Perspective Annotator

The PerspectiveAnnotator is an annotator that uses the Perspective service to annotate documents.

import os
from gatenlp import Document
from gatenlp.processing.client.perspective import PerspectiveAnnotator
from gatenlp.lib_spacy import AnnSpacy
apikey = os.environ["PERSPECTIVE_KEY"]   
docs = [
    Document("Barack Obama visited Microsoft in New York last May."),
    Document("""This is just some example text. 
      Has a sentence that talks about shit in general. 
      And another talking about 💩💩💩💩 in general. This guy is a moron."""),
    Document("What a stupid bitch she is."),
    Document("I am going to kill you, asshole!"),
]

# Just use the sentence annotations for annotating texts on a per-sentence level later
anntr = AnnSpacy(add_tokens=False, add_entities=False, add_sentences=True, add_nounchunks=False, add_deps=False)
for doc in docs:
    doc = anntr(doc)
# Annotator for annotating documents as a whole
perspective_doc = PerspectiveAnnotator(auth_token=apikey, 
                                       requested_attributes=["TOXICITY", "INSULT", "PROFANITY"])
# Annotator for annotating sentences
perspective_sent = PerspectiveAnnotator(auth_token=apikey, 
                                        ann_type="Sentence",  
                                        requested_attributes=["TOXICITY", "INSULT", "PROFANITY"])

# run both annotators over all documents, show the document features assigned for each
for idx, doc in enumerate(docs):
    doc.features.clear()    
    perspective_doc(doc)
    print("Document", idx, ":", doc.features)
    perspective_sent(doc)
2022-11-09 22:02:56,809|INFO|googleapiclient.discovery_cache|file_cache is only supported with oauth2client<4.0.0
2022-11-09 22:02:56,943|INFO|googleapiclient.discovery_cache|file_cache is only supported with oauth2client<4.0.0


Document 0 : Features({'PROFANITY_PROBABILITY': 0.014634945, 'INSULT_PROBABILITY': 0.009051885, 'TOXICITY_PROBABILITY': 0.017843807, 'languages': ['en']})
Document 1 : Features({'PROFANITY_PROBABILITY': 0.7246992, 'TOXICITY_PROBABILITY': 0.8540474, 'INSULT_PROBABILITY': 0.802824, 'languages': ['en']})
Document 2 : Features({'TOXICITY_PROBABILITY': 0.96426713, 'INSULT_PROBABILITY': 0.9343686, 'PROFANITY_PROBABILITY': 0.89909166, 'languages': ['en']})
Document 3 : Features({'PROFANITY_PROBABILITY': 0.87472564, 'TOXICITY_PROBABILITY': 0.9817629, 'INSULT_PROBABILITY': 0.8390655, 'languages': ['en']})
docs[0]
docs[1]
docs[2]
docs[3]

Notebook last updated

import gatenlp
print("NB last updated with gatenlp version", gatenlp.__version__)
NB last updated with gatenlp version 1.0.8a1