edu.stanford.nlp.pipeline.SentimentAnnotator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7

Show newest version

package edu.stanford.nlp.pipeline;

import java.util.*;

import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.sentiment.CollapseUnaryTransformer;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.sentiment.SentimentCostAndGradient;
import edu.stanford.nlp.sentiment.SentimentModel;
import edu.stanford.nlp.sentiment.SentimentUtils;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;

/**
 * This annotator attaches a binarized tree with sentiment annotations
 * to each sentence.  It requires there to already be binarized trees
 * attached to the sentence, which is best done in the
 * ParserAnnotator.
 * 

 * The tree will be attached to each sentence in the
 * SentencesAnnotation via the SentimentCoreAnnotations.SentimentAnnotatedTree
 * annotation.  The class name for the top level class is also set
 * using the SentimentCoreAnnotations.SentimentClass annotation.
 * 

 * The reason the decision was made to do the binarization in the
 * ParserAnnotator is because it may require specific options set in
 * the parser.  An alternative would be to do the binarization here,
 * which would require at a minimum the HeadFinder used in the parser.
 *
 * @author John Bauer
 */
public class SentimentAnnotator implements Annotator {

  private static final String DEFAULT_MODEL = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";

  private final String modelPath;
  private final SentimentModel model;
  private final CollapseUnaryTransformer transformer = new CollapseUnaryTransformer();

  public SentimentAnnotator(String name, Properties props) {
    this.modelPath = props.getProperty(name + ".model", DEFAULT_MODEL);
    if (modelPath == null) {
      throw new IllegalArgumentException("No model specified for Sentiment annotator");
    }
    this.model = SentimentModel.loadSerialized(modelPath);
  }

  @Override
  public Set> requirementsSatisfied() {
    return Collections.emptySet();
  }

  @Override
  public Set> requires() {
    return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
        CoreAnnotations.PartOfSpeechAnnotation.class,
        TreeCoreAnnotations.TreeAnnotation.class,
        TreeCoreAnnotations.BinarizedTreeAnnotation.class,
        CoreAnnotations.CategoryAnnotation.class
    )));
  }

  @Override
  public void annotate(Annotation annotation) {
    if (annotation.containsKey(CoreAnnotations.SentencesAnnotation.class)) {
      // TODO: parallelize
      List sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
      for (CoreMap sentence : sentences) {
        Tree binarized = sentence.get(TreeCoreAnnotations.BinarizedTreeAnnotation.class);
        if (binarized == null) {
          throw new AssertionError("Binarized sentences not built by parser");
        }
        Tree collapsedUnary = transformer.transformTree(binarized);
        SentimentCostAndGradient scorer = new SentimentCostAndGradient(model, null);
        scorer.forwardPropagateTree(collapsedUnary);
        sentence.set(SentimentCoreAnnotations.SentimentAnnotatedTree.class, collapsedUnary);
        int sentiment = RNNCoreAnnotations.getPredictedClass(collapsedUnary);
        sentence.set(SentimentCoreAnnotations.SentimentClass.class, SentimentUtils.sentimentString(model, sentiment));
        Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        if (tree != null) {
          collapsedUnary.setSpans();
          // map the sentiment annotations onto the tree
          Map spanSentiment = Generics.newHashMap();
          for (Tree bt : collapsedUnary) {
            IntPair p = bt.getSpan();
            int sen = RNNCoreAnnotations.getPredictedClass(bt);
            String sentStr = SentimentUtils.sentimentString(model, sen);
            if ( ! spanSentiment.containsKey(p)) {
              // we'll take the first = highest one discovered
              spanSentiment.put(p, sentStr);
            }
          }
          if (((CoreLabel) tree.label()).containsKey(CoreAnnotations.SpanAnnotation.class)) {
            throw new IllegalStateException("This code assumes you don't have SpanAnnotation");
          }
          tree.setSpans();
          for (Tree t : tree) {
            IntPair p = t.getSpan();
            String str = spanSentiment.get(p);
            if (str != null) {
              CoreLabel cl = (CoreLabel) t.label();
              cl.set(SentimentCoreAnnotations.SentimentClass.class, str);
              cl.remove(CoreAnnotations.SpanAnnotation.class);
            }
          }
        }
      }
    } else {
      throw new RuntimeException("unable to find sentences in: " + annotation);
    }
  }

}