edu.stanford.nlp.pipeline.ParserAnnotatorUtils Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7

Show newest version

package edu.stanford.nlp.pipeline; 
import edu.stanford.nlp.util.logging.Redwood;

import java.util.List;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.util.CoreMap;


/** @author David McClosky */
public class ParserAnnotatorUtils  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(ParserAnnotatorUtils.class);

  private ParserAnnotatorUtils() {} // static methods

  /** Put the tree in the CoreMap for the sentence, also add any
   *  dependency graphs to the sentence, and fill in missing tag annotations.
   *
   *  Thread safety note: nothing special is done to ensure the thread
   *  safety of the GrammaticalStructureFactory.  However, both the
   *  EnglishGrammaticalStructureFactory and the
   *  ChineseGrammaticalStructureFactory are thread safe.
   */
  public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs,
                                            GrammaticalStructureFactory gsf, CoreMap sentence,
                                            List trees, GrammaticalStructure.Extras extras) {

    boolean first = true;
    for (Tree tree : trees) {

      // make sure all tree nodes are CoreLabels
      // TODO: why isn't this always true? something fishy is going on
      Trees.convertToCoreLabels(tree);

      // index nodes, i.e., add start and end token positions to all nodes
      // this is needed by other annotators down stream, e.g., the NFLAnnotator
      tree.indexSpans(0);

      if (first) {
        sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
        if (verbose) {
          log.info("Tree is:");
          tree.pennPrint(System.err);
        }

        setMissingTags(sentence, tree);

        if (buildGraphs) {
          // generate the dependency graph
          // unfortunately, it is necessary to make the
          // GrammaticalStructure three times, as the dependency
          // conversion changes the given data structure
          SemanticGraph deps = SemanticGraphFactory.generateCollapsedDependencies(gsf.newGrammaticalStructure(tree), extras);
          SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(gsf.newGrammaticalStructure(tree), extras);
          SemanticGraph ccDeps = SemanticGraphFactory.generateCCProcessedDependencies(gsf.newGrammaticalStructure(tree), extras);
          SemanticGraph enhancedDeps = SemanticGraphFactory.generateEnhancedDependencies(gsf.newGrammaticalStructure(tree));
          SemanticGraph enhancedPlusPlusDeps = SemanticGraphFactory.generateEnhancedPlusPlusDependencies(gsf.newGrammaticalStructure(tree));


          if (verbose) {
            log.info("SDs:");
            log.info(deps.toString(SemanticGraph.OutputFormat.LIST));
          }
          sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
          sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
          sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
          sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, enhancedDeps);
          sentence.set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, enhancedPlusPlusDeps);
        }

        first = false;
      }
    }
    if (trees.size() > 1) {
      sentence.set(TreeCoreAnnotations.KBestTreesAnnotation.class, trees);
    }
  }

  /**
   * Set the tags of the original tokens and the leaves if they
   * aren't already set.
   */
  public static void setMissingTags(CoreMap sentence, Tree tree) {
    List taggedWords = null;
    List leaves = null;
    List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
    for (int i = 0, size = tokens.size(); i < size; ++i) {
      CoreLabel token = tokens.get(i);
      if (token.tag() == null) {
        if (taggedWords == null) {
          taggedWords = tree.taggedYield();
        }
        if (leaves == null) {
          leaves = tree.yield();
        }
        token.setTag(taggedWords.get(i).tag());
        Label leaf = leaves.get(i);
        if (leaf instanceof HasTag) {
          ((HasTag) leaf).setTag(taggedWords.get(i).tag());
        }
      }
    }
  }

}