All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.pipeline.ParserAnnotatorUtils Maven / Gradle / Ivy

package edu.stanford.nlp.pipeline;

import java.util.List;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.util.CoreMap;


/** @author David McClosky */
public class ParserAnnotatorUtils {

  private ParserAnnotatorUtils() {} // static methods

  /** Put the tree in the CoreMap for the sentence, also add any
   *  dependency graphs to the sentence, and fill in missing tag annotations.
   *
   *  Thread safety note: nothing special is done to ensure the thread
   *  safety of the GrammaticalStructureFactory.  However, both the
   *  EnglishGrammaticalStructureFactory and the
   *  ChineseGrammaticalStructureFactory are thread safe.
   */
  public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs,
                                            GrammaticalStructureFactory gsf, CoreMap sentence,
                                            List trees, GrammaticalStructure.Extras extras) {

    boolean first = true;
    for (Tree tree : trees) {

      // make sure all tree nodes are CoreLabels
      // TODO: why isn't this always true? something fishy is going on
      Trees.convertToCoreLabels(tree);

      // index nodes, i.e., add start and end token positions to all nodes
      // this is needed by other annotators down stream, e.g., the NFLAnnotator
      tree.indexSpans(0);

      if (first) {
        sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
        if (verbose) {
          System.err.println("Tree is:");
          tree.pennPrint(System.err);
        }

        setMissingTags(sentence, tree);

        if (buildGraphs) {
          // generate the dependency graph
          // unfortunately, it is necessary to make the
          // GrammaticalStructure three times, as the dependency
          // conversion changes the given data structure
          SemanticGraph deps = SemanticGraphFactory.generateCollapsedDependencies(gsf.newGrammaticalStructure(tree), extras);
          SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(gsf.newGrammaticalStructure(tree), extras);
          SemanticGraph ccDeps = SemanticGraphFactory.generateCCProcessedDependencies(gsf.newGrammaticalStructure(tree), extras);

          if (verbose) {
            System.err.println("SDs:");
            System.err.println(deps.toString(SemanticGraph.OutputFormat.LIST));
          }
          sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
          sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
          sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
        }

        first = false;
      }
    }
    if (trees.size() > 1) {
      sentence.set(TreeCoreAnnotations.KBestTreesAnnotation.class, trees);
    }
  }

  /**
   * Set the tags of the original tokens and the leaves if they
   * aren't already set.
   */
  public static void setMissingTags(CoreMap sentence, Tree tree) {
    List taggedWords = null;
    List




© 2015 - 2024 Weber Informatics LLC | Privacy Policy