edu.stanford.nlp.pipeline.ParserAnnotatorUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.pipeline;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.List;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.trees.*;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.util.CoreMap;
/** @author David McClosky */
public class ParserAnnotatorUtils {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(ParserAnnotatorUtils.class);
private ParserAnnotatorUtils() {} // static methods
/** Put the tree in the CoreMap for the sentence, also add any
* dependency graphs to the sentence, and fill in missing tag annotations.
*
* Thread safety note: nothing special is done to ensure the thread
* safety of the GrammaticalStructureFactory. However, both the
* EnglishGrammaticalStructureFactory and the
* ChineseGrammaticalStructureFactory are thread safe.
*/
public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs,
GrammaticalStructureFactory gsf, CoreMap sentence,
List trees, GrammaticalStructure.Extras extras) {
boolean first = true;
for (Tree tree : trees) {
// make sure all tree nodes are CoreLabels
// TODO: why isn't this always true? something fishy is going on
Trees.convertToCoreLabels(tree);
// index nodes, i.e., add start and end token positions to all nodes
// this is needed by other annotators down stream, e.g., the NFLAnnotator
tree.indexSpans(0);
if (first) {
sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
if (verbose) {
log.info("Tree is:");
tree.pennPrint(System.err);
}
setMissingTags(sentence, tree);
if (buildGraphs) {
// generate the dependency graph
// unfortunately, it is necessary to make the
// GrammaticalStructure three times, as the dependency
// conversion changes the given data structure
SemanticGraph deps = SemanticGraphFactory.generateCollapsedDependencies(gsf.newGrammaticalStructure(tree), extras);
SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(gsf.newGrammaticalStructure(tree), extras);
SemanticGraph ccDeps = SemanticGraphFactory.generateCCProcessedDependencies(gsf.newGrammaticalStructure(tree), extras);
SemanticGraph enhancedDeps = SemanticGraphFactory.generateEnhancedDependencies(gsf.newGrammaticalStructure(tree));
SemanticGraph enhancedPlusPlusDeps = SemanticGraphFactory.generateEnhancedPlusPlusDependencies(gsf.newGrammaticalStructure(tree));
if (verbose) {
log.info("SDs:");
log.info(deps.toString(SemanticGraph.OutputFormat.LIST));
}
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, enhancedDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, enhancedPlusPlusDeps);
}
first = false;
}
}
if (trees.size() > 1) {
sentence.set(TreeCoreAnnotations.KBestTreesAnnotation.class, trees);
}
}
/**
* Set the tags of the original tokens and the leaves if they
* aren't already set.
*/
public static void setMissingTags(CoreMap sentence, Tree tree) {
List taggedWords = null;
List