edu.stanford.nlp.pipeline.TextOutputter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.pipeline;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import edu.stanford.nlp.coref.CorefCoreAnnotations;
import edu.stanford.nlp.coref.data.CorefChain;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
import edu.stanford.nlp.ie.util.RelationTriple;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.naturalli.NaturalLogicAnnotations;
import edu.stanford.nlp.naturalli.OpenIE;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
/**
* @author John Bauer
*/
public class TextOutputter extends AnnotationOutputter {
public TextOutputter() {}
/** {@inheritDoc} */
@Override
public void print(Annotation annotation, OutputStream stream, Options options) throws IOException {
PrintWriter os = new PrintWriter(IOUtils.encodedOutputStreamWriter(stream, options.encoding));
print(annotation, os, options);
}
/**
* The meat of the outputter
*/
private static void print(Annotation annotation, PrintWriter pw, Options options) throws IOException {
double beam = options.beamPrintingOption;
List sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
// Display docid if available
String docId = annotation.get(CoreAnnotations.DocIDAnnotation.class);
if (docId != null) {
List tokens = annotation.get(CoreAnnotations.TokensAnnotation.class);
int nSentences = (sentences != null)? sentences.size():0;
int nTokens = (tokens != null)? tokens.size():0;
pw.printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens);
}
// Display doctitle if available
String docTitle = annotation.get(CoreAnnotations.DocTitleAnnotation.class);
if (docTitle != null) {
pw.printf("Document Title: %s%n", docTitle);
}
// Display docdate if available
String docDate = annotation.get(CoreAnnotations.DocDateAnnotation.class);
if (docDate != null) {
pw.printf("Document Date: %s%n", docDate);
}
// Display doctype if available
String docType = annotation.get(CoreAnnotations.DocTypeAnnotation.class);
if (docType != null) {
pw.printf("Document Type: %s%n", docType);
}
// Display docsourcetype if available
String docSourceType = annotation.get(CoreAnnotations.DocSourceTypeAnnotation.class);
if (docSourceType != null) {
pw.printf("Document Source Type: %s%n", docSourceType);
}
// display each sentence in this annotation
if (sentences != null) {
for (int i = 0, sz = sentences.size(); i < sz; i ++) {
CoreMap sentence = sentences.get(i);
List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
String sentiment = sentence.get(SentimentCoreAnnotations.SentimentClass.class);
if (sentiment == null) {
sentiment = "";
} else {
sentiment = ", sentiment: " + sentiment;
}
pw.printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.size(), sentiment);
String text = sentence.get(CoreAnnotations.TextAnnotation.class);
pw.println(text);
// display the token-level annotations
String[] tokenAnnotations = {
"Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag",
"CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag",
"Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" };
for (CoreLabel token: tokens) {
pw.print(token.toShorterString(tokenAnnotations));
pw.println();
}
// display the parse tree for this sentence
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
options.constituentTreePrinter.printTree(tree, pw);
}
// It is possible to turn off the semantic graphs, in which
// case we don't want to recreate them using the dependency
// printer. This might be relevant if using CoreNLP for a
// language which doesn't have dependencies, for example.
if (sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class) != null) {
pw.print(sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class).toList());
pw.println();
}
// display MachineReading entities and relations
List entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class);
if (entities != null) {
pw.println("Extracted the following MachineReading entity mentions:");
for (EntityMention e : entities) {
pw.print('\t');
pw.println(e);
}
}
List relations = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class);
if (relations != null){
pw.println("Extracted the following MachineReading relation mentions:");
for (RelationMention r: relations) {
if (r.printableObject(beam)) {
pw.println(r);
}
}
}
// display OpenIE triples
Collection openieTriples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
if (openieTriples != null && openieTriples.size() > 0) {
pw.println("Extracted the following Open IE triples:");
for (RelationTriple triple : openieTriples) {
pw.println(OpenIE.tripleToString(triple, docId, sentence));
}
}
// display KBP triples
Collection kbpTriples = sentence.get(CoreAnnotations.KBPTriplesAnnotation.class);
if (kbpTriples != null && kbpTriples.size() > 0) {
pw.println("Extracted the following KBP triples:");
for (RelationTriple triple : kbpTriples) {
pw.println(triple.toString());
}
}
}
}
// display the old-style doc-level coref annotations
// this is not supported anymore!
//String corefAnno = annotation.get(CorefPLAnnotation.class);
//if(corefAnno != null) os.println(corefAnno);
// display the new-style coreference graph
Map corefChains =
annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
if (corefChains != null && sentences != null) {
for (CorefChain chain : corefChains.values()) {
CorefChain.CorefMention representative =
chain.getRepresentativeMention();
boolean outputHeading = false;
for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
if (mention == representative)
continue;
if (!outputHeading) {
outputHeading = true;
pw.println("Coreference set:");
}
// all offsets start at 1!
pw.printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n",
mention.sentNum,
mention.headIndex,
mention.startIndex,
mention.endIndex,
representative.sentNum,
representative.headIndex,
representative.startIndex,
representative.endIndex,
mention.mentionSpan,
representative.mentionSpan);
}
}
}
pw.flush();
}
/** Static helper */
public static void prettyPrint(Annotation annotation, OutputStream stream, StanfordCoreNLP pipeline) {
prettyPrint(annotation, new PrintWriter(stream), pipeline);
}
/** Static helper */
public static void prettyPrint(Annotation annotation, PrintWriter pw, StanfordCoreNLP pipeline) {
try {
TextOutputter.print(annotation, pw, getOptions(pipeline));
// already flushed
// don't close, might not want to close underlying stream
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
}