Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package edu.stanford.nlp.trees;
import edu.stanford.nlp.ling.*;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.trees.international.pennchinese.ChineseEnglishWordMap;
import edu.stanford.nlp.util.*;
import edu.stanford.nlp.util.XMLUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.*;
import java.util.*;
import java.util.function.Function;
import java.util.function.Predicate;
/**
* A class for customizing the print method(s) for a
* {@code edu.stanford.nlp.trees.Tree} as the output of the
* parser. This class supports printing in multiple ways and altering
* behavior via properties specified at construction.
*
* @author Roger Levy
* @author Christopher Manning
* @author Galen Andrew
*/
public class TreePrint {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(TreePrint.class);
// TODO: Add support for makeCopulaHead as an outputFormatOption here.
public static final String rootLabelOnlyFormat = "rootSymbolOnly";
public static final String headMark = "=H";
/** The legal output tree formats. */
public static final String[] outputTreeFormats = {
"penn",
"oneline",
rootLabelOnlyFormat,
"words",
"wordsAndTags",
"dependencies",
"typedDependencies",
"typedDependenciesCollapsed",
"latexTree",
"xmlTree",
"collocations",
"semanticGraph",
"conllStyleDependencies",
"conll2007"
};
private final Properties formats;
private final Properties options;
private final boolean markHeadNodes; // = false;
private final boolean lexicalize; // = false;
private final boolean removeEmpty;
private final boolean ptb2text;
private final boolean transChinese; // = false;
private final boolean basicDependencies;
private final boolean collapsedDependencies;
private final boolean nonCollapsedDependencies;
private final boolean nonCollapsedDependenciesSeparated;
private final boolean CCPropagatedDependencies;
private final boolean treeDependencies;
private final boolean includeTags;
private final HeadFinder hf;
private final TreebankLanguagePack tlp;
private final WordStemmer stemmer;
private final Predicate> dependencyFilter;
private final Predicate> dependencyWordFilter;
private final GrammaticalStructureFactory gsf;
/** Pool use of one WordNetConnection. I don't really know if
* Dan Bikel's WordNet code is thread safe, but it definitely doesn't
* close its files, and too much of our code makes TreePrint objects and
* then drops them on the floor, and so we run out of file handles.
* That is, if this variable isn't static, code crashes.
* Maybe we should change this code to use jwnl(x)?
* CDM July 2006.
*/
private static WordNetConnection wnc;
/** This PrintWriter is used iff the user doesn't pass one in to a
* call to printTree(). It prints to System.out.
*/
private final PrintWriter pw = new PrintWriter(System.out, true);
/** Construct a new TreePrint that will print the given formats.
* Warning! This is the anglocentric constructor.
* It will work correctly only for English.
*
* @param formats The formats to print the tree in.
*/
public TreePrint(String formats) {
this(formats, "", new PennTreebankLanguagePack());
}
/** Make a TreePrint instance with no options specified. */
public TreePrint(String formats, TreebankLanguagePack tlp) {
this(formats, "", tlp);
}
/** Make a TreePrint instance. This one uses the default tlp headFinder. */
public TreePrint(String formats, String options, TreebankLanguagePack tlp) {
this(formats, options, tlp, tlp.headFinder(), tlp.typedDependencyHeadFinder());
}
/**
* Make a TreePrint instance.
*
* @param formatString A comma separated list of ways to print each Tree.
* For instance, "penn" or "words,typedDependencies".
* Known formats are: oneline, penn, latexTree, xmlTree, words,
* wordsAndTags, rootSymbolOnly, dependencies,
* typedDependencies, typedDependenciesCollapsed,
* collocations, semanticGraph, conllStyleDependencies,
* conll2007. The last two are both tab-separated values
* formats. The latter has a lot more columns filled with
* underscores. All of them print a blank line after
* the output except for oneline. oneline is also not
* meaningful in XML output (it is ignored: use penn instead).
* (Use of typedDependenciesCollapsed is deprecated. It
* works but we recommend instead selecting a type of
* dependencies using the optionsString argument. Note in
* particular that typedDependenciesCollapsed does not do
* CC propagation, which we generally recommend.)
* @param optionsString Options that additionally specify how trees are to
* be printed (for instance, whether stemming should be done).
* Known options are: {@code stem, lexicalize, markHeadNodes,
* xml, removeTopBracket, transChinese,
* includePunctuationDependencies, basicDependencies, treeDependencies,
* CCPropagatedDependencies, collapsedDependencies, nonCollapsedDependencies,
* nonCollapsedDependenciesSeparated, includeTags}.
* @param tlp The TreebankLanguagePack used to do things like delete
* or ignore punctuation in output
* @param hf The HeadFinder used in printing output
*/
public TreePrint(String formatString, String optionsString, TreebankLanguagePack tlp, HeadFinder hf, HeadFinder typedDependencyHF) {
formats = StringUtils.stringToProperties(formatString);
options = StringUtils.stringToProperties(optionsString);
List okOutputs = Arrays.asList(outputTreeFormats);
for (Object formObj : formats.keySet()) {
String format = (String) formObj;
if ( ! okOutputs.contains(format)) {
throw new RuntimeException("Error: output tree format " + format + " not supported. Known formats are: " + okOutputs);
}
}
this.hf = hf;
this.tlp = tlp;
boolean includePunctuationDependencies;
includePunctuationDependencies = propertyToBoolean(this.options,
"includePunctuationDependencies");
boolean generateOriginalDependencies = tlp.generateOriginalDependencies();
Predicate puncFilter;
if (includePunctuationDependencies) {
dependencyFilter = Filters.acceptFilter();
dependencyWordFilter = Filters.acceptFilter();
puncFilter = Filters.acceptFilter();
} else {
dependencyFilter = new Dependencies.DependentPuncTagRejectFilter<>(tlp.punctuationTagRejectFilter());
dependencyWordFilter = new Dependencies.DependentPuncWordRejectFilter<>(tlp.punctuationWordRejectFilter());
//Universal dependencies filter punction by tags
puncFilter = generateOriginalDependencies ? tlp.punctuationWordRejectFilter() : tlp.punctuationTagRejectFilter();
}
if (propertyToBoolean(this.options, "stem")) {
stemmer = new WordStemmer();
} else {
stemmer = null;
}
if (formats.containsKey("typedDependenciesCollapsed") ||
formats.containsKey("typedDependencies") ||
(formats.containsKey("conll2007") && tlp.supportsGrammaticalStructures())) {
gsf = tlp.grammaticalStructureFactory(puncFilter, typedDependencyHF);
} else {
gsf = null;
}
lexicalize = propertyToBoolean(this.options, "lexicalize");
markHeadNodes = propertyToBoolean(this.options, "markHeadNodes");
transChinese = propertyToBoolean(this.options, "transChinese");
ptb2text = propertyToBoolean(this.options, "ptb2text");
removeEmpty = propertyToBoolean(this.options, "noempty") || ptb2text;
basicDependencies = propertyToBoolean(this.options, "basicDependencies");
collapsedDependencies = propertyToBoolean(this.options, "collapsedDependencies");
nonCollapsedDependencies = propertyToBoolean(this.options, "nonCollapsedDependencies");
nonCollapsedDependenciesSeparated = propertyToBoolean(this.options, "nonCollapsedDependenciesSeparated");
treeDependencies = propertyToBoolean(this.options, "treeDependencies");
includeTags = propertyToBoolean(this.options, "includeTags");
// if no option format for the dependencies is specified, CCPropagated is the default
if ( ! basicDependencies && ! collapsedDependencies && ! nonCollapsedDependencies && ! nonCollapsedDependenciesSeparated && ! treeDependencies) {
CCPropagatedDependencies = true;
} else {
CCPropagatedDependencies = propertyToBoolean(this.options, "CCPropagatedDependencies");
}
}
private static boolean propertyToBoolean(Properties prop, String key) {
return Boolean.parseBoolean(prop.getProperty(key));
}
/**
* Prints the tree to the default PrintWriter.
* @param t The tree to display
*/
public void printTree(Tree t) {
printTree(t, pw);
}
/**
* Prints the tree, with an empty ID.
* @param t The tree to display
* @param pw The PrintWriter to print it to
*/
public void printTree(final Tree t, PrintWriter pw) {
printTree(t, "", pw);
}
/**
* Prints the tree according to the options specified for this instance.
* If the tree {@code t} is {@code null}, then the code prints
* a line indicating a skipped tree. Under the XML option this is
* an {@code s} element with the {@code skipped} attribute having
* value {@code true}, and, otherwise, it is the token
* {@code SENTENCE_SKIPPED_OR_UNPARSABLE}.
*
* @param t The tree to display
* @param id A name for this sentence
* @param pw Where to display the tree
*/
public void printTree(final Tree t, final String id, final PrintWriter pw) {
final boolean inXml = propertyToBoolean(options, "xml");
if (t == null) {
// Parsing didn't succeed.
if (inXml) {
pw.print("");
pw.println();
} else {
pw.println("SENTENCE_SKIPPED_OR_UNPARSABLE");
}
} else {
if (inXml) {
pw.print("");
}
printTreeInternal(t, pw, inXml);
if (inXml) {
pw.println("");
pw.println();
}
}
}
/**
* Prints the trees according to the options specified for this instance.
* If the tree {@code t} is {@code null}, then the code prints
* a line indicating a skipped tree. Under the XML option this is
* an {@code s} element with the {@code skipped} attribute having
* value {@code true}, and, otherwise, it is the token
* {@code SENTENCE_SKIPPED_OR_UNPARSABLE}.
*
* @param trees The list of trees to display
* @param id A name for this sentence
* @param pw Where to dislay the tree
*/
public void printTrees(final List> trees, final String id, final PrintWriter pw) {
final boolean inXml = propertyToBoolean(options, "xml");
int ii = 0; // incremented before used, so first tree is numbered 1
for (ScoredObject tp : trees) {
ii++;
Tree t = tp.object();
double score = tp.score();
if (t == null) {
// Parsing didn't succeed.
if (inXml) {
pw.print("");
pw.println();
} else {
pw.println("SENTENCE_SKIPPED_OR_UNPARSABLE Parse #" + ii + " with score " + score);
}
} else {
if (inXml) {
pw.print("");
} else {
pw.print("# Parse ");
pw.print(ii);
pw.print(" with score ");
pw.println(score);
}
printTreeInternal(t, pw, inXml);
if (inXml) {
pw.println("");
pw.println();
}
}
}
}
/** Print the internal part of a tree having already identified it.
* The ID and outer XML element is printed wrapping this method, but none
* of the internal content.
*
* @param t The tree to print. Now known to be non-null
* @param pw Where to print it to
* @param inXml Whether to use XML style printing
*/
private void printTreeInternal(final Tree t, final PrintWriter pw, final boolean inXml) {
Tree outputTree = t;
if (formats.containsKey("conll2007") || removeEmpty) {
outputTree = outputTree.prune(new BobChrisTreeNormalizer.EmptyFilter());
}
if (formats.containsKey("words")) {
if (inXml) {
ArrayList