edu.stanford.nlp.parser.lexparser.EvaluateTreebank Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-parser Show documentation
Show all versions of stanford-parser Show documentation
Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.
package edu.stanford.nlp.parser.lexparser;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import edu.stanford.nlp.io.NullOutputStream;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.parser.common.NoSuchParseException;
import edu.stanford.nlp.parser.common.ParserGrammar;
import edu.stanford.nlp.parser.common.ParserQuery;
import edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor;
import edu.stanford.nlp.parser.metrics.AbstractEval;
import edu.stanford.nlp.parser.metrics.BestOfTopKEval;
import edu.stanford.nlp.parser.metrics.Eval;
import edu.stanford.nlp.parser.metrics.Evalb;
import edu.stanford.nlp.parser.metrics.EvalbByCat;
import edu.stanford.nlp.parser.metrics.FilteredEval;
import edu.stanford.nlp.parser.metrics.LeafAncestorEval;
import edu.stanford.nlp.parser.metrics.ParserQueryEval;
import edu.stanford.nlp.parser.metrics.TaggingEval;
import edu.stanford.nlp.parser.metrics.TopMatchEval;
import edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TreePrint;
import edu.stanford.nlp.trees.TreeTransformer;
import java.util.function.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.ScoredObject;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.concurrent.MulticoreWrapper;
public class EvaluateTreebank {
private final Options op;
private final TreeTransformer debinarizer;
private final TreeTransformer subcategoryStripper;
private final TreeTransformer collinizer;
private final TreeTransformer boundaryRemover;
private final ParserGrammar pqFactory;
// private final Lexicon lex;
List evals = null;
List parserQueryEvals = null;
private final boolean summary;
private final boolean tsv;
// no annotation
private final TreeAnnotatorAndBinarizer binarizerOnly;
AbstractEval pcfgLB = null;
AbstractEval pcfgChildSpecific = null;
LeafAncestorEval pcfgLA = null;
AbstractEval pcfgCB = null;
AbstractEval pcfgDA = null;
AbstractEval pcfgTA = null;
AbstractEval depDA = null;
AbstractEval depTA = null;
AbstractEval factLB = null;
AbstractEval factChildSpecific = null;
LeafAncestorEval factLA = null;
AbstractEval factCB = null;
AbstractEval factDA = null;
AbstractEval factTA = null;
AbstractEval pcfgRUO = null;
AbstractEval pcfgCUO = null;
AbstractEval pcfgCatE = null;
AbstractEval.ScoreEval pcfgLL = null;
AbstractEval.ScoreEval depLL = null;
AbstractEval.ScoreEval factLL = null;
AbstractEval kGoodLB = null;
private final List topKEvals = new ArrayList();
private int kbestPCFG = 0;
private int numSkippedEvals = 0;
private boolean saidMemMessage = false;
/**
* The tagger optionally used before parsing.
*
* We keep it here as a function rather than a MaxentTagger so that
* we can distribute a version of the parser that doesn't include
* the entire tagger.
*/
protected final Function, List> tagger;
public EvaluateTreebank(LexicalizedParser parser) {
this(parser.getOp(), parser.lex, parser);
}
public EvaluateTreebank(Options op, Lexicon lex, ParserGrammar pqFactory) {
this(op, lex, pqFactory, pqFactory.loadTagger());
}
public EvaluateTreebank(Options op, Lexicon lex, ParserGrammar pqFactory, Function,List> tagger) {
this.op = op;
this.debinarizer = new Debinarizer(op.forceCNF);
this.subcategoryStripper = op.tlpParams.subcategoryStripper();
this.evals = Generics.newArrayList();
evals.addAll(pqFactory.getExtraEvals());
this.parserQueryEvals = pqFactory.getParserQueryEvals();
// this.lex = lex;
this.pqFactory = pqFactory;
this.tagger = tagger;
collinizer = op.tlpParams.collinizer();
boundaryRemover = new BoundaryRemover();
boolean runningAverages = Boolean.parseBoolean(op.testOptions.evals.getProperty("runningAverages"));
summary = Boolean.parseBoolean(op.testOptions.evals.getProperty("summary"));
tsv = Boolean.parseBoolean(op.testOptions.evals.getProperty("tsv"));
if (!op.trainOptions.leftToRight) {
binarizerOnly = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, false, false, op);
} else {
binarizerOnly = new TreeAnnotatorAndBinarizer(op.tlpParams.headFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, false, false, op);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgLB"))) {
pcfgLB = new Evalb("pcfg LP/LR", runningAverages);
}
// TODO: might be nice to allow more than one child-specific scorer
if (op.testOptions.evals.getProperty("pcfgChildSpecific") != null) {
String filter = op.testOptions.evals.getProperty("pcfgChildSpecific");
pcfgChildSpecific = FilteredEval.childFilteredEval("pcfg children matching " + filter + " LP/LR", runningAverages, op.langpack(), filter);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgLA"))) {
pcfgLA = new LeafAncestorEval("pcfg LeafAncestor");
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgCB"))) {
pcfgCB = new Evalb.CBEval("pcfg CB", runningAverages);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgDA"))) {
pcfgDA = new UnlabeledAttachmentEval("pcfg DA", runningAverages, op.langpack().headFinder());
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgTA"))) {
pcfgTA = new TaggingEval("pcfg Tag", runningAverages, lex);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("depDA"))) {
depDA = new UnlabeledAttachmentEval("dep DA", runningAverages, null, op.langpack().punctuationWordRejectFilter());
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("depTA"))) {
depTA = new TaggingEval("dep Tag", runningAverages, lex);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factLB"))) {
factLB = new Evalb("factor LP/LR", runningAverages);
}
if (op.testOptions.evals.getProperty("factChildSpecific") != null) {
String filter = op.testOptions.evals.getProperty("factChildSpecific");
factChildSpecific = FilteredEval.childFilteredEval("fact children matching " + filter + " LP/LR", runningAverages, op.langpack(), filter);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factLA"))) {
factLA = new LeafAncestorEval("factor LeafAncestor");
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factCB"))) {
factCB = new Evalb.CBEval("fact CB", runningAverages);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factDA"))) {
factDA = new UnlabeledAttachmentEval("factor DA", runningAverages, null);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factTA"))) {
factTA = new TaggingEval("factor Tag", runningAverages, lex);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgRUO"))) {
pcfgRUO = new AbstractEval.RuleErrorEval("pcfg Rule under/over");
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgCUO"))) {
pcfgCUO = new AbstractEval.CatErrorEval("pcfg Category under/over");
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgCatE"))) {
pcfgCatE = new EvalbByCat("pcfg Category Eval", runningAverages);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgLL"))) {
pcfgLL = new AbstractEval.ScoreEval("pcfgLL", runningAverages);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("depLL"))) {
depLL = new AbstractEval.ScoreEval("depLL", runningAverages);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factLL"))) {
factLL = new AbstractEval.ScoreEval("factLL", runningAverages);
}
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("topMatch"))) {
evals.add(new TopMatchEval("topMatch", runningAverages));
}
// this one is for the various k Good/Best options. Just for individual results
kGoodLB = new Evalb("kGood LP/LR", false);
if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgTopK"))) {
topKEvals.add(new BestOfTopKEval(new Evalb("pcfg top k comparisons", false), new Evalb("pcfg top k LP/LR", runningAverages)));
}
if (topKEvals.size() > 0) {
kbestPCFG = op.testOptions.evalPCFGkBest;
}
if (op.testOptions.printPCFGkBest > 0) {
kbestPCFG = Math.max(kbestPCFG, op.testOptions.printPCFGkBest);
}
}
public double getLBScore() {
if (factLB != null) {
return factLB.getEvalbF1Percent();
}
if (pcfgLB != null) {
return pcfgLB.getEvalbF1Percent();
}
return 0.0;
}
public double getTagScore() {
if (factTA != null) {
return factTA.getEvalbF1Percent();
}
if (pcfgTA != null) {
return pcfgTA.getEvalbF1Percent();
}
return 0.0;
}
/**
* Remove tree scores, so they don't print.
*
* TODO: The printing architecture should be fixed up in the trees package
* sometime.
*/
private static void nanScores(Tree tree) {
tree.setScore(Double.NaN);
Tree[] kids = tree.children();
for (Tree kid : kids) {
nanScores(kid);
}
}
/**
* Returns the input sentence for the parser.
*/
private List getInputSentence(Tree t) {
if (op.testOptions.forceTags) {
if (op.testOptions.preTag) {
List s = tagger.apply(t.yieldWords());
if(op.testOptions.verbose) {
System.err.println("Guess tags: "+Arrays.toString(s.toArray()));
System.err.println("Gold tags: "+t.labeledYield().toString());
}
return Sentence.toCoreLabelList(s);
} else if(op.testOptions.noFunctionalForcing) {
ArrayList extends HasWord> s = t.taggedYield();
for (HasWord word : s) {
String tag = ((HasTag) word).tag();
tag = tag.split("-")[0];
((HasTag) word).setTag(tag);
}
return Sentence.toCoreLabelList(s);
} else {
return Sentence.toCoreLabelList(t.taggedYield());
}
} else {
return Sentence.toCoreLabelList(t.yieldWords());
}
}
public void processResults(ParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint) {
if (pq.saidMemMessage()) {
saidMemMessage = true;
}
Tree tree;
List extends HasWord> sentence = pq.originalSentence();
try {
tree = pq.getBestParse();
} catch (NoSuchParseException e) {
tree = null;
}
List> kbestPCFGTrees = null;
if (tree != null && kbestPCFG > 0) {
kbestPCFGTrees = pq.getKBestPCFGParses(kbestPCFG);
}
//combo parse goes to pwOut (System.out)
if (op.testOptions.verbose) {
pwOut.println("ComboParser best");
Tree ot = tree;
if (ot != null && ! op.tlpParams.treebankLanguagePack().isStartSymbol(ot.value())) {
ot = ot.treeFactory().newTreeNode(op.tlpParams.treebankLanguagePack().startSymbol(), Collections.singletonList(ot));
}
treePrint.printTree(ot, pwOut);
} else {
treePrint.printTree(tree, pwOut);
}
// **OUTPUT**
// print various n-best like outputs (including 1-best)
// print various statistics
if (tree != null) {
if(op.testOptions.printAllBestParses) {
List> parses = pq.getBestPCFGParses();
int sz = parses.size();
if (sz > 1) {
pwOut.println("There were " + sz + " best PCFG parses with score " + parses.get(0).score() + '.');
Tree transGoldTree = collinizer.transformTree(goldTree);
int iii = 0;
for (ScoredObject sot : parses) {
iii++;
Tree tb = sot.object();
Tree tbd = debinarizer.transformTree(tb);
tbd = subcategoryStripper.transformTree(tbd);
pq.restoreOriginalWords(tbd);
pwOut.println("PCFG Parse #" + iii + " with score " + tbd.score());
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
// pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
}
}
}
// Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
else if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null) {
List> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
Tree transGoldTree = collinizer.transformTree(goldTree);
int i = 0;
for (ScoredObject tp : trees) {
i++;
pwOut.println("PCFG Parse #" + i + " with score " + tp.score());
Tree tbd = tp.object();
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
kGoodLB.evaluate(tbtr, transGoldTree, pwErr);
}
}
// Chart parser (factored) n-best list
else if (op.testOptions.printFactoredKGood > 0 && pq.hasFactoredParse()) {
// DZ: debug n best trees
List> trees = pq.getKGoodFactoredParses(op.testOptions.printFactoredKGood);
Tree transGoldTree = collinizer.transformTree(goldTree);
int ii = 0;
for (ScoredObject tp : trees) {
ii++;
pwOut.println("Factored Parse #" + ii + " with score " + tp.score());
Tree tbd = tp.object();
tbd.pennPrint(pwOut);
Tree tbtr = collinizer.transformTree(tbd);
kGoodLB.evaluate(tbtr, transGoldTree, pwOut);
}
}
//1-best output
else if(pwFileOut != null) {
pwFileOut.println(tree.toString());
}
//Print the derivational entropy
if(op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0) {
List> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest);
double[] logScores = new double[trees.size()];
int treeId = 0;
for(ScoredObject kBestTree : trees)
logScores[treeId++] = kBestTree.score();
//Re-normalize
double entropy = 0.0;
double denom = ArrayMath.logSum(logScores);
for (double logScore : logScores) {
double logPr = logScore - denom;
entropy += Math.exp(logPr) * (logPr / Math.log(2));
}
entropy *= -1; //Convert to bits
pwStats.printf("%f\t%d\t%d\n", entropy,trees.size(),sentence.size());
}
}
// **EVALUATION**
// Perform various evaluations specified by the user
if (tree != null) {
//Strip subcategories and remove punctuation for evaluation
tree = subcategoryStripper.transformTree(tree);
Tree treeFact = collinizer.transformTree(tree);
//Setup the gold tree
if (op.testOptions.verbose) {
pwOut.println("Correct parse");
treePrint.printTree(goldTree, pwOut);
}
Tree transGoldTree = collinizer.transformTree(goldTree);
if(transGoldTree != null)
transGoldTree = subcategoryStripper.transformTree(transGoldTree);
//Can't do evaluation in these two cases
if (transGoldTree == null) {
pwErr.println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
goldTree.pennPrint(pwErr);
numSkippedEvals++;
return;
} else if (treeFact == null) {
pwErr.println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
tree.pennPrint(pwErr);
numSkippedEvals++;
return;
} else if(treeFact.yield().size() != transGoldTree.yield().size()) {
List