All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.lexparser.EvaluateTreebank Maven / Gradle / Ivy

package edu.stanford.nlp.parser.lexparser;

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

import edu.stanford.nlp.io.NullOutputStream;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.parser.common.NoSuchParseException;
import edu.stanford.nlp.parser.common.ParserGrammar;
import edu.stanford.nlp.parser.common.ParserQuery;
import edu.stanford.nlp.parser.common.ParserUtils;
import edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor;
import edu.stanford.nlp.parser.metrics.AbstractEval;
import edu.stanford.nlp.parser.metrics.BestOfTopKEval;
import edu.stanford.nlp.parser.metrics.Eval;
import edu.stanford.nlp.parser.metrics.Evalb;
import edu.stanford.nlp.parser.metrics.EvalbByCat;
import edu.stanford.nlp.parser.metrics.FilteredEval;
import edu.stanford.nlp.parser.metrics.LeafAncestorEval;
import edu.stanford.nlp.parser.metrics.ParserQueryEval;
import edu.stanford.nlp.parser.metrics.TaggingEval;
import edu.stanford.nlp.parser.metrics.TopMatchEval;
import edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TreePrint;
import edu.stanford.nlp.trees.TreeTransformer;
import java.util.function.Function;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.ScoredObject;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.concurrent.MulticoreWrapper;

public class EvaluateTreebank {

  private final Options op;
  private final TreeTransformer debinarizer;
  private final TreeTransformer subcategoryStripper;
  private final TreeTransformer collinizer;
  private final TreeTransformer boundaryRemover;

  private final ParserGrammar pqFactory;

  // private final Lexicon lex;

  List evals = null;
  List parserQueryEvals = null;

  private final boolean summary;
  private final boolean tsv;

  // no annotation
  private final TreeAnnotatorAndBinarizer binarizerOnly;

  AbstractEval pcfgLB = null;
  AbstractEval pcfgChildSpecific = null;
  LeafAncestorEval pcfgLA = null;
  AbstractEval pcfgCB = null;
  AbstractEval pcfgDA = null;
  AbstractEval pcfgTA = null;
  AbstractEval depDA = null;
  AbstractEval depTA = null;
  AbstractEval factLB = null;
  AbstractEval factChildSpecific = null;
  LeafAncestorEval factLA = null;
  AbstractEval factCB = null;
  AbstractEval factDA = null;
  AbstractEval factTA = null;
  AbstractEval pcfgRUO = null;
  AbstractEval pcfgCUO = null;
  AbstractEval pcfgCatE = null;
  AbstractEval.ScoreEval pcfgLL = null;
  AbstractEval.ScoreEval depLL = null;
  AbstractEval.ScoreEval factLL = null;
  AbstractEval kGoodLB = null;

  private final List topKEvals = new ArrayList<>();

  private int kbestPCFG = 0;

  private int numSkippedEvals = 0;

  private boolean saidMemMessage = false;

  /**
   * The tagger optionally used before parsing.
   * 
* We keep it here as a function rather than a MaxentTagger so that * we can distribute a version of the parser that doesn't include * the entire tagger. */ protected final Function, List> tagger; public EvaluateTreebank(LexicalizedParser parser) { this(parser.getOp(), parser.lex, parser); } public EvaluateTreebank(Options op, Lexicon lex, ParserGrammar pqFactory) { this(op, lex, pqFactory, pqFactory.loadTagger()); } public EvaluateTreebank(Options op, Lexicon lex, ParserGrammar pqFactory, Function,List> tagger) { this.op = op; this.debinarizer = new Debinarizer(op.forceCNF); this.subcategoryStripper = op.tlpParams.subcategoryStripper(); this.evals = Generics.newArrayList(); evals.addAll(pqFactory.getExtraEvals()); this.parserQueryEvals = pqFactory.getParserQueryEvals(); // this.lex = lex; this.pqFactory = pqFactory; this.tagger = tagger; collinizer = op.tlpParams.collinizer(); boundaryRemover = new BoundaryRemover(); boolean runningAverages = Boolean.parseBoolean(op.testOptions.evals.getProperty("runningAverages")); summary = Boolean.parseBoolean(op.testOptions.evals.getProperty("summary")); tsv = Boolean.parseBoolean(op.testOptions.evals.getProperty("tsv")); if (!op.trainOptions.leftToRight) { binarizerOnly = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, false, false, op); } else { binarizerOnly = new TreeAnnotatorAndBinarizer(op.tlpParams.headFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, false, false, op); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgLB"))) { pcfgLB = new Evalb("pcfg LP/LR", runningAverages); } // TODO: might be nice to allow more than one child-specific scorer if (op.testOptions.evals.getProperty("pcfgChildSpecific") != null) { String filter = op.testOptions.evals.getProperty("pcfgChildSpecific"); pcfgChildSpecific = FilteredEval.childFilteredEval("pcfg children matching " + filter + " LP/LR", runningAverages, op.langpack(), filter); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgLA"))) { pcfgLA = new LeafAncestorEval("pcfg LeafAncestor"); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgCB"))) { pcfgCB = new Evalb.CBEval("pcfg CB", runningAverages); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgDA"))) { pcfgDA = new UnlabeledAttachmentEval("pcfg DA", runningAverages, op.langpack().headFinder()); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgTA"))) { pcfgTA = new TaggingEval("pcfg Tag", runningAverages, lex); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("depDA"))) { depDA = new UnlabeledAttachmentEval("dep DA", runningAverages, null, op.langpack().punctuationWordRejectFilter()); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("depTA"))) { depTA = new TaggingEval("dep Tag", runningAverages, lex); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factLB"))) { factLB = new Evalb("factor LP/LR", runningAverages); } if (op.testOptions.evals.getProperty("factChildSpecific") != null) { String filter = op.testOptions.evals.getProperty("factChildSpecific"); factChildSpecific = FilteredEval.childFilteredEval("fact children matching " + filter + " LP/LR", runningAverages, op.langpack(), filter); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factLA"))) { factLA = new LeafAncestorEval("factor LeafAncestor"); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factCB"))) { factCB = new Evalb.CBEval("fact CB", runningAverages); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factDA"))) { factDA = new UnlabeledAttachmentEval("factor DA", runningAverages, null); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factTA"))) { factTA = new TaggingEval("factor Tag", runningAverages, lex); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgRUO"))) { pcfgRUO = new AbstractEval.RuleErrorEval("pcfg Rule under/over"); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgCUO"))) { pcfgCUO = new AbstractEval.CatErrorEval("pcfg Category under/over"); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgCatE"))) { pcfgCatE = new EvalbByCat("pcfg Category Eval", runningAverages); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgLL"))) { pcfgLL = new AbstractEval.ScoreEval("pcfgLL", runningAverages); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("depLL"))) { depLL = new AbstractEval.ScoreEval("depLL", runningAverages); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factLL"))) { factLL = new AbstractEval.ScoreEval("factLL", runningAverages); } if (Boolean.parseBoolean(op.testOptions.evals.getProperty("topMatch"))) { evals.add(new TopMatchEval("topMatch", runningAverages)); } // this one is for the various k Good/Best options. Just for individual results kGoodLB = new Evalb("kGood LP/LR", false); if (Boolean.parseBoolean(op.testOptions.evals.getProperty("pcfgTopK"))) { topKEvals.add(new BestOfTopKEval(new Evalb("pcfg top k comparisons", false), new Evalb("pcfg top k LP/LR", runningAverages))); } if (topKEvals.size() > 0) { kbestPCFG = op.testOptions.evalPCFGkBest; } if (op.testOptions.printPCFGkBest > 0) { kbestPCFG = Math.max(kbestPCFG, op.testOptions.printPCFGkBest); } } public double getLBScore() { if (factLB != null) { return factLB.getEvalbF1Percent(); } if (pcfgLB != null) { return pcfgLB.getEvalbF1Percent(); } return 0.0; } public double getTagScore() { if (factTA != null) { return factTA.getEvalbF1Percent(); } if (pcfgTA != null) { return pcfgTA.getEvalbF1Percent(); } return 0.0; } /** * Remove tree scores, so they don't print. *
* TODO: The printing architecture should be fixed up in the trees package * sometime. */ private static void nanScores(Tree tree) { tree.setScore(Double.NaN); Tree[] kids = tree.children(); for (Tree kid : kids) { nanScores(kid); } } /** * Returns the input sentence for the parser. */ private List getInputSentence(Tree t) { if (op.testOptions.forceTags) { if (op.testOptions.preTag) { List s = tagger.apply(t.yieldWords()); if(op.testOptions.verbose) { System.err.println("Guess tags: "+Arrays.toString(s.toArray())); System.err.println("Gold tags: "+t.labeledYield().toString()); } return Sentence.toCoreLabelList(s); } else if(op.testOptions.noFunctionalForcing) { ArrayList s = t.taggedYield(); for (HasWord word : s) { String tag = ((HasTag) word).tag(); tag = tag.split("-")[0]; ((HasTag) word).setTag(tag); } return Sentence.toCoreLabelList(s); } else { return Sentence.toCoreLabelList(t.taggedYield()); } } else { return Sentence.toCoreLabelList(t.yieldWords()); } } public void processResults(ParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint) { if (pq.saidMemMessage()) { saidMemMessage = true; } Tree tree; List sentence = pq.originalSentence(); try { tree = pq.getBestParse(); } catch (NoSuchParseException e) { tree = null; } List> kbestPCFGTrees = null; if (tree != null && kbestPCFG > 0) { kbestPCFGTrees = pq.getKBestPCFGParses(kbestPCFG); } //combo parse goes to pwOut (System.out) if (op.testOptions.verbose) { pwOut.println("ComboParser best"); Tree ot = tree; if (ot != null && ! op.tlpParams.treebankLanguagePack().isStartSymbol(ot.value())) { ot = ot.treeFactory().newTreeNode(op.tlpParams.treebankLanguagePack().startSymbol(), Collections.singletonList(ot)); } treePrint.printTree(ot, pwOut); } else { treePrint.printTree(tree, pwOut); } // **OUTPUT** // print various n-best like outputs (including 1-best) // print various statistics if (tree != null) { if(op.testOptions.printAllBestParses) { List> parses = pq.getBestPCFGParses(); int sz = parses.size(); if (sz > 1) { pwOut.println("There were " + sz + " best PCFG parses with score " + parses.get(0).score() + '.'); Tree transGoldTree = collinizer.transformTree(goldTree); int iii = 0; for (ScoredObject sot : parses) { iii++; Tree tb = sot.object(); Tree tbd = debinarizer.transformTree(tb); tbd = subcategoryStripper.transformTree(tbd); pq.restoreOriginalWords(tbd); pwOut.println("PCFG Parse #" + iii + " with score " + tbd.score()); tbd.pennPrint(pwOut); Tree tbtr = collinizer.transformTree(tbd); // pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth()); kGoodLB.evaluate(tbtr, transGoldTree, pwErr); } } } // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser else if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null) { List> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest); Tree transGoldTree = collinizer.transformTree(goldTree); int i = 0; for (ScoredObject tp : trees) { i++; pwOut.println("PCFG Parse #" + i + " with score " + tp.score()); Tree tbd = tp.object(); tbd.pennPrint(pwOut); Tree tbtr = collinizer.transformTree(tbd); kGoodLB.evaluate(tbtr, transGoldTree, pwErr); } } // Chart parser (factored) n-best list else if (op.testOptions.printFactoredKGood > 0 && pq.hasFactoredParse()) { // DZ: debug n best trees List> trees = pq.getKGoodFactoredParses(op.testOptions.printFactoredKGood); Tree transGoldTree = collinizer.transformTree(goldTree); int ii = 0; for (ScoredObject tp : trees) { ii++; pwOut.println("Factored Parse #" + ii + " with score " + tp.score()); Tree tbd = tp.object(); tbd.pennPrint(pwOut); Tree tbtr = collinizer.transformTree(tbd); kGoodLB.evaluate(tbtr, transGoldTree, pwOut); } } //1-best output else if(pwFileOut != null) { pwFileOut.println(tree.toString()); } //Print the derivational entropy if(op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0) { List> trees = kbestPCFGTrees.subList(0, op.testOptions.printPCFGkBest); double[] logScores = new double[trees.size()]; int treeId = 0; for(ScoredObject kBestTree : trees) logScores[treeId++] = kBestTree.score(); //Re-normalize double entropy = 0.0; double denom = ArrayMath.logSum(logScores); for (double logScore : logScores) { double logPr = logScore - denom; entropy += Math.exp(logPr) * (logPr / Math.log(2)); } entropy *= -1; //Convert to bits pwStats.printf("%f\t%d\t%d\n", entropy,trees.size(),sentence.size()); } } // **EVALUATION** // Perform various evaluations specified by the user if (tree != null) { //Strip subcategories and remove punctuation for evaluation tree = subcategoryStripper.transformTree(tree); Tree treeFact = collinizer.transformTree(tree); //Setup the gold tree if (op.testOptions.verbose) { pwOut.println("Correct parse"); treePrint.printTree(goldTree, pwOut); } Tree transGoldTree = collinizer.transformTree(goldTree); if(transGoldTree != null) transGoldTree = subcategoryStripper.transformTree(transGoldTree); //Can't do evaluation in these two cases if (transGoldTree == null) { pwErr.println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:"); goldTree.pennPrint(pwErr); numSkippedEvals++; return; } else if (treeFact == null) { pwErr.println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:"); tree.pennPrint(pwErr); numSkippedEvals++; return; } else if(treeFact.yield().size() != transGoldTree.yield().size()) { List




© 2015 - 2024 Weber Informatics LLC | Privacy Policy