edu.stanford.nlp.parser.metrics.Evalb Maven / Gradle / Ivy
Show all versions of stanford-parser Show documentation
package edu.stanford.nlp.parser.metrics;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Properties;
import java.util.Set;
import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.ConstituentFactory;
import edu.stanford.nlp.trees.LabeledScoredConstituentFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;
/**
* A Java re-implementation of the evalb bracket scoring metric (Collins, 1997) that accepts Unicode input.
* "Collinization" should be performed on input trees prior to invoking the package programmatically.
* "Collinization" refers to normalization of trees for things not counted in evaluation,
* such as equivalencing PRT and ADVP, which has standardly been done in English evaluation.
* A main method is provided that performs Collinization according to language specific settings.
*
* This implementation assumes that the guess/gold input files are of equal length, and have one tree per
* line.
*
* This implementation was last validated against EVALB20080701 (http://nlp.cs.nyu.edu/evalb/)
* by Spence Green on 22 Jan. 2010. Notwithstanding this, Sekine and collins' EVALB script has been
* the common standard for constituency evaluation of parsers for the last decade. We always validate
* any numbers we report with it, and we suggest that you do the same.
*
* @author Dan Klein
* @author Spence Green
*/
public class Evalb extends AbstractEval {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(Evalb.class);
private final ConstituentFactory cf;
public Evalb(String str, boolean runningAverages) {
super(str, runningAverages);
cf = new LabeledScoredConstituentFactory();
}
/**
* evalb only evaluates phrasal categories, thus constituents() does not
* return objects for terminals and pre-terminals.
*/
@Override
protected Set makeObjects(Tree tree) {
Set set = Generics.newHashSet();
if(tree != null) set.addAll(tree.constituents(cf));
return set;
}
@Override
public void evaluate(Tree guess, Tree gold, PrintWriter pw) {
if(gold == null || guess == null) {
System.err.printf("%s: Cannot compare against a null gold or guess tree!\n",this.getClass().getName());
return;
} else if (guess.yield().size() != gold.yield().size()) {
log.info("Warning: yield differs:");
log.info("Guess: " + SentenceUtils.listToString(guess.yield()));
log.info("Gold: " + SentenceUtils.listToString(gold.yield()));
}
super.evaluate(guess, gold, pw);
}
public static class CBEval extends Evalb {
private double cb = 0.0;
private double num = 0.0;
private double zeroCB = 0.0;
protected void checkCrossing(Set s1, Set s2) {
double c = 0.0;
for (Constituent constit : s1) {
if (constit.crosses(s2)) {
c += 1.0;
}
}
if (c == 0.0) {
zeroCB += 1.0;
}
cb += c;
num += 1.0;
}
@Override
public void evaluate(Tree t1, Tree t2, PrintWriter pw) {
Set b1 = makeObjects(t1);
Set b2 = makeObjects(t2);
checkCrossing(b1, b2);
if (pw != null && runningAverages) {
pw.println("AvgCB: " + ((int) (10000.0 * cb / num)) / 100.0 +
" ZeroCB: " + ((int) (10000.0 * zeroCB / num)) / 100.0 + " N: " + getNum());
}
}
@Override
public void display(boolean verbose, PrintWriter pw) {
pw.println(str + " AvgCB: " + ((int) (10000.0 * cb / num)) / 100.0 +
" ZeroCB: " + ((int) (10000.0 * zeroCB / num)) / 100.0);
}
public CBEval(String str, boolean runningAverages) {
super(str, runningAverages);
}
}
private static final int minArgs = 2;
private static String usage() {
StringBuilder sb = new StringBuilder();
String nl = System.getProperty("line.separator");
sb.append(String.format("Usage: java %s [OPTS] gold guess%n%n",Evalb.class.getName()));
sb.append("Options:").append(nl);
sb.append(" -v : Verbose mode.").append(nl);
sb.append(" -l lang : Select language settings from ").append(Language.langList).append(nl);
sb.append(" -y num : Skip gold trees with yields longer than num.").append(nl);
sb.append(" -s num : Sort the trees by F1 and output the num lowest F1 trees.").append(nl);
sb.append(" -c : Compute LP/LR/F1 by category.").append(nl);
sb.append(" -f regex : Compute category level evaluation for categories that match this regex.").append(nl);
sb.append(" -e : Input encoding.").append(nl);
return sb.toString();
}
private static Map optionArgDefs() {
Map optionArgDefs = Generics.newHashMap();
optionArgDefs.put("v", 0);
optionArgDefs.put("l", 1);
optionArgDefs.put("y", 1);
optionArgDefs.put("s", 1);
optionArgDefs.put("c", 0);
optionArgDefs.put("e", 0);
optionArgDefs.put("f", 1);
return optionArgDefs;
}
/**
* Run the Evalb scoring metric on guess/gold input. The default language is English.
*
* @param args
*/
public static void main(String[] args) {
if (args.length < minArgs) {
log.info(usage());
System.exit(-1);
}
Properties options = StringUtils.argsToProperties(args, optionArgDefs());
Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
final TreebankLangParserParams tlpp = language.params;
final int maxGoldYield = PropertiesUtils.getInt(options, "y", Integer.MAX_VALUE);
final boolean VERBOSE = PropertiesUtils.getBool(options, "v", false);
final boolean sortByF1 = PropertiesUtils.hasProperty(options, "s");
int worstKTreesToEmit = PropertiesUtils.getInt(options, "s", 0);
PriorityQueue> queue = sortByF1 ? new PriorityQueue<>(2000, new F1Comparator()) : null;
boolean doCatLevel = PropertiesUtils.getBool(options, "c", false);
String labelRegex = options.getProperty("f", null);
String encoding = options.getProperty("e", "UTF-8");
String[] parsedArgs = options.getProperty("","").split("\\s+");
if (parsedArgs.length != minArgs) {
log.info(usage());
System.exit(-1);
}
String goldFile = parsedArgs[0];
String guessFile = parsedArgs[1];
// Command-line has been parsed. Configure the metric for evaluation.
tlpp.setInputEncoding(encoding);
final PrintWriter pwOut = tlpp.pw();
final Treebank guessTreebank = tlpp.diskTreebank();
guessTreebank.loadPath(guessFile);
pwOut.println("GUESS TREEBANK:");
pwOut.println(guessTreebank.textualSummary());
final Treebank goldTreebank = tlpp.diskTreebank();
goldTreebank.loadPath(goldFile);
pwOut.println("GOLD TREEBANK:");
pwOut.println(goldTreebank.textualSummary());
final Evalb metric = new Evalb("Evalb LP/LR", true);
final EvalbByCat evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null;
final TreeTransformer tc = tlpp.collinizer();
//The evalb ref implementation assigns status for each tree pair as follows:
//
// 0 - Ok (yields match)
// 1 - length mismatch
// 2 - null parse e.g. (()).
//
//In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
final Iterator goldItr = goldTreebank.iterator();
final Iterator guessItr = guessTreebank.iterator();
int goldLineId = 0;
int guessLineId = 0;
int skippedGuessTrees = 0;
while( guessItr.hasNext() && goldItr.hasNext() ) {
Tree guessTree = guessItr.next();
List