All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.stanford.nlp.parser.metrics.Evalb Maven / Gradle / Ivy

Go to download

Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.

There is a newer version: 3.9.2
Show newest version
package edu.stanford.nlp.parser.metrics;

import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Properties;
import java.util.Set;

import edu.stanford.nlp.international.Languages;
import edu.stanford.nlp.international.Languages.Language;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.Sentence;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.trees.Constituent;
import edu.stanford.nlp.trees.ConstituentFactory;
import edu.stanford.nlp.trees.LabeledScoredConstituentFactory;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.Triple;

/**
 * A Java re-implementation of the evalb bracket scoring metric (Collins, 1997) that accepts Unicode input.
 * "Collinization" should be performed on input trees prior to invoking the package programmatically.
 * "Collinization" refers to normalization of trees for things not counted in evaluation,
 * such as equivalencing PRT and ADVP, which has standardly been done in English evaluation.
 * A main method is provided that performs Collinization according to language specific settings.
 * 

* This implementation assumes that the guess/gold input files are of equal length, and have one tree per * line. *

* This implementation was last validated against EVALB20080701 (http://nlp.cs.nyu.edu/evalb/) * by Spence Green on 22 Jan. 2010. Notwithstanding this, Sekine and collins' EVALB script has been * the common standard for constituency evaluation of parsers for the last decade. We always validate * any numbers we report with it, and we suggest that you do the same. * * @author Dan Klein * @author Spence Green */ public class Evalb extends AbstractEval { private final ConstituentFactory cf; public Evalb(String str, boolean runningAverages) { super(str, runningAverages); cf = new LabeledScoredConstituentFactory(); } /** * evalb only evaluates phrasal categories, thus constituents() does not * return objects for terminals and pre-terminals. */ @Override protected Set makeObjects(Tree tree) { Set set = Generics.newHashSet(); if(tree != null) set.addAll(tree.constituents(cf)); return set; } @Override public void evaluate(Tree guess, Tree gold, PrintWriter pw) { if(gold == null || guess == null) { System.err.printf("%s: Cannot compare against a null gold or guess tree!\n",this.getClass().getName()); return; } else if (guess.yield().size() != gold.yield().size()) { System.err.println("Warning: yield differs:"); System.err.println("Guess: " + Sentence.listToString(guess.yield())); System.err.println("Gold: " + Sentence.listToString(gold.yield())); } super.evaluate(guess, gold, pw); } public static class CBEval extends Evalb { private double cb = 0.0; private double num = 0.0; private double zeroCB = 0.0; protected void checkCrossing(Set s1, Set s2) { double c = 0.0; for (Constituent constit : s1) { if (constit.crosses(s2)) { c += 1.0; } } if (c == 0.0) { zeroCB += 1.0; } cb += c; num += 1.0; } @Override public void evaluate(Tree t1, Tree t2, PrintWriter pw) { Set b1 = makeObjects(t1); Set b2 = makeObjects(t2); checkCrossing(b1, b2); if (pw != null && runningAverages) { pw.println("AvgCB: " + ((int) (10000.0 * cb / num)) / 100.0 + " ZeroCB: " + ((int) (10000.0 * zeroCB / num)) / 100.0 + " N: " + getNum()); } } @Override public void display(boolean verbose, PrintWriter pw) { pw.println(str + " AvgCB: " + ((int) (10000.0 * cb / num)) / 100.0 + " ZeroCB: " + ((int) (10000.0 * zeroCB / num)) / 100.0); } public CBEval(String str, boolean runningAverages) { super(str, runningAverages); } } private static final int minArgs = 2; private static String usage() { StringBuilder sb = new StringBuilder(); String nl = System.getProperty("line.separator"); sb.append(String.format("Usage: java %s [OPTS] gold guess%n%n",Evalb.class.getName())); sb.append("Options:").append(nl); sb.append(" -v : Verbose mode.").append(nl); sb.append(" -l lang : Select language settings from ").append(Languages.listOfLanguages()).append(nl); sb.append(" -y num : Skip gold trees with yields longer than num.").append(nl); sb.append(" -s num : Sort the trees by F1 and output the num lowest F1 trees.").append(nl); sb.append(" -c : Compute LP/LR/F1 by category.").append(nl); sb.append(" -f regex : Compute category level evaluation for categories that match this regex.").append(nl); sb.append(" -e : Input encoding.").append(nl); return sb.toString(); } private static Map optionArgDefs() { Map optionArgDefs = Generics.newHashMap(); optionArgDefs.put("v", 0); optionArgDefs.put("l", 1); optionArgDefs.put("y", 1); optionArgDefs.put("s", 1); optionArgDefs.put("c", 0); optionArgDefs.put("e", 0); optionArgDefs.put("f", 1); return optionArgDefs; } /** * Run the Evalb scoring metric on guess/gold input. The default language is English. * * @param args */ public static void main(String[] args) { if (args.length < minArgs) { System.err.println(usage()); System.exit(-1); } Properties options = StringUtils.argsToProperties(args, optionArgDefs()); Language language = PropertiesUtils.get(options, "l", Language.English, Language.class); final TreebankLangParserParams tlpp = Languages.getLanguageParams(language); final int maxGoldYield = PropertiesUtils.getInt(options, "y", Integer.MAX_VALUE); final boolean VERBOSE = PropertiesUtils.getBool(options, "v", false); final boolean sortByF1 = PropertiesUtils.hasProperty(options, "s"); int worstKTreesToEmit = PropertiesUtils.getInt(options, "s", 0); PriorityQueue> queue = sortByF1 ? new PriorityQueue>(2000, new F1Comparator()) : null; boolean doCatLevel = PropertiesUtils.getBool(options, "c", false); String labelRegex = options.getProperty("f", null); String encoding = options.getProperty("e", "UTF-8"); String[] parsedArgs = options.getProperty("","").split("\\s+"); if (parsedArgs.length != minArgs) { System.err.println(usage()); System.exit(-1); } String goldFile = parsedArgs[0]; String guessFile = parsedArgs[1]; // Command-line has been parsed. Configure the metric for evaluation. tlpp.setInputEncoding(encoding); final PrintWriter pwOut = tlpp.pw(); final Treebank guessTreebank = tlpp.diskTreebank(); guessTreebank.loadPath(guessFile); pwOut.println("GUESS TREEBANK:"); pwOut.println(guessTreebank.textualSummary()); final Treebank goldTreebank = tlpp.diskTreebank(); goldTreebank.loadPath(goldFile); pwOut.println("GOLD TREEBANK:"); pwOut.println(goldTreebank.textualSummary()); final Evalb metric = new Evalb("Evalb LP/LR", true); final EvalbByCat evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null; final TreeTransformer tc = tlpp.collinizer(); //The evalb ref implementation assigns status for each tree pair as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. final Iterator goldItr = goldTreebank.iterator(); final Iterator guessItr = guessTreebank.iterator(); int goldLineId = 0; int guessLineId = 0; int skippedGuessTrees = 0; while( guessItr.hasNext() && goldItr.hasNext() ) { Tree guessTree = guessItr.next(); List





© 2015 - 2024 Weber Informatics LLC | Privacy Policy