![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.PCFGLA.GrammarTester Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.PCFGLA;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.zip.GZIPInputStream;
import edu.berkeley.nlp.PCFGLA.Corpus.TreeBankType;
import edu.berkeley.nlp.PCFGLA.smoothing.SmoothAcrossParentSubstate;
import edu.berkeley.nlp.PCFGLA.smoothing.Smoother;
import edu.berkeley.nlp.parser.EnglishPennTreebankParseEvaluator;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees;
import edu.berkeley.nlp.util.Numberer;
import edu.berkeley.nlp.util.Pair;
/**
* Reads in the Penn Treebank and generates N_GRAMMARS different grammars.
*
* @author Slav Petrov
*/
public class GrammarTester implements Callable{
public static ParserFactory externalParserFactory = null;
public static interface ParserFactory {
public ConstrainedArrayParser newParser(Grammar gr, Lexicon lex,
SpanPredictor sp);
}
public static class Options {
@Option(name = "-in", required = true, usage = "Input File for Grammar (Required)\n")
public String inFileName;
@Option(name = "-path", usage = "Path to Corpus (Default: null)\n")
public String path = null;
@Option(name = "-treebank", usage = "Language: WSJ, CHNINESE, GERMAN, CONLL, SINGLEFILE (Default: ENGLISH)")
public TreeBankType treebank = TreeBankType.WSJ;
@Option(name = "-maxL", usage = "Maximum sentence length (Default <=40)")
public int maxSentenceLength = 40;
@Option(name = "-section", usage = "On which part of the WSJ to test: train/dev/test (Default: dev)")
public String section = "dev";
@Option(name = "-maxS", usage = "Maximum number of sentences (Default all)")
public int maxSentences = 1000000;
@Option(name = "-parser", usage = "Parser type: c-to-f, plain, kbest, basic, maxderivation")
public String parser = "c-to-f";
@Option(name = "-k", usage = "k for k-best parsing")
public int k = 1;
@Option(name = "-cons", usage = "Constraints for plain parser")
public String cons = null;
@Option(name = "-viterbi", usage = "Compute viterbi derivation instead of max-rule parse (Default: max-rule)")
public boolean viterbi = false;
@Option(name = "-allowAllSubstates", usage = "Don't prune at the substate level")
public boolean allowAllSubstates = false;
@Option(name = "-unaryPenalty", usage = "Unary penalty (Default: 1.0)")
public double unaryPenalty = 1.0;
@Option(name = "-finalLevel", usage = "Parse with projected grammar from this level (Default: -1 = input grammar)")
public int finalLevel = -1;
@Option(name = "-verbose", usage = "Verbose/Quiet (Default: Quiet)\n")
public boolean verbose = false;
@Option(name = "-accurate", usage = "Set thresholds for accuracy. (Default: set thresholds for efficiency)")
public boolean accurate = false;
@Option(name = "-useGoldPOS", usage = "Use gold part of speech tags (Default: false)")
public boolean useGoldPOS = false;
@Option(name = "-smooth", usage = "Smooth the parameters before parsing")
public static boolean smooth = false;
@Option(name = "-doNOTprojectConstraints", usage = "Do NOT project constraints")
public boolean doNOTprojectConstraints = false;
@Option(name = "-nThreads", usage = "Parse in parallel using this many threads (Default: 1).")
public int nThreads = 1;
@Option(name = "-filterTrees", usage = "Parse in parallel using this many threads (Default: 1).")
public boolean filterTrees = false;
@Option(name = "-filterAllUnaries", usage="Mark any unary parent with a ^u")
public boolean filterAllUnaries = false;
@Option(name = "-filterStupidFrickinWHNP", usage="Temp hack!")
public boolean filterStupidFrickinWHNP = false;
@Option(name = "-printGoldTree", usage="Print (flat) gold tree")
public boolean printGoldTree = false;
@Option(name = "-computeConstraints", usage="Compute constraints from the given grammar (rather than loading with -cons)")
public boolean computeConstraints = false;
@Option(name = "-evaluateConstraints", usage="Evaluate search errors from constraints")
public boolean evaluateConstraints = false;
@Option(name = "-logT", usage="Threshold for constraints")
public double logT = -10;
@Option(name="-printAllKBest", usage="Print every kBest parse")
public boolean printAllKBest = false;
@Option(name="-testAll", usage="Test all grammar files starting with this name")
public boolean testAll = false;
@Option(name="-filePath", usage="Path for grammars to be tested")
public String filePath = null;
@Option(name = "-nProcess", usage = "Parse in parallel using this many threads (Default: 1).")
public int nProcess = 1;
@Option(name = "-lowercase", usage = "Lowercase all words in the treebank")
public boolean lowercase = false;
@Option(name = "-allSubstatesAllowed", usage = "When using constraints whether to prune on the substate level")
public boolean allSubstatesAllowed = false;
@Option(name = "-printAllF1", usage = "Print all F1 scores (when using testAll)")
public boolean printAllF1 = false;
@Option(name = "-nGrammars", usage = "Use a product model based on that many grammars")
public int nGrammars = 1;
}
List> testTrees;
boolean[][][][][] cons;
String fileName;
int maxSentenceLength;
public static void main(String[] args){
OptionParser optParser = new OptionParser(Options.class);
Options opts = (Options) optParser.parse(args, true);
// provide feedback on command-line arguments
System.out.println("Calling with " + optParser.getPassedInOptions());
String path = opts.path;
// int lang = opts.lang;
System.out.println("Loading trees from "+path+" and using treebank type "+opts.treebank);
int maxSentenceLength = opts.maxSentenceLength;
System.out.println("Will remove sentences with more than "+maxSentenceLength+" words.");
// int nbest = Integer.parseInt(CommandLineUtils.getValueOrUseDefault(input, "-N","1"));
String testSetString = opts.section;
boolean devTestSet = testSetString.equals("dev");
boolean finalTestSet = testSetString.equals("final");
boolean trainTestSet = testSetString.equals("train");
if (!(devTestSet || finalTestSet || trainTestSet)) {
System.out.println("I didn't understand dev/final test set argument "+testSetString);
System.exit(1);
}
System.out.println(" using "+testSetString+" test set");
boolean[][][][][] cons = null;
if (opts.computeConstraints)
{
String[] args1 = new String[0];
String dirName = ".";
String baseName="tmp";
String[] consArgsTrain = addOptions(args1, new String[]{"-logT", "" + opts.logT,"-maxL", "" + opts.maxSentenceLength,"-path",opts.path, "-filterStupidFrickinWHNP", opts.filterStupidFrickinWHNP ? "true" : "false","-markUnaryParents", "true", "-out", dirName+"/"+baseName+"0_" + opts.section, "-in", opts.inFileName, "-section", opts.section, "-nChunks", "1", "-outputLog", dirName+"/"+baseName+".cons.log"});
ParserConstrainer.main(consArgsTrain);
opts.cons = dirName+"/"+baseName+"0_" + opts.section + "-0.data";
}
if (opts.cons!=null) cons = ParserConstrainer.loadData(opts.cons);
Corpus corpus = new Corpus(path,opts.treebank,1.0,!trainTestSet);
List> testTrees = null;
if (devTestSet)
testTrees = corpus.getDevTestingTrees();
if (finalTestSet)
testTrees = corpus.getFinalTestingTrees();
if (trainTestSet)
testTrees = corpus.getTrainTrees();
// for (Tree tree : testTrees){
// System.out.println(tree);
// }
if (opts.lowercase){
System.out.println("Lowercasing the treebank.");
Corpus.lowercaseWords(testTrees);
}
String inFileName = (opts.testAll) ? opts.filePath+"/"+opts.inFileName : opts.inFileName;
if (inFileName==null) {
throw new Error("Did not provide a grammar.");
}
System.out.println("Loading grammar from "+inFileName+".");
int finalLevel = opts.finalLevel;
if (finalLevel!=-1) System.out.println("Parsing with projected grammar from level "+finalLevel+".");
boolean viterbiParse = opts.viterbi;
if (viterbiParse) System.out.println("Computing viterbi derivation instead of max-rule parse.");
// CoarseToFineMaxRuleParser parser = new CoarseToFineTwoChartsParser(grammar, lexicon, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate);
boolean doVariational = false;
boolean useGoldPOS = opts.useGoldPOS;
ConstrainedArrayParser parser = null;
EnglishPennTreebankParseEvaluator.LabeledConstituentEval eval = new EnglishPennTreebankParseEvaluator.LabeledConstituentEval(new HashSet(Arrays.asList(new String[] {"ROOT","PSEUDO"})), new HashSet(Arrays.asList(new String[] {"''", "``", ".", ":", ","})));
EnglishPennTreebankParseEvaluator.LabeledConstituentEval tmpEval = null;
System.out.println("The computed F1,LP,LR scores are just a rough guide. They are typically 0.1-0.2 lower than the official EVALB scores.");
// for (Tree testTree : testTrees) {
// System.out.println(testTree);
// } System.exit(0);
if (externalParserFactory != null) {
// parser = externalParserFactory.newParser(grammar, lexicon, spanPredictor);
} else {
if (opts.nGrammars != 1){
Grammar[] grammars = new Grammar[opts.nGrammars];
Lexicon[] lexicons = new Lexicon[opts.nGrammars];
Binarization bin = null;
for (int nGr = 0; nGr < opts.nGrammars; nGr++){
inFileName = opts.inFileName+"."+nGr;
ParserData pData = ParserData.Load(inFileName);
Numberer.setNumberers(pData.getNumbs());
if (pData==null) {
System.out.println("Failed to load grammar from file"+inFileName+".");
System.exit(1);
}
grammars[nGr] = pData.getGrammar();
lexicons[nGr] = pData.getLexicon();
Numberer.setNumberers(pData.getNumbs());
bin = pData.getBinarization();
}
parser = new CoarseToFineMaxRuleProductParser(grammars, lexicons, opts.unaryPenalty,-1,opts.viterbi,false,false, opts.accurate, false, true, true);
parser.binarization = bin;
} else {
ParserData pData = ParserData.Load(inFileName);
if (pData==null) {
System.out.println("Failed to load grammar from file"+inFileName+".");
System.exit(1);
}
Grammar grammar = pData.getGrammar();
grammar.splitRules();
Lexicon lexicon = pData.getLexicon();
SpanPredictor spanPredictor = pData.getSpanPredictor();
if (opts.smooth){
System.out.println("Smoothing only lexicon.");
// Smoother grSmoother = new SmoothAcrossParentBits(0.01,grammar.splitTrees);
// grammar.setSmoother(grSmoother);
// grammar.smooth(false);
// Smoother lexSmoother = new SmoothAcrossParentBits(0.01,grammar.splitTrees);
Smoother lexSmoother = new SmoothAcrossParentSubstate(0.01);
lexicon.setSmoother(lexSmoother);
}
Numberer.setNumberers(pData.getNumbs());
if ("plain".equals(opts.parser)){
testTrees = Corpus.filterTreesForConditional(testTrees,opts.filterAllUnaries,opts.filterStupidFrickinWHNP,false);
grammar.clearUnaryIntermediates();
if (grammar instanceof HierarchicalAdaptiveGrammar){
lexicon.explicitlyComputeScores(grammar.finalLevel);
parser = new ConstrainedHierarchicalTwoChartParser(grammar, lexicon, spanPredictor, grammar.finalLevel);
}else
parser = new ConstrainedTwoChartsParser(grammar, lexicon, spanPredictor);
if (opts.viterbi) parser.viterbi = true;
}
else if ("basic".equals(opts.parser)){
parser = new ConstrainedArrayParser(grammar, lexicon, grammar.numSubStates);
}
else if ("kbest".equals(opts.parser)){
parser = new CoarseToFineNBestParser(grammar, lexicon, opts.k, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate, doVariational, useGoldPOS, true);
tmpEval = new EnglishPennTreebankParseEvaluator.LabeledConstituentEval(Collections.singleton("ROOT"), new HashSet(Arrays.asList(new String[] {"''", "``", ".", ":", ","})));
}
else if ("maxderivation".equals(opts.parser)){
parser = new CoarseToFineMaxRuleDerivationParser(grammar, lexicon, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate, doVariational, useGoldPOS, true);
}
else parser = new CoarseToFineMaxRuleParser(grammar, lexicon, opts.unaryPenalty,finalLevel,viterbiParse,false,false,opts.accurate, doVariational, useGoldPOS, true);
parser.binarization = pData.getBinarization();
}
}
boolean kBestParsing ="kbest".equals(opts.parser);
if (opts.allSubstatesAllowed) System.out.println("All substates are allowed.");
if (opts.filterTrees) testTrees = Corpus.filterTreesForConditional(testTrees,opts.filterAllUnaries,opts.filterStupidFrickinWHNP,false);
if (opts.nThreads > 1){
System.out.println("Parsing with "+opts.nThreads+" threads in parallel.");
MultiThreadedParserWrapper m_parser = new MultiThreadedParserWrapper(parser, opts.nThreads);
int treeNumber = 0;
ArrayList> newList = new ArrayList>();
for (Tree testTree : testTrees) {
List testSentence = testTree.getYield();
int sentenceLength = testSentence.size();
if (sentenceLength > maxSentenceLength) continue;
newList.add(testTree);
}
testTrees = newList;
for (Tree testTree : testTrees) {
List testSentence = testTree.getYield();
int sentenceLength = testSentence.size();
if (sentenceLength > maxSentenceLength){
System.out.println("()\n");
continue;
}
// m_parser.waitUntilFreeThread();
m_parser.parseThisSentence(testSentence);
while (m_parser.hasNext()){
List> parsedTrees = m_parser.getNext();
Tree tTree = testTrees.get(treeNumber++);
Tree bestTree = null;
if (kBestParsing){
double bestFscore = -1;
for (Tree pTree : parsedTrees){
pTree = TreeAnnotations.unAnnotateTree(pTree, false);
double f1 = tmpEval.evaluate(pTree, tTree, false);
if (f1>bestFscore) {
bestTree = pTree;
bestFscore = f1;
}
}
}
else {
bestTree = parsedTrees.get(0);
bestTree = TreeAnnotations.unAnnotateTree(bestTree, false);
}
if (!bestTree.getChildren().isEmpty()) {
System.out.println(bestTree.getChildren().get(0));
} else System.out.println("()\n");
eval.evaluate(bestTree, tTree);
}
}
while (!m_parser.isDone()){
while (m_parser.hasNext()){
List> parsedTrees = m_parser.getNext();
Tree tTree = testTrees.get(treeNumber++);
Tree bestTree = null;
if (kBestParsing){
double bestFscore = -1;
for (Tree pTree : parsedTrees){
pTree = TreeAnnotations.unAnnotateTree(pTree, false);
if (opts.printAllKBest)
System.out.println("\t" + pTree);
double f1 = tmpEval.evaluate(pTree, tTree, false);
if (f1>bestFscore) {
bestTree = pTree;
bestFscore = f1;
}
}
}
else {
bestTree = parsedTrees.get(0);
bestTree = TreeAnnotations.unAnnotateTree(bestTree, false);
}
if (!bestTree.getChildren().isEmpty()) {
System.out.println(bestTree.getChildren().get(0));
} else System.out.println("()\n");
if (opts.printGoldTree) System.out.println(tTree.getChildren().get(0));
eval.evaluate(bestTree, tTree);
}
}
System.out.println("Parsed "+treeNumber+" sentences.");
eval.display(true);
System.out.println("The computed F1,LP,LR scores are just a rough guide. They are typically 0.1-0.2 lower than the official EVALB scores.");
System.exit(0);
}
if (!opts.testAll){
int i = 0;
int totalGoldPruned = 0;
int totalPruned = 0;
for (Tree testTree : testTrees) {
List testSentence = testTree.getYield();
int sentenceLength = testSentence.size();
if( sentenceLength > maxSentenceLength) {
System.out.println("()\n");
continue;
}
// System.out.println("Gold: "+testTree);
// if (true) continue;
List posTags = null;
if (useGoldPOS) posTags = testTree.getPreTerminalYield();
// if (true){
// for (int ii=0; ii parsedTree = null;
if (kBestParsing){
List> list = parser.getKBestConstrainedParses(testSentence, posTags, opts.k);
double bestFscore = 0;
for (Tree tree : list){
Tree tmp = TreeAnnotations.unAnnotateTree(tree, false);
if (opts.printAllKBest)
System.out.println("\t"+tmp);
double f1 = tmpEval.evaluate(tmp, testTree, false);
if (f1>bestFscore) {
parsedTree = tmp;
bestFscore = f1;
}
}
if (parsedTree==null) parsedTree = new Tree("ROOT");
}
else {
parsedTree = parser.getBestConstrainedParse(testSentence,posTags,allowedStates);
if (opts.verbose) System.out.println("Annotated result:\n"+Trees.PennTreeRenderer.render(parsedTree));
parsedTree = TreeAnnotations.unAnnotateTree(parsedTree, false);
if (useGoldPOS && parsedTree.getChildren().isEmpty()){ // parse error when using goldPOS, try without
parsedTree = parser.getBestConstrainedParse(testSentence,null,allowedStates);
parsedTree = TreeAnnotations.unAnnotateTree(parsedTree, false);
}
}
// if (outFile!=null) output.write(parsedTree+"\n");
if (!parsedTree.getChildren().isEmpty()) {
System.out.println(parsedTree.getChildren().get(0));
} else System.out.println("()\nLength: "+sentenceLength);//System.out.println(testTree);//
int numGoldPruned = 0;
int numPruned = 0;
if (opts.evaluateConstraints && cons != null)
{
numGoldPruned = countPrunedNodes(testTree, allowedStates, Numberer.getGlobalNumberer("tags"), false, 0, testTree.getYield().size());
numPruned = countPrunedNodes(allowedStates, Numberer.getGlobalNumberer("tags"), false, 0, testTree.getYield().size());
System.out.println("Pruned " + numGoldPruned + " constituents.");
totalGoldPruned += numGoldPruned;
totalPruned += numPruned;
}
if (opts.printGoldTree) System.out.println("Gold: " + testTree.getChildren().get(0));
eval.evaluate(parsedTree, testTree);
if (++i > opts.maxSentences) break;
}
if (opts.evaluateConstraints)
System.out.println("Pruned total of " + totalGoldPruned + " gold constituents out of a total of " + totalPruned +" constituents pruned.");
eval.display(true);
System.out.println("The computed F1,LP,LR scores are just a rough guide. They are typically 0.1-0.2 lower than the official EVALB scores.");
} else {
int k=0;
for (Tree testTree : testTrees) {
List testSentence = testTree.getYield();
int sentenceLength = testSentence.size();
if( sentenceLength > maxSentenceLength) {
System.out.println("()\n");
continue;
}
boolean[][][][] allowedStates = null;
if (cons!=null) {
if (cons[k]==null) {
k++;
continue;
}
if (!opts.doNOTprojectConstraints) parser.projectConstraints(cons[k], opts.allSubstatesAllowed);
}
k++;
}
File[] fileList = null;
final String fileName = opts.inFileName;
if (opts.testAll){
FilenameFilter filter = new FilenameFilter(){
public boolean accept(File arg0, String arg1) {
return arg1.startsWith(fileName);
} };
fileList = new File(opts.filePath).listFiles(filter);
Comparator DATE_COMPARE = new Comparator()
{
private Date d1 = new Date();
private Date d2 = new Date();
public int compare(Object file1, Object file2)
{
d1.setTime(((File) file1).lastModified());
d2.setTime(((File) file2).lastModified());
return d1.compareTo(d2);
}
};
Arrays.sort(fileList,DATE_COMPARE);
} else {
fileList = new File[1];
}
int nProcess = opts.nProcess;
double bestF1 = -1;
String bestGrammar = null;
ExecutorService pool = Executors.newFixedThreadPool(nProcess);
Future[] submits = new Future[nProcess];
for (int f=0; f,String> res = (Pair,String>) submits[i].get();
System.out.print(res.getSecond()+"\t");
double thisF1 = res.getFirst().display(true);
if (opts.printAllF1)
System.out.println(res.getSecond() + " had F1 " + thisF1);
if (thisF1 > bestF1){
bestF1 = thisF1;
bestGrammar = res.getSecond();
}
}
} catch (ExecutionException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
System.out.println("The best F1 was: "+bestF1);
System.out.println("The best grammar was: "+bestGrammar);
File finalGrammar = new File(bestGrammar);
finalGrammar.renameTo(new File(opts.filePath+"/"+opts.inFileName));
pool.shutdown();
}
if (!opts.testAll) System.exit(0);
}
GrammarTester(String fName, List> tT, int maxL, boolean[][][][][] c){
testTrees = tT;
cons = c;
fileName = fName;
maxSentenceLength = maxL;
}
/**
* @param allowedStates
* @param globalNumberer
* @param b
* @param i
* @param size
* @return
*/
private static int countPrunedNodes(boolean[][][][] allowedStates,
Numberer globalNumberer, boolean b, int start, int end) {
int total = 0;
for (int i = start; i < end; ++i)
{
for (int j = i+1; j <= end; ++j)
{
for (int state = 0; state < allowedStates[i][j].length; ++state)
{
if (!hasTrue(allowedStates[i][j][state]))
total++;
}
}
}
return total;
}
public static List[][][] loadData(String fileName) {
List[][][] data = null;
try {
FileInputStream fis = new FileInputStream(fileName); // Load from file
GZIPInputStream gzis = new GZIPInputStream(fis); // Compressed
ObjectInputStream in = new ObjectInputStream(gzis); // Load objects
data = (List[][][])in.readObject(); // Read the mix of grammars
in.close(); // And close the stream.
} catch (IOException e) {
System.out.println("IOException\n"+e);
return null;
} catch (ClassNotFoundException e) {
System.out.println("Class not found!");
return null;
}
return data;
}
private static String[] addOptions(String[] a, String[] b) {
String[] res = new String[a.length+b.length];
for (int i=0; i state;
String asString = (String)tagNumberer.object(state);
String unannotatedLabel = asString;
if (!isPreTerminal)
unannotatedLabel = TreeAnnotations.unAnnotateTree(new Tree(asString, Collections.singletonList(new Tree("FakeLabel"))), false).getLabel();
if (unannotatedLabel.equals(label))
{
if (hasTrue(allowed))
return true;
}
}
return false;
}
private static int countPrunedNodes (Tree tree, boolean[][][][] cons, Numberer tagNumberer,boolean splitRoot, int from, int to){
int total = 0;
if (!isAllowed(tree.getLabel(),tagNumberer,cons[from][to],tree.isPreTerminal()))
{
total += 1;
}
if (tree.isPreTerminal()) {
return total;
}
// if (label<0) label =0;
//// System.out.println(label + " " +tree.getLabel());
// if (label>=numStates.length){
//// System.err.println("Have never seen this state before: "+tree.getLabel());
//// StateSet newState = new StateSet(zero, one, tree.getLabel().intern(),(short)from,(short)to);
//// return new Tree(newState);
// }
// short nodeNumStates = allSplitTheSame ? numStates[0] : numStates[label];
// if (!splitRoot) nodeNumStates = 1;
// StateSet newState = new StateSet(label, nodeNumStates, null, (short)from , (short)to);
// Tree newTree = new Tree(newState);
// List> newChildren = new ArrayList>();
for (Tree child : tree.getChildren()) {
short length = (short) child.getYield().size();
total += countPrunedNodes(child, cons, tagNumberer, true, from, from+length);
from += length;
}
return total;
}
public static boolean hasTrue(boolean[] a)
{
boolean hasTrue = false;
if (a == null) return hasTrue;
for (boolean b : a)
hasTrue |= b;
return hasTrue;
}
public Pair, String> call() throws Exception {
EnglishPennTreebankParseEvaluator.LabeledConstituentEval eval = new EnglishPennTreebankParseEvaluator.LabeledConstituentEval(Collections.singleton("ROOT"), new HashSet(Arrays.asList(new String[] {"''", "``", ".", ":", ","})));
ParserData pData = ParserData.Load(fileName);
if (pData==null) {
System.out.println("Failed to load grammar from file"+fileName+".");
System.exit(1);
}
Grammar grammar = pData.getGrammar();
grammar.splitRules();
Lexicon lexicon = pData.getLexicon();
grammar.clearUnaryIntermediates();
lexicon.explicitlyComputeScores(grammar.finalLevel);
if (GrammarTester.Options.smooth){
System.out.println("Smoothing only the lexicon.");
Smoother lexSmoother = new SmoothAcrossParentSubstate(0.01);
lexicon.setSmoother(lexSmoother);
}
SpanPredictor spanPredictor = pData.getSpanPredictor();
ConstrainedArrayParser parser = null;// new
// ConstrainedHierarchicalTwoChartParser
// (grammar, lexicon, spanPredictor,
// grammar.finalLevel);
if (grammar instanceof HierarchicalAdaptiveGrammar) {
lexicon.explicitlyComputeScores(grammar.finalLevel);
parser = new ConstrainedHierarchicalTwoChartParser(grammar, lexicon,
spanPredictor, grammar.finalLevel);
} else
parser = new ConstrainedTwoChartsParser(grammar, lexicon, spanPredictor);
int i=0;
for (Tree testTree : testTrees) {
List testSentence = testTree.getYield();
int sentenceLength = testSentence.size();
if(sentenceLength > maxSentenceLength) continue;
Tree parsedTree = null;
boolean[][][][] con = (cons==null) ? null : cons[i];
parsedTree = parser.getBestConstrainedParse(testSentence,null,con);
parsedTree = TreeAnnotations.unAnnotateTree(parsedTree, false);
eval.evaluate(parsedTree, testTree, false);
i++;
}
return new Pair,String>(eval,fileName);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy