All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.TreeOracle Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;

import edu.berkeley.nlp.parser.EnglishPennTreebankParseEvaluator;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees.PennTreeReader;

/**
 * @author petrov
 *
 */
public class TreeOracle {
	
	public static class Options {
		@Option(name = "-nbestFile", usage = "File with nbest lists")
		public String nbestFile;

		@Option(name = "-goldFile", usage = "File with gold trees")
		public String goldFile;
	}
	
	public static void main(String[] args) {
		OptionParser optParser = new OptionParser(Options.class);
		Options opts = (Options) optParser.parse(args, true);
		// provide feedback on command-line arguments
		System.err.println("Calling with " + optParser.getPassedInOptions());

		int totalTrees = 0;
		int goldTrees = 0;
    EnglishPennTreebankParseEvaluator.LabeledConstituentEval eval = new EnglishPennTreebankParseEvaluator.LabeledConstituentEval(new HashSet(Arrays.asList(new String[] {"ROOT","PSEUDO"})), new HashSet(Arrays.asList(new String[] {"''", "``", ".", ":", ","})));

		try{
			BufferedReader nbestData = new BufferedReader(new InputStreamReader(new FileInputStream(opts.nbestFile), "UTF-8"));
			BufferedReader goldData = new BufferedReader(new InputStreamReader(new FileInputStream(opts.goldFile), "UTF-8"));
			
			String line = "";
			List> nbestList = new LinkedList>();
    	while ((line = nbestData.readLine()) != null) {
  			Tree tree = PennTreeReader.parseEasy(line);
    		if (line.equals("\n") || tree==null || tree.getYield().get(0).equals("") ) { // done with the block
    			Tree bestTree = null;
    			double bestF1 = -1;
    			Tree goldTree = PennTreeReader.parseEasy(goldData.readLine());
//    			System.err.println(goldTree);
    			for (Tree candidateTree : nbestList){
//    				System.err.println(candidateTree);
    				if (candidateTree.getYield().size()==0) continue;
      			double f1 = eval.evaluate(candidateTree, goldTree, false);
      			totalTrees++;
    				if (f1 > bestF1) {
    					bestF1 = f1;
    					bestTree = candidateTree;
    				}
    			}
    			if (bestTree == null) {
    				System.out.println("(())");
    			} else {
    				System.out.println(bestTree);
    				goldTrees++;
    			}
    			nbestList = new LinkedList>();
    		} else {
    			nbestList.add(tree);
    		}
    	}
		}catch (Exception ex) {
			ex.printStackTrace();
		}
		
		System.err.println("Average nbest list length:"+(double)totalTrees/(double)goldTrees);
		System.exit(0);

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy