All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.discPCFG.ConstrainedParsingObjectiveFunction Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.discPCFG;
///**
// * 
// */
//package edu.berkeley.nlp.classify;
//
//import java.io.FileInputStream;
//import java.io.IOException;
//import java.io.ObjectInputStream;
//import java.util.ArrayList;
//import java.util.Arrays;
//import java.util.List;
//import java.util.zip.GZIPInputStream;
//
//import edu.berkeley.nlp.syntax.StateSet;
//import edu.berkeley.nlp.syntax.Tree;
//import edu.berkeley.nlp.math.DoubleArrays;
//import edu.berkeley.nlp.math.SloppyMath;
//import edu.berkeley.nlp.oldPCFGLA.ArrayParser;
//import edu.berkeley.nlp.oldPCFGLA.Binarization;
//import edu.berkeley.nlp.oldPCFGLA.ConstrainedCoarseToFineParser;
//import edu.berkeley.nlp.oldPCFGLA.ConstrainedCoarseToFineTwoChartsParser;
//import edu.berkeley.nlp.oldPCFGLA.Grammar;
//import edu.berkeley.nlp.oldPCFGLA.Lexicon;
//import edu.berkeley.nlp.oldPCFGLA.ParserData;
//import edu.berkeley.nlp.oldPCFGLA.StateSetTreeList;
//import edu.berkeley.nlp.util.Numberer;
//import fig.basic.Pair;
//
///**
// * @author petrov
// *
// */
//public class ConstrainedParsingObjectiveFunction extends ParsingObjectiveFunction implements ObjectiveFunction{
//
//	/**
//	 * @param grammar
//	 * @param lexicon
//	 * @param trainingTrees
//	 * @param sigma
//	 */
//	String baseFileName;
//	ConstrainedCoarseToFineParser parser;
//	List[][][] constraintsCollection;
//
//	public ConstrainedParsingObjectiveFunction(Grammar grammar, int[][][] grammarIndex, Lexicon lexicon, 
//			int[][] lexiconIndex, StateSetTreeList trainingTrees, 
//			double sigma, String fileName, boolean regularize, boolean alwaysUpdate, int nGrW, int nLexW) {
//    this.sigma = sigma;
//    this.trainingTrees = trainingTrees;
//		this.dim = nGrW+nLexW;
//		this.alwaysUpdate = alwaysUpdate;
//		if (alwaysUpdate) System.out.println("Will update gold counts in every round.");
//
//		this.grammar = grammar;
//		this.lexicon = lexicon;
//  	this.startIndexGrammar = grammarIndex;
//  	this.startIndexLexicon = lexiconIndex;
//
//  	this.parser = new ConstrainedCoarseToFineTwoChartsParser(grammar, lexicon, 1.0,-1,false,false,false);
//  	this.baseFileName = fileName;
//  	
//  	constraintsCollection = loadData(fileName);
//  	this.REGULARIZE = regularize;
//  	
//  	this.expectedGoldCounts = DoubleArrays.constantArray(0.0, dimension());
//  	goldParser = new ArrayParser(grammar.copyGrammar(), lexicon.copyLexicon());
//  	this.updateGoldCounts = true;
//  	
//  	this.nGrammarWeights = nGrW;
//  	this.nLexiconWeights = nLexW;
//  	
////  	boolean hardCounts = false, noSmoothing = true, debugOutput = false;
////  	int treeNumber = 0;
////  	for (Tree stateSetTree : trainingTrees) {
////  		//parser.doGoldInsideOutsideScores(stateSetTree, sentence);
////  		goldParser.doInsideOutsideScores(stateSetTree, noSmoothing, debugOutput);
////  		StateSet node = stateSetTree.getLabel();
////    	double tree_score = node.getIScore(0);
////  		int tree_scale = node.getIScale();
////  		goldLLs[treeNumber++] = Math.log(tree_score) + (100*tree_scale);
////    	parser.incrementExpectedGoldCounts(expectedGoldCounts, stateSetTree, grammar, startIndex, hardCounts, tree_score, tree_scale);
////  	}  	
////  	
////  	for (int i=0; i calculate() {
//  	//goldParser = new ArrayParser(grammar, lexicon);
//  	parser.updateFinalGrammarAndLexicon(grammar,lexicon);
//    System.out.print("In Constrained-Calculate");
//    double objective = -1.0;
//  	double[] expectedCounts = DoubleArrays.constantArray(0.0, dimension());
//  	deltas = DoubleArrays.constantArray(0.0, dimension());
//  	//HERE
//  	if (updateGoldCounts||alwaysUpdate) {
//  		this.expectedGoldCounts = DoubleArrays.constantArray(0.0, dimension());
//  		goldParser = new ArrayParser(grammar, lexicon);
//  		System.out.println("Will update gold counts in this round.");
//  	}
//
//  	int nInvalidTrees = 0, maxInvalidTrees = 50000, nValidTrees = 0;
//  	boolean tooManyInvalidTrees = false;
//  	int i = 0;
//  	
//  	// load first constraints file
//  	int treeNumber = 0;
//  	int cIndex = 0;
//    //String fileName = baseFileName+"-"+cIndex+".data";
//    cIndex++;
//    
//  	for (Tree stateSetTree : trainingTrees) {
//
//  		if(nInvalidTrees>maxInvalidTrees) {
//  			tooManyInvalidTrees = true;
//  			break;
//  		}
//  		
//  		List yield = stateSetTree.getYield();
//    	List sentence = new ArrayList(yield.size());
//    	for (StateSet el : yield){ sentence.add(el.getWord()); }
//    	
//    	//goldParser.doInsideOutsideScores(stateSetTree,true,false);   
//    	//grammar.tallyStateSetTree(stateSetTree, grammar);
//    	parser.setConstraints(constraintsCollection[treeNumber]);
//    	treeNumber++;
//    	Pair chart = parser.doInsideOutsideScores(sentence,stateSetTree);
//    	double allLL = parser.getLogInsideScore();
//
//    	
//    	if (Double.isInfinite(allLL)) {
//    		System.out.println("Couldn't compute a parse. allLL:"+allLL+"\n"+sentence);
//    		//allLL = -1000;
//    		nInvalidTrees++;
//    		continue;
//    	}
//    	
//    	parser.incrementExpectedCounts(expectedCounts, grammar, startIndexGrammar, lexicon, startIndexLexicon, sentence, false);
//
//    	
//  		//parser.doGoldInsideOutsideScores(stateSetTree, sentence);
//  		
//    	
//    	//goldParser = new ArrayParser(grammar, lexicon);
//    	boolean hardCounts = false, noSmoothing = true, debugOutput = false;
//    	goldParser.doInsideOutsideScores(stateSetTree,noSmoothing,debugOutput);
//    		
//    	    	
//    	StateSet node = stateSetTree.getLabel();
//    	double tree_score = node.getIScore(0);
//  		int tree_scale = node.getIScale();
//  		//HERE
//  		if (updateGoldCounts||alwaysUpdate) parser.incrementExpectedGoldCounts(expectedGoldCounts, stateSetTree, grammar, startIndexGrammar, lexicon, startIndexLexicon, hardCounts, tree_score, tree_scale);
//    	
//  		
//  		//System.out.println("\nSum Gold: "+DoubleArrays.sum(expectedGoldCounts)+" Sum Emp: "+DoubleArrays.sum(expectedCounts));
//      
//  		
//  		
//    	//double goldLL1 = parser.getLogInsideScore(); //Math.log(stateSetTree.getLabel().getIScore(0)) + (100*stateSetTree.getLabel().getIScale());//System.out.println(stateSetTree);
//    	double goldLL = Math.log(tree_score) + (100*tree_scale);//goldLLs[treeNumber-2];//
//    	
//    	//if (goldLL!=goldLL1)	System.out.println("Different LL "+goldLL+", "+goldLL1+" for tree "+stateSetTree);
//    		
//    		
//    	if (Double.isInfinite(goldLL)) {
//    		System.out.println("Couldn't score the gold parse. goldLL:"+goldLL+"\n"+sentence);
//    		//goldLL = -10000;
//    		nInvalidTrees++;
//    		continue;
//    	}
// 
//    		
//    	
//    	if (goldLL > allLL){
//    		System.out.println("Something is wrong! The gold LL is " + goldLL + " and the all LL is " + allLL+"\n"+sentence);
//    		nInvalidTrees++;
//    		//continue;
//    	}
//    	
//     	//System.out.println("0: "+stateSetTree.getChildren().get(0).getChildren().get(0).getLabel().getIScore(0)+
//    //			"1: "+stateSetTree.getChildren().get(0).getChildren().get(0).getLabel().getIScore(1));
// 	
//    	objective += (goldLL - allLL);
//    	nValidTrees++;
//    	
////    	System.out.println("gLL " + goldLL + " aLL "+allLL+" COND LL "+(goldLL-allLL));
//       	
//    	if (i++ % 100 == 0) System.out.print(".");
//    	if (i % 1000 == 0) System.out.print(i);
//    	/*if (treeNumber % 2000 == 0) {
//    		fileName = baseFileName+"-"+cIndex+".data";
//        cIndex++;
//        treeNumber=0;
//    		constraintsCollection = loadData(fileName);
//    	}*/
//    	
//    	//lexicon.trainTree(stateSetTree, -1, previousLexicon, secondHalf,false);
//    	
//    }
//    
//  	updateGoldCounts = false;
//  	//if (firstTime) firstTime = false;
//  	
//    System.out.print("done.\nThe objective was "+objective);
//
//    
//    double[] derivatives = computeDerivatives(expectedGoldCounts, expectedCounts);
//     
//    if (REGULARIZE){
//    	objective = regularize(objective, derivatives);
//    }		    
//	  System.out.print(" and is "+objective+" after regularization.");
//	  System.out.print(" Sum Gold: "+DoubleArrays.sum(expectedGoldCounts)+" Sum Emp: "+DoubleArrays.sum(expectedCounts)+"\n");
//    
//    objective *= -1.0; // flip sign since we are working with a minimizer rather than with a maximizer
//    for (int index = 0; index < lastX.length; index++) {
//      // 'x' and 'derivatives' have same layout
//      derivatives[index] *= -1.0;
//      double val = Math.log(expectedGoldCounts[index]/expectedCounts[index]);
//      val = (SloppyMath.isVeryDangerous(val)) ? 0 : val;
//      deltas[index] = val;
//    }
//    System.out.print(" Sum Derivatives: "+DoubleArrays.sum(derivatives));
//    
//////    
////    System.out.println("Exp GOLD: "+Arrays.toString(expectedGoldCounts));
////  	System.out.println("Expected: "+Arrays.toString(expectedCounts));
////  	
////    System.out.println("Weights:   "+Arrays.toString(lastX));
////  	
////    System.out.println("Derivatives: "+Arrays.toString(derivatives));
////    System.out.println(grammar);
//////  	
//    
//    if (tooManyInvalidTrees || nValidTrees < 2){
//    	return failedSearchResult();
//    }
//   
//    ParserData pData = new ParserData(lexicon, grammar, Numberer.getNumberers(), grammar.numSubStates, 1, 0, Binarization.RIGHT);
//
//    String outFileName = "tmp"+objective+".gr";
//    //System.out.println("Saving grammar to "+outFileName+".");
//    //if 
//    pData.Save(outFileName); //System.out.println("Saving successful.");
//    //else System.out.println("Saving failed!");
//
//   
//    return new Pair(objective, derivatives);
//  }
//  
//  public static List[][][] loadData(String fileName) {
//  	List[][][] data = null;
//    try {
//      FileInputStream fis = new FileInputStream(fileName); // Load from file
//      GZIPInputStream gzis = new GZIPInputStream(fis); // Compressed
//      ObjectInputStream in = new ObjectInputStream(gzis); // Load objects
//      data = (List[][][])in.readObject(); // Read the mix of grammars
//      in.close(); // And close the stream.
//    } catch (IOException e) {
//      System.out.println("IOException\n"+e);
//      return null;
//    } catch (ClassNotFoundException e) {
//      System.out.println("Class not found!");
//      return null;
//    }
//    return data;
//  }
//	
//}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy