edu.berkeley.nlp.discPCFG.CascadingLinearizer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.discPCFG;
///**
// * 
// */
//package edu.berkeley.nlp.classify;
//
//import java.io.Serializable;
//
//import edu.berkeley.nlp.PCFGLA.BinaryRule;
//import edu.berkeley.nlp.PCFGLA.Grammar;
//import edu.berkeley.nlp.PCFGLA.Rule;
//import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
//import edu.berkeley.nlp.PCFGLA.UnaryRule;
//import edu.berkeley.nlp.syntax.StateSet;
//import edu.berkeley.nlp.math.ArrayMath;
//import edu.berkeley.nlp.math.DoubleArrays;
//import edu.berkeley.nlp.math.SloppyMath;
//import edu.berkeley.nlp.util.ArrayUtil;
//
///**
// * @author petrov
// *
// */
//public class CascadingLinearizer implements Linearizer, Serializable{
//	private static final long serialVersionUID = 1L;
//	Grammar grammar, oldGrammar;
//	SimpleLexicon lexicon, oldLexicon;
//	DefaultLinearizer linearizer, oldLinearizer;
//	int dimension, lexiconOffset;
//	int nSubstates;
//	
//	/**
//	 * @param grammar
//	 * @param oldGrammar
//	 * @param lexicon
//	 * @param simpleLexicon
//	 */
//	public CascadingLinearizer(Grammar grammar, Grammar oldGrammar, SimpleLexicon lexicon, SimpleLexicon oldLexicon) {
//		this.grammar = grammar;
//		this.oldGrammar = oldGrammar;
//		this.lexicon = lexicon;
//		this.oldLexicon = oldLexicon;
//		this.linearizer = new DefaultLinearizer(grammar, lexicon);
//		this.oldLinearizer = new DefaultLinearizer(oldGrammar, oldLexicon);
//		this.dimension = -1;
//		this.lexiconOffset = -1;
//		this.nSubstates = DoubleArrays.max(grammar.numSubStates);
//	}
//	
//	public int dimension() {
//		if (dimension==-1){
//			lexiconOffset = getLinearizedGrammar().length;
//			dimension = lexiconOffset + getLinearizedLexicon().length;
//		}
//		return dimension;
//	}
//  
//	public int getLexiconOffset(){
//		if (lexiconOffset==-1)
//			lexiconOffset = getLinearizedGrammar().length;
//		return lexiconOffset;
//	}
//
//
//	public Grammar delinearizeGrammar(double[] probs) {
//		int nDangerous = 0;
//		for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
//			int ind = linearizer.startIndex[linearizer.ruleIndexer.indexOf(bRule)];
//			double[][][] scores = bRule.getScores2();
//			double[][][] oldScores = oldGrammar.getBinaryScore(bRule);
//			for (int j=0; j1000){//Double.POSITIVE_INFINITY) {
////			  					System.out.println("POS INF");
////			  					val = 1000;//Double.MAX_VALUE; // prevent overflow
////			  				}
//		  				else if (SloppyMath.isVeryDangerous(val)) {
////		  					System.out.println("dangerous value when delinearizng grammar, binary "+ val);
////			  					val=Double.MAX_VALUE; // shouldn't happen but just in case
////		  					val = 0;
//		  					nDangerous++;
//	  					continue;
//		  				}
//							scores[j][k][l] = val;
//						}
//					}
//				}
//			}
//		}
//		if (nDangerous>0) System.out.println("Left "+nDangerous+" binary rule weights unchanged since the proposed weight was dangerous.");
//
////			UnaryRule[] unaries = this.getClosedSumUnaryRulesByParent(state);
////			for (int r = 0; r < unaries.length; r++) {
////				UnaryRule uRule = unaries[r];
//		nDangerous = 0;
//		for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
//			int ind = linearizer.startIndex[linearizer.ruleIndexer.indexOf(uRule)];
//			if (uRule.childState==uRule.parentState) continue;
//			double[][] scores = uRule.getScores2();
//			double[][] oldScores = oldGrammar.getUnaryScore(uRule);
//			for (int j=0; j1000){//==Double.POSITIVE_INFINITY){
////		  					System.out.println("POS INF");
////		  					val = 1000;//Double.MAX_VALUE; // prevent overflow
////		  				}
//	  				else if (SloppyMath.isVeryDangerous(val)) {
////	  					System.out.println("dangerous value when delinearizng grammar, unary "+val);
////		  					val=Double.MAX_VALUE; // shouldn't happen but just in case
////	  					val = 0;
//	  					nDangerous++;
//	  					continue;
//	  				}
//						scores[j][k] = val;
//					}
//				}
//			}
//		}
//		if (nDangerous>0) System.out.println("Left "+nDangerous+" unary rule weights unchanged since the proposed weight was dangerous.");
//
//		
//		grammar.closedSumRulesWithParent = grammar.closedViterbiRulesWithParent = grammar.unaryRulesWithParent;
//		grammar.closedSumRulesWithChild = grammar.closedViterbiRulesWithChild = grammar.unaryRulesWithC;
////		computePairsOfUnaries();
//		grammar.makeCRArrays();
//		return grammar;
//	}
//	
//
//	public SimpleLexicon delinearizeLexicon(double[] logProbs) {
//		int nDangerous = 0;
//		for (short tag=0; tag1000){//==Double.POSITIVE_INFINITY) {
////  					val = 1000;//Double.MAX_VALUE; // prevent overflow
////  				}
//  				else if (SloppyMath.isVeryDangerous(val)) {
////					System.out.println("dangerous value when delinearizng lexicon "+val);
////					System.out.println("Word "+tagWordIndexer[tag].get(0)+" tag "+tag);
////  					val=Double.MAX_VALUE; // shouldn't happen but just in case
////						val = 0;
//  					nDangerous++;
//						continue;
//  				}
//  				lexicon.scores[tag][substate][word] = val;
//  			}
//  		}
//  	}  	
////		System.out.println(lexicon);
//
//		if (nDangerous>0) System.out.println("Left "+nDangerous+" lexicon weights unchanged since the proposed weight was dangerous.");
//		return lexicon;
//
//	}
//
//	public int getLinearIndex(Rule rule) {
//		return linearizer.getLinearIndex(rule);
//	}
//
//
//	public int getLinearIndex(String word, int tag){
//		return getLinearIndex(lexicon.wordIndexer.indexOf(word), tag);
//	}
//
//	public int getLinearIndex(int globalWordIndex, int tag) {
//		return linearizer.getLinearIndex(globalWordIndex, tag);
//	}
//
//	public double[] getLinearizedGrammar() {
//		double[] logProbs = linearizer.getLinearizedGrammar();
//		
//		for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
//			int ind = linearizer.startIndex[linearizer.ruleIndexer.indexOf(bRule)];
//			double[][][] scores = bRule.getScores2();
//			double[][][] oldScores = oldGrammar.getBinaryScore(bRule);
//			for (int j=0; j