All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.discPCFG.CascadingLinearizer Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.discPCFG;
///**
// * 
// */
//package edu.berkeley.nlp.classify;
//
//import java.io.Serializable;
//
//import edu.berkeley.nlp.PCFGLA.BinaryRule;
//import edu.berkeley.nlp.PCFGLA.Grammar;
//import edu.berkeley.nlp.PCFGLA.Rule;
//import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
//import edu.berkeley.nlp.PCFGLA.UnaryRule;
//import edu.berkeley.nlp.syntax.StateSet;
//import edu.berkeley.nlp.math.ArrayMath;
//import edu.berkeley.nlp.math.DoubleArrays;
//import edu.berkeley.nlp.math.SloppyMath;
//import edu.berkeley.nlp.util.ArrayUtil;
//
///**
// * @author petrov
// *
// */
//public class CascadingLinearizer implements Linearizer, Serializable{
//	private static final long serialVersionUID = 1L;
//	Grammar grammar, oldGrammar;
//	SimpleLexicon lexicon, oldLexicon;
//	DefaultLinearizer linearizer, oldLinearizer;
//	int dimension, lexiconOffset;
//	int nSubstates;
//	
//	/**
//	 * @param grammar
//	 * @param oldGrammar
//	 * @param lexicon
//	 * @param simpleLexicon
//	 */
//	public CascadingLinearizer(Grammar grammar, Grammar oldGrammar, SimpleLexicon lexicon, SimpleLexicon oldLexicon) {
//		this.grammar = grammar;
//		this.oldGrammar = oldGrammar;
//		this.lexicon = lexicon;
//		this.oldLexicon = oldLexicon;
//		this.linearizer = new DefaultLinearizer(grammar, lexicon);
//		this.oldLinearizer = new DefaultLinearizer(oldGrammar, oldLexicon);
//		this.dimension = -1;
//		this.lexiconOffset = -1;
//		this.nSubstates = DoubleArrays.max(grammar.numSubStates);
//	}
//	
//	public int dimension() {
//		if (dimension==-1){
//			lexiconOffset = getLinearizedGrammar().length;
//			dimension = lexiconOffset + getLinearizedLexicon().length;
//		}
//		return dimension;
//	}
//  
//	public int getLexiconOffset(){
//		if (lexiconOffset==-1)
//			lexiconOffset = getLinearizedGrammar().length;
//		return lexiconOffset;
//	}
//
//
//	public Grammar delinearizeGrammar(double[] probs) {
//		int nDangerous = 0;
//		for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
//			int ind = linearizer.startIndex[linearizer.ruleIndexer.indexOf(bRule)];
//			double[][][] scores = bRule.getScores2();
//			double[][][] oldScores = oldGrammar.getBinaryScore(bRule);
//			for (int j=0; j1000){//Double.POSITIVE_INFINITY) {
////			  					System.out.println("POS INF");
////			  					val = 1000;//Double.MAX_VALUE; // prevent overflow
////			  				}
//		  				else if (SloppyMath.isVeryDangerous(val)) {
////		  					System.out.println("dangerous value when delinearizng grammar, binary "+ val);
////			  					val=Double.MAX_VALUE; // shouldn't happen but just in case
////		  					val = 0;
//		  					nDangerous++;
//	  					continue;
//		  				}
//							scores[j][k][l] = val;
//						}
//					}
//				}
//			}
//		}
//		if (nDangerous>0) System.out.println("Left "+nDangerous+" binary rule weights unchanged since the proposed weight was dangerous.");
//
////			UnaryRule[] unaries = this.getClosedSumUnaryRulesByParent(state);
////			for (int r = 0; r < unaries.length; r++) {
////				UnaryRule uRule = unaries[r];
//		nDangerous = 0;
//		for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
//			int ind = linearizer.startIndex[linearizer.ruleIndexer.indexOf(uRule)];
//			if (uRule.childState==uRule.parentState) continue;
//			double[][] scores = uRule.getScores2();
//			double[][] oldScores = oldGrammar.getUnaryScore(uRule);
//			for (int j=0; j1000){//==Double.POSITIVE_INFINITY){
////		  					System.out.println("POS INF");
////		  					val = 1000;//Double.MAX_VALUE; // prevent overflow
////		  				}
//	  				else if (SloppyMath.isVeryDangerous(val)) {
////	  					System.out.println("dangerous value when delinearizng grammar, unary "+val);
////		  					val=Double.MAX_VALUE; // shouldn't happen but just in case
////	  					val = 0;
//	  					nDangerous++;
//	  					continue;
//	  				}
//						scores[j][k] = val;
//					}
//				}
//			}
//		}
//		if (nDangerous>0) System.out.println("Left "+nDangerous+" unary rule weights unchanged since the proposed weight was dangerous.");
//
//		
//		grammar.closedSumRulesWithParent = grammar.closedViterbiRulesWithParent = grammar.unaryRulesWithParent;
//		grammar.closedSumRulesWithChild = grammar.closedViterbiRulesWithChild = grammar.unaryRulesWithC;
////		computePairsOfUnaries();
//		grammar.makeCRArrays();
//		return grammar;
//	}
//	
//
//	public SimpleLexicon delinearizeLexicon(double[] logProbs) {
//		int nDangerous = 0;
//		for (short tag=0; tag1000){//==Double.POSITIVE_INFINITY) {
////  					val = 1000;//Double.MAX_VALUE; // prevent overflow
////  				}
//  				else if (SloppyMath.isVeryDangerous(val)) {
////					System.out.println("dangerous value when delinearizng lexicon "+val);
////					System.out.println("Word "+tagWordIndexer[tag].get(0)+" tag "+tag);
////  					val=Double.MAX_VALUE; // shouldn't happen but just in case
////						val = 0;
//  					nDangerous++;
//						continue;
//  				}
//  				lexicon.scores[tag][substate][word] = val;
//  			}
//  		}
//  	}  	
////		System.out.println(lexicon);
//
//		if (nDangerous>0) System.out.println("Left "+nDangerous+" lexicon weights unchanged since the proposed weight was dangerous.");
//		return lexicon;
//
//	}
//
//	public int getLinearIndex(Rule rule) {
//		return linearizer.getLinearIndex(rule);
//	}
//
//
//	public int getLinearIndex(String word, int tag){
//		return getLinearIndex(lexicon.wordIndexer.indexOf(word), tag);
//	}
//
//	public int getLinearIndex(int globalWordIndex, int tag) {
//		return linearizer.getLinearIndex(globalWordIndex, tag);
//	}
//
//	public double[] getLinearizedGrammar() {
//		double[] logProbs = linearizer.getLinearizedGrammar();
//		
//		for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
//			int ind = linearizer.startIndex[linearizer.ruleIndexer.indexOf(bRule)];
//			double[][][] scores = bRule.getScores2();
//			double[][][] oldScores = oldGrammar.getBinaryScore(bRule);
//			for (int j=0; j




© 2015 - 2025 Weber Informatics LLC | Privacy Policy