edu.berkeley.nlp.discPCFG.HiearchicalAdaptiveLinearizer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
 * 
 */
package edu.berkeley.nlp.discPCFG;

import java.util.List;

import edu.berkeley.nlp.PCFGLA.BinaryRule;
import edu.berkeley.nlp.PCFGLA.ConditionalTrainer;
import edu.berkeley.nlp.PCFGLA.Grammar;
import edu.berkeley.nlp.PCFGLA.HierarchicalAdaptiveBinaryRule;
import edu.berkeley.nlp.PCFGLA.HierarchicalAdaptiveGrammar;
import edu.berkeley.nlp.PCFGLA.HierarchicalAdaptiveLexicalRule;
import edu.berkeley.nlp.PCFGLA.HierarchicalAdaptiveUnaryRule;
import edu.berkeley.nlp.PCFGLA.HierarchicalBinaryRule;
import edu.berkeley.nlp.PCFGLA.HierarchicalFullyConnectedAdaptiveLexicon;
import edu.berkeley.nlp.PCFGLA.HierarchicalGrammar;
import edu.berkeley.nlp.PCFGLA.HierarchicalUnaryRule;
import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
import edu.berkeley.nlp.PCFGLA.SpanPredictor;
import edu.berkeley.nlp.PCFGLA.UnaryRule;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.StateSetWithFeatures;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.math.DoubleArrays;
import edu.berkeley.nlp.math.SloppyMath;

/**
 * @author petrov
 *
 */
public class HiearchicalAdaptiveLinearizer extends HierarchicalLinearizer {
	private static final long serialVersionUID = 1L;
	
	HierarchicalAdaptiveGrammar grammar;
	HierarchicalFullyConnectedAdaptiveLexicon lexicon;

	public HiearchicalAdaptiveLinearizer(Grammar grammar, SimpleLexicon lexicon, SpanPredictor sp, int fLevel) {
		this.grammar = (HierarchicalAdaptiveGrammar)grammar;
		lexicon.explicitlyComputeScores(fLevel);
		grammar.closedSumRulesWithParent = grammar.closedViterbiRulesWithParent = grammar.unaryRulesWithParent;
		grammar.closedSumRulesWithChild = grammar.closedViterbiRulesWithChild = grammar.unaryRulesWithC;
		grammar.clearUnaryIntermediates();
		grammar.makeCRArrays();

		this.lexicon = (HierarchicalFullyConnectedAdaptiveLexicon)lexicon;
		this.spanPredictor = sp;
		this.finalLevel = fLevel;
		this.nSubstates = (int)ArrayUtil.max(grammar.numSubStates);
		init();
		computeMappings();

	}
	
	public SimpleLexicon getLexicon() {
		return lexicon;
	}
	

	public Grammar getGrammar() {
		return grammar;
	}


	
	public double[] getLinearizedLexicon(boolean update) {
  	if(update){
  		nLexiconWeights = 0;
	  	for (short tag=0; tag vals = lexicon.rules[tag][word].getFinalLevel();
  			for (Double val : vals){
  				logProbs[index++] = val;
  			}
  		}
  	}
		if (index!=logProbs.length)
			System.out.println("unequal length in lexicon");

  	return logProbs;
	}

	public void delinearizeLexicon(double[] logProbs, boolean usingOnlyLastLevel) {
		for (short tag=0; tag=0){
					HierarchicalAdaptiveLexicalRule rule = lexicon.rules[tag][tagSpecificWordIndex];
					int startIndexWord = rule.identifier;
					short[] mapping = rule.mapping;
					for (int i=0; i0){
					weights[curInd]=0;
					if (isGold) counts[thisStartIndex + curInd] += val;
					else counts[thisStartIndex + curInd] -= val;
				}
	//			System.out.println(counts[thisStartIndex + curInd]);
			}
		} else {
			int curInd=0;
			for (int lp = 0; lp < nSubstates; lp++) {
				for (int rp = 0; rp < nSubstates; rp++) {
	//				if (scores[cp]==null) continue; 
					for (int np = 0; np < nSubstates; np++) {
						double val = weights[curInd];
						short mapping[][][] = hr.mapping;
						if (val>0){
							counts[thisStartIndex + mapping[lp][rp][np]] += val;
							weights[curInd]=0;
						}
						curInd++;
					}
				}
			}
		}
	}
	
	public void increment(double[] counts, UnaryRule rule, double[] weights, boolean isGold) {
		HierarchicalAdaptiveUnaryRule hr = (HierarchicalAdaptiveUnaryRule)rule;
		int thisStartIndex = hr.identifier;
		if (true){
//			if (hr.parentState==0)
//				System.out.println("letss ee");
			for (int curInd=0; curInd0){
					weights[curInd]=0;
					if (isGold) counts[thisStartIndex + curInd] += val;
					else counts[thisStartIndex + curInd] -= val;
				}
	//			System.out.println(counts[thisStartIndex + curInd]);
			}
		} else {
			int curInd = 0;
			if (rule.parentState==-1){
				for (int cp = 0; cp < nSubstates; cp++) {
					double val = weights[curInd];
					short[][] mapping = hr.mapping;
					if (val>0){
						if (isGold) counts[thisStartIndex + mapping[cp][0]] += val;
						else counts[thisStartIndex + mapping[cp][0]] -= val;
						weights[curInd]=0;
					}
					curInd++;
				}
				return;
			}
			
			for (int cp = 0; cp < nSubstates; cp++) {
	//			if (scores[cp]==null) continue; 
				for (int np = 0; np < nSubstates; np++) {
					double val = weights[curInd];
					short[][] mapping = hr.mapping;
					if (val>0){
						if (isGold) counts[thisStartIndex + mapping[cp][np]] += val;
						else counts[thisStartIndex + mapping[cp][np]] -= val;
						weights[curInd]=0;
					}
					curInd++;
				}
			}
		}
	}


	

	public void delinearizeGrammar(double[] probs) {
		int nDangerous = 0;
		for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
			HierarchicalAdaptiveBinaryRule hRule = (HierarchicalAdaptiveBinaryRule)bRule;
			hRule.updateScores(probs);
		}
		if (nDangerous>0) System.out.println("Left "+nDangerous+" binary rule weights unchanged since the proposed weight was dangerous.");

		nDangerous = 0;
		for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
			HierarchicalAdaptiveUnaryRule hRule = (HierarchicalAdaptiveUnaryRule)uRule; 
			hRule.updateScores(probs);
		}
		if (nDangerous>0) System.out.println("Left "+nDangerous+" unary rule weights unchanged since the proposed weight was dangerous.");

		grammar.explicitlyComputeScores(finalLevel);
		grammar.closedSumRulesWithParent = grammar.closedViterbiRulesWithParent = grammar.unaryRulesWithParent;
		grammar.closedSumRulesWithChild = grammar.closedViterbiRulesWithChild = grammar.unaryRulesWithC;
//		computePairsOfUnaries();
		grammar.clearUnaryIntermediates();
		grammar.makeCRArrays();
//		return grammar;
	}

	public double[] getLinearizedGrammar(boolean update) {
		if (update){
//			int nRules = grammar.binaryRuleMap.size() + grammar.unaryRuleMap.size();
//			startIndex = new int[nRules];
			
			nGrammarWeights = 0;
			for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
				HierarchicalAdaptiveBinaryRule hRule = (HierarchicalAdaptiveBinaryRule)bRule; 
				if (!grammar.isGrammarTag[bRule.parentState]){ System.out.println("Incorrect grammar tag"); }
				bRule.identifier = nGrammarWeights; 
				nGrammarWeights += hRule.nParam;
			}
			for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
				HierarchicalAdaptiveUnaryRule hRule = (HierarchicalAdaptiveUnaryRule)uRule; 
				uRule.identifier = nGrammarWeights;
				nGrammarWeights += hRule.nParam;
			}
		}
		double[] logProbs = new double[nGrammarWeights];

		for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
			HierarchicalAdaptiveBinaryRule hRule = (HierarchicalAdaptiveBinaryRule)bRule; 
			int ind = hRule.identifier;//startIndex[ruleIndexer.indexOf(hRule)];
			List vals = hRule.getFinalLevel();
			for (Double val : vals){
				logProbs[ind++] = val;
			}
		}

		for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
			HierarchicalAdaptiveUnaryRule hRule = (HierarchicalAdaptiveUnaryRule)uRule; 
			int ind = hRule.identifier;//startIndex[ruleIndexer.indexOf(hRule)];
			if (uRule.childState==uRule.parentState) continue;
			List vals = hRule.getFinalLevel();
			for (Double val : vals){
				logProbs[ind++] = val;
			}
		}
		return logProbs;
	}

	
	public void delinearizeLexiconWeights(double[] logWeights) {
		int nGrZ=0, nLexZ=0, nSpZ=0;

		int tmpI = 0;
	    for (int i=0; i