edu.berkeley.nlp.discPCFG.HierarchicalLinearizer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
 * 
 */
package edu.berkeley.nlp.discPCFG;

import java.io.Serializable;

import edu.berkeley.nlp.PCFGLA.BinaryRule;
import edu.berkeley.nlp.PCFGLA.Grammar;
import edu.berkeley.nlp.PCFGLA.HierarchicalBinaryRule;
import edu.berkeley.nlp.PCFGLA.HierarchicalGrammar;
import edu.berkeley.nlp.PCFGLA.HierarchicalLexicon;
import edu.berkeley.nlp.PCFGLA.HierarchicalUnaryRule;
import edu.berkeley.nlp.PCFGLA.Rule;
import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
import edu.berkeley.nlp.PCFGLA.SpanPredictor;
import edu.berkeley.nlp.PCFGLA.UnaryRule;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.math.DoubleArrays;
import edu.berkeley.nlp.math.SloppyMath;
import edu.berkeley.nlp.util.ArrayUtil;

/**
 * similar to cascading linearizer but doesnt compute the grammars explicitly
 * instead uses hierarchical rules and merges back unused splits
 * @author petrov
 *
 */
public class HierarchicalLinearizer extends DefaultLinearizer {

	private static final long serialVersionUID = 1L;
	
	HierarchicalGrammar grammar;
  HierarchicalLexicon lexicon;

	int finalLevel;
	int[][] lexiconMapping;
	int[][][] unaryMapping;
	int[][][][] binaryMapping;

	public HierarchicalLinearizer(){}
	
	/**
	 * @param grammar
	 * @param lexicon
	 */
	public HierarchicalLinearizer(Grammar grammar, SimpleLexicon lexicon, SpanPredictor sp,  int fLevel) {
		this.grammar = (HierarchicalGrammar)grammar;
		this.lexicon = (HierarchicalLexicon)lexicon;
		this.spanPredictor = sp;
		this.finalLevel = fLevel;
		this.nSubstates = (int)ArrayUtil.max(grammar.numSubStates);
		init();
		computeMappings();
	}
	
	protected void computeMappings(){
		lexiconMapping = new int[finalLevel+1][nSubstates];
		unaryMapping = new int[finalLevel+1][nSubstates][nSubstates];
		binaryMapping = new int[finalLevel+1][nSubstates][nSubstates][nSubstates];
		
		int[] divisors = new int[finalLevel+1];
		for (int i=0; i<=finalLevel; i++){
			divisors[i] = (int)Math.pow(2, finalLevel-i);
		}
		
		for (int level=1; level<=finalLevel; level++){
			int div = divisors[level];
			int l = (int)Math.pow(2,level);
			int[][] tmpU = new int[l][l];
			int[][][] tmpB = new int[l][l][l];
			int indU=0, indB=0;
			for (int i=0; i0) System.out.println("Left "+nDangerous+" binary rule weights unchanged since the proposed weight was dangerous.");

		nDangerous = 0;
		for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
			HierarchicalUnaryRule hRule = (HierarchicalUnaryRule)uRule; 
			int ind = hRule.identifier;//startIndex[ruleIndexer.indexOf(hRule)];
			if (uRule.childState==uRule.parentState) continue;
			double[][] scores = hRule.getLastLevel();
			for (int j=0; j0) System.out.println("Left "+nDangerous+" unary rule weights unchanged since the proposed weight was dangerous.");

		grammar.explicitlyComputeScores(finalLevel);
		grammar.closedSumRulesWithParent = grammar.closedViterbiRulesWithParent = grammar.unaryRulesWithParent;
		grammar.closedSumRulesWithChild = grammar.closedViterbiRulesWithChild = grammar.unaryRulesWithC;
//		computePairsOfUnaries();
		grammar.clearUnaryIntermediates();
		grammar.makeCRArrays();
//		return grammar;
	}

	public void delinearizeLexicon(double[] logProbs) {
		int nDangerous = 0;
		for (short tag=0; tag0) System.out.println("Left "+nDangerous+" lexicon weights unchanged since the proposed weight was dangerous.");
		lexicon.explicitlyComputeScores(finalLevel);
//		System.out.println(lexicon);
//		return lexicon;
	}

	public double[] getLinearizedGrammar(boolean update) {
		if (update){
//			int nRules = grammar.binaryRuleMap.size() + grammar.unaryRuleMap.size();
//			startIndex = new int[nRules];
			
			nGrammarWeights = 0;
			for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
				HierarchicalBinaryRule hRule = (HierarchicalBinaryRule)bRule; 
//				ruleIndexer.add(hRule);
				if (!grammar.isGrammarTag[bRule.parentState]){ System.out.println("Incorrect grammar tag"); }
				bRule.identifier = nGrammarWeights; 
				double[][][] scores = hRule.getLastLevel();
				for (int j=0; j=0){
				int finalLevel = lexicon.getFinalLevel(globalSigIndex, tag);
				for (int i=0; i=0) {
			int finalLevel = lexicon.getFinalLevel(globalWordIndex, tag);
			for (int i=0; i0){
					if (isGold) counts[thisStartIndex + lexiconMapping[finalLevel][cp]] += val;
					else counts[thisStartIndex + lexiconMapping[finalLevel][cp]] -= val;
					weights[curInd]=0;
				}
				curInd++;
			}
			return;
		}
		
		for (int cp = 0; cp < nSubstates; cp++) {
//			if (scores[cp]==null) continue; 
			for (int np = 0; np < nSubstates; np++) {
				double val = weights[curInd];
				if (val>0){
					if (isGold) counts[thisStartIndex + unaryMapping[finalLevel][cp][np]] += val;
					else counts[thisStartIndex + unaryMapping[finalLevel][cp][np]] -= val;
					weights[curInd]=0;
				}
				curInd++;
			}
		}
	}


	public void increment(double[] counts, BinaryRule rule, double[] weights, boolean isGold) {
		HierarchicalBinaryRule hr = (HierarchicalBinaryRule)rule;
		int thisStartIndex = hr.identifier;
		int finalLevel = hr.lastLevel;
		int curInd = 0;
		for (int lp = 0; lp < nSubstates; lp++) {
			for (int rp = 0; rp < nSubstates; rp++) {
//				if (scores[cp]==null) continue; 
				for (int np = 0; np < nSubstates; np++) {
					double val = weights[curInd];
					if (val>0){
						if (isGold) counts[thisStartIndex + binaryMapping[finalLevel][lp][rp][np]] += val;
						else counts[thisStartIndex + binaryMapping[finalLevel][lp][rp][np]] -= val;
						weights[curInd]=0;
					}
					curInd++;
				}
			}
		}
	}
	
	
	public Grammar getGrammar() {
		return grammar;
	}

	public SimpleLexicon getLexicon() {
		return lexicon;
	}

	public SpanPredictor getSpanPredictor() {
		return spanPredictor;
	}

	
}