All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.discPCFG.HierarchicalLinearizer Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.discPCFG;

import java.io.Serializable;

import edu.berkeley.nlp.PCFGLA.BinaryRule;
import edu.berkeley.nlp.PCFGLA.Grammar;
import edu.berkeley.nlp.PCFGLA.HierarchicalBinaryRule;
import edu.berkeley.nlp.PCFGLA.HierarchicalGrammar;
import edu.berkeley.nlp.PCFGLA.HierarchicalLexicon;
import edu.berkeley.nlp.PCFGLA.HierarchicalUnaryRule;
import edu.berkeley.nlp.PCFGLA.Rule;
import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
import edu.berkeley.nlp.PCFGLA.SpanPredictor;
import edu.berkeley.nlp.PCFGLA.UnaryRule;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.math.DoubleArrays;
import edu.berkeley.nlp.math.SloppyMath;
import edu.berkeley.nlp.util.ArrayUtil;

/**
 * similar to cascading linearizer but doesnt compute the grammars explicitly
 * instead uses hierarchical rules and merges back unused splits
 * @author petrov
 *
 */
public class HierarchicalLinearizer extends DefaultLinearizer {

	private static final long serialVersionUID = 1L;
	
	HierarchicalGrammar grammar;
  HierarchicalLexicon lexicon;

	int finalLevel;
	int[][] lexiconMapping;
	int[][][] unaryMapping;
	int[][][][] binaryMapping;

	public HierarchicalLinearizer(){}
	
	/**
	 * @param grammar
	 * @param lexicon
	 */
	public HierarchicalLinearizer(Grammar grammar, SimpleLexicon lexicon, SpanPredictor sp,  int fLevel) {
		this.grammar = (HierarchicalGrammar)grammar;
		this.lexicon = (HierarchicalLexicon)lexicon;
		this.spanPredictor = sp;
		this.finalLevel = fLevel;
		this.nSubstates = (int)ArrayUtil.max(grammar.numSubStates);
		init();
		computeMappings();
	}
	
	protected void computeMappings(){
		lexiconMapping = new int[finalLevel+1][nSubstates];
		unaryMapping = new int[finalLevel+1][nSubstates][nSubstates];
		binaryMapping = new int[finalLevel+1][nSubstates][nSubstates][nSubstates];
		
		int[] divisors = new int[finalLevel+1];
		for (int i=0; i<=finalLevel; i++){
			divisors[i] = (int)Math.pow(2, finalLevel-i);
		}
		
		for (int level=1; level<=finalLevel; level++){
			int div = divisors[level];
			int l = (int)Math.pow(2,level);
			int[][] tmpU = new int[l][l];
			int[][][] tmpB = new int[l][l][l];
			int indU=0, indB=0;
			for (int i=0; i0) System.out.println("Left "+nDangerous+" binary rule weights unchanged since the proposed weight was dangerous.");

		nDangerous = 0;
		for (UnaryRule uRule : grammar.unaryRuleMap.keySet()){
			HierarchicalUnaryRule hRule = (HierarchicalUnaryRule)uRule; 
			int ind = hRule.identifier;//startIndex[ruleIndexer.indexOf(hRule)];
			if (uRule.childState==uRule.parentState) continue;
			double[][] scores = hRule.getLastLevel();
			for (int j=0; j0) System.out.println("Left "+nDangerous+" unary rule weights unchanged since the proposed weight was dangerous.");

		grammar.explicitlyComputeScores(finalLevel);
		grammar.closedSumRulesWithParent = grammar.closedViterbiRulesWithParent = grammar.unaryRulesWithParent;
		grammar.closedSumRulesWithChild = grammar.closedViterbiRulesWithChild = grammar.unaryRulesWithC;
//		computePairsOfUnaries();
		grammar.clearUnaryIntermediates();
		grammar.makeCRArrays();
//		return grammar;
	}

	public void delinearizeLexicon(double[] logProbs) {
		int nDangerous = 0;
		for (short tag=0; tag0) System.out.println("Left "+nDangerous+" lexicon weights unchanged since the proposed weight was dangerous.");
		lexicon.explicitlyComputeScores(finalLevel);
//		System.out.println(lexicon);
//		return lexicon;
	}

	public double[] getLinearizedGrammar(boolean update) {
		if (update){
//			int nRules = grammar.binaryRuleMap.size() + grammar.unaryRuleMap.size();
//			startIndex = new int[nRules];
			
			nGrammarWeights = 0;
			for (BinaryRule bRule : grammar.binaryRuleMap.keySet()){
				HierarchicalBinaryRule hRule = (HierarchicalBinaryRule)bRule; 
//				ruleIndexer.add(hRule);
				if (!grammar.isGrammarTag[bRule.parentState]){ System.out.println("Incorrect grammar tag"); }
				bRule.identifier = nGrammarWeights; 
				double[][][] scores = hRule.getLastLevel();
				for (int j=0; j=0){
				int finalLevel = lexicon.getFinalLevel(globalSigIndex, tag);
				for (int i=0; i=0) {
			int finalLevel = lexicon.getFinalLevel(globalWordIndex, tag);
			for (int i=0; i0){
					if (isGold) counts[thisStartIndex + lexiconMapping[finalLevel][cp]] += val;
					else counts[thisStartIndex + lexiconMapping[finalLevel][cp]] -= val;
					weights[curInd]=0;
				}
				curInd++;
			}
			return;
		}
		
		for (int cp = 0; cp < nSubstates; cp++) {
//			if (scores[cp]==null) continue; 
			for (int np = 0; np < nSubstates; np++) {
				double val = weights[curInd];
				if (val>0){
					if (isGold) counts[thisStartIndex + unaryMapping[finalLevel][cp][np]] += val;
					else counts[thisStartIndex + unaryMapping[finalLevel][cp][np]] -= val;
					weights[curInd]=0;
				}
				curInd++;
			}
		}
	}


	public void increment(double[] counts, BinaryRule rule, double[] weights, boolean isGold) {
		HierarchicalBinaryRule hr = (HierarchicalBinaryRule)rule;
		int thisStartIndex = hr.identifier;
		int finalLevel = hr.lastLevel;
		int curInd = 0;
		for (int lp = 0; lp < nSubstates; lp++) {
			for (int rp = 0; rp < nSubstates; rp++) {
//				if (scores[cp]==null) continue; 
				for (int np = 0; np < nSubstates; np++) {
					double val = weights[curInd];
					if (val>0){
						if (isGold) counts[thisStartIndex + binaryMapping[finalLevel][lp][rp][np]] += val;
						else counts[thisStartIndex + binaryMapping[finalLevel][lp][rp][np]] -= val;
						weights[curInd]=0;
					}
					curInd++;
				}
			}
		}
	}
	
	
	public Grammar getGrammar() {
		return grammar;
	}

	public SimpleLexicon getLexicon() {
		return lexicon;
	}

	public SpanPredictor getSpanPredictor() {
		return spanPredictor;
	}

	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy