edu.berkeley.nlp.PCFGLA.HierarchicalLexicon Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Numberer;

/**
 * @author petrov
 *
 */
public class HierarchicalLexicon extends SimpleLexicon {

	private static final long serialVersionUID = 1L;
	public List[][] hierarchicalScores; // for each tag, word store a list of hiearchical features
	public int[][] finalLevels;
	
	/**
	 * @param numSubStates
	 * @param threshold
	 */
	public HierarchicalLexicon(short[] numSubStates, double threshold) {
		super(numSubStates, threshold);
		hierarchicalScores = new List[numStates][];
	}

	public HierarchicalLexicon(SimpleLexicon lex){
  	super(lex.numSubStates,lex.threshold);
  	this.expectedCounts = new double[numStates][][];
  	this.tagWordIndexer = new IntegerIndexer[numStates];
  	this.wordIndexer = lex.wordIndexer;
  	this.wordCounter = lex.wordCounter;
//  	this.wordIsAmbiguous = lex.wordIsAmbiguous;
  	for (int tag=0; tag();
  			double[] score = {Math.log(scores[tag][0][word])};
  			hierarchicalScores[tag][word].add(score);
  			//finalLevels[tag][word]=0; // already initialized to 0
  		}
  	}
	}

	public void explicitlyComputeScores(int finalLevel){
		this.scores = new double[numStates][][];
		int nSubstates = (int)Math.pow(2, finalLevel);
//		int[] divisors = new int[nSubstates];//finalLevel+1];
//		for (int i=0; i<=finalLevel; i++){
//			int div = (int)Math.pow(2, finalLevel-i);
//			divisors[div] = div;
//		}

		for (int tag=0; tag scoreHierarchy = hierarchicalScores[tag][word];
				for (int level=0; level<=finalLevel; level++){
					if (level>finalLevels[tag][word]) 
						continue;
					double[] scoresThisLevel = scoreHierarchy.get(level); 
					int divisor = nSubstates/scoresThisLevel.length; // divisors[level];
					for (int substate=0; substate=counts[i]) { 
				newNumSubStates[i]=numSubStates[i];
			} 
			else{
				newNumSubStates[i] = (short)(numSubStates[i] * 2);
			}
		}
		HierarchicalLexicon newLex = newInstance();
		newLex.numSubStates = newNumSubStates;
		Random random = GrammarTrainer.RANDOM;
		newLex.expectedCounts = new double[numStates][][];
		newLex.tagWordIndexer = new IntegerIndexer[numStates];
		newLex.wordIndexer = this.wordIndexer;
  	for (int tag=0; tag[][] hS = new List[numStates][];
		newLex.finalLevels = new int[numStates][];
//		int[] nSubstates = new int[finalLevel+1];
//		for (int i=0; i<=finalLevel; i++){
//			nSubstates[i] = (int)Math.pow(2, i);
//		}
  	for (int tag=0; tag();
  			for (double[] scores : hierarchicalScores[tag][word]){
  				hS[tag][word].add(scores.clone());
  			}
  			int fLevel = this.finalLevels[tag][word]+1;
  			int nSub = (int)Math.pow(2, fLevel);
  			if (nSub > newNumSubStates[tag]) continue;
  			double[] newScores = new double[nSub];
  			for (int i=0; i scoreHierarchy = hierarchicalScores[tag][word];
				int level = finalLevels[tag][word];
				double[] scoresThisLevel = scoreHierarchy.get(level); 
				if (scoresThisLevel == null) continue;
				boolean allZero = true;
				for (int substate=0; substate