All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.HierarchicalLexicon Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;

import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Numberer;

/**
 * @author petrov
 *
 */
public class HierarchicalLexicon extends SimpleLexicon {

	private static final long serialVersionUID = 1L;
	public List[][] hierarchicalScores; // for each tag, word store a list of hiearchical features
	public int[][] finalLevels;
	
	/**
	 * @param numSubStates
	 * @param threshold
	 */
	public HierarchicalLexicon(short[] numSubStates, double threshold) {
		super(numSubStates, threshold);
		hierarchicalScores = new List[numStates][];
	}

	public HierarchicalLexicon(SimpleLexicon lex){
  	super(lex.numSubStates,lex.threshold);
  	this.expectedCounts = new double[numStates][][];
  	this.tagWordIndexer = new IntegerIndexer[numStates];
  	this.wordIndexer = lex.wordIndexer;
  	this.wordCounter = lex.wordCounter;
//  	this.wordIsAmbiguous = lex.wordIsAmbiguous;
  	for (int tag=0; tag();
  			double[] score = {Math.log(scores[tag][0][word])};
  			hierarchicalScores[tag][word].add(score);
  			//finalLevels[tag][word]=0; // already initialized to 0
  		}
  	}
	}

	public void explicitlyComputeScores(int finalLevel){
		this.scores = new double[numStates][][];
		int nSubstates = (int)Math.pow(2, finalLevel);
//		int[] divisors = new int[nSubstates];//finalLevel+1];
//		for (int i=0; i<=finalLevel; i++){
//			int div = (int)Math.pow(2, finalLevel-i);
//			divisors[div] = div;
//		}

		for (int tag=0; tag scoreHierarchy = hierarchicalScores[tag][word];
				for (int level=0; level<=finalLevel; level++){
					if (level>finalLevels[tag][word]) 
						continue;
					double[] scoresThisLevel = scoreHierarchy.get(level); 
					int divisor = nSubstates/scoresThisLevel.length; // divisors[level];
					for (int substate=0; substate=counts[i]) { 
				newNumSubStates[i]=numSubStates[i];
			} 
			else{
				newNumSubStates[i] = (short)(numSubStates[i] * 2);
			}
		}
		HierarchicalLexicon newLex = newInstance();
		newLex.numSubStates = newNumSubStates;
		Random random = GrammarTrainer.RANDOM;
		newLex.expectedCounts = new double[numStates][][];
		newLex.tagWordIndexer = new IntegerIndexer[numStates];
		newLex.wordIndexer = this.wordIndexer;
  	for (int tag=0; tag[][] hS = new List[numStates][];
		newLex.finalLevels = new int[numStates][];
//		int[] nSubstates = new int[finalLevel+1];
//		for (int i=0; i<=finalLevel; i++){
//			nSubstates[i] = (int)Math.pow(2, i);
//		}
  	for (int tag=0; tag();
  			for (double[] scores : hierarchicalScores[tag][word]){
  				hS[tag][word].add(scores.clone());
  			}
  			int fLevel = this.finalLevels[tag][word]+1;
  			int nSub = (int)Math.pow(2, fLevel);
  			if (nSub > newNumSubStates[tag]) continue;
  			double[] newScores = new double[nSub];
  			for (int i=0; i scoreHierarchy = hierarchicalScores[tag][word];
				int level = finalLevels[tag][word];
				double[] scoresThisLevel = scoreHierarchy.get(level); 
				if (scoresThisLevel == null) continue;
				boolean allZero = true;
				for (int substate=0; substate




© 2015 - 2025 Weber Informatics LLC | Privacy Policy