All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.HierarchicalAdaptiveLexicalRule Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.syntax.Trees.PennTreeRenderer;
import edu.berkeley.nlp.util.Pair;

/**
 * @author petrov
 *
 */
public class HierarchicalAdaptiveLexicalRule implements Serializable{
	private static final long serialVersionUID = 1L;

	double[] scores;
	public short[] mapping;
	Tree hierarchy;
	public int nParam;
	public int identifier;
	
//	HierarchicalAdaptiveLexicalRule(short t, int w){
//		this.tag = t;
//		this.wordIndex = w;
//	}

	HierarchicalAdaptiveLexicalRule(){
		hierarchy = new Tree(0.0);
		scores = new double[1];
		mapping = new short[1];
		nParam = 1;
	}
	
	public Pair countParameters(){
		// first one is the max_depth, second one is the number of parameters
		int maxDepth = hierarchy.getDepth();
		nParam = hierarchy.getYield().size();
		return new Pair(maxDepth, nParam);
	}
	
	public void splitRule(int nSubstates){
		splitRuleHelper(hierarchy, 2);
		mapping = new short[nSubstates];
		int finalLevel = (int)(Math.log(mapping.length)/Math.log(2));
		updateMapping((short)0, 0, 0, finalLevel, hierarchy);
//		mapping[0] = (short)0; 
//		mapping[1] = (short)1;
	}
	

	private Pair updateMapping(short myID, int nextSubstate, int myDepth, int finalDepth, Tree tree) {
		if (tree.isLeaf()){
			if (myDepth==finalDepth){
				mapping[nextSubstate++] = myID;
			} else {
				int substatesToCover = (int)Math.pow(2,finalDepth-myDepth);
				for (int i=0; i child : tree.getChildren()){
				Pair tmp = updateMapping(myID, nextSubstate, myDepth+1, finalDepth, child);
				myID = tmp.getFirst();
				nextSubstate = tmp.getSecond();
			}
		}
		return new Pair(myID, nextSubstate);
	}

	private void splitRuleHelper(Tree tree, int splitFactor) {
		if (tree.isLeaf()){
			if (tree.getLabel()!=0||nParam==1){ // split it
				ArrayList> children = new ArrayList>(splitFactor);
				for (int i=0; i child = new Tree((GrammarTrainer.RANDOM.nextDouble()-.5)/100.0);
					children.add(child);
				}
				tree.setChildren(children);
				nParam += splitFactor-1;
//			} else { //perturb it
//				tree.setLabel(GrammarTrainer.RANDOM.nextDouble()/100.0);
			}
		} else {
			for (Tree child : tree.getChildren()){
				splitRuleHelper(child, splitFactor);
			}
		}
	}

	public void explicitlyComputeScores(int finalLevel, final boolean usingOnlyLastLevel){
		int nSubstates = (int)Math.pow(2, finalLevel);
		scores = new double[nSubstates];
		int nextSubstate = fillScores(0, 0, 0, finalLevel, hierarchy, usingOnlyLastLevel);
		if (nextSubstate != nSubstates) 
			System.out.println("Didn't fill all lexical scores!");
		mapping = new short[nSubstates];
		updateMapping((short)0, 0, 0, finalLevel, hierarchy);
	}
	
	private int fillScores(double previousScore, int nextSubstate, int myDepth, int finalDepth, Tree tree, final boolean usingOnlyLastLevel){
		if (tree.isLeaf()){
			double myScore = (usingOnlyLastLevel) ?  Math.exp(tree.getLabel()) : Math.exp(previousScore + tree.getLabel());
			if (myDepth==finalDepth){
				scores[nextSubstate++] = myScore;
			} else {
				int substatesToCover = (int)Math.pow(2,finalDepth-myDepth);
				for (int i=0; i child : tree.getChildren()){
				nextSubstate = fillScores(myScore, nextSubstate, myDepth+1, finalDepth, child, usingOnlyLastLevel);
			}
		}
		return nextSubstate;
	}
	
	public void updateScores(double[] scores){
		int nSubstates = updateHierarchy(hierarchy, 0, scores);
		if (nSubstates != nParam) System.out.println("Didn't update all parameters");
	}

	
	private int updateHierarchy(Tree tree, int nextSubstate, double[] scores) {
		if (tree.isLeaf()){
			double val = scores[identifier + nextSubstate++];
			if (val>200) {
				System.out.println("Ignored proposed lexical value since it was danegrous");
				val = 0;
			} else 
				tree.setLabel(val);
		} else {
			for (Tree child : tree.getChildren()){
				nextSubstate = updateHierarchy(child, nextSubstate, scores);
			}
		}
		return nextSubstate;
	}

	/**
	 * @return
	 */
	public List getFinalLevel() {
		return hierarchy.getYield();
	}
	
	private void compactifyHierarchy(Tree tree){
		if (tree.getDepth()==2){
			boolean allZero = true;
			for (Tree child : tree.getChildren()){
				allZero = allZero && (child.getLabel()==0.0);
			}
			if (allZero) {
				nParam -= tree.getChildren().size()-1;
				tree.setChildren(Collections.EMPTY_LIST);
			}
		} else {
			for (Tree child : tree.getChildren()){
				compactifyHierarchy(child);
			}
		}
	}
	
	
	public String toString(){
		StringBuilder sb = new StringBuilder();
		compactifyHierarchy(hierarchy);
		sb.append(Arrays.toString(scores));
		sb.append("\n");
		sb.append(PennTreeRenderer.render(hierarchy));
		sb.append("\n");
		return sb.toString();
	}

	public int mergeRule() { 
		int paramBefore = nParam;
		compactifyHierarchy(hierarchy); 
		scores = null;
		mapping = null;
		return paramBefore - nParam; 
	}

	
	public int countNonZeroFeatures() {
		int total = 0;
		for (Tree d : hierarchy.getPreOrderTraversal()) { if (d.getLabel()!=0) total++; }
		return total;
	}
	
	public int countNonZeroFringeFeatures() {
		int total = 0;
		for (Tree d : hierarchy.getTerminals()) { if (d.getLabel()!=0) total++; }
		return total;
	}


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy