edu.berkeley.nlp.PCFGLA.smoothing.SmoothAcrossParentBits Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA.smoothing;

import java.io.Serializable;
import java.util.List;


import edu.berkeley.nlp.PCFGLA.BinaryCounterTable;
import edu.berkeley.nlp.PCFGLA.BinaryRule;
import edu.berkeley.nlp.PCFGLA.UnaryCounterTable;
import edu.berkeley.nlp.PCFGLA.UnaryRule;

import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Numberer;

/**
 * @author leon
 *
 */
public class SmoothAcrossParentBits implements Smoother, Serializable  {
	
	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;
	double same;
	double[][][] diffWeights;
	double weightBasis = 0.5;
	double totalWeight;
	
	public SmoothAcrossParentBits copy(){
		return new SmoothAcrossParentBits(same,diffWeights,weightBasis,totalWeight);
	}
	
	public SmoothAcrossParentBits(double smooth, Tree[] splitTrees) {
		// does not smooth across top-level split, otherwise smooths uniformly
		
		same = 1-smooth;
		//int maxNBits = (int)Math.round(Math.log(maxSubstates)/Math.log(2));
		
		int nStates = splitTrees.length;
		diffWeights = new double [nStates][][];
		for (short state=0; state splitTree = splitTrees[state];
			List allSubstates = splitTree.getYield();
			int nSubstates = 1;
			for (int i=0; i=nSubstates) 
					nSubstates = allSubstates.get(i)+1;
			}
			diffWeights[state] = new double[nSubstates][nSubstates];
			if (nSubstates==1){
				// state has only one substate -> no smoothing
				diffWeights[state][0][0] = 1.0;
			}
			else {
				// smooth only with ones in the same top-level branch
				// TODO: weighted smoothing

				// descend down to first split first
				while (splitTree.getChildren().size()==1) { splitTree = splitTree.getChildren().get(0); }
//				for (short substate=0; substate substatesInBranch = splitTree.getChildren().get(branch).getYield();
//						if (substatesInBranch.contains(substate)){
//							totalWeight = 0;
//							fillWeightsArray(state,substate,1.0,splitTree.getChildren().get(branch));
//							// normalize the weights
//							if (totalWeight==0) continue;
//							for (short substate2 = 0; substate2 substatesInBranch = splitTree.getChildren().get(branch).getYield();
					int total = substatesInBranch.size();
					double normalizedSmooth = smooth/(total-1);

					for (short i : substatesInBranch) {
						for (short j : substatesInBranch) {
							if (i==j) { diffWeights[state][i][j] = same; }
							else { diffWeights[state][i][j] = normalizedSmooth; }
						}
					}
				}
				
				
				
			}
		}
/*		diffWeights = new double[maxNBits+1];
		for (int i=0; i<=maxNBits; i++) {
			diffWeights[i] = Math.pow(2,-i+1)*smooth/maxNBits;
		}*/
	}

	/**
	 * @param same2
	 * @param diffWeights2
	 * @param weightBasis2
	 * @param totalWeight2
	 */
	public SmoothAcrossParentBits(double same2, double[][][] diffWeights2, double weightBasis2, double totalWeight2) {
		this.same = same2;
		this.diffWeights = diffWeights2;
		this.weightBasis = weightBasis2;
		this.totalWeight = totalWeight2;
	}

	/* (non-Javadoc)
	 * @see edu.berkeley.nlp.PCFGLA.smoothing.Smoother#smooth(edu.berkeley.nlp.util.UnaryCounterTable, edu.berkeley.nlp.util.BinaryCounterTable)
	 */
	public void smooth(UnaryCounterTable unaryCounter, BinaryCounterTable binaryCounter) {
		for (UnaryRule r : unaryCounter.keySet()) {
			double[][] scores = unaryCounter.getCount(r);
			double[][] scopy = new double[scores.length][];
			short pState = r.parentState;
			for (int j=0; j subTree){
		if (subTree.isLeaf()){
			if (subTree.getLabel()==substate) diffWeights[state][substate][substate] = same;
			else { diffWeights[state][substate][subTree.getLabel()] = weight; totalWeight+=weight;}
			return;
		}
		if (subTree.getChildren().size()==1) { 
			fillWeightsArray(state,substate,weight,subTree.getChildren().get(0));
			return;
		}
		for (int branch=0; branch<2; branch++) {	
			Tree branchTree = subTree.getChildren().get(branch);
			List substatesInBranch = branchTree.getYield();
			//int nSubstatesInBranch = substatesInBranch.size();
			if (substatesInBranch.contains(substate)) fillWeightsArray(state,substate,weight,branchTree);
			else fillWeightsArray(state,substate,weight*weightBasis/2.0,branchTree);
		}
	}

	/* (non-Javadoc)
	 * @see edu.berkeley.nlp.PCFGLA.smoothing.Smoother#smooth(short, float[])
	 */
	public void smooth(short tag, double[] scores) {
		double[] scopy = new double[scores.length];
		for (int i=0; i= 0) {
				remappedSmoother.diffWeights[s] = diffWeights[translatedState];
			} else {
				remappedSmoother.diffWeights[s] = new double[1][1];
			}
		}
		return remappedSmoother;
	}

	private short translateState(int state, Numberer baseNumberer, Numberer translationNumberer) {
		Object object = baseNumberer.object(state);
		if (translationNumberer.hasSeen(object)) {
			return (short)translationNumberer.number(object);
		} else {
			return (short)-1;
		}
	}

}