All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.PCFGLA.smoothing.SmoothAcrossParentBits Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
/**
 * 
 */
package edu.berkeley.nlp.PCFGLA.smoothing;

import java.io.Serializable;
import java.util.List;


import edu.berkeley.nlp.PCFGLA.BinaryCounterTable;
import edu.berkeley.nlp.PCFGLA.BinaryRule;
import edu.berkeley.nlp.PCFGLA.UnaryCounterTable;
import edu.berkeley.nlp.PCFGLA.UnaryRule;

import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Numberer;

/**
 * @author leon
 *
 */
public class SmoothAcrossParentBits implements Smoother, Serializable  {
	
	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;
	double same;
	double[][][] diffWeights;
	double weightBasis = 0.5;
	double totalWeight;
	
	public SmoothAcrossParentBits copy(){
		return new SmoothAcrossParentBits(same,diffWeights,weightBasis,totalWeight);
	}
	
	public SmoothAcrossParentBits(double smooth, Tree[] splitTrees) {
		// does not smooth across top-level split, otherwise smooths uniformly
		
		same = 1-smooth;
		//int maxNBits = (int)Math.round(Math.log(maxSubstates)/Math.log(2));
		
		int nStates = splitTrees.length;
		diffWeights = new double [nStates][][];
		for (short state=0; state splitTree = splitTrees[state];
			List allSubstates = splitTree.getYield();
			int nSubstates = 1;
			for (int i=0; i=nSubstates) 
					nSubstates = allSubstates.get(i)+1;
			}
			diffWeights[state] = new double[nSubstates][nSubstates];
			if (nSubstates==1){
				// state has only one substate -> no smoothing
				diffWeights[state][0][0] = 1.0;
			}
			else {
				// smooth only with ones in the same top-level branch
				// TODO: weighted smoothing

				// descend down to first split first
				while (splitTree.getChildren().size()==1) { splitTree = splitTree.getChildren().get(0); }
//				for (short substate=0; substate substatesInBranch = splitTree.getChildren().get(branch).getYield();
//						if (substatesInBranch.contains(substate)){
//							totalWeight = 0;
//							fillWeightsArray(state,substate,1.0,splitTree.getChildren().get(branch));
//							// normalize the weights
//							if (totalWeight==0) continue;
//							for (short substate2 = 0; substate2 substatesInBranch = splitTree.getChildren().get(branch).getYield();
					int total = substatesInBranch.size();
					double normalizedSmooth = smooth/(total-1);

					for (short i : substatesInBranch) {
						for (short j : substatesInBranch) {
							if (i==j) { diffWeights[state][i][j] = same; }
							else { diffWeights[state][i][j] = normalizedSmooth; }
						}
					}
				}
				
				
				
			}
		}
/*		diffWeights = new double[maxNBits+1];
		for (int i=0; i<=maxNBits; i++) {
			diffWeights[i] = Math.pow(2,-i+1)*smooth/maxNBits;
		}*/
	}

	/**
	 * @param same2
	 * @param diffWeights2
	 * @param weightBasis2
	 * @param totalWeight2
	 */
	public SmoothAcrossParentBits(double same2, double[][][] diffWeights2, double weightBasis2, double totalWeight2) {
		this.same = same2;
		this.diffWeights = diffWeights2;
		this.weightBasis = weightBasis2;
		this.totalWeight = totalWeight2;
	}

	/* (non-Javadoc)
	 * @see edu.berkeley.nlp.PCFGLA.smoothing.Smoother#smooth(edu.berkeley.nlp.util.UnaryCounterTable, edu.berkeley.nlp.util.BinaryCounterTable)
	 */
	public void smooth(UnaryCounterTable unaryCounter, BinaryCounterTable binaryCounter) {
		for (UnaryRule r : unaryCounter.keySet()) {
			double[][] scores = unaryCounter.getCount(r);
			double[][] scopy = new double[scores.length][];
			short pState = r.parentState;
			for (int j=0; j subTree){
		if (subTree.isLeaf()){
			if (subTree.getLabel()==substate) diffWeights[state][substate][substate] = same;
			else { diffWeights[state][substate][subTree.getLabel()] = weight; totalWeight+=weight;}
			return;
		}
		if (subTree.getChildren().size()==1) { 
			fillWeightsArray(state,substate,weight,subTree.getChildren().get(0));
			return;
		}
		for (int branch=0; branch<2; branch++) {	
			Tree branchTree = subTree.getChildren().get(branch);
			List substatesInBranch = branchTree.getYield();
			//int nSubstatesInBranch = substatesInBranch.size();
			if (substatesInBranch.contains(substate)) fillWeightsArray(state,substate,weight,branchTree);
			else fillWeightsArray(state,substate,weight*weightBasis/2.0,branchTree);
		}
	}

	/* (non-Javadoc)
	 * @see edu.berkeley.nlp.PCFGLA.smoothing.Smoother#smooth(short, float[])
	 */
	public void smooth(short tag, double[] scores) {
		double[] scopy = new double[scores.length];
		for (int i=0; i= 0) {
				remappedSmoother.diffWeights[s] = diffWeights[translatedState];
			} else {
				remappedSmoother.diffWeights[s] = new double[1][1];
			}
		}
		return remappedSmoother;
	}

	private short translateState(int state, Numberer baseNumberer, Numberer translationNumberer) {
		Object object = baseNumberer.object(state);
		if (translationNumberer.hasSeen(object)) {
			return (short)translationNumberer.number(object);
		} else {
			return (short)-1;
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy