edu.berkeley.nlp.crf.Counts Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!

package edu.berkeley.nlp.crf;

import java.util.ArrayList;
import java.util.List;

import edu.berkeley.nlp.classify.Encoding;
import edu.berkeley.nlp.classify.FeatureExtractor;
import edu.berkeley.nlp.util.Counter;
import edu.berkeley.nlp.util.Logger;
import edu.berkeley.nlp.util.Pair;

public class Counts { 
	private final Encoding encoding;
	private final FeatureExtractor vertexExtractor;
	private final FeatureExtractor edgeExtractor;
	private final Inference inf;

	public Counts(Encoding encoding, FeatureExtractor vertexExtractor, FeatureExtractor edgeExtractor) {
		this.encoding = encoding;
		this.vertexExtractor = vertexExtractor;
		this.edgeExtractor = edgeExtractor;
		this.inf = new Inference(encoding, vertexExtractor, edgeExtractor);
	}
	
	public List> getEmpiricalCounts(List> sequences) {
		int numLabels = encoding.getNumLabels();
		List> counts = new ArrayList>(numLabels);
		for (int l=0; l());
		}
		for (LabeledInstanceSequence s: sequences) {
			for (int i=0; i vertexFeatures = vertexExtractor.extractFeatures(s.getVertexInstance(i));
				int goldLabelIndex = encoding.getLabelIndex(s.getGoldLabel(i));
				counts.get(goldLabelIndex).incrementAll(vertexFeatures);
				if (i>0) {
					Counter edgeFeatures = edgeExtractor.extractFeatures(s.getEdgeInstance(i, s.getGoldLabel(i-1)));
					counts.get(goldLabelIndex).incrementAll(edgeFeatures);
				}
			}
		}
		return counts;
	}
	
	public Pair>> getLogNormalizationAndExpectedCounts(List> sequences, double[] w) {
		int numLabels = encoding.getNumLabels();
		List> counts = new ArrayList>(numLabels);
		for (int l=0; l());
		}
		double totalLogZ = 0.0;
		Logger.startTrack("Computing expected counts");
		int index = 0;
		for (InstanceSequence s : sequences) {
			double[][] alpha = inf.getAlphas(s, w);
			double[][] beta = inf.getBetas(s, w);
			totalLogZ += Math.log(inf.getNormalizationConstant(alpha, beta));
			double[][] vertexPosteriors = inf.getVertexPosteriors(alpha, beta);
			double[][][] edgePosteriors = inf.getEdgePosteriors(s, w, alpha, beta);
			for (int i=0; i vertexFeatures = vertexExtractor.extractFeatures(s.getVertexInstance(i));
				for (int l=0; l0) {
					for (int pl=0; pl edgeFeatures = edgeExtractor.extractFeatures(s.getEdgeInstance(i, encoding.getLabel(pl)));
						for (int cl=0; cl