All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.berkeley.nlp.crf.Counts Maven / Gradle / Ivy

Go to download

The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).

The newest version!
package edu.berkeley.nlp.crf;

import java.util.ArrayList;
import java.util.List;

import edu.berkeley.nlp.classify.Encoding;
import edu.berkeley.nlp.classify.FeatureExtractor;
import edu.berkeley.nlp.util.Counter;
import edu.berkeley.nlp.util.Logger;
import edu.berkeley.nlp.util.Pair;

public class Counts { 
	private final Encoding encoding;
	private final FeatureExtractor vertexExtractor;
	private final FeatureExtractor edgeExtractor;
	private final Inference inf;

	public Counts(Encoding encoding, FeatureExtractor vertexExtractor, FeatureExtractor edgeExtractor) {
		this.encoding = encoding;
		this.vertexExtractor = vertexExtractor;
		this.edgeExtractor = edgeExtractor;
		this.inf = new Inference(encoding, vertexExtractor, edgeExtractor);
	}
	
	public List> getEmpiricalCounts(List> sequences) {
		int numLabels = encoding.getNumLabels();
		List> counts = new ArrayList>(numLabels);
		for (int l=0; l());
		}
		for (LabeledInstanceSequence s: sequences) {
			for (int i=0; i vertexFeatures = vertexExtractor.extractFeatures(s.getVertexInstance(i));
				int goldLabelIndex = encoding.getLabelIndex(s.getGoldLabel(i));
				counts.get(goldLabelIndex).incrementAll(vertexFeatures);
				if (i>0) {
					Counter edgeFeatures = edgeExtractor.extractFeatures(s.getEdgeInstance(i, s.getGoldLabel(i-1)));
					counts.get(goldLabelIndex).incrementAll(edgeFeatures);
				}
			}
		}
		return counts;
	}
	
	public Pair>> getLogNormalizationAndExpectedCounts(List> sequences, double[] w) {
		int numLabels = encoding.getNumLabels();
		List> counts = new ArrayList>(numLabels);
		for (int l=0; l());
		}
		double totalLogZ = 0.0;
		Logger.startTrack("Computing expected counts");
		int index = 0;
		for (InstanceSequence s : sequences) {
			double[][] alpha = inf.getAlphas(s, w);
			double[][] beta = inf.getBetas(s, w);
			totalLogZ += Math.log(inf.getNormalizationConstant(alpha, beta));
			double[][] vertexPosteriors = inf.getVertexPosteriors(alpha, beta);
			double[][][] edgePosteriors = inf.getEdgePosteriors(s, w, alpha, beta);
			for (int i=0; i vertexFeatures = vertexExtractor.extractFeatures(s.getVertexInstance(i));
				for (int l=0; l0) {
					for (int pl=0; pl edgeFeatures = edgeExtractor.extractFeatures(s.getEdgeInstance(i, encoding.getLabel(pl)));
						for (int cl=0; cl




© 2015 - 2025 Weber Informatics LLC | Privacy Policy