edu.berkeley.nlp.crf.Counts Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.crf;
import java.util.ArrayList;
import java.util.List;
import edu.berkeley.nlp.classify.Encoding;
import edu.berkeley.nlp.classify.FeatureExtractor;
import edu.berkeley.nlp.util.Counter;
import edu.berkeley.nlp.util.Logger;
import edu.berkeley.nlp.util.Pair;
public class Counts {
private final Encoding encoding;
private final FeatureExtractor vertexExtractor;
private final FeatureExtractor edgeExtractor;
private final Inference inf;
public Counts(Encoding encoding, FeatureExtractor vertexExtractor, FeatureExtractor edgeExtractor) {
this.encoding = encoding;
this.vertexExtractor = vertexExtractor;
this.edgeExtractor = edgeExtractor;
this.inf = new Inference(encoding, vertexExtractor, edgeExtractor);
}
public List> getEmpiricalCounts(List extends LabeledInstanceSequence> sequences) {
int numLabels = encoding.getNumLabels();
List> counts = new ArrayList>(numLabels);
for (int l=0; l());
}
for (LabeledInstanceSequence s: sequences) {
for (int i=0; i vertexFeatures = vertexExtractor.extractFeatures(s.getVertexInstance(i));
int goldLabelIndex = encoding.getLabelIndex(s.getGoldLabel(i));
counts.get(goldLabelIndex).incrementAll(vertexFeatures);
if (i>0) {
Counter edgeFeatures = edgeExtractor.extractFeatures(s.getEdgeInstance(i, s.getGoldLabel(i-1)));
counts.get(goldLabelIndex).incrementAll(edgeFeatures);
}
}
}
return counts;
}
public Pair>> getLogNormalizationAndExpectedCounts(List extends InstanceSequence> sequences, double[] w) {
int numLabels = encoding.getNumLabels();
List> counts = new ArrayList>(numLabels);
for (int l=0; l());
}
double totalLogZ = 0.0;
Logger.startTrack("Computing expected counts");
int index = 0;
for (InstanceSequence s : sequences) {
double[][] alpha = inf.getAlphas(s, w);
double[][] beta = inf.getBetas(s, w);
totalLogZ += Math.log(inf.getNormalizationConstant(alpha, beta));
double[][] vertexPosteriors = inf.getVertexPosteriors(alpha, beta);
double[][][] edgePosteriors = inf.getEdgePosteriors(s, w, alpha, beta);
for (int i=0; i vertexFeatures = vertexExtractor.extractFeatures(s.getVertexInstance(i));
for (int l=0; l0) {
for (int pl=0; pl edgeFeatures = edgeExtractor.extractFeatures(s.getEdgeInstance(i, encoding.getLabel(pl)));
for (int cl=0; cl
© 2015 - 2025 Weber Informatics LLC | Privacy Policy