cmu.arktweetnlp.impl.Model Maven / Gradle / Ivy
The newest version!
package cmu.arktweetnlp.impl;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import cmu.arktweetnlp.util.BasicFileIO;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Triple;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.util.Pair;
/**
* This contains
*
* (1) Feature and label vocabularies (therefore knowledge of numberization)
* (2) Model coefficients (and knowledge how to flattenize them for LBFGS's sake)
* (3) Decoding/posterior and gradient computation
*/
public class Model {
public Vocabulary labelVocab;
public Vocabulary featureVocab;
/**
* dim: N_labels
**/
public double[] biasCoefs;
/**
* dim: (N_labels+1 x N_labels)
**/
public double[][] edgeCoefs;
/**
* dim: (N_base_features x N_labels)
**/
public double[][] observationFeatureCoefs;
public Model() {
labelVocab = new Vocabulary();
featureVocab = new Vocabulary();
}
public int numLabels; //initialized in loadModelFromText
public int startMarker() {
assert labelVocab.isLocked();
int lastLabel = labelVocab.size() - 1;
return lastLabel+1;
}
public void lockdownAfterFeatureExtraction() {
labelVocab.lock();
featureVocab.lock();
allocateCoefs(labelVocab.size(), featureVocab.size());
}
public void allocateCoefs(int numLabels, int numObsFeats) {
observationFeatureCoefs = new double[numObsFeats][numLabels];
edgeCoefs = new double[numLabels+1][numLabels];
biasCoefs = new double[numLabels];
}
/**
* "given labels" i.e. at trainingtime labels are observed.
* You hide the current one and predict it given you know the previous.
* So you get funny incremental posteriors per position that an MEMM uses at trainingtime.
* (They don't have a proper full-model posterior marginal
* interpretation like a CRF forward-backward-computed posterior does. no?)
*
* @param sentence - must its have .labels set
* @returns posterior marginals, dim (T x N_label)
*/
public double[][] inferPosteriorGivenLabels(ModelSentence sentence) {
double[][] posterior = new double[sentence.T][labelVocab.size()];
double[] labelScores = new double[numLabels];
for (int t=0; tstartMarker)
*/
public void viterbiDecode(ModelSentence sentence) {
int T = sentence.T;
sentence.labels = new int[T];
int[][] bptr = new int[T][numLabels];
double[][] vit = new double[T][numLabels];
double[] labelScores = new double[numLabels];
computeVitLabelScores(0, startMarker(), sentence, labelScores);
ArrayUtil.logNormalize(labelScores);
//initialization
vit[0]=labelScores;
for (int k=0; k < numLabels; k++){
bptr[0][k]=startMarker();
}
for (int t=1; t < T; t++){
double[][] prevcurr = new double[numLabels][numLabels];
for (int s=0; s < numLabels; s++){
computeVitLabelScores(t, s, sentence, prevcurr[s]);
ArrayUtil.logNormalize(prevcurr[s]);
prevcurr[s] = ArrayUtil.add(prevcurr[s], labelScores[s]);
}
for (int s=0; s < numLabels; s++){
double[] sprobs = getColumn(prevcurr, s);
bptr[t][s] = ArrayUtil.argmax(sprobs);
vit[t][s] = sprobs[bptr[t][s]];
}
labelScores=vit[t];
}
sentence.labels[T-1] = ArrayUtil.argmax(vit[T-1]);
//System.out.print(labelVocab.name(sentence.labels[T-1]));
//System.out.println(" with prob: "+Math.exp(vit[T-1][sentence.labels[T-1]]));
int backtrace = bptr[T-1][sentence.labels[T-1]];
for (int i=T-2; (i>=0)&&(backtrace != startMarker()); i--){ //termination
sentence.labels[i] = backtrace;
//System.err.println(labelVocab.name(backtrace)
//+" with prob: "+Math.exp(vit[i][backtrace]));
backtrace = bptr[i][backtrace];
}
assert (backtrace == startMarker());
}
private double[] getColumn(double[][] matrix, int col){
double[] column = new double[matrix.length];
for (int i=0; i pair : sentence.observationFeatures.get(t)) {
// labelScores[k] += observationFeatureCoefs[obsFeat][k];
labelScores[k] += observationFeatureCoefs[pair.first][k] * pair.second;
}
}
}
public double[] ThreewiseMultiply(double[] a, double[] b, double[] c) {
if ((a.length != b.length) || (b.length!=c.length)) {
throw new RuntimeException();
}
double[] result = new double[a.length];
for(int i = 0; i < result.length; i++){
result[i] = a[i] * b[i] * c[i];
}
return result;
}
/**
* Training-only
*
* add-in loglik gradient (direction of higher likelihood) **/
public void computeGradient(ModelSentence sentence, double[] grad) {
assert grad.length == flatIDsize();
int T = sentence.T;
double[][] posterior = inferPosteriorGivenLabels(sentence);
for (int t=0; t fv : sentence.observationFeatures.get(t)) {
grad[observationFeature_to_flatID(fv.first, k)] += (empir - p) * fv.second;
}
}
}
}
public double computeLogLik(ModelSentence s) {
double[][] posterior = inferPosteriorGivenLabels(s);
double loglik = 0;
for (int t=0; t < s.T; t++) {
int y = s.labels[t];
loglik += Math.log(posterior[t][y]);
}
return loglik;
}
/////////////////////////////////////////////////////////
// Flat-version conversion routines
// (If this was C++ we could do something clever with memory layout instead to avoid this.)
// (Or we could do said clever things in Java atop a flat representation, but that would be painful.)
public void setCoefsFromFlat(double[] flatCoefs) {
for (int k=0; k biasCoefs =
new ArrayList();
ArrayList< Triple > edgeCoefs =
new ArrayList< Triple >();
ArrayList< Triple > obsCoefs =
new ArrayList< Triple >();
while ( (line = reader.readLine()) != null ) {
String[] parts = line.split("\t");
if ( ! parts[0].equals("***BIAS***")) break;
model.labelVocab.num(parts[1]);
biasCoefs.add(Double.parseDouble(parts[2]));
}
model.labelVocab.lock();
model.numLabels = model.labelVocab.size();
do {
String[] parts = line.split("\t");
if ( ! parts[0].equals("***EDGE***")) break;
String[] edgePair = parts[1].split(" ");
int prev = Integer.parseInt(edgePair[0]);
int cur = Integer.parseInt(edgePair[1]);
edgeCoefs.add(new Triple(prev, cur, Double.parseDouble(parts[2])));
} while ( (line = reader.readLine()) != null );
do {
String[] parts = line.split("\t");
int f = model.featureVocab.num(parts[0]);
int k = model.labelVocab.num(parts[1]);
obsCoefs.add(new Triple(f, k, Double.parseDouble(parts[2])));
} while ( (line = reader.readLine()) != null );
model.featureVocab.lock();
model.allocateCoefs(model.labelVocab.size(), model.featureVocab.size());
for (int k=0; k x : edgeCoefs) {
model.edgeCoefs[x.getFirst()][x.getSecond()] = x.getThird();
}
for (Triple x : obsCoefs) {
model.observationFeatureCoefs[x.getFirst()][x.getSecond()] = x.getThird();
}
reader.close();
return model;
}
/**
* Copies coefs from sourceModel into destModel.
* For observation features, only copies features that exist in both.
* (Therefore if a feature exists in destModel but not sourceModel, it's not touched.)
*/
public static void copyCoefsForIntersectingFeatures(Model sourceModel, Model destModel) {
int K = sourceModel.numLabels;
// We could do the name-checking intersection trick for label vocabs, but punt for now
if (K != destModel.numLabels) throw new RuntimeException("label vocabs must be same size for warm-start");
for (int k=0; k < K; k++) {
if ( ! destModel.labelVocab.name(k).equals(sourceModel.labelVocab.name(k))) {
throw new RuntimeException("label vocabs must agree for warm-start");
}
}
destModel.biasCoefs = ArrayUtil.copy(sourceModel.biasCoefs);
destModel.edgeCoefs = ArrayUtil.copy(sourceModel.edgeCoefs);
// observation features need the intersection
for (int sourceFeatID=0; sourceFeatID < sourceModel.featureVocab.size(); sourceFeatID++) {
String featName = sourceModel.featureVocab.name(sourceFeatID);
if (destModel.featureVocab.contains(featName)) {
int destFeatID = destModel.featureVocab.num(featName);
destModel.observationFeatureCoefs[destFeatID] = ArrayUtil.copy(
sourceModel.observationFeatureCoefs[sourceFeatID] );
}
}
}
}