cmu.arktweetnlp.impl.Model Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ark-tweet-nlp Show documentation
ark-tweet-nlp
The newest version!
package cmu.arktweetnlp.impl;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;

import cmu.arktweetnlp.util.BasicFileIO;
import edu.berkeley.nlp.util.ArrayUtil;
import edu.berkeley.nlp.util.Triple;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.util.Pair;

/**
 * This contains
 *
 * (1) Feature and label vocabularies (therefore knowledge of numberization)
 * (2) Model coefficients (and knowledge how to flattenize them for LBFGS's sake)
 * (3) Decoding/posterior and gradient computation
 */
public class Model {
	public Vocabulary labelVocab;
	public Vocabulary featureVocab;

	/** 
	 * dim: N_labels 
	 **/
	public double[] biasCoefs;

	/** 
	 * dim: (N_labels+1 x N_labels) 
	 **/
	public double[][] edgeCoefs;

	/** 
	 * dim: (N_base_features x N_labels)
	 **/
	public double[][] observationFeatureCoefs;


	public Model() {
		labelVocab = new Vocabulary();
		featureVocab = new Vocabulary();
	}
	
	public int numLabels;	//initialized in loadModelFromText
	public int startMarker() {
		assert labelVocab.isLocked();
		int lastLabel = labelVocab.size() - 1;
		return lastLabel+1;
	}

	public void lockdownAfterFeatureExtraction() {
		labelVocab.lock();
		featureVocab.lock();
		allocateCoefs(labelVocab.size(), featureVocab.size());
	}

	public void allocateCoefs(int numLabels, int numObsFeats) {
		observationFeatureCoefs = new double[numObsFeats][numLabels];
		edgeCoefs = new double[numLabels+1][numLabels];
		biasCoefs = new double[numLabels];
	}

	/**
	 * "given labels" i.e. at trainingtime labels are observed.
	 * You hide the current one and predict it given you know the previous.
	 * So you get funny incremental posteriors per position that an MEMM uses at trainingtime.
	 * (They don't have a proper full-model posterior marginal
	 * interpretation like a CRF forward-backward-computed posterior does. no?)
	 * 
	 * @param sentence - must its have .labels set
	 * @returns posterior marginals, dim (T x N_label)
	 */
	public double[][] inferPosteriorGivenLabels(ModelSentence sentence) {
		double[][] posterior = new double[sentence.T][labelVocab.size()];
		double[] labelScores = new double[numLabels];
		for (int t=0; tstartMarker)
	 */
	public void viterbiDecode(ModelSentence sentence) {		
		int T = sentence.T;
		sentence.labels = new int[T];
		int[][] bptr = new int[T][numLabels];
		double[][] vit = new double[T][numLabels];
		double[] labelScores = new double[numLabels];
		computeVitLabelScores(0, startMarker(), sentence, labelScores);
		ArrayUtil.logNormalize(labelScores);
		//initialization
		vit[0]=labelScores;
		for (int k=0; k < numLabels; k++){
			bptr[0][k]=startMarker();
		}
		for (int t=1; t < T; t++){
			double[][] prevcurr = new double[numLabels][numLabels];
			for (int s=0; s < numLabels; s++){
				computeVitLabelScores(t, s, sentence, prevcurr[s]);
				ArrayUtil.logNormalize(prevcurr[s]);
				prevcurr[s] = ArrayUtil.add(prevcurr[s], labelScores[s]);
			}
			for (int s=0; s < numLabels; s++){
				double[] sprobs = getColumn(prevcurr, s);
				bptr[t][s] = ArrayUtil.argmax(sprobs);
				vit[t][s] = sprobs[bptr[t][s]];	
			}
			labelScores=vit[t];
		}
		sentence.labels[T-1] = ArrayUtil.argmax(vit[T-1]);
		//System.out.print(labelVocab.name(sentence.labels[T-1]));
		//System.out.println(" with prob: "+Math.exp(vit[T-1][sentence.labels[T-1]]));
		int backtrace = bptr[T-1][sentence.labels[T-1]];
		for (int i=T-2; (i>=0)&&(backtrace != startMarker()); i--){ //termination
			sentence.labels[i] = backtrace;
			//System.err.println(labelVocab.name(backtrace)
				//+" with prob: "+Math.exp(vit[i][backtrace]));
			backtrace = bptr[i][backtrace];
		}
		assert (backtrace == startMarker());
	}

	private double[] getColumn(double[][] matrix, int col){
		double[] column = new double[matrix.length];
		for (int i=0; i pair : sentence.observationFeatures.get(t)) {
				//    			labelScores[k] += observationFeatureCoefs[obsFeat][k];
				labelScores[k] += observationFeatureCoefs[pair.first][k] * pair.second;
			}
		}
	}
	public double[] ThreewiseMultiply(double[] a, double[] b, double[] c) {
		if ((a.length != b.length) || (b.length!=c.length)) {
			throw new RuntimeException();
		}
		double[] result = new double[a.length];
		for(int i = 0; i < result.length; i++){
			result[i] = a[i] * b[i] * c[i];
		}
		return result;
	}
	/**
	 * Training-only
	 * 
	 * add-in loglik gradient (direction of higher likelihood) **/
	public void computeGradient(ModelSentence sentence, double[] grad) {
		assert grad.length == flatIDsize();
		int T = sentence.T;
		double[][] posterior = inferPosteriorGivenLabels(sentence);

		for (int t=0; t fv : sentence.observationFeatures.get(t)) {
					grad[observationFeature_to_flatID(fv.first, k)] += (empir - p) * fv.second;
				}
			}
		}
	}

	public double computeLogLik(ModelSentence s) {
		double[][] posterior = inferPosteriorGivenLabels(s);
		double loglik = 0;
		for (int t=0; t < s.T; t++) {
			int y = s.labels[t];
			loglik += Math.log(posterior[t][y]);
		}
		return loglik;
	}

	/////////////////////////////////////////////////////////

	// Flat-version conversion routines
	// (If this was C++ we could do something clever with memory layout instead to avoid this.)
	// (Or we could do said clever things in Java atop a flat representation, but that would be painful.)

	public void setCoefsFromFlat(double[] flatCoefs) {
		for (int k=0; k biasCoefs = 
				new ArrayList();
		ArrayList< Triple > edgeCoefs = 
				new ArrayList< Triple >();
		ArrayList< Triple > obsCoefs  = 
				new ArrayList< Triple >();

		while ( (line = reader.readLine()) != null ) {
			String[] parts = line.split("\t");
			if ( ! parts[0].equals("***BIAS***")) break;

			model.labelVocab.num(parts[1]);
			biasCoefs.add(Double.parseDouble(parts[2]));
		}
		model.labelVocab.lock();
		model.numLabels = model.labelVocab.size();
		do {
			String[] parts = line.split("\t");
			if ( ! parts[0].equals("***EDGE***")) break;
			String[] edgePair = parts[1].split(" ");
			int prev = Integer.parseInt(edgePair[0]);
			int cur  = Integer.parseInt(edgePair[1]);
			edgeCoefs.add(new Triple(prev, cur, Double.parseDouble(parts[2])));
		} while ( (line = reader.readLine()) != null );
		do {
			String[] parts = line.split("\t");
			int f = model.featureVocab.num(parts[0]);
			int k = model.labelVocab.num(parts[1]);
			obsCoefs.add(new Triple(f, k, Double.parseDouble(parts[2])));
		} while ( (line = reader.readLine()) != null );
		model.featureVocab.lock();

		model.allocateCoefs(model.labelVocab.size(), model.featureVocab.size());

		for (int k=0; k x : edgeCoefs) {
			model.edgeCoefs[x.getFirst()][x.getSecond()] = x.getThird();
		}
		for (Triple x : obsCoefs) {
			model.observationFeatureCoefs[x.getFirst()][x.getSecond()] = x.getThird();
		}
		reader.close();
		return model;
	}

	/**
	 * Copies coefs from sourceModel into destModel.
	 * For observation features, only copies features that exist in both.
	 * (Therefore if a feature exists in destModel but not sourceModel, it's not touched.)
	 */
	public static void copyCoefsForIntersectingFeatures(Model sourceModel, Model destModel) {		
		int K = sourceModel.numLabels;

		// We could do the name-checking intersection trick for label vocabs, but punt for now
		if (K != destModel.numLabels) throw new RuntimeException("label vocabs must be same size for warm-start");
		for (int k=0; k < K; k++) {
			if ( ! destModel.labelVocab.name(k).equals(sourceModel.labelVocab.name(k))) {
				throw new RuntimeException("label vocabs must agree for warm-start");
			}
		}

		destModel.biasCoefs = ArrayUtil.copy(sourceModel.biasCoefs);
		destModel.edgeCoefs = ArrayUtil.copy(sourceModel.edgeCoefs);

		// observation features need the intersection
		for (int sourceFeatID=0; sourceFeatID < sourceModel.featureVocab.size(); sourceFeatID++) {
			String featName = sourceModel.featureVocab.name(sourceFeatID);
			if (destModel.featureVocab.contains(featName)) {
				int destFeatID = destModel.featureVocab.num(featName);
				destModel.observationFeatureCoefs[destFeatID] = ArrayUtil.copy(
						sourceModel.observationFeatureCoefs[sourceFeatID] );
			}
		}
	}

}