edu.stanford.nlp.ie.crf.CRFLogConditionalObjectiveFunctionForLOP Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.ie.crf;
import edu.stanford.nlp.util.logging.Redwood;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.optimization.AbstractCachingDiffFunction;
import edu.stanford.nlp.util.Index;
import java.util.*;
/**
* @author Mengqiu Wang
* TODO(mengqiu) currently only works with disjoint feature sets
* for non-disjoint feature sets, need to recompute EHat each iteration, and multiply in the scale
* in EHat and E calculations for each lopExpert
*/
public class CRFLogConditionalObjectiveFunctionForLOP extends AbstractCachingDiffFunction implements HasCliquePotentialFunction {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(CRFLogConditionalObjectiveFunctionForLOP.class);
/** label indices - for all possible label sequences - for each feature */
List> labelIndices;
Index classIndex; // didn't have before. Added since that's what is assumed everywhere.
double[][][] Ehat; // empirical counts of all the features [lopIter][feature][class]
double[] sumOfObservedLogPotential; // empirical sum of all log potentials [lopIter]
double[][][][][] sumOfExpectedLogPotential; // sumOfExpectedLogPotential[m][i][j][lopIter][k] m-docNo;i-position;j-cliqueNo;k-label
List> featureIndicesSetArray;
List> featureIndicesListArray;
int window;
int numClasses;
int[] map;
int[][][][] data; // data[docIndex][tokenIndex][][]
double[][] lopExpertWeights; // lopExpertWeights[expertIter][weightIndex]
double[][][] lopExpertWeights2D;
int[][] labels; // labels[docIndex][tokenIndex]
int[][] learnedParamsMapping;
int numLopExpert;
boolean backpropTraining;
int domainDimension = -1;
String crfType = "maxent";
String backgroundSymbol;
public static boolean VERBOSE = false;
CRFLogConditionalObjectiveFunctionForLOP(int[][][][] data, int[][] labels, double[][] lopExpertWeights, int window,
Index classIndex, List> labelIndices, int[] map, String backgroundSymbol, int numLopExpert,
List> featureIndicesSetArray, List> featureIndicesListArray, boolean backpropTraining) {
this.window = window;
this.classIndex = classIndex;
this.numClasses = classIndex.size();
this.labelIndices = labelIndices;
this.map = map;
this.data = data;
this.lopExpertWeights = lopExpertWeights;
this.labels = labels;
this.backgroundSymbol = backgroundSymbol;
this.numLopExpert = numLopExpert;
this.featureIndicesSetArray = featureIndicesSetArray;
this.featureIndicesListArray = featureIndicesListArray;
this.backpropTraining = backpropTraining;
initialize2DWeights();
if (backpropTraining) {
computeEHat();
} else {
logPotential(lopExpertWeights2D);
}
}
@Override
public int domainDimension() {
if (domainDimension < 0) {
domainDimension = numLopExpert;
if (backpropTraining) {
// for (int i = 0; i < map.length; i++) {
// domainDimension += labelIndices[map[i]].size();
// }
for (int i = 0; i < numLopExpert; i++) {
List featureIndicesList = featureIndicesListArray.get(i);
double[][] expertWeights2D = lopExpertWeights2D[i];
for (int fIndex: featureIndicesList) {
int len = expertWeights2D[fIndex].length;
domainDimension += len;
}
}
}
}
return domainDimension;
}
@Override
public double[] initial() {
double[] initial = new double[domainDimension()];
if (backpropTraining) {
learnedParamsMapping = new int[domainDimension()][3];
int index = 0;
for (; index < numLopExpert; index++) {
initial[index] = 1.0;
}
for (int i = 0; i < numLopExpert; i++) {
List featureIndicesList = featureIndicesListArray.get(i);
double[][] expertWeights2D = lopExpertWeights2D[i];
for (int fIndex: featureIndicesList) {
for (int j = 0; j < expertWeights2D[fIndex].length; j++) {
initial[index] = expertWeights2D[fIndex][j];
learnedParamsMapping[index] = new int[]{i, fIndex, j};
index++;
}
}
}
} else {
Arrays.fill(initial, 1.0);
}
return initial;
}
public double[][][] empty2D() {
double[][][] d2 = new double[numLopExpert][][];
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
double[][] d = new double[map.length][];
// int index = 0;
for (int i = 0; i < map.length; i++) {
d[i] = new double[labelIndices.get(map[i]).size()];
// cdm july 2005: below array initialization isn't necessary: JLS (3rd ed.) 4.12.5
// Arrays.fill(d[i], 0.0);
// index += labelIndices[map[i]].size();
}
d2[lopIter] = d;
}
return d2;
}
private void initialize2DWeights() {
lopExpertWeights2D = new double[numLopExpert][][];
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
lopExpertWeights2D[lopIter] = to2D(lopExpertWeights[lopIter], labelIndices, map);
}
}
public double[][] to2D(double[] weights, List> labelIndices, int[] map) {
double[][] newWeights = new double[map.length][];
int index = 0;
for (int i = 0; i < map.length; i++) {
newWeights[i] = new double[labelIndices.get(map[i]).size()];
System.arraycopy(weights, index, newWeights[i], 0, labelIndices.get(map[i]).size());
index += labelIndices.get(map[i]).size();
}
return newWeights;
}
private void computeEHat() {
Ehat = empty2D();
for (int m = 0; m < data.length; m++) {
int[][][] docData = data[m];
int[] docLabels = labels[m];
int[] windowLabels = new int[window];
Arrays.fill(windowLabels, classIndex.indexOf(backgroundSymbol));
if (docLabels.length>docData.length) { // only true for self-training
// fill the windowLabel array with the extra docLabels
System.arraycopy(docLabels, 0, windowLabels, 0, windowLabels.length);
// shift the docLabels array left
int[] newDocLabels = new int[docData.length];
System.arraycopy(docLabels, docLabels.length-newDocLabels.length, newDocLabels, 0, newDocLabels.length);
docLabels = newDocLabels;
}
for (int i = 0; i < docData.length; i++) {
System.arraycopy(windowLabels, 1, windowLabels, 0, window - 1);
windowLabels[window - 1] = docLabels[i];
int[][] docDataI = docData[i];
for (int j = 0; j < docDataI.length; j++) { // j iterates over cliques
int[] docDataIJ = docDataI[j];
int[] cliqueLabel = new int[j + 1];
System.arraycopy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
CRFLabel crfLabel = new CRFLabel(cliqueLabel);
Index labelIndex = labelIndices.get(j);
int observedLabelIndex = labelIndex.indexOf(crfLabel);
//log.info(crfLabel + " " + observedLabelIndex);
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
double[][] ehatOfIter = Ehat[lopIter];
Set indicesSet = featureIndicesSetArray.get(lopIter);
for (int featureIdx : docDataIJ) { // k iterates over features
if (indicesSet.contains(featureIdx)) {
ehatOfIter[featureIdx][observedLabelIndex]++;
}
}
}
}
}
}
}
private void logPotential(double[][][] learnedLopExpertWeights2D) {
sumOfExpectedLogPotential = new double[data.length][][][][];
sumOfObservedLogPotential = new double[numLopExpert];
for (int m = 0; m < data.length; m++) {
int[][][] docData = data[m];
int[] docLabels = labels[m];
int[] windowLabels = new int[window];
Arrays.fill(windowLabels, classIndex.indexOf(backgroundSymbol));
double[][][][] sumOfELPm = new double[docData.length][][][];
if (docLabels.length>docData.length) { // only true for self-training
// fill the windowLabel array with the extra docLabels
System.arraycopy(docLabels, 0, windowLabels, 0, windowLabels.length);
// shift the docLabels array left
int[] newDocLabels = new int[docData.length];
System.arraycopy(docLabels, docLabels.length-newDocLabels.length, newDocLabels, 0, newDocLabels.length);
docLabels = newDocLabels;
}
for (int i = 0; i < docData.length; i++) {
System.arraycopy(windowLabels, 1, windowLabels, 0, window - 1);
windowLabels[window - 1] = docLabels[i];
double[][][] sumOfELPmi = new double[docData[i].length][][];
int[][] docDataI = docData[i];
for (int j = 0; j < docDataI.length; j++) { // j iterates over cliques
int[] docDataIJ = docDataI[j];
int[] cliqueLabel = new int[j + 1];
System.arraycopy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
CRFLabel crfLabel = new CRFLabel(cliqueLabel);
Index labelIndex = labelIndices.get(j);
double[][] sumOfELPmij = new double[numLopExpert][];
int observedLabelIndex = labelIndex.indexOf(crfLabel);
//log.info(crfLabel + " " + observedLabelIndex);
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
double[] sumOfELPmijIter = new double[labelIndex.size()];
Set indicesSet = featureIndicesSetArray.get(lopIter);
for (int featureIdx : docDataIJ) { // k iterates over features
if (indicesSet.contains(featureIdx)) {
sumOfObservedLogPotential[lopIter] += learnedLopExpertWeights2D[lopIter][featureIdx][observedLabelIndex];
// sum over potential of this clique over all possible labels, used later in calculating expected counts
for (int l = 0; l < labelIndex.size(); l++) {
sumOfELPmijIter[l] += learnedLopExpertWeights2D[lopIter][featureIdx][l];
}
}
}
sumOfELPmij[lopIter] = sumOfELPmijIter;
}
sumOfELPmi[j] = sumOfELPmij;
}
sumOfELPm[i] = sumOfELPmi;
}
sumOfExpectedLogPotential[m] = sumOfELPm;
}
}
public static double[] combineAndScaleLopWeights(int numLopExpert, double[][] lopExpertWeights, double[] lopScales) {
double[] newWeights = new double[lopExpertWeights[0].length];
for (int i = 0; i < newWeights.length; i++) {
double tempWeight = 0;
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
tempWeight += lopExpertWeights[lopIter][i] * lopScales[lopIter];
}
newWeights[i] = tempWeight;
}
return newWeights;
}
public static double[][] combineAndScaleLopWeights2D(int numLopExpert, double[][][] lopExpertWeights2D, double[] lopScales) {
double[][] newWeights = new double[lopExpertWeights2D[0].length][];
for (int i = 0; i < newWeights.length; i++) {
int innerDim = lopExpertWeights2D[0][i].length;
double[] innerWeights = new double[innerDim];
for (int j = 0; j < innerDim; j++) {
double tempWeight = 0;
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
tempWeight += lopExpertWeights2D[lopIter][i][j] * lopScales[lopIter];
}
innerWeights[j] = tempWeight;
}
newWeights[i] = innerWeights;
}
return newWeights;
}
public double[][][] separateLopExpertWeights2D(double[] learnedParams) {
double[][][] learnedWeights2D = empty2D();
for (int paramIndex = numLopExpert; paramIndex < learnedParams.length; paramIndex++) {
int[] mapping = learnedParamsMapping[paramIndex];
learnedWeights2D[mapping[0]][mapping[1]][mapping[2]] = learnedParams[paramIndex];
}
return learnedWeights2D;
}
public double[][] separateLopExpertWeights(double[] learnedParams) {
double[][] learnedWeights = new double[numLopExpert][];
double[][][] learnedWeights2D = separateLopExpertWeights2D(learnedParams);
for (int i = 0; i < numLopExpert; i++) {
learnedWeights[i] = CRFLogConditionalObjectiveFunction.to1D(learnedWeights2D[i], lopExpertWeights[i].length);
}
return learnedWeights;
}
public double[] separateLopScales(double[] learnedParams) {
double[] rawScales = new double[numLopExpert];
System.arraycopy(learnedParams, 0, rawScales, 0, numLopExpert);
return rawScales;
}
public CliquePotentialFunction getCliquePotentialFunction(double[] x) {
double[] rawScales = separateLopScales(x);
double[] scales = ArrayMath.softmax(rawScales);
double[][][] learnedLopExpertWeights2D = lopExpertWeights2D;
if (backpropTraining) {
learnedLopExpertWeights2D = separateLopExpertWeights2D(x);
}
double[][] combinedWeights2D = combineAndScaleLopWeights2D(numLopExpert, learnedLopExpertWeights2D, scales);
return new LinearCliquePotentialFunction(combinedWeights2D);
}
// todo [cdm]: Below data[m] --> docData
/**
* Calculates both value and partial derivatives at the point x, and save them internally.
*/
@Override
public void calculate(double[] x) {
double prob = 0.0; // the log prob of the sequence given the model, which is the negation of value at this point
double[][][] E = empty2D();
double[] eScales = new double[numLopExpert];
double[] rawScales = separateLopScales(x);
double[] scales = ArrayMath.softmax(rawScales);
double[][][] learnedLopExpertWeights2D = lopExpertWeights2D;
if (backpropTraining) {
learnedLopExpertWeights2D = separateLopExpertWeights2D(x);
logPotential(learnedLopExpertWeights2D);
}
double[][] combinedWeights2D = combineAndScaleLopWeights2D(numLopExpert, learnedLopExpertWeights2D, scales);
// iterate over all the documents
for (int m = 0; m < data.length; m++) {
int[][][] docData = data[m];
int[] docLabels = labels[m];
double[][][][] sumOfELPm = sumOfExpectedLogPotential[m]; // sumOfExpectedLogPotential[m][i][j][lopIter][k] m-docNo;i-position;j-cliqueNo;k-label
// make a clique tree for this document
CliquePotentialFunction cliquePotentialFunc = new LinearCliquePotentialFunction(combinedWeights2D);
CRFCliqueTree cliqueTree = CRFCliqueTree.getCalibratedCliqueTree(docData, labelIndices, numClasses, classIndex, backgroundSymbol, cliquePotentialFunc, null);
// compute the log probability of the document given the model with the parameters x
int[] given = new int[window - 1];
Arrays.fill(given, classIndex.indexOf(backgroundSymbol));
if (docLabels.length > docData.length) { // only true for self-training
// fill the given array with the extra docLabels
System.arraycopy(docLabels, 0, given, 0, given.length);
// shift the docLabels array left
int[] newDocLabels = new int[docData.length];
System.arraycopy(docLabels, docLabels.length-newDocLabels.length, newDocLabels, 0, newDocLabels.length);
docLabels = newDocLabels;
}
// iterate over the positions in this document
for (int i = 0; i < docData.length; i++) {
int label = docLabels[i];
double p = cliqueTree.condLogProbGivenPrevious(i, label, given);
if (VERBOSE) {
log.info("P(" + label + "|" + ArrayMath.toString(given) + ")=" + p);
}
prob += p;
System.arraycopy(given, 1, given, 0, given.length - 1);
given[given.length - 1] = label;
}
// compute the expected counts for this document, which we will need to compute the derivative
// iterate over the positions in this document
for (int i = 0; i < docData.length; i++) {
// for each possible clique at this position
double[][][] sumOfELPmi = sumOfELPm[i];
for (int j = 0; j < docData[i].length; j++) {
double[][] sumOfELPmij = sumOfELPmi[j];
Index labelIndex = labelIndices.get(j);
// for each possible labeling for that clique
for (int l = 0; l < labelIndex.size(); l++) {
int[] label = labelIndex.get(l).getLabel();
double p = cliqueTree.prob(i, label); // probability of these labels occurring in this clique with these features
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
Set indicesSet = featureIndicesSetArray.get(lopIter);
double scale = scales[lopIter];
double expected = sumOfELPmij[lopIter][l];
for (int innerLopIter = 0; innerLopIter < numLopExpert; innerLopIter++) {
expected -= scales[innerLopIter] * sumOfELPmij[innerLopIter][l];
}
expected *= scale;
eScales[lopIter] += (p * expected);
double[][] eOfIter = E[lopIter];
if (backpropTraining) {
for (int k = 0; k < docData[i][j].length; k++) { // k iterates over features
int featureIdx = docData[i][j][k];
if (indicesSet.contains(featureIdx)) {
eOfIter[featureIdx][l] += p;
}
}
}
}
}
}
}
}
if (Double.isNaN(prob)) { // shouldn't be the case
throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunctionForLOP.calculate()");
}
value = -prob;
if(VERBOSE){
log.info("value is " + value);
}
// compute the partial derivative for each feature by comparing expected counts to empirical counts
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
double scale = scales[lopIter];
double observed = sumOfObservedLogPotential[lopIter];
for (int j = 0; j < numLopExpert; j++) {
observed -= scales[j] * sumOfObservedLogPotential[j];
}
observed *= scale;
double expected = eScales[lopIter];
derivative[lopIter] = (expected - observed);
if (VERBOSE) {
log.info("deriv(" + lopIter + ") = " + expected + " - " + observed + " = " + derivative[lopIter]);
}
}
if (backpropTraining) {
int dIndex = numLopExpert;
for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
double scale = scales[lopIter];
double[][] eOfExpert = E[lopIter];
double[][] ehatOfExpert = Ehat[lopIter];
List featureIndicesList = featureIndicesListArray.get(lopIter);
for (int fIndex: featureIndicesList) {
for (int j = 0; j < eOfExpert[fIndex].length; j++) {
derivative[dIndex++] = scale * (eOfExpert[fIndex][j] - ehatOfExpert[fIndex][j]);
if (VERBOSE) {
log.info("deriv[" + lopIter+ "](" + fIndex + "," + j + ") = " + scale + " * (" + eOfExpert[fIndex][j] + " - " + ehatOfExpert[fIndex][j] + ") = " + derivative[dIndex - 1]);
}
}
}
}
assert(dIndex == domainDimension());
}
}
}