edu.stanford.nlp.ie.crf.CRFLogConditionalObjectiveFunctionForLOP Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.ie.crf; 
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.optimization.AbstractCachingDiffFunction;
import edu.stanford.nlp.util.Index;

import java.util.*;

/**
 * @author Mengqiu Wang
 * TODO(mengqiu) currently only works with disjoint feature sets
 * for non-disjoint feature sets, need to recompute EHat each iteration, and multiply in the scale
 * in EHat and E calculations for each lopExpert
 */

public class CRFLogConditionalObjectiveFunctionForLOP extends AbstractCachingDiffFunction implements HasCliquePotentialFunction  {

  /** A logger for this class */
  private static Redwood.RedwoodChannels log = Redwood.channels(CRFLogConditionalObjectiveFunctionForLOP.class);

  /** label indices - for all possible label sequences - for each feature */
  List> labelIndices;
  Index classIndex;  // didn't have  before. Added since that's what is assumed everywhere.
  double[][][] Ehat; // empirical counts of all the features [lopIter][feature][class]
  double[] sumOfObservedLogPotential; // empirical sum of all log potentials [lopIter]
  double[][][][][] sumOfExpectedLogPotential; // sumOfExpectedLogPotential[m][i][j][lopIter][k] m-docNo;i-position;j-cliqueNo;k-label 
  List> featureIndicesSetArray;
  List> featureIndicesListArray;
  int window;
  int numClasses;
  int[] map;
  int[][][][] data;  // data[docIndex][tokenIndex][][]
  double[][] lopExpertWeights; // lopExpertWeights[expertIter][weightIndex]
  double[][][] lopExpertWeights2D;
  int[][] labels;    // labels[docIndex][tokenIndex]
  int[][] learnedParamsMapping;
  int numLopExpert;
  boolean backpropTraining;
  int domainDimension = -1;

  String crfType = "maxent";
  String backgroundSymbol;

  public static boolean VERBOSE = false;

  CRFLogConditionalObjectiveFunctionForLOP(int[][][][] data, int[][] labels, double[][] lopExpertWeights, int window,
      Index classIndex, List> labelIndices, int[] map, String backgroundSymbol, int numLopExpert,
      List> featureIndicesSetArray, List> featureIndicesListArray, boolean backpropTraining) {
    this.window = window;
    this.classIndex = classIndex;
    this.numClasses = classIndex.size();
    this.labelIndices = labelIndices;
    this.map = map;
    this.data = data;
    this.lopExpertWeights = lopExpertWeights;
    this.labels = labels;
    this.backgroundSymbol = backgroundSymbol;
    this.numLopExpert = numLopExpert;
    this.featureIndicesSetArray = featureIndicesSetArray;
    this.featureIndicesListArray = featureIndicesListArray;
    this.backpropTraining = backpropTraining;
    initialize2DWeights();
    if (backpropTraining) {
      computeEHat();
    } else {
      logPotential(lopExpertWeights2D);
    }
  }

  @Override
  public int domainDimension() {
    if (domainDimension < 0) {
      domainDimension = numLopExpert;
      if (backpropTraining) {
        // for (int i = 0; i < map.length; i++) {
        //   domainDimension += labelIndices[map[i]].size();
        // }
        for (int i = 0; i < numLopExpert; i++) {
          List featureIndicesList = featureIndicesListArray.get(i);
          double[][] expertWeights2D = lopExpertWeights2D[i];
          for (int fIndex: featureIndicesList) {
            int len = expertWeights2D[fIndex].length;
            domainDimension += len;
          }
        }
      }
    }
    return domainDimension;
  }

  @Override 
  public double[] initial() {
    double[] initial = new double[domainDimension()];
    if (backpropTraining) {
      learnedParamsMapping = new int[domainDimension()][3];
      int index = 0;
      for (; index < numLopExpert; index++) {
        initial[index] = 1.0;
      }
      for (int i = 0; i < numLopExpert; i++) {
        List featureIndicesList = featureIndicesListArray.get(i);
        double[][] expertWeights2D = lopExpertWeights2D[i];
        for (int fIndex: featureIndicesList) {
          for (int j = 0; j < expertWeights2D[fIndex].length; j++) {
            initial[index] = expertWeights2D[fIndex][j];
            learnedParamsMapping[index] = new int[]{i, fIndex, j};
            index++;
          }
        }
      }
    } else {
      Arrays.fill(initial, 1.0);
    }
    return initial;
  }

  public double[][][] empty2D() {
    double[][][] d2 = new double[numLopExpert][][];
    for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
      double[][] d = new double[map.length][];
      // int index = 0;
      for (int i = 0; i < map.length; i++) {
        d[i] = new double[labelIndices.get(map[i]).size()];
        // cdm july 2005: below array initialization isn't necessary: JLS (3rd ed.) 4.12.5
        // Arrays.fill(d[i], 0.0);
        // index += labelIndices[map[i]].size();
      }
      d2[lopIter] = d;
    }
    return d2;
  }

  private void initialize2DWeights() {
    lopExpertWeights2D = new double[numLopExpert][][];
    for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
      lopExpertWeights2D[lopIter] = to2D(lopExpertWeights[lopIter], labelIndices, map); 
    }
  }

  public double[][] to2D(double[] weights, List> labelIndices, int[] map) {
    double[][] newWeights = new double[map.length][];
    int index = 0;
    for (int i = 0; i < map.length; i++) {
      newWeights[i] = new double[labelIndices.get(map[i]).size()];
      System.arraycopy(weights, index, newWeights[i], 0, labelIndices.get(map[i]).size());
      index += labelIndices.get(map[i]).size();
    }
    return newWeights;
  }
  

  private void computeEHat() {
    Ehat = empty2D();

    for (int m = 0; m < data.length; m++) {
      int[][][] docData = data[m];
      int[] docLabels = labels[m];
      int[] windowLabels = new int[window];
      Arrays.fill(windowLabels, classIndex.indexOf(backgroundSymbol));

      if (docLabels.length>docData.length) { // only true for self-training
        // fill the windowLabel array with the extra docLabels
        System.arraycopy(docLabels, 0, windowLabels, 0, windowLabels.length);
        // shift the docLabels array left
        int[] newDocLabels = new int[docData.length];
        System.arraycopy(docLabels, docLabels.length-newDocLabels.length, newDocLabels, 0, newDocLabels.length);
        docLabels = newDocLabels;
      }
      for (int i = 0; i < docData.length; i++) {
        System.arraycopy(windowLabels, 1, windowLabels, 0, window - 1);
        windowLabels[window - 1] = docLabels[i];
        int[][] docDataI = docData[i];

        for (int j = 0; j < docDataI.length; j++) { // j iterates over cliques
          int[] docDataIJ = docDataI[j];
          int[] cliqueLabel = new int[j + 1];
          System.arraycopy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
          CRFLabel crfLabel = new CRFLabel(cliqueLabel);
          Index labelIndex = labelIndices.get(j);

          int observedLabelIndex = labelIndex.indexOf(crfLabel);
          //log.info(crfLabel + " " + observedLabelIndex);
          for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
            double[][] ehatOfIter = Ehat[lopIter];
            Set indicesSet = featureIndicesSetArray.get(lopIter);
            for (int featureIdx : docDataIJ) { // k iterates over features
              if (indicesSet.contains(featureIdx)) {
                ehatOfIter[featureIdx][observedLabelIndex]++;
              }
            }
          }
        }
      }
    }
  }

  private void logPotential(double[][][] learnedLopExpertWeights2D) {
    sumOfExpectedLogPotential = new double[data.length][][][][];
    sumOfObservedLogPotential = new double[numLopExpert];

    for (int m = 0; m < data.length; m++) {
      int[][][] docData = data[m];
      int[] docLabels = labels[m];
      int[] windowLabels = new int[window];
      Arrays.fill(windowLabels, classIndex.indexOf(backgroundSymbol));

      double[][][][] sumOfELPm = new double[docData.length][][][];
      
      if (docLabels.length>docData.length) { // only true for self-training
        // fill the windowLabel array with the extra docLabels
        System.arraycopy(docLabels, 0, windowLabels, 0, windowLabels.length);
        // shift the docLabels array left
        int[] newDocLabels = new int[docData.length];
        System.arraycopy(docLabels, docLabels.length-newDocLabels.length, newDocLabels, 0, newDocLabels.length);
        docLabels = newDocLabels;
      }
      for (int i = 0; i < docData.length; i++) {
        System.arraycopy(windowLabels, 1, windowLabels, 0, window - 1);
        windowLabels[window - 1] = docLabels[i];

        double[][][] sumOfELPmi = new double[docData[i].length][][]; 
        int[][] docDataI = docData[i];

        for (int j = 0; j < docDataI.length; j++) { // j iterates over cliques
          int[] docDataIJ = docDataI[j];
          int[] cliqueLabel = new int[j + 1];
          System.arraycopy(windowLabels, window - 1 - j, cliqueLabel, 0, j + 1);
          CRFLabel crfLabel = new CRFLabel(cliqueLabel);
          Index labelIndex = labelIndices.get(j);

          double[][] sumOfELPmij = new double[numLopExpert][];  

          int observedLabelIndex = labelIndex.indexOf(crfLabel);
          //log.info(crfLabel + " " + observedLabelIndex);
          for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
            double[] sumOfELPmijIter = new double[labelIndex.size()]; 
            Set indicesSet = featureIndicesSetArray.get(lopIter);
            for (int featureIdx : docDataIJ) { // k iterates over features
              if (indicesSet.contains(featureIdx)) {
                sumOfObservedLogPotential[lopIter] += learnedLopExpertWeights2D[lopIter][featureIdx][observedLabelIndex];
                // sum over potential of this clique over all possible labels, used later in calculating expected counts
                for (int l = 0; l < labelIndex.size(); l++) {
                  sumOfELPmijIter[l] += learnedLopExpertWeights2D[lopIter][featureIdx][l];
                }
              }
            }
            sumOfELPmij[lopIter] = sumOfELPmijIter;
          }
          sumOfELPmi[j] = sumOfELPmij;
        }
        sumOfELPm[i] = sumOfELPmi;
      }
      sumOfExpectedLogPotential[m] = sumOfELPm;
    }
  }

  public static double[] combineAndScaleLopWeights(int numLopExpert, double[][] lopExpertWeights, double[] lopScales) {
    double[] newWeights = new double[lopExpertWeights[0].length];
    for (int i = 0; i < newWeights.length; i++) {
      double tempWeight = 0;
      for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
        tempWeight += lopExpertWeights[lopIter][i] * lopScales[lopIter];
      }
      newWeights[i] = tempWeight;
    }
    return newWeights;
  }

  public static double[][] combineAndScaleLopWeights2D(int numLopExpert, double[][][] lopExpertWeights2D, double[] lopScales) {
    double[][] newWeights = new double[lopExpertWeights2D[0].length][];
    for (int i = 0; i < newWeights.length; i++) {
      int innerDim = lopExpertWeights2D[0][i].length;
      double[] innerWeights = new double[innerDim];
      for (int j = 0; j < innerDim; j++) {
        double tempWeight = 0;
        for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
          tempWeight += lopExpertWeights2D[lopIter][i][j] * lopScales[lopIter];
        }
        innerWeights[j] = tempWeight;
      }
      newWeights[i] = innerWeights;
    }
    return newWeights;
  }

  public double[][][] separateLopExpertWeights2D(double[] learnedParams) {
    double[][][] learnedWeights2D = empty2D();
    for (int paramIndex = numLopExpert; paramIndex < learnedParams.length; paramIndex++) {
      int[] mapping = learnedParamsMapping[paramIndex];
      learnedWeights2D[mapping[0]][mapping[1]][mapping[2]] = learnedParams[paramIndex];
    }
    return learnedWeights2D;
  }

  public double[][] separateLopExpertWeights(double[] learnedParams) {
    double[][] learnedWeights = new double[numLopExpert][];
    double[][][] learnedWeights2D = separateLopExpertWeights2D(learnedParams);
    for (int i = 0; i < numLopExpert; i++) {
      learnedWeights[i] = CRFLogConditionalObjectiveFunction.to1D(learnedWeights2D[i], lopExpertWeights[i].length);
    }
    return learnedWeights;
  }

  public double[] separateLopScales(double[] learnedParams) {
    double[] rawScales = new double[numLopExpert];
    System.arraycopy(learnedParams, 0, rawScales, 0, numLopExpert);
    return rawScales;
  }

  public CliquePotentialFunction getCliquePotentialFunction(double[] x) {
    double[] rawScales = separateLopScales(x);
    double[] scales = ArrayMath.softmax(rawScales);
    double[][][] learnedLopExpertWeights2D = lopExpertWeights2D;
    if (backpropTraining) {
      learnedLopExpertWeights2D = separateLopExpertWeights2D(x);
    }

    double[][] combinedWeights2D = combineAndScaleLopWeights2D(numLopExpert, learnedLopExpertWeights2D, scales);
    return new LinearCliquePotentialFunction(combinedWeights2D);
  }

  // todo [cdm]: Below data[m] --> docData
  /**
   * Calculates both value and partial derivatives at the point x, and save them internally.
   */
  @Override
  public void calculate(double[] x) {

    double prob = 0.0; // the log prob of the sequence given the model, which is the negation of value at this point
    double[][][] E = empty2D();
    double[] eScales = new double[numLopExpert];

    double[] rawScales = separateLopScales(x);
    double[] scales = ArrayMath.softmax(rawScales);
    double[][][] learnedLopExpertWeights2D = lopExpertWeights2D;
    if (backpropTraining) {
      learnedLopExpertWeights2D = separateLopExpertWeights2D(x);
      logPotential(learnedLopExpertWeights2D);
    }

    double[][] combinedWeights2D = combineAndScaleLopWeights2D(numLopExpert, learnedLopExpertWeights2D, scales);
    // iterate over all the documents
    for (int m = 0; m < data.length; m++) {
      int[][][] docData = data[m];
      int[] docLabels = labels[m];
      double[][][][] sumOfELPm = sumOfExpectedLogPotential[m]; // sumOfExpectedLogPotential[m][i][j][lopIter][k] m-docNo;i-position;j-cliqueNo;k-label 

      // make a clique tree for this document
      CliquePotentialFunction cliquePotentialFunc = new LinearCliquePotentialFunction(combinedWeights2D);
      CRFCliqueTree cliqueTree = CRFCliqueTree.getCalibratedCliqueTree(docData, labelIndices, numClasses, classIndex, backgroundSymbol, cliquePotentialFunc, null);

      // compute the log probability of the document given the model with the parameters x
      int[] given = new int[window - 1];
      Arrays.fill(given, classIndex.indexOf(backgroundSymbol));
      if (docLabels.length > docData.length) { // only true for self-training
        // fill the given array with the extra docLabels
        System.arraycopy(docLabels, 0, given, 0, given.length);
        // shift the docLabels array left
        int[] newDocLabels = new int[docData.length];
        System.arraycopy(docLabels, docLabels.length-newDocLabels.length, newDocLabels, 0, newDocLabels.length);
        docLabels = newDocLabels;
      }
      // iterate over the positions in this document
      for (int i = 0; i < docData.length; i++) {
        int label = docLabels[i];
        double p = cliqueTree.condLogProbGivenPrevious(i, label, given);
        if (VERBOSE) {
          log.info("P(" + label + "|" + ArrayMath.toString(given) + ")=" + p);
        }
        prob += p;
        System.arraycopy(given, 1, given, 0, given.length - 1);
        given[given.length - 1] = label;
      }

      // compute the expected counts for this document, which we will need to compute the derivative
      // iterate over the positions in this document
      for (int i = 0; i < docData.length; i++) {
        // for each possible clique at this position
        double[][][] sumOfELPmi = sumOfELPm[i];
        for (int j = 0; j < docData[i].length; j++) {
          double[][] sumOfELPmij = sumOfELPmi[j];
          Index labelIndex = labelIndices.get(j);
          // for each possible labeling for that clique
          for (int l = 0; l < labelIndex.size(); l++) {
            int[] label = labelIndex.get(l).getLabel();
            double p = cliqueTree.prob(i, label); // probability of these labels occurring in this clique with these features
            for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
              Set indicesSet = featureIndicesSetArray.get(lopIter);
              double scale = scales[lopIter];
              double expected = sumOfELPmij[lopIter][l];
              for (int innerLopIter = 0; innerLopIter < numLopExpert; innerLopIter++) {
                expected -= scales[innerLopIter] * sumOfELPmij[innerLopIter][l];
              }
              expected *= scale;
              eScales[lopIter] += (p * expected);

              double[][] eOfIter = E[lopIter];
              if (backpropTraining) {
                for (int k = 0; k < docData[i][j].length; k++) { // k iterates over features
                  int featureIdx = docData[i][j][k];
                  if (indicesSet.contains(featureIdx)) {
                    eOfIter[featureIdx][l] += p;
                  }
                }
              }
            }
          }
        }
      }
    }

    if (Double.isNaN(prob)) { // shouldn't be the case
      throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunctionForLOP.calculate()");
    }

    value = -prob;
    if(VERBOSE){
      log.info("value is " + value);
    }
    // compute the partial derivative for each feature by comparing expected counts to empirical counts
    for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
      double scale = scales[lopIter];
      double observed = sumOfObservedLogPotential[lopIter];
      for (int j = 0; j < numLopExpert; j++) {
        observed -= scales[j] * sumOfObservedLogPotential[j];
      }
      observed *= scale;
      double expected = eScales[lopIter];

      derivative[lopIter] = (expected - observed);
      if (VERBOSE) {
        log.info("deriv(" + lopIter + ") = " + expected + " - " + observed + " = " + derivative[lopIter]);
      }
    }
    if (backpropTraining) {
      int dIndex = numLopExpert;
      for (int lopIter = 0; lopIter < numLopExpert; lopIter++) {
        double scale = scales[lopIter];
        double[][] eOfExpert = E[lopIter];
        double[][] ehatOfExpert = Ehat[lopIter];
        List featureIndicesList = featureIndicesListArray.get(lopIter);
        for (int fIndex: featureIndicesList) {
          for (int j = 0; j < eOfExpert[fIndex].length; j++) {
            derivative[dIndex++] = scale * (eOfExpert[fIndex][j] - ehatOfExpert[fIndex][j]);
            if (VERBOSE) {
              log.info("deriv[" + lopIter+ "](" + fIndex + "," + j + ") = " + scale + " * (" + eOfExpert[fIndex][j] + " - " + ehatOfExpert[fIndex][j] + ") = " + derivative[dIndex - 1]);
            }
          }
        }
      }
      assert(dIndex == domainDimension());
    }
  }
}