edu.stanford.nlp.classify.GeneralizedExpectationObjectiveFunction Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
There is a newer version: 4.5.7
Show newest version
package edu.stanford.nlp.classify;

import edu.stanford.nlp.ling.Datum;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.optimization.AbstractCachingDiffFunction;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Triple;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;


/**
 * Implementation of Generalized Expectation Objective function for
 * an I.I.D. log-linear model. See Mann and McCallum, ACL 2008 for GE in CRFs.
 * This code, however, is just for log-linear model
 * IMPORTANT: the current implementation is only correct as long as
 * the labeled features passed to GE are binary.
 * However, other features are allowed to be real valued.
 * The original paper also discusses GE only for binary features.
 *
 * @author Ramesh Nallapati ([email protected])
 */

public class GeneralizedExpectationObjectiveFunction extends AbstractCachingDiffFunction {

  private final GeneralDataset labeledDataset;
  private final List> unlabeledDataList;
  private final List geFeatures;
  private final LinearClassifier classifier;
  private double[][] geFeature2EmpiricalDist; //empirical label distributions of each feature. Really final but java won't let us.
  private List> geFeature2DatumList; //an inverted list of active unlabeled documents for each feature. Really final but java won't let us.

  private final int numFeatures;
  private final int numClasses;


  @Override
  public int domainDimension() {
    return numFeatures * numClasses;
  }

  int classOf(int index) {
    return index % numClasses;
  }

  int featureOf(int index) {
    return index / numClasses;
  }

  protected int indexOf(int f, int c) {
    return f * numClasses + c;
  }

  public double[][] to2D(double[] x) {
    double[][] x2 = new double[numFeatures][numClasses];
    for (int i = 0; i < numFeatures; i++) {
      for (int j = 0; j < numClasses; j++) {
        x2[i][j] = x[indexOf(i, j)];
      }
    }
    return x2;
  }

  @Override
  protected void calculate(double[] x) {
    classifier.setWeights(to2D(x));
    if (derivative == null) {
      derivative = new double[x.length];
    } else {
      Arrays.fill(derivative, 0.0);
    }
    Counter> feature2classPairDerivatives = new ClassicCounter<>();

    value = 0.0;
    for(int n = 0; n < geFeatures.size(); n++){
      //F feature = geFeatures.get(n);
      double[] modelDist = new double[numClasses];
      Arrays.fill(modelDist,0);

    //go over the unlabeled active data to compute expectations
      List activeData = geFeature2DatumList.get(n);
      for (Integer activeDatum : activeData) {
        Datum datum = unlabeledDataList.get(activeDatum);
        double[] probs = getModelProbs(datum);
        for (int c = 0; c < numClasses; c++) {
          modelDist[c] += probs[c];
        }
        updateDerivative(datum, probs, feature2classPairDerivatives); //computes p(y_d)*(1-p(y_d))*f_d for all active features.
      }

      //now  compute the value (KL-divergence) and the final value of the derivative.
      if (activeData.size()>0) {
        for (int c = 0; c < numClasses; c++) {
          modelDist[c]/= activeData.size();
        }
        smoothDistribution(modelDist);

        for(int c = 0; c < numClasses; c++)
          value += -geFeature2EmpiricalDist[n][c]*Math.log(modelDist[c]);

        for(int f = 0; f < labeledDataset.featureIndex().size(); f++) {
          for(int c = 0; c < numClasses; c++) {
            int wtIndex = indexOf(f,c);
            for(int cPrime = 0;  cPrime < numClasses; cPrime++){
              derivative[wtIndex] += feature2classPairDerivatives.getCount(new Triple<>(f, c, cPrime))*geFeature2EmpiricalDist[n][cPrime]/modelDist[cPrime];
            }
            derivative[wtIndex] /= activeData.size();
          }
        } // loop over each feature for derivative computation
      } //end of if condition
    } //loop over each GE feature
  }


   private void updateDerivative(Datum datum, double[] probs,Counter> feature2classPairDerivatives){
     for (F feature : datum.asFeatures()) {
       int fID = labeledDataset.featureIndex.indexOf(feature);
       if (fID >= 0) {
         for (int c = 0; c < numClasses; c++) {
           for (int cPrime = 0; cPrime < numClasses; cPrime++) {
             if (cPrime == c) {
               feature2classPairDerivatives.incrementCount(new Triple<>(fID, c, cPrime), - probs[c]*(1-probs[c])*valueOfFeature(feature,datum));
             } else {
               feature2classPairDerivatives.incrementCount(new Triple<>(fID, c, cPrime), probs[c]*probs[cPrime]*valueOfFeature(feature,datum));
             }
           }
         }
       }
     }
   }

   /*
    * This method assumes the feature already exists in the datum.
    */
   private double valueOfFeature(F feature, Datum datum){
      if(datum instanceof RVFDatum)
        return ((RVFDatum)datum).asFeaturesCounter().getCount(feature);
      else return 1.0;
    }

    private void computeEmpiricalStatistics(List geFeatures){
      //allocate memory to the containers and initialize them
      geFeature2EmpiricalDist = new double[geFeatures.size()][labeledDataset.labelIndex.size()];
      geFeature2DatumList = new ArrayList<>(geFeatures.size());
      Map geFeatureMap = Generics.newHashMap();
      Set activeUnlabeledExamples = Generics.newHashSet();
      for(int n = 0; n < geFeatures.size(); n++){
        F geFeature = geFeatures.get(n);
        geFeature2DatumList.add(new ArrayList<>());
        Arrays.fill(geFeature2EmpiricalDist[n], 0);
        geFeatureMap.put(geFeature,n);
      }

      //compute the empirical label distribution for each GE feature
      for(int i = 0; i < labeledDataset.size(); i++){
        Datum datum = labeledDataset.getDatum(i);
        int labelID = labeledDataset.labelIndex.indexOf(datum.label());
        for(F feature : datum.asFeatures()){
          if(geFeatureMap.containsKey(feature)){
            int geFnum = geFeatureMap.get(feature);
            geFeature2EmpiricalDist[geFnum][labelID]++;
          }
        }
      }
      //now normalize and smooth the label distribution for each feature.
      for(int n = 0;  n < geFeatures.size(); n++){
        ArrayMath.normalize(geFeature2EmpiricalDist[n]);
        smoothDistribution(geFeature2EmpiricalDist[n]);
      }

      //now build the inverted index from each GE feature to unlabeled datums that contain it.
      for (int i = 0; i < unlabeledDataList.size(); i++) {
        Datum datum = unlabeledDataList.get(i);
        for (F feature : datum.asFeatures()) {
          if (geFeatureMap.containsKey(feature)) {
            int geFnum = geFeatureMap.get(feature);
            geFeature2DatumList.get(geFnum).add(i);
            activeUnlabeledExamples.add(i);
          }
        }
      }
      System.out.println("Number of active unlabeled examples:"+activeUnlabeledExamples.size());
    }

    private static void smoothDistribution(double [] dist) {
      //perform Laplace smoothing
      double epsilon = 1e-6;
      for(int i = 0; i < dist.length; i++)
        dist[i] += epsilon;
      ArrayMath.normalize(dist);
    }

    private double[] getModelProbs(Datum datum){
      double[] condDist = new double[labeledDataset.numClasses()];
      Counter probCounter = classifier.probabilityOf(datum);
      for(L label : probCounter.keySet()){
        int labelID = labeledDataset.labelIndex.indexOf(label);
        condDist[labelID] = probCounter.getCount(label);
      }
      return condDist;
    }

  public GeneralizedExpectationObjectiveFunction(GeneralDataset labeledDataset, List> unlabeledDataList,List geFeatures) {
    System.out.println("Number of labeled examples:"+labeledDataset.size+"\nNumber of unlabeled examples:"+unlabeledDataList.size());
    System.out.println("Number of GE features:"+geFeatures.size());
    this.numFeatures = labeledDataset.numFeatures();
    this.numClasses = labeledDataset.numClasses();
    this.labeledDataset = labeledDataset;
    this.unlabeledDataList = unlabeledDataList;
    this.geFeatures = geFeatures;
    this.classifier = new LinearClassifier<>(null, labeledDataset.featureIndex, labeledDataset.labelIndex);
    computeEmpiricalStatistics(geFeatures);
    //empirical distributions don't change with iterations, so compute them only once.
    //model distributions will have to be recomputed every iteration though.
  }

}