edu.stanford.nlp.optimization.InefficientSGDMinimizer Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of stanford-corenlp Show documentation

Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.

There is a newer version: 4.5.7

Show newest version

package edu.stanford.nlp.optimization;

import edu.stanford.nlp.util.Pair;

/**
 * Stochastic Gradient Descent Minimizer.
 *
 *
 * The basic way to use the minimizer is with a null constructor, then
 * the simple minimize method:
 * 
 * 
Minimizer smd = new InefficientSGDMinimizer();
 * 
DiffFunction df = new SomeDiffFunction(); //Note that it must be a incidence of AbstractStochasticCachingDiffFunction
 * 
double tol = 1e-4;
 * 
double[] initial = getInitialGuess();
 * 
int maxIterations = someSafeNumber;
 * 
double[] minimum = qnm.minimize(df,tol,initial,maxIterations);
 * 

 * Constructing with a null constructor will use the default values of
 * 

 * 
batchSize = 15;
 * 
initialGain = 0.1;
 * 

 * 
 NOTE: This class was previously called SGDMinimizer. SGDMinimizer is now what was StochasticInPlaceMinimizer. New projects should use that class.
 * 

 *
 * @author Alex Kleeman
 * @version 1.0
 * @since 1.0
 */
public class InefficientSGDMinimizer extends StochasticMinimizer {


  @Override
  public void shutUp() {
    this.quiet = true;
  }

  public void setBatchSize(int batchSize) {
    bSize = batchSize;
  }

  public InefficientSGDMinimizer() {
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize){
    this(SGDGain,batchSize,50);
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes){
    this(SGDGain,batchSize,passes,Long.MAX_VALUE,false);
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, boolean outputToFile){
    this(SGDGain, batchSize, passes, Long.MAX_VALUE ,outputToFile );
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime){
    this(SGDGain,batchSize,passes,maxTime,false);
  }

  public InefficientSGDMinimizer(double SGDGain, int batchSize, int passes, long maxTime, boolean outputToFile){
    bSize = batchSize;
    gain = SGDGain;
    this.numPasses = passes;
    this.outputIterationsToFile = outputToFile;
    this.maxTime = maxTime;
  }


  @Override
  protected String getName(){
    int g = (int) gain*1000;
      return "SGD" + bSize + "_g" + g;
  }


  public Pair  tune(Function function, double[] initial,long msPerTest,double gainLow,double gainHigh){
    this.quiet = true;
    gain = tuneGain(function, initial, msPerTest, gainLow,gainHigh);
    bSize = tuneBatch(function,initial,msPerTest,1);

    return new Pair<>(bSize, gain);
  }

  @Override
  public Pair tune(Function function,double[] initial, long msPerTest){
    return this.tune(function, initial, msPerTest, 1e-7,1.0);

  }

  @Override
  protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
    for(int i = 0; i < x.length; i++){
      newX[i] = x[i] - gain*gainSchedule(k,5*numBatches)*grad[i];
    }
  }






  public static void main(String[] args) {
    // optimizes test function using doubles and floats
    // test function is (0.5 sum(x_i^2 * var_i)) ^ PI
    // where var is a vector of random nonnegative numbers
    // dimensionality is variable.
    final int dim = 500000;
    final double maxVar = 5;
    final double[] var = new double[dim];
    double[] init = new double[dim];

    for (int i = 0; i < dim; i++) {
      init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
      var[i] = maxVar * (i + 1) / dim;
    }

    final double[] grads = new double[dim];

    final DiffFunction f = new DiffFunction() {
      @Override
      public double[] derivativeAt(double[] x) {
        double val = Math.PI * valuePow(x, Math.PI - 1);
        for (int i = 0; i < dim; i++) {
          grads[i] = x[i] * var[i] * val;
        }
        return grads;
      }

      @Override
      public double valueAt(double[] x) {
        return 1.0 + valuePow(x, Math.PI);
      }

      private double valuePow(double[] x, double pow) {
        double val = 0.0;
        for (int i = 0; i < dim; i++) {
          val += x[i] * x[i] * var[i];
        }
        return Math.pow(val * 0.5, pow);
      }

      @Override
      public int domainDimension() {
        return dim;
      }
    };

    InefficientSGDMinimizer min = new InefficientSGDMinimizer<>();
    min.minimize(f, 1.0E-4, init);
  }

}