edu.stanford.nlp.optimization.SMDMinimizer Maven / Gradle / Ivy

package edu.stanford.nlp.optimization;

import java.text.DecimalFormat;
import java.text.NumberFormat;

import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.util.Pair;

/**
 * 
 * Stochastic Meta Descent Minimizer based on
 *
 * 

 * Accelerated training of conditional random fields with stochastic gradient methods
 * S. V. N. Vishwanathan, Nicol N. Schraudolph, Mark W. Schmidt, Kevin P. Murphy
 * June 2006 	 	Proceedings of the 23rd international conference on Machine learning ICML '06
 * Publisher: ACM Press
 * 

 * The basic way to use the minimizer is with a null constructor, then
 * the simple minimize method:
 * 

 * 
Minimizer smd = new SMDMinimizer();
 * 
DiffFunction df = new SomeDiffFunction();
 * 
double tol = 1e-4;
 * 
double[] initial = getInitialGuess();
 * 
int maxIterations = someSafeNumber;
 * 
double[] minimum = qnm.minimize(df,tol,initial,maxIterations);
 * 

 * Constructing with a null constructor will use the default values of
 * 

 * 
batchSize = 15;
 * 
initialGain = 0.1;
 * 
useAlgorithmicDifferentiation = true;
 * 

 * 

 *
 * @author Alex Kleeman
 * @version 1.0
 * @since 1.0
 */
public class SMDMinimizer extends StochasticMinimizer {



  public double mu = 0.01;
  public double lam = 1.0;
  public double cPosDef = 0.00;
  public double meta;
  //DEBUG ONLY
  public boolean printMinMax = false;
  private double[] Hv,gains;
  StochasticCalculateMethods method = null;

  @Override
  public void shutUp() {
    this.quiet = true;
  }

  public void setBatchSize(int batchSize) {
    bSize = batchSize;
  }


  private static NumberFormat nf = new DecimalFormat("0.000E0");

  public SMDMinimizer() {
  }

  public SMDMinimizer(double initialSMDGain, int batchSize, StochasticCalculateMethods method, int passes) {
    this(initialSMDGain, batchSize, method, passes, false);
  }
  
  public SMDMinimizer(double initGain, int batchSize,StochasticCalculateMethods method, int passes, boolean outputToFile){
    StochasticMinimizer.bSize = batchSize;
    StochasticMinimizer.gain = initGain;
    this.method = method;
    this.numPasses = passes;
    this.outputIterationsToFile = outputToFile;
  }


  @Override
  public double[] minimize(Function function, double functionTolerance, double[] initial) {
    return minimize(function, functionTolerance, initial, -1);
  }


  @Override
  protected void init(AbstractStochasticCachingDiffFunction func){
    func.method = this.method;
    gains = new double[x.length];
    v = new double[x.length];
    Hv = new double[x.length];
    for(int i = 0; i{
    SMDMinimizer parent = null;

    public setMu(SMDMinimizer smd){parent = smd;}
    
    public void set(Double in){
      parent.mu = in ;
    }
  }
  
  private class setLam implements PropertySetter{
    SMDMinimizer parent = null;

    public setLam(SMDMinimizer smd){parent = smd;}
    
    public void set(Double in){
      parent.lam = in ;
    }
  }
  

  
  @Override
  public Pair tune( edu.stanford.nlp.optimization.Function function,double[] initial, long msPerTest){

    this.quiet = true;
    this.lam = 0.9;
    this.mu = tuneDouble(function,initial,msPerTest,new setMu(this),1e-8,1e-2);
    this.lam = tuneDouble(function,initial,msPerTest,new setLam(this),0.1,1.0);
    StochasticMinimizer.gain = tuneGain(function, initial, msPerTest, 1e-8,1.0);
    StochasticMinimizer.bSize= tuneBatch(function,initial,msPerTest,1);
    
    System.err.println("Results:  gain: " + nf.format(StochasticMinimizer.gain) + "  batch " + StochasticMinimizer.bSize  + "   mu" + nf.format(this.mu) + "  lam" + nf.format(this.lam));
    
    return new Pair(StochasticMinimizer.bSize,StochasticMinimizer.gain);
  }
  
  
  @Override
  protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
    dfunction.returnPreviousValues = true;
    
    System.arraycopy(dfunction.HdotVAt(x,v,grad,bSize), 0, Hv, 0, Hv.length);
    
    //Update the weights
    for(int i = 0; i < x.length; i++){
      meta = 1-mu*grad[i]*v[i];
      if(0.5 > meta){
        gains[i] = gains[i]*0.5;
      }else{
        gains[i] = gains[i]*meta;
      }
      //Update gain history
      v[i] = lam*(1+cPosDef*gains[i])*v[i] - gains[i]*(grad[i] + lam*Hv[i]);
      //Get the next X
      newX[i] = x[i] - gains[i]*grad[i];
    }

    if(printMinMax){
      say("vMin = " + ArrayMath.min(v) + "  ");
      say("vMax = " + ArrayMath.max(v) + "  ");
      say("gainMin = " + ArrayMath.min(gains) + "  ");
      say("gainMax = " + ArrayMath.max(gains) + "  ");
    }
    
  }
  

  
  
  @Override
  protected String getName(){
    int m = (int) (mu*1000);
    int l = (int) (lam * 1000);
    int g = (int) (gain*10000);
    return "SMD" + bSize +"_mu" + m + "_lam" + l + "_g" + g ;
  }


















  public static void main(String[] args) {
    // optimizes test function using doubles and floats
    // test function is (0.5 sum(x_i^2 * var_i)) ^ PI
    // where var is a vector of random nonnegative numbers
    // dimensionality is variable.
    final int dim = 500000;
    final double maxVar = 5;
    final double[] var = new double[dim];
    double[] init = new double[dim];

    for (int i = 0; i < dim; i++) {
      init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
      var[i] = maxVar * (i + 1) / dim;
    }

    final double[] grads = new double[dim];

    final DiffFunction f = new DiffFunction() {
      public double[] derivativeAt(double[] x) {
        double val = Math.PI * valuePow(x, Math.PI - 1);
        for (int i = 0; i < dim; i++) {
          grads[i] = x[i] * var[i] * val;
        }
        return grads;
      }

      public double valueAt(double[] x) {
        return 1.0 + valuePow(x, Math.PI);
      }

      private double valuePow(double[] x, double pow) {
        double val = 0.0;
        for (int i = 0; i < dim; i++) {
          val += x[i] * x[i] * var[i];
        }
        return Math.pow(val * 0.5, pow);
      }

      public int domainDimension() {
        return dim;
      }
    };



    SMDMinimizer min = new SMDMinimizer();

    min.minimize(f, 1.0E-4, init);

  }

}