edu.stanford.nlp.optimization.SMDMinimizer Maven / Gradle / Ivy
package edu.stanford.nlp.optimization;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import edu.stanford.nlp.math.ArrayMath;
import edu.stanford.nlp.util.Pair;
/**
*
* Stochastic Meta Descent Minimizer based on
*
*
* Accelerated training of conditional random fields with stochastic gradient methods
* S. V. N. Vishwanathan, Nicol N. Schraudolph, Mark W. Schmidt, Kevin P. Murphy
* June 2006 Proceedings of the 23rd international conference on Machine learning ICML '06
* Publisher: ACM Press
*
* The basic way to use the minimizer is with a null constructor, then
* the simple minimize method:
*
* Minimizer smd = new SMDMinimizer();
*
DiffFunction df = new SomeDiffFunction();
*
double tol = 1e-4;
*
double[] initial = getInitialGuess();
*
int maxIterations = someSafeNumber;
*
double[] minimum = qnm.minimize(df,tol,initial,maxIterations);
*
* Constructing with a null constructor will use the default values of
*
*
batchSize = 15;
*
initialGain = 0.1;
*
useAlgorithmicDifferentiation = true;
*
*
*
* @author Alex Kleeman
* @version 1.0
* @since 1.0
*/
public class SMDMinimizer extends StochasticMinimizer {
public double mu = 0.01;
public double lam = 1.0;
public double cPosDef = 0.00;
public double meta;
//DEBUG ONLY
public boolean printMinMax = false;
private double[] Hv,gains;
StochasticCalculateMethods method = null;
@Override
public void shutUp() {
this.quiet = true;
}
public void setBatchSize(int batchSize) {
bSize = batchSize;
}
private static NumberFormat nf = new DecimalFormat("0.000E0");
public SMDMinimizer() {
}
public SMDMinimizer(double initialSMDGain, int batchSize, StochasticCalculateMethods method, int passes) {
this(initialSMDGain, batchSize, method, passes, false);
}
public SMDMinimizer(double initGain, int batchSize,StochasticCalculateMethods method, int passes, boolean outputToFile){
StochasticMinimizer.bSize = batchSize;
StochasticMinimizer.gain = initGain;
this.method = method;
this.numPasses = passes;
this.outputIterationsToFile = outputToFile;
}
@Override
public double[] minimize(Function function, double functionTolerance, double[] initial) {
return minimize(function, functionTolerance, initial, -1);
}
@Override
protected void init(AbstractStochasticCachingDiffFunction func){
func.method = this.method;
gains = new double[x.length];
v = new double[x.length];
Hv = new double[x.length];
for(int i = 0; i{
SMDMinimizer parent = null;
public setMu(SMDMinimizer smd){parent = smd;}
public void set(Double in){
parent.mu = in ;
}
}
private class setLam implements PropertySetter{
SMDMinimizer parent = null;
public setLam(SMDMinimizer smd){parent = smd;}
public void set(Double in){
parent.lam = in ;
}
}
@Override
public Pair tune( edu.stanford.nlp.optimization.Function function,double[] initial, long msPerTest){
this.quiet = true;
this.lam = 0.9;
this.mu = tuneDouble(function,initial,msPerTest,new setMu(this),1e-8,1e-2);
this.lam = tuneDouble(function,initial,msPerTest,new setLam(this),0.1,1.0);
StochasticMinimizer.gain = tuneGain(function, initial, msPerTest, 1e-8,1.0);
StochasticMinimizer.bSize= tuneBatch(function,initial,msPerTest,1);
System.err.println("Results: gain: " + nf.format(StochasticMinimizer.gain) + " batch " + StochasticMinimizer.bSize + " mu" + nf.format(this.mu) + " lam" + nf.format(this.lam));
return new Pair(StochasticMinimizer.bSize,StochasticMinimizer.gain);
}
@Override
protected void takeStep(AbstractStochasticCachingDiffFunction dfunction){
dfunction.returnPreviousValues = true;
System.arraycopy(dfunction.HdotVAt(x,v,grad,bSize), 0, Hv, 0, Hv.length);
//Update the weights
for(int i = 0; i < x.length; i++){
meta = 1-mu*grad[i]*v[i];
if(0.5 > meta){
gains[i] = gains[i]*0.5;
}else{
gains[i] = gains[i]*meta;
}
//Update gain history
v[i] = lam*(1+cPosDef*gains[i])*v[i] - gains[i]*(grad[i] + lam*Hv[i]);
//Get the next X
newX[i] = x[i] - gains[i]*grad[i];
}
if(printMinMax){
say("vMin = " + ArrayMath.min(v) + " ");
say("vMax = " + ArrayMath.max(v) + " ");
say("gainMin = " + ArrayMath.min(gains) + " ");
say("gainMax = " + ArrayMath.max(gains) + " ");
}
}
@Override
protected String getName(){
int m = (int) (mu*1000);
int l = (int) (lam * 1000);
int g = (int) (gain*10000);
return "SMD" + bSize +"_mu" + m + "_lam" + l + "_g" + g ;
}
public static void main(String[] args) {
// optimizes test function using doubles and floats
// test function is (0.5 sum(x_i^2 * var_i)) ^ PI
// where var is a vector of random nonnegative numbers
// dimensionality is variable.
final int dim = 500000;
final double maxVar = 5;
final double[] var = new double[dim];
double[] init = new double[dim];
for (int i = 0; i < dim; i++) {
init[i] = ((i + 1) / (double) dim - 0.5);//init[i] = (Math.random() - 0.5);
var[i] = maxVar * (i + 1) / dim;
}
final double[] grads = new double[dim];
final DiffFunction f = new DiffFunction() {
public double[] derivativeAt(double[] x) {
double val = Math.PI * valuePow(x, Math.PI - 1);
for (int i = 0; i < dim; i++) {
grads[i] = x[i] * var[i] * val;
}
return grads;
}
public double valueAt(double[] x) {
return 1.0 + valuePow(x, Math.PI);
}
private double valuePow(double[] x, double pow) {
double val = 0.0;
for (int i = 0; i < dim; i++) {
val += x[i] * x[i] * var[i];
}
return Math.pow(val * 0.5, pow);
}
public int domainDimension() {
return dim;
}
};
SMDMinimizer min = new SMDMinimizer();
min.minimize(f, 1.0E-4, init);
}
}