cc.mallet.optimize.BackTrackLineSearch Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */


/** 
   @author Aron Culotta [email protected]
 */

/**
	 Numerical Recipes in C: p.385. lnsrch. A simple backtracking line
	 search. No attempt at accurately finding the true minimum is
	 made. The goal is only to ensure that BackTrackLineSearch will
	 return a position of higher value.
 */
package cc.mallet.optimize;

import java.util.logging.*;
import cc.mallet.optimize.LineOptimizer;
import cc.mallet.optimize.Optimizable;
import cc.mallet.types.MatrixOps;

//"Line Searches and Backtracking", p385, "Numeric Recipes in C"

public class BackTrackLineSearch implements LineOptimizer.ByGradient
{
	private static Logger logger = Logger.getLogger(BackTrackLineSearch.class.getName());
	
	Optimizable.ByGradientValue function;
	
	public BackTrackLineSearch (Optimizable.ByGradientValue optimizable) {
		this.function = optimizable;
	}

	final int maxIterations = 100;
	final double stpmax = 100;
	final double EPS = 3.0e-12;

	// termination conditions: either
	//   a) abs(delta x/x) < REL_TOLX for all coordinates
	//   b) abs(delta x) < ABS_TOLX for all coordinates
	//   c) sufficient function increase (uses ALF)
	private double relTolx = 1e-7;
	private double absTolx = 1e-4; // tolerance on absolute value difference
	final double ALF = 1e-4;


	/** 
	 * Sets the tolerance of relative diff in function value.
	 *  Line search converges if abs(delta x / x) < tolx
	 *  for all coordinates. */
	public void setRelTolx (double tolx) { relTolx = tolx; }        

	/** 
	 * Sets the tolerance of absolute diff in function value.
	 *  Line search converges if abs(delta x) < tolx
	 *  for all coordinates. */
	public void setAbsTolx (double tolx) { absTolx = tolx; }        

	// initialStep is ignored.  This is b/c if the initial step is not 1.0,
	//   it sometimes confuses the backtracking for reasons I don't 
	//   understand.  (That is, the jump gets LARGER on iteration 1.)

	// returns fraction of step size (alam) if found a good step
	// returns 0.0 if could not step in direction
	public double optimize (double[] line, double initialStep) {
		double[] g, x, oldParameters;
		double slope, newSlope, temp, test, alamin, alam, alam2, tmplam;
		double rhs1, rhs2, a, b, disc, oldAlam;
		double f, fold, f2;
		g = new double[function.getNumParameters()]; // gradient
		x = new double[function.getNumParameters()]; // parameters
		oldParameters = new double[function.getNumParameters()];
		function.getParameters (x);
		System.arraycopy (x, 0, oldParameters, 0, x.length);
		function.getValueGradient (g);
		alam2 = tmplam = 0.0; 
		f2 = fold = function.getValue();
		if (logger.isLoggable(Level.FINE)) {
			logger.fine ("ENTERING BACKTRACK\n");
			logger.fine("Entering BackTrackLnSrch, value="+fold+",\ndirection.oneNorm:"
					+	MatrixOps.oneNorm(line) + "  direction.infNorm:"+MatrixOps.infinityNorm(line));
		}
		assert (!MatrixOps.isNaN(g));
		double sum = MatrixOps.twoNorm(line);
		if (sum > stpmax) {
			logger.warning("attempted step too big. scaling: sum="+sum+", stpmax="+stpmax);
			MatrixOps.timesEquals(line, stpmax / sum);
		}

		newSlope = slope = MatrixOps.dotProduct (g, line);
		logger.fine("slope="+slope);

		if (slope < 0) {
			throw new InvalidOptimizableException ("Slope = " + slope + " is negative");
		}
		if (slope == 0) {
			throw new InvalidOptimizableException ("Slope = " + slope + " is zero");
		}

		// find maximum lambda
		// converge when (delta x) / x < REL_TOLX for all coordinates.
		//  the largest step size that triggers this threshold is
		//  precomputed and saved in alamin
		test = 0.0;
		for (int i=0; i test) { test = temp; }
		}

		alamin = relTolx/test;
		alam  = 1.0;
		oldAlam = 0.0;
		int iteration = 0;
		// look for step size in direction given by "line"
		for (iteration=0; iteration < maxIterations; iteration++) {
			// x = oldParameters + alam*line
			// initially, alam = 1.0, i.e. take full Newton step
			logger.fine("BackTrack loop iteration "+iteration+": alam="+
					alam+" oldAlam="+oldAlam);
			logger.fine ("before step, x.1norm: " + MatrixOps.oneNorm(x) +
					"\nalam: " + alam + "\noldAlam: " + oldAlam);
			assert(alam != oldAlam) : "alam == oldAlam";
			MatrixOps.plusEquals(x, line, alam - oldAlam); // step
			logger.fine ("after step, x.1norm: " + MatrixOps.oneNorm(x));

			// check for convergence 
			//convergence on delta x
			if ((alam < alamin) || smallAbsDiff (oldParameters, x)) {
//				if ((alam < alamin)) {
				function.setParameters(oldParameters);
				f = function.getValue();
				logger.warning("EXITING BACKTRACK: Jump too small (alamin="+alamin+"). Exiting and using xold. Value="+f);
				return 0.0;
			}

			function.setParameters(x);
			oldAlam = alam;
			f = function.getValue();

			logger.fine("value="+f);

			// sufficient function increase (Wolf condition)
			if (f >= fold+ALF*alam*slope) { 

				logger.fine("EXITING BACKTRACK: value="+f);

				if (f= .1*Lambda_1						
		}
		if (iteration >= maxIterations) {
			throw new IllegalStateException ("Too many iterations.");
		}
		return 0.0;
	}

	// returns true iff we've converged based on absolute x difference 
	private boolean smallAbsDiff (double[] x, double[] xold) {
		for (int i = 0; i < x.length; i++) {
			if (Math.abs (x[i] - xold[i]) > absTolx) {
				return false;
			}
		}
		return true;
	}
}