gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizerDerivativeBased Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: LineMinimizerDerivativeBased.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Jun 18, 2008, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.learning.algorithm.minimization.line;
import gov.sandia.cognition.learning.algorithm.minimization.line.interpolator.LineBracketInterpolator;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.minimization.line.interpolator.LineBracketInterpolatorHermiteParabola;
import gov.sandia.cognition.math.AbstractDifferentiableUnivariateScalarFunction;
import gov.sandia.cognition.math.DifferentiableUnivariateScalarFunction;
import gov.sandia.cognition.util.ObjectUtil;
/**
* This is an implementation of a line-minimization algorithm proposed by
* Fletcher that makes extensive use of first-order derivative information.
* The algorithm is provably correct and has good empirical performance.
*
* According to my test battery, this algorithm performs best using Hermite
* parabolic interpolators (LineBracketInterpolatorHermiteParabola).
*
* My test battery LineMinimizerTestHarness minimizes over several different
* functions including cosine and an absolute value of a cubic polynomial.
* Here are the results in {function_evaluations, gradient_evaluation) pairs.
*
* LineBracketInterpolatorHermiteParabola:
* cosine=(4.25,3.36), absolute_cubic=(8.09,5.02).
*
* LineBracketInterpolatorHermiteCubic:
* cosine=(4.27,4.21), absolute_cubic=(7.52,6.55).
*
* LineBracketInterpolatorBrent:
* cosine=(5.22,3.98), absolute_cubic=(9.44,6.27).
*
*
* @author Kevin R. Dixon
* @since 2.2
*/
@PublicationReference(
author="R. Fletcher",
title="Practical Methods of Optimization, Second Edition",
type=PublicationType.Book,
year=1987,
pages={34, 39},
notes={
"Equation 2.6.2 and Equation 2.6.4",
"Fletcher assumes that the initial slope is negative (WOLOG), and this class automatically adjusts itself to positive-slope guesses."
}
)
public class LineMinimizerDerivativeBased
extends AbstractAnytimeLineMinimizer
{
/**
* This is a fairly accurate line search, {@value}.
*/
public final static double DEFAULT_CURVATURE_CONDITION = 0.1;
/**
* This is a fairly accurate line search, {@value}.
*/
public final static double DEFAULT_SLOPE_CONDITION = DEFAULT_CURVATURE_CONDITION / 10.0;
/**
* Default interpolator to use to create a new candidate point to evaluate
*/
public final static LineBracketInterpolator super DifferentiableUnivariateScalarFunction>
DEFAULT_INTERPOLATOR = new LineBracketInterpolatorHermiteParabola();
/**
* Minimum value of the target function. In other words, the user will
* accept a solution less than or equal to minFunctionValue. For many
* applications 0.0 is a likely candidate (for cost functions, metrics,
* least squares, etc.)
*/
private double minFunctionValue;
/**
* Default minimum function value, {@value}.
*/
public final static double DEFAULT_MIN_FUNCTION_VALUE = 0.0;
/**
* Default constructor
*/
public LineMinimizerDerivativeBased()
{
this( DEFAULT_MIN_FUNCTION_VALUE );
}
/**
* Creates a new instance of LineMinimizerDerivativeBased
* @param minFunctionValue
* Direction of the search. Because Fletcher assumes the slope of the
* initialGuess is less than 0.0, we have to flip around the direction
* of search if the initial guess has positive slope. Thus, direction=1.0
* means that the initial slope was negative, while direction=-1.0 means
* that the initial slope was positive.
*/
public LineMinimizerDerivativeBased(
double minFunctionValue )
{
this( ObjectUtil.cloneSafe( DEFAULT_INTERPOLATOR ), minFunctionValue );
}
/**
* Creates a new instance of LineMinimizerDerivativeBased
* @param interpolator
* Type of algorithm to fit data points and find an interpolated minimum
* to the known points.
* @param minFunctionValue
* Direction of the search. Because Fletcher assumes the slope of the
* initialGuess is less than 0.0, we have to flip around the direction
* of search if the initial guess has positive slope. Thus, direction=1.0
* means that the initial slope was negative, while direction=-1.0 means
* that the initial slope was positive.
*/
public LineMinimizerDerivativeBased(
LineBracketInterpolator super DifferentiableUnivariateScalarFunction> interpolator,
double minFunctionValue )
{
super( interpolator );
this.setMinFunctionValue( minFunctionValue );
}
/**
* Direction of the search. Because Fletcher assumes the slope of the
* initialGuess is less than 0.0, we have to flip around the direction
* of search if the initial guess has positive slope. Thus, direction=1.0
* means that the initial slope was negative, while direction=-1.0 means
* that the initial slope was positive.
*/
private double direction;
/**
* Internal function used to map/remap/unmap the search direction.
*/
private InternalFunction internalFunction;
/**
* The Wolfe conditions define approximate line search stopping criteria.
*/
private WolfeConditions wolfe;
/**
* Maximum value of x in the search space. That is, the minimizer will not
* be greater than maxX.
*/
private double maxX;
/**
* Suggested value given in PMOO=9.0, bottom of p.34, {@value}
*/
protected static final double TAU1 = 5.0;
/**
* Suggested value given in PMOO=0.05, top of p.36 (given as 0.05 on p.69), {@value}
*/
protected static final double TAU2 = 0.10;
/**
* Suggested value given in PMOO=0.50, top of p.36, {@value}
*/
protected static final double TAU3 = 0.5;
@Override
protected boolean initializeAlgorithm()
{
boolean retval = super.initializeAlgorithm();
// Set up the internal optimization function
this.internalFunction = new InternalFunction();
// I will store the points as the bounds
this.setBracket( new LineBracket() );
Double initialGuessFunctionValue;
if( this.getInitialGuessFunctionValue() != null )
{
initialGuessFunctionValue = this.getInitialGuessFunctionValue();
}
else
{
initialGuessFunctionValue = this.data.evaluate( this.getInitialGuess() );
}
Double initialGuessSlope;
if( this.getInitialGuessSlope() != null )
{
initialGuessSlope = this.getInitialGuessSlope();
}
else
{
initialGuessSlope = this.data.differentiate( this.getInitialGuess() );
}
// The initial point will be considered 0.0
InputOutputSlopeTriplet initialTriplet = new InputOutputSlopeTriplet(
this.internalFunction.convertInputToInternal( this.getInitialGuess() ),
initialGuessFunctionValue, initialGuessSlope );
double initialSlope = initialTriplet.getSlope();
// This is the "standard" downhill optimization, that is, increasing "x"
// will initiall reduce the function
if( initialSlope < 0.0 )
{
this.direction = 1.0;
}
// Fletcher assumes the initial slope is downhill, so reverse directions
// if necessary
else
{
this.direction = -1.0;
initialTriplet.setSlope( initialSlope * this.direction );
}
this.getBracket().setLowerBound( initialTriplet );
// Look for a nearly flat function and bound the search, because it's
// likely to be hopeless
if (Math.abs( initialSlope ) <= this.getTolerance()*1e-3)
{
// The initialTriplet hasn't yet been converted, so we can just return
this.result = this.internalFunction.convertInputFromInternal( initialTriplet );
this.stop();
return true;
}
this.wolfe = new WolfeConditions(
initialTriplet, DEFAULT_SLOPE_CONDITION, DEFAULT_CURVATURE_CONDITION );
double denom = this.wolfe.getSlopeCondition() * initialTriplet.getSlope();
this.maxX = (this.getMinFunctionValue() - initialTriplet.getOutput()) / denom;
// Here's the next point (alpha1)... the initial point becomes alpha0
double nextX = 1.0;
double fnextX = this.internalFunction.evaluate( nextX );
this.getBracket().setUpperBound(
new InputOutputSlopeTriplet( nextX, fnextX, null ) );
return retval;
}
@Override
public boolean bracketingStep()
{
LineBracket bracket = this.getBracket();
// I'm storing the previous point (alpha_{i-1}) as the lower bound
// and the current point (alpha_i) as the upper bound.
// This is useful for interpolation.
// If we've already set the result, then we're done because we've
// found a satifying point
InputOutputSlopeTriplet previousPoint = bracket.getLowerBound();
InputOutputSlopeTriplet currentPoint = bracket.getUpperBound();
if( currentPoint.getOutput() < this.getMinFunctionValue() )
{
this.result =
this.internalFunction.convertInputFromInternal( currentPoint );
return true;
}
if (!this.wolfe.evaluateGoldsteinCondition( currentPoint ) ||
currentPoint.getOutput() >= previousPoint.getOutput())
{
// We've found a valid bracket! So we're done bracketing!
bracket.setLowerBound( previousPoint );
bracket.setUpperBound( currentPoint );
return true;
}
// Compute the slope of the current point,
// if it hasn't already been computed
if( currentPoint.getSlope() == null )
{
currentPoint.setSlope(
this.internalFunction.differentiate( currentPoint.getInput() ) );
}
// If we meet the Wolfe conditions, then we're done already!!
if (this.wolfe.evaluateStrictCurvatureCondition( currentPoint.getSlope() ))
{
this.result =
this.internalFunction.convertInputFromInternal( currentPoint );
return true;
}
// We've found a point whose slope is increasing.
// Since the original point is assumed (forced) to have negative slope,
// this implies that somewhere between the original point and the
// current point, there exists a minimum.
// Furthermore, by induction, we can infer that "previousPoint" also
// had negative slope as well. This means that there exists a
// minimum somewhere between previous point and current point.
if( currentPoint.getSlope() >= 0.0 )
{
bracket.setLowerBound( currentPoint );
bracket.setUpperBound( previousPoint );
return true;
}
// We haven't bracketed a minimum, so let's find a promising point
double delta = currentPoint.getInput() - previousPoint.getInput();
double deltaPlusCurrent = currentPoint.getInput() + delta;
double nextX;
if (this.maxX <= deltaPlusCurrent)
{
nextX = this.maxX;
}
else
{
double minx = deltaPlusCurrent;
double maxx = Math.min( this.maxX, currentPoint.getInput() + TAU1 * delta );
if( minx > maxx )
{
double temp = minx;
minx = maxx;
maxx = temp;
}
// Let's interpolate between [minx,maxx] using the points we've
// got available
nextX = this.getInterpolator().findMinimum(
bracket, minx, maxx, this.internalFunction );
}
// We haven't found an appropriate bracket yet, so keep on trucking
bracket.setOtherPoint( previousPoint );
bracket.setLowerBound( currentPoint );
bracket.setUpperBound( new InputOutputSlopeTriplet(
nextX, this.internalFunction.evaluate( nextX ), null ) );
return false;
}
@Override
public boolean sectioningStep()
{
LineBracket bracket = this.getBracket();
InputOutputSlopeTriplet a = bracket.getLowerBound();
InputOutputSlopeTriplet b = bracket.getUpperBound();
// See if the bracket has converged... if so, then stop
double bracketDelta = b.getInput() - a.getInput();
if( Math.abs(bracketDelta) < this.getTolerance() )
{
this.result = this.internalFunction.convertInputFromInternal(
(a.getOutput() < b.getOutput()) ? a : b );
return false;
}
double minx = a.getInput() + TAU2 * bracketDelta;
double maxx = b.getInput() - TAU3 * bracketDelta;
if( minx > maxx )
{
double temp = minx;
minx = maxx;
maxx = temp;
}
// Let's interpolate between [minx,maxx] using a Hermite polynomial
double alphaj = this.getInterpolator().findMinimum(
bracket, minx, maxx, this.internalFunction );
double falphaj = this.internalFunction.evaluate( alphaj );
InputOutputSlopeTriplet currentPoint =
new InputOutputSlopeTriplet( alphaj, falphaj, null );
// Let's check for convergence on the bracket
double midx = 0.5 * (minx + maxx);
double convergenceThreshold =
this.getTolerance()*Math.abs(b.getInput()) - 0.5*(maxx-minx);
// This checks for converence along the x-axis and "flatness" on the
// y-axis
if( (Math.abs(midx-alphaj) <= convergenceThreshold) ||
(falphaj < this.getMinFunctionValue()) )
{
this.result = this.internalFunction.convertInputFromInternal(
currentPoint );
return false;
}
// Use the interpolated point to update the high-side bound
if (!this.wolfe.evaluateGoldsteinCondition( currentPoint ) ||
falphaj >= a.getOutput())
{
bracket.setOtherPoint( b );
b = currentPoint;
}
else
{
if( currentPoint.getSlope() == null )
{
currentPoint.setSlope(
this.internalFunction.differentiate( alphaj ) );
}
// We've met the Wolfe conditions, so we're done!
if (this.wolfe.evaluateStrictCurvatureCondition(
currentPoint.getSlope() ))
{
this.result = this.internalFunction.convertInputFromInternal(
currentPoint );
return false;
}
// Use the interpolated point to update the low-side bound
InputOutputSlopeTriplet previousA = a;
bracket.setOtherPoint( previousA );
a = currentPoint;
// See if we should update the high-side bound
// using the low-side if the slope has changed directions
if (bracketDelta * currentPoint.getSlope() >= 0.0)
{
bracket.setOtherPoint( b );
b = previousA;
}
}
bracket.setLowerBound( a );
bracket.setUpperBound( b );
return true;
}
/**
* Getter for minFunctionValue
* @return
* Minimum value of the target function. In other words, the user will
* accept a solution less than or equal to minFunctionValue.
*/
public double getMinFunctionValue()
{
return this.minFunctionValue;
}
/**
* Setter for minFunctionValue
* @param minFunctionValue
* Minimum value of the target function. In other words, the user will
* accept a solution less than or equal to minFunctionValue.
*/
public void setMinFunctionValue(
double minFunctionValue )
{
this.minFunctionValue = minFunctionValue;
}
/**
* Internal function used to map/remap/unmap the search direction.
*/
public class InternalFunction
extends AbstractDifferentiableUnivariateScalarFunction
{
/**
* Converts a real-world "x" value to the internal values used inside
* the search algorithm. This compensates for reflecting the search
* space
* @param input
* Input value in the real-world
* @return
* X-axis value to send to the InternalFunction
*/
public double convertInputToInternal(
double input )
{
double x0 = LineMinimizerDerivativeBased.this.getInitialGuess();
double internalInput = LineMinimizerDerivativeBased.this.direction * (input-x0);
return internalInput;
}
/**
* Converts the internal x-axis value to real-world x-axis value
* @param internalInput
* internalInput to convert
* @return
* real-world x-axis value
*/
protected double convertInputFromInternal(
double internalInput )
{
double x0 = LineMinimizerDerivativeBased.this.getInitialGuess();
double input = x0 + LineMinimizerDerivativeBased.this.direction*internalInput;
return input;
}
/**
* Converts an InternalFunction InputOutputSlopeTriplet to a real-world
* InputOutputSlopeTriplet by unreflection and flipping the sign of the
* slope (if the direction of search was backward).
* @param internalPoint
* InternalFunction-based point to manipulate
* @return
* Real-world value
*/
public InputOutputSlopeTriplet convertInputFromInternal(
InputOutputSlopeTriplet internalPoint )
{
InputOutputSlopeTriplet retval;
double input = this.convertInputFromInternal(
internalPoint.getInput() );
if( LineMinimizerDerivativeBased.this.direction > 0.0 )
{
retval = new InputOutputSlopeTriplet(
input, internalPoint.getOutput(), internalPoint.getSlope() );
}
else
{
Double m = (internalPoint.getSlope() != null) ? -internalPoint.getSlope() : null;
retval = new InputOutputSlopeTriplet(
input, internalPoint.getOutput(), m );
}
return retval;
}
public double evaluate(
double internalInput )
{
return LineMinimizerDerivativeBased.this.data.evaluate(
this.convertInputFromInternal( internalInput ) );
}
public double differentiate(
double internalInput )
{
return LineMinimizerDerivativeBased.this.direction *
LineMinimizerDerivativeBased.this.data.differentiate(
this.convertInputFromInternal( internalInput ) );
}
}
}