gov.sandia.cognition.learning.algorithm.minimization.FunctionMinimizerConjugateGradient Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: FunctionMinimizerConjugateGradient.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright November 7, 2007, Sandia Corporation. Under the terms of Contract
* DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
* or on behalf of the U.S. Government. Export of this program may require a
* license from the United States Government. See CopyrightHistory.txt for
* complete details.
*
*
*/
package gov.sandia.cognition.learning.algorithm.minimization;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizerDerivativeBased;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.minimization.line.DirectionalVectorToDifferentiableScalarFunction;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizer;
import gov.sandia.cognition.learning.data.DefaultInputOutputPair;
import gov.sandia.cognition.math.DifferentiableEvaluator;
import gov.sandia.cognition.math.matrix.Vector;
/**
* Conjugate gradient method is a class of algorithms for finding the
* unconstrained local minimum of a nonlinear function. CG algorithms find
* the local minimum by using a line search algorithm along a "conjugate
* gradient" direction using first-order (gradient) information. The particular
* approaches vary only slightly according to how the direction is updated.
* However, in my experience, the Liu-Storey CG variant
* (FunctionMinimizerLiuStorey) performs the best.
*
* All CG variants tend to require more function/gradient evaluations than
* their Quasi-Newton cousins. However, CG methods only require O(N) storage,
* whereas Quasi-Newton algorithms (FunctionMinimizerQuasiNewton) require
* O(N*N) storage, where N is the size of the input space. So, if your input
* space is large, then CG algorithms may be the method of choice. In any
* case, the Liu-Storey CG variant tends to perform fairly well compared to the
* best Quasi-Newton algorithms, BFGS in particular.
*
* @see FunctionMinimizerQuasiNewton
*
* @author Kevin R. Dixon
* @since 2.1
*/
@PublicationReferences(
references={
@PublicationReference(
author="R. Fletcher",
title="Practical Methods of Optimization, Second Edition",
type=PublicationType.Book,
year=1987,
pages={80,87},
notes="Section 4.1"
),
@PublicationReference(
author="Wikipedia",
title="Nonlinear conjugate gradient method",
type=PublicationType.WebPage,
url="http://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method",
year=2008
),
@PublicationReference(
author={
"William H. Press",
"Saul A. Teukolsky",
"William T. Vetterling",
"Brian P. Flannery"
},
title="Numerical Recipes in C, Second Edition",
type=PublicationType.Book,
year=1992,
pages={423,424},
notes="Section 10.6",
url="http://www.nrbook.com/a/bookcpdf.php"
)
}
)
public abstract class FunctionMinimizerConjugateGradient
extends AbstractAnytimeFunctionMinimizer>
{
/**
* Default maximum number of iterations before stopping, {@value}
*/
public static final int DEFAULT_MAX_ITERATIONS = 1000;
/**
* Default tolerance, {@value}
*/
public static final double DEFAULT_TOLERANCE = 1e-5;
/**
* Test for convergence on change in x, {@value}
*/
private static final double TOLERANCE_DELTA_Y = 1e-10;
/**
* Default line minimization algorithm, LineMinimizerDerivativeBased
*/
public static final LineMinimizer> DEFAULT_LINE_MINIMIZER =
new LineMinimizerDerivativeBased();
// new LineMinimizerDerivativeFree();
/**
* Work-horse algorithm that minimizes the function along a direction
*/
private LineMinimizer> lineMinimizer;
/**
* Creates a new instance of FunctionMinimizerConjugateGradient
*
* @param initialGuess Initial guess about the minimum of the method
* @param tolerance
* Tolerance of the minimization algorithm, must be >= 0.0, typically ~1e-10
* @param lineMinimizer
* Work-horse algorithm that minimizes the function along a direction
* @param maxIterations
* Maximum number of iterations, must be >0, typically ~100
*/
public FunctionMinimizerConjugateGradient(
LineMinimizer> lineMinimizer,
Vector initialGuess,
double tolerance,
int maxIterations )
{
super( initialGuess, tolerance, maxIterations );
this.setLineMinimizer( lineMinimizer );
}
/**
* Function that maps a Evaluator onto a
* Evaluator using a set point, direction and scale factor
*/
protected DirectionalVectorToDifferentiableScalarFunction lineFunction;
/**
* Gradient at the current guess
*/
private Vector gradient;
/**
* Getter for lineMinimizer
* @return
* Work-horse algorithm that minimizes the function along a direction
*/
public LineMinimizer> getLineMinimizer()
{
return this.lineMinimizer;
}
/**
* Setter for lineMinimizer
* @param lineMinimizer
* Work-horse algorithm that minimizes the function along a direction
*/
public void setLineMinimizer(
LineMinimizer> lineMinimizer )
{
this.lineMinimizer = lineMinimizer;
}
@Override
protected boolean initializeAlgorithm()
{
this.result = new DefaultInputOutputPair(
this.initialGuess, this.data.evaluate( this.initialGuess ) );
this.gradient = this.data.differentiate( this.initialGuess );
this.lineFunction = new DirectionalVectorToDifferentiableScalarFunction(
this.data, this.initialGuess, this.gradient.scale(-1.0) );
return true;
}
@Override
protected boolean step()
{
// Rename the function "this.data" to "f" to minimize confusion.
DifferentiableEvaluator super Vector, ? extends Double, Vector> f = this.data;
Vector xold = this.result.getInput();
// Find the minimum along this search direction
this.result = this.lineMinimizer.minimizeAlongDirection(
this.lineFunction, this.result.getOutput(), this.gradient );
Vector xnew = this.result.getInput();
double fnew = this.result.getOutput();
// Save off the previous gradient
Vector gradientOld = this.gradient;
// See if I've already computed the gradient information
// NOTE: It's possible that there's still an inefficiency here.
// For example, we could have computed the gradient for "xnew"
// previous to the last evaluation. But this would require a
// tremendous amount of bookkeeping and memory.
if( (this.lineFunction.getLastGradient() != null) &&
(this.lineFunction.getLastGradient().getInput().equals( xnew )) )
{
this.gradient = this.lineFunction.getLastGradient().getOutput();
}
else
{
this.gradient = f.differentiate( xnew );
}
// Test for almost zero-gradient convergence
if( MinimizationStoppingCriterion.convergence(
xnew, fnew, this.gradient, xnew.minus( xold ), this.getTolerance() ) )
{
return false;
}
double beta;
// This is how often we will reset the search direction and
// re-initialize it as the direction of steepest descent
int resetPeriod = this.gradient.getDimensionality()*2;
if( ((this.getIteration()+1) % resetPeriod) == 0 )
{
beta = 0.0;
}
else
{
beta = this.computeScaleFactor( this.gradient, gradientOld );
}
Vector newDirection =
this.lineFunction.getDirection().scale( beta ).minus( this.gradient );
this.lineFunction.setDirection( newDirection );
this.lineFunction.setVectorOffset( xnew );
return true;
}
@Override
protected void cleanupAlgorithm()
{
}
/**
* Computes the conjugate gradient parameter for the particular update
* scheme.
* @param gradientCurrent
* Gradient at the current evaluation point
* @param gradientPrevious
* Gradient at the previous evaluation point
* @return
* "beta" scale factor
*/
protected abstract double computeScaleFactor(
Vector gradientCurrent,
Vector gradientPrevious );
}