gov.sandia.cognition.learning.algorithm.minimization.FunctionMinimizerBFGS Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: FunctionMinimizerBFGS.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright November 7, 2007, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.learning.algorithm.minimization;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizer;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.util.ObjectUtil;
/**
* Implementation of the Broyden-Fletcher-Goldfarb-Shanno (BFGS) Quasi-Newton
* nonlinear minimization algorithm. This algorithm is generally considered
* to be the most effective/efficient unconstrained nonlinear optimization
* algorithm out there. It does, however, require the derivative of the
* function to optimize. Furthermore, it requires the storage of an
* approximation of the Hessian inverse (BFGS does not invert any matrix,
* it merely approximates the inverse explicitly... clever, eh?), which is an
* N-by-N matrix, where N is the size of the input space.
*
* In practice, one can use approximated Jacobians with BFGS with good
* convergence results, so one can use, for example,
* {@code GradientDescendableApproximator} (when used for parameter cost
* minimization) when exact gradients are not available. Using approximated
* Jacobian tends, to slow down BFGS by a factor of about ~3, but it appears
* to generally outperform derivative-free minimization techniques, like
* Powell's direction-set method. Also, BFGS appears to outperform
* Leveberg-Marquardt estimation for parameter estimation, in my experience.
*
* Again, just to recap: BFGS appears to be the method of choice when
* minimizing against a cost function (using exact or approximated Jacobians).
*
* Note that there is a reduced-memory implementation of BFGS, called L-BFGS,
* that reduces the memory needed to store the Hessian inverse. We have not
* yet implemented this.
*
* @author Kevin R. Dixon
* @since 2.0
*
*/
@PublicationReferences(
references={
@PublicationReference(
author="R. Fletcher",
title="Practical Methods of Optimization, Second Edition",
type=PublicationType.Book,
year=1987,
pages=55,
notes="Section 3.2, Equation 3.2.12"
)
,
@PublicationReference(
author="Wikipedia",
title="BFGS method",
type=PublicationType.WebPage,
year=2008,
url="http://en.wikipedia.org/wiki/BFGS_method"
)
,
@PublicationReference(
author={
"William H. Press",
"Saul A. Teukolsky",
"William T. Vetterling",
"Brian P. Flannery"
},
title="Numerical Recipes in C, Second Edition",
type=PublicationType.Book,
year=1992,
pages={428,429},
notes="Section 10.7",
url="http://www.nrbook.com/a/bookcpdf.php"
)
}
)
public class FunctionMinimizerBFGS
extends FunctionMinimizerQuasiNewton
{
/**
* Creates a new instance of FunctionMinimizerBFGS
*/
public FunctionMinimizerBFGS()
{
this( ObjectUtil.cloneSafe( DEFAULT_LINE_MINIMIZER ) );
}
/**
* Creates a new instance of FunctionMinimizerBFGS
* @param lineMinimizer
* Work-horse algorithm that minimizes the function along a direction
*/
public FunctionMinimizerBFGS(
LineMinimizer> lineMinimizer )
{
super( lineMinimizer, null, DEFAULT_TOLERANCE, DEFAULT_MAX_ITERATIONS );
}
/**
* Creates a new instance of FunctionMinimizerBFGS
*
* @param initialGuess Initial guess about the minimum of the method
* @param tolerance Tolerance of the minimization algorithm, must be >= 0.0, typically ~1e-10
* @param lineMinimizer
* Work-horse algorithm that minimizes the function along a direction
* @param maxIterations Maximum number of iterations, must be >0, typically ~100
*/
public FunctionMinimizerBFGS(
LineMinimizer> lineMinimizer,
Vector initialGuess,
double tolerance,
int maxIterations )
{
super( lineMinimizer, initialGuess, tolerance, maxIterations );
}
public boolean updateHessianInverse(
Matrix hessianInverse,
Vector delta,
Vector gamma )
{
return FunctionMinimizerBFGS.BFGSupdateRule(
hessianInverse, delta, gamma, this.getTolerance() );
}
/**
* BFGS Quasi-Newton update rule
* @param hessianInverse
* Current Hessian inverse estimate, will be modified
* @param delta
* Change in x-axis
* @param gamma
* Change in gradient
* @param tolerance
* Tolerance of the algorithm
* @return
* true if update, false otherwise
*/
@PublicationReference(
author="R. Fletcher",
title="Practical Methods of Optimization, Second Edition",
type=PublicationType.Book,
year=1987,
pages=55,
notes="Section 3.2, Equation 3.2.12"
)
public static boolean BFGSupdateRule(
Matrix hessianInverse,
Vector delta,
Vector gamma,
double tolerance )
{
int M = hessianInverse.getNumRows();
// This is formula 3.2.12 on p.55 in PMOO
// Solving for Fletcher's "H"
Vector Higamma = hessianInverse.times( gamma );
double deltaTgamma = delta.dotProduct( gamma );
// If we're close to singular, then skip the Hessian update.
// Some people add some clever resetting code here.
// I'll just skip for now.
if( Math.sqrt( tolerance * delta.norm2Squared() * gamma.norm2Squared() )
>= Math.abs(deltaTgamma) )
{
return false;
}
double term1 = 1.0 + (gamma.dotProduct( Higamma ) / deltaTgamma);
for( int i = 0; i < M; i++ )
{
double deltai = delta.getElement( i );
double Higammai = Higamma.getElement( i );
// Since the Hessian inverse is symmetric, we can just iterate
// through the bottom half of the matrix and mirror the values.
for( int j = 0; j <= i; j++ )
{
double oldValue = hessianInverse.getElement( i, j );
double change = term1*deltai*delta.getElement( j );
change -= deltai*Higamma.getElement(j) + Higammai*delta.getElement(j);
change /= deltaTgamma;
double newValue = oldValue + change;
// Since the Hessian inverse is symmetric, we can just iterate
// through the bottom half of the matrix and mirror the values.
// But make sure we don't double update the diagonal.
hessianInverse.setElement( i, j, newValue );
if( i != j )
{
hessianInverse.setElement( j, i, newValue );
}
}
}
return true;
}
}