![JAR search and dependency download from the Maven repository](/logo.png)
gov.sandia.cognition.learning.algorithm.regression.FletcherXuHybridEstimation Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: FletcherXuHybridEstimation.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Jul 4, 2008, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.learning.algorithm.regression;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.algorithm.minimization.FunctionMinimizerBFGS;
import gov.sandia.cognition.learning.algorithm.minimization.MinimizationStoppingCriterion;
import gov.sandia.cognition.learning.algorithm.minimization.line.DirectionalVectorToDifferentiableScalarFunction;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizer;
import gov.sandia.cognition.learning.algorithm.minimization.line.LineMinimizerDerivativeFree;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.learning.function.cost.SumSquaredErrorCostFunction;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.util.ObjectUtil;
/**
* The Fletcher-Xu hybrid estimation for solving the nonlinear least-squares
* parameters. The FX method is a hybrid between the BFGS Quasi-Newton method
* and the Gauss-Newton minimization procedure. We have slightly modified the
* original algorithm to choose between BFGS and Levenberg-Marquardt (Tikhonov
* regularization or ridge regression) update formulae, as the LMA update is
* more stable than Gauss-Newton and produces better results on my test battery.
*
* The motivation behind FX hybrid is that Gauss-Newton (and
* Levenberg-Marquardt) has quadratic convergence properties, but has linear
* convergence when the parameters are far from optimal. BFGS always
* demonstrates superlinear convergence, even on nonoptimal parameter sets.
* The FX hybrid attempts to use BFGS when the solutions are far from optimal,
* but switches to Gauss-Newton (Levenberg-Marquardt in our implementation)
* when its quadratic convergence properties can be brought to bear.
*
* Generally speaking, FX hybrid is the most efficient and effective
* parameter-estimation procedure I know of. However, FX requires the storage
* of an estimated Hessian inverse, which is O(N*N) space, where "N" is the
* number of parameters to estimate.
*
* @author Kevin R. Dixon
* @since 2.1
*/
@PublicationReferences(
references={
@PublicationReference(
author="R. Fletcher",
title="Practical Methods of Optimization, Second Edition",
type=PublicationType.Book,
year=1987,
pages={116,117},
notes="Section 6.1 motivates the algorithm w.r.t. Gauss-Newton, BFGS, and Levenberg-Marquardt"
)
,
@PublicationReference(
author={
"R. Fletcher",
"C. Xu"
},
title="Hybrid Methods for Nonlinear Least Squares",
type=PublicationType.Journal,
year=1987,
pages={371,389},
publication="Institute of Mathematics and its Applications Journal of Numerical Analysis"
)
}
)
public class FletcherXuHybridEstimation
extends LeastSquaresEstimator
{
/**
* Reduction test for Equation 6.1.16 in Fletcher PMOO, 0.2 given on page 117.
* Lower values result in more Gauss-Newton steps, larger values mean more
* BFGS steps (1.0), default is {@value}
* In my test battery, 0.2 also performs the best.
*/
public static final double DEFAULT_REDUCTION_TEST = 0.2;
/**
* Divisor of the damping factor on unsuccessful iteration, dividing
* damping factor on cost-reducing iteration {@value}
*/
public static final double DEFAULT_DAMPING_DIVISOR = 2.0;
/**
* Default line minimization algorithm, LineMinimizerDerivativeFree
*/
public static final LineMinimizer> DEFAULT_LINE_MINIMIZER =
new LineMinimizerDerivativeFree();
/**
* Workhorse algorithm that finds the minimum along a particular direction
*/
private LineMinimizer> lineMinimizer;
/**
* Reduction test for switching between BFGS and Levenberg-Marquardt, must
* be [0,1]. Lower values result in more Levenberg-Marquardt steps,
* larger values result in more BFGS steps.
*/
private double reductionTest;
/**
* Amount to modify the damping factor, typically 2.0 or 10.0
*/
private double dampingFactorDivisor;
/**
* Creates a new instance of FletcherXuHybridEstimation
*/
public FletcherXuHybridEstimation()
{
this( ObjectUtil.cloneSafe( DEFAULT_LINE_MINIMIZER ) );
}
/**
* Creates a new instance of FletcherXuHybridEstimation
* @param lineMinimizer The minimizer
*/
public FletcherXuHybridEstimation(
LineMinimizer> lineMinimizer )
{
this( lineMinimizer, DEFAULT_REDUCTION_TEST );
}
/**
* Creates a new instance of FletcherXuHybridEstimation
*
* @param lineMinimizer
* Workhorse algorithm that finds the minimum along a particular direction
* @param reductionTest
* Reduction test for switching between BFGS and Levenberg-Marquardt, must
* be [0,1]. Lower values result in more Levenberg-Marquardt steps,
* larger values result in more BFGS steps.
*/
public FletcherXuHybridEstimation(
LineMinimizer> lineMinimizer,
double reductionTest )
{
this( lineMinimizer, reductionTest, DEFAULT_DAMPING_DIVISOR );
}
/**
* Creates a new instance of FletcherXuHybridEstimation
*
* @param lineMinimizer
* Workhorse algorithm that finds the minimum along a particular direction
* @param reductionTest
* Reduction test for switching between BFGS and Levenberg-Marquardt, must
* be [0,1]. Lower values result in more Levenberg-Marquardt steps,
* larger values result in more BFGS steps.
* @param dampingFactorDivisor
* Amount to modify the damping factor, typically 2.0 or 10.0
*/
public FletcherXuHybridEstimation(
LineMinimizer> lineMinimizer,
double reductionTest,
double dampingFactorDivisor )
{
this( lineMinimizer, reductionTest, dampingFactorDivisor,
DEFAULT_MAX_ITERATIONS, DEFAULT_TOLERANCE );
}
/**
* Creates a new instance of FletcherXuHybridEstimation
*
* @param lineMinimizer
* Workhorse algorithm that finds the minimum along a particular direction
* @param reductionTest
* Reduction test for switching between BFGS and Levenberg-Marquardt, must
* be [0,1]. Lower values result in more Levenberg-Marquardt steps,
* larger values result in more BFGS steps.
* @param dampingFactorDivisor
* Amount to modify the damping factor, typically 2.0 or 10.0
* @param maxIterations
* Maximum number of iterations before stopping
* @param tolerance
* Tolerance of the algorithm.
*/
public FletcherXuHybridEstimation(
LineMinimizer> lineMinimizer,
double reductionTest,
double dampingFactorDivisor,
int maxIterations,
double tolerance )
{
super( maxIterations, tolerance );
this.setLineMinimizer( lineMinimizer );
this.setReductionTest( reductionTest );
this.setDampingFactorDivisor( dampingFactorDivisor );
}
/**
* Last value of the parameter cost
*/
private SumSquaredErrorCostFunction.Cache lastCost;
/**
* Function that maps a Evaluator onto a
* Evaluator using a set point, direction and scale factor
*/
private DirectionalVectorToDifferentiableScalarFunction lineFunction;
/**
* Estimated inverse of the Hessian (second derivative)
*/
private Matrix hessianInverse;
@Override
protected boolean initializeAlgorithm()
{
this.setResult( this.getObjectToOptimize().clone() );
this.getCostFunction().setCostParameters( this.getData() );
this.dampingFactor = 1.0;
this.lastCost = SumSquaredErrorCostFunction.Cache.compute(
this.getResult(), this.getData() );
ParameterDifferentiableCostMinimizer.ParameterCostEvaluatorDerivativeBased f =
new ParameterDifferentiableCostMinimizer.ParameterCostEvaluatorDerivativeBased(
this.getResult(), this.getCostFunction() );
// Load up the line function with the current direction and
// the search direction, which is the negative gradient, in other words
// the direction of steepest descent
Vector parameters = this.getResult().convertToVector();
int M = parameters.getDimensionality();
this.lineFunction = new DirectionalVectorToDifferentiableScalarFunction(
f, parameters, this.lastCost.Jte );
this.hessianInverse =
MatrixFactory.getDefault().createIdentity( M, M ).scale( 0.5 );
return true;
}
/**
* Damping factor for the Levenberg-Marquardt ridge regression
*/
private double dampingFactor;
@Override
protected boolean step()
{
InputOutputPair result = this.getLineMinimizer().minimizeAlongDirection(
this.lineFunction, this.lastCost.parameterCost, this.lastCost.Jte );
Vector lastParameters = this.lineFunction.getVectorOffset();
Vector delta = result.getInput().minus( lastParameters );
this.lineFunction.setVectorOffset( result.getInput() );
this.getResult().convertFromVector( result.getInput() );
// If the trial parameters reduce the cost, then accept them
SumSquaredErrorCostFunction.Cache cache =
SumSquaredErrorCostFunction.Cache.compute(
this.getResult(), this.getData() );
this.setResultCost( cache.parameterCost );
// Equation 6.1.16 in Fletcher PMOO
if( this.getReductionTest()*this.lastCost.parameterCost <=
(this.lastCost.parameterCost - cache.parameterCost) )
{
// On my test battery, taking a Levenberg-Marquardt step performs
// better here than a Gauss-Newton step.
// I've also tried pretty much every variant on these parameters,
// and this is what came out the best. I've also tried:
// Gauss-Newton: direction = cache.JtJ.solve(Jte);
// Steepest descent: direction = Jte;
Matrix JtJpI = cache.JtJ.scale(-1.0);
Vector Jte = cache.Jte;
int M = JtJpI.getNumRows();
for( int i = 0; i < M; i++ )
{
// Again, the damping factor is subtracted to compensate for the
// negative between gradient and direction
double v = JtJpI.getElement(i, i);
JtJpI.setElement(i, i, v - this.dampingFactor );
}
// This is the ridge-regression (Tikhonov regularization) step to solve
// for the parameter change
Vector direction = JtJpI.solve(Jte);
this.dampingFactor /= this.getDampingFactorDivisor();
double directionNorm = direction.norm2();
if( directionNorm > GaussNewtonAlgorithm.STEP_MAX )
{
direction.scaleEquals( GaussNewtonAlgorithm.STEP_MAX / directionNorm );
}
// Take a Levenber-Marquardt step
this.lineFunction.setDirection( direction );
}
else
{
// Suggested by Fletcher, PMOO p.117, Equation 6.1.15
// I've modified it slightly, the last term sould be:
// cache.J.transpose().minus( lastCost.J ).times( cache.error )
// However, I don't cache the errors individually, so this is
// as close as I can get... It seems to be very effective.
Vector gamma = cache.JtJ.times( this.lineFunction.getDirection().scale( 2.0 ) ).plus(
cache.Jte.minus( lastCost.Jte ) );
FunctionMinimizerBFGS.BFGSupdateRule(
this.hessianInverse, delta, gamma, this.getTolerance() );
// Take a BFGS step
Vector direction = this.hessianInverse.times( cache.Jte );
this.lineFunction.setDirection( direction );
this.dampingFactor *= this.getDampingFactorDivisor();
}
this.lastCost = cache;
return !MinimizationStoppingCriterion.convergence(
result.getInput(), result.getOutput(), delta, cache.Jte, this.getTolerance() );
}
@Override
protected void cleanupAlgorithm()
{
}
/**
* Getter for reduction test.
* @return
* Reduction test for switching between BFGS and Levenberg-Marquardt, must
* be [0,1]. Lower values result in more Levenberg-Marquardt steps,
* larger values result in more BFGS steps.
*/
public double getReductionTest()
{
return this.reductionTest;
}
/**
* Setter for reduction test.
* @param reductionTest
* Reduction test for switching between BFGS and Levenberg-Marquardt, must
* be [0,1]. Lower values result in more Levenberg-Marquardt steps,
* larger values result in more BFGS steps.
*/
public void setReductionTest(
double reductionTest )
{
if( (reductionTest < 0.0) ||
(reductionTest > 1.0) )
{
throw new IllegalArgumentException( "reductionTest must be [0,1]" );
}
this.reductionTest = reductionTest;
}
/**
* Getter for dampingFactorDivisor
* @return
* Amount to modify the damping factor, typically 2.0 or 10.0
*/
public double getDampingFactorDivisor()
{
return this.dampingFactorDivisor;
}
/**
* Setter for dampingFactorDivisor
* @param dampingFactorDivisor
* Amount to modify the damping factor, typically 2.0 or 10.0
*/
public void setDampingFactorDivisor(
double dampingFactorDivisor )
{
this.dampingFactorDivisor = dampingFactorDivisor;
}
/**
* Getter for lineMinimizer
* @return
* Workhorse algorithm that finds the minimum along a particular direction
*/
public LineMinimizer> getLineMinimizer()
{
return this.lineMinimizer;
}
/**
* Setter for lineMinimizer
* @param lineMinimizer
* Workhorse algorithm that finds the minimum along a particular direction
*/
public void setLineMinimizer(
LineMinimizer> lineMinimizer )
{
this.lineMinimizer = lineMinimizer;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy