gov.sandia.cognition.learning.algorithm.regression.LevenbergMarquardtEstimation Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: LevenbergMarquardtEstimation.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright Apr 13, 2008, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.learning.algorithm.regression;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.function.cost.SumSquaredErrorCostFunction;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.Vector;
/**
* Implementation of the nonlinear regression algorithm, known as
* Levenberg-Marquardt Estimation (or LMA). In its pure form, this algorithm
* computes the least-squares solution to the parameters of a functional form
* given a (weighted) set of input-output Vector pairs. While the algorithm
* is specified in terms of parameter gradients, it has been proven that a
* forward difference Jacobian approximation does not hurt the convergence
* properties of LMA. (Thus, you can use GradientDescendableApproximator with
* impunity.) LMA requires the storage of the explicit Jacobian, which may
* not be possible for large problems.
*
* When gradients are available, LMA appears competitive with BFGS for function
* evaluations, but BFGS is ~2 times faster than LMA.
*
* When gradients are not available, LMA slows down by a factor of ~3 and
* requires "M" more function evaluations, where "M" is the number of
* parameters in the function. However, LMA with approximated
* parameter Jacobian is ~4 times FASTER than Powell's minimization (and
* Powell's method requires about ~8 times more function evaluations).
*
* Take home message: Use ParameterDifferentiableCostMinimizer with BFGS when
* possible, or LMA with approximated Jacobian when not gradient information
* is not available.
*
* Loosely based on Numerical Recipes in C, p.685-687
*
* @author Kevin R. Dixon
* @since 2.1
*/
@PublicationReferences(
references={
@PublicationReference(
author={
"William H. Press",
"Saul A. Teukolsky",
"William T. Vetterling",
"Brian P. Flannery"
},
title="Numerical Recipes in C, Second Edition",
type=PublicationType.Book,
year=1992,
pages={685,687},
notes="Section 15.5",
url="http://www.nrbook.com/a/bookcpdf.php"
),
@PublicationReference(
author="Wikipedia",
title="Levenberg-Marquardt algorithm",
type=PublicationType.WebPage,
year=2008,
url="http://en.wikipedia.org/wiki/Levenberg-Marquardt_algorithm"
)
}
)
public class LevenbergMarquardtEstimation
extends LeastSquaresEstimator
{
/**
* Default initial value of the damping factor {@value}
*/
public static final double DEFAULT_DAMPING = 1.0;
/**
* Divisor of the damping factor on unsuccessful iteration, dividing
* damping factor on cost-reducing iteration {@value}
*/
public static final double DEFAULT_DAMPING_DIVISOR = 10.0;
/**
* Default maximum number of iterations without improvement before stopping {@value}
*/
public static final int DEFAULT_MAX_ITERATIONS_WITHOUT_IMPROVEMENT = 4;
/**
* Number of sequential unsuccessful iterations without a cost-reducing step
*/
private int iterationsWithoutImprovement;
/**
* Maximum number of iterations without improvement before stopping
*/
private int maxIterationsWithoutImprovement;
/**
* Current damping factor for the ridge regression
*/
private double dampingFactor;
/**
* Divisor of the damping factor on a successful iteration, must be greater
* then 1.0, typically ~10.0
*/
private double dampingFactorDivisor;
/**
* Creates a new instance of LevenbergMarquardtEstimation
*/
public LevenbergMarquardtEstimation()
{
this( DEFAULT_DAMPING, DEFAULT_DAMPING_DIVISOR,
DEFAULT_MAX_ITERATIONS, DEFAULT_MAX_ITERATIONS_WITHOUT_IMPROVEMENT,
DEFAULT_TOLERANCE );
}
/**
* Creates a new instance of LevenbergMarquardtEstimation
* @param dampingFactor
* Current damping factor for the ridge regression
* @param dampingFactorDivisor
* Divisor of the damping factor on a successful iteration, must be greater
* then 1.0, typically ~10.0
* @param maxIterations
* Maximum iterations before stopping
* @param maxIterationsWithoutImprovement
* Number of sequential unsuccessful iterations without a cost-reducing step
* @param tolerance
* Stopping criterion for the algorithm, typically ~1e-5
*/
public LevenbergMarquardtEstimation(
double dampingFactor,
double dampingFactorDivisor,
int maxIterations,
int maxIterationsWithoutImprovement,
double tolerance )
{
super( maxIterations, tolerance );
this.setDampingFactor(dampingFactor);
this.setDampingFactorDivisor(dampingFactorDivisor);
this.setMaxIterationsWithoutImprovement(maxIterationsWithoutImprovement);
}
/**
* Parameters used to generate the lastCost
*/
private Vector bestParameters;
/**
* Cost associated with lastParameters
*/
private SumSquaredErrorCostFunction.Cache bestParametersCost;
@Override
protected boolean initializeAlgorithm()
{
this.setResult( this.getObjectToOptimize().clone() );
this.setIterationsWithoutImprovement(0);
this.bestParameters = this.getResult().convertToVector();
this.bestParametersCost = SumSquaredErrorCostFunction.Cache.compute(
this.getResult(), this.getData() );
this.setResultCost( this.bestParametersCost.parameterCost );
return true;
}
@Override
protected boolean step()
{
// These negatives are to compensate because the derivative is the
// negative of the error. Since we're computing the derivative, but
// the descent direction is the negative of the gradient, we need to
// scale the matrices by -1.0
Matrix JtJpI = this.bestParametersCost.JtJ.scale( -1.0 );
Vector Jte = this.bestParametersCost.Jte;
int M = JtJpI.getNumRows();
for( int i = 0; i < M; i++ )
{
// Again, the damping factor is subtracted to compensate for the
// negative between gradient and direction
double v = JtJpI.getElement(i, i);
JtJpI.setElement(i, i, v - this.getDampingFactor() );
}
// This is the ridge-regression (Tikhonov regularization) step to solve
// for the parameter change
Vector trialParameters = JtJpI.solve(Jte);
trialParameters.plusEquals(this.bestParameters);
// If the trial parameters reduce the cost, then accept them
this.getResult().convertFromVector( trialParameters );
SumSquaredErrorCostFunction.Cache trialCost =
SumSquaredErrorCostFunction.Cache.compute( this.getResult(), this.getData() );
if( trialCost.parameterCost < this.bestParametersCost.parameterCost )
{
this.iterationsWithoutImprovement = 0;
this.dampingFactor /= this.dampingFactorDivisor;
this.bestParameters = trialParameters;
// Test to see if we're done
double delta = trialParameters.norm2() *
(this.bestParametersCost.parameterCost - trialCost.parameterCost);
this.bestParameters = trialParameters;
this.bestParametersCost = trialCost;
this.setResultCost( this.bestParametersCost.parameterCost );
if( delta < this.getTolerance() )
{
return false;
}
}
// The trial parameters aren't better than the best parameters,
// so increase the regularization damping factor
else
{
this.dampingFactor *= this.dampingFactorDivisor;
this.iterationsWithoutImprovement++;
}
if( this.iterationsWithoutImprovement > this.getMaxIterationsWithoutImprovement() )
{
return false;
}
return true;
}
@Override
protected void cleanupAlgorithm()
{
// Make sure we've uploaded the best parameters into the function
// We won't have necessarily have the best parameters uploaded if
// we didn't finish on the "delta" convergence condition
this.getResult().convertFromVector( this.bestParameters );
}
/**
* Getter for iterationsWithoutImprovement
* @return
* Number of sequential unsuccessful iterations without a cost-reducing step
*/
public int getIterationsWithoutImprovement()
{
return this.iterationsWithoutImprovement;
}
/**
* Setter for iterationsWithoutImprovement
* @param iterationsWithoutImprovement
* Number of sequential unsuccessful iterations without a cost-reducing step
*/
public void setIterationsWithoutImprovement(
int iterationsWithoutImprovement )
{
this.iterationsWithoutImprovement = iterationsWithoutImprovement;
}
/**
* Getter for maxIterationsWithoutImprovement
* @return
* Maximum number of iterations without improvement before stopping
*/
public int getMaxIterationsWithoutImprovement()
{
return this.maxIterationsWithoutImprovement;
}
/**
* Setter for maxIterationsWithoutImprovement
* @param maxIterationsWithoutImprovement
* Maximum number of iterations without improvement before stopping
*/
public void setMaxIterationsWithoutImprovement(
int maxIterationsWithoutImprovement )
{
this.maxIterationsWithoutImprovement = maxIterationsWithoutImprovement;
}
/**
* Getter for dampingFactor
* @return
* Current damping factor for the ridge regression
*/
public double getDampingFactor()
{
return this.dampingFactor;
}
/**
* Setter for dampingFactor
* @param dampingFactor
* Current damping factor for the ridge regression
*/
public void setDampingFactor(
double dampingFactor )
{
this.dampingFactor = dampingFactor;
}
/**
* Getter for dampingFactorDivisor
* @return
* Divisor of the damping factor on a successful iteration, must be greater
* then 1.0, typically ~10.0
*/
public double getDampingFactorDivisor()
{
return dampingFactorDivisor;
}
/**
* Setter for dampingFactorDivisor
* @param dampingFactorDivisor
* Divisor of the damping factor on a successful iteration, must be greater
* then 1.0, typically ~10.0
*/
public void setDampingFactorDivisor(
double dampingFactorDivisor )
{
this.dampingFactorDivisor = dampingFactorDivisor;
}
}