org.vesalainen.math.LevenbergMarquardt Maven / Gradle / Ivy
package org.vesalainen.math;
import java.io.Serializable;
import org.ejml.data.DenseMatrix64F;
import static org.ejml.ops.CommonOps.*;
import static org.ejml.ops.SpecializedOps.*;
/**
*
* This is a straight forward implementation of the Levenberg-Marquardt (LM) algorithm. LM is used to minimize
* non-linear cost functions:
*
* S(P) = Sum{ i=1:m , [yi - f(xi,P)]2}
*
* where P is the set of parameters being optimized.
*
*
*
* In each iteration the parameters are updated using the following equations:
*
* Pi+1 = (H + λ I)-1 d
* d = (1/N) Sum{ i=1..N , (f(xi;Pi) - yi) * jacobian(:,i) }
* H = (1/N) Sum{ i=1..N , jacobian(:,i) * jacobian(:,i)T }
*
*
* Whenever possible the allocation of new memory is avoided. This is accomplished by reshaping matrices.
* A matrix that is reshaped won't grow unless the new shape requires more memory than it has available.
*
* @author Peter Abeles
*/
public class LevenbergMarquardt implements Serializable
{
private static final long serialVersionUID = 1L;
// how much the numerical jacobian calculation perturbs the parameters by.
// In better implementation there are better ways to compute this delta. See Numerical Recipes.
private final static double DELTA = 1e-8;
private int iter1 = 25;
private int iter2 = 5;
private double maxDifference = 1e-8;
private double initialLambda;
// the function that is optimized
private Function func;
private JacobianFactory jacobianFactory;
// the optimized parameters and associated costs
private DenseMatrix64F param;
private double initialCost;
private double finalCost;
// used by matrix operations
private DenseMatrix64F d;
private DenseMatrix64F H;
private DenseMatrix64F negDelta;
private DenseMatrix64F tempParam;
private DenseMatrix64F A;
// variables used by the numerical jacobian algorithm
private DenseMatrix64F temp0;
private DenseMatrix64F temp1;
// used when computing d and H variables
private DenseMatrix64F tempDH;
// Where the numerical Jacobian is stored.
private DenseMatrix64F jacobian;
/**
* Creates a new instance that uses the provided cost function.
*
* @param func Cost function that is being optimized.
*/
public LevenbergMarquardt(Function func)
{
this(func, null);
}
/**
* Creates a new instance that uses the provided cost function.
*
* @param funcCost Cost function that is being optimized.
* @param jacobianFactory
*/
public LevenbergMarquardt( Function funcCost, JacobianFactory jacobianFactory )
{
this.initialLambda = 1;
// declare data to some initial small size. It will grow later on as needed.
int maxElements = 1;
int numParam = 1;
this.temp0 = new DenseMatrix64F(maxElements,1);
this.temp1 = new DenseMatrix64F(maxElements,1);
this.tempDH = new DenseMatrix64F(maxElements,1);
this.jacobian = new DenseMatrix64F(numParam,maxElements);
this.func = funcCost;
this.jacobianFactory = jacobianFactory;
this.param = new DenseMatrix64F(numParam,1);
this.d = new DenseMatrix64F(numParam,1);
this.H = new DenseMatrix64F(numParam,numParam);
this.negDelta = new DenseMatrix64F(numParam,1);
this.tempParam = new DenseMatrix64F(numParam,1);
this.A = new DenseMatrix64F(numParam,numParam);
}
public double getInitialCost() {
return initialCost;
}
public double getFinalCost() {
return finalCost;
}
public DenseMatrix64F getParameters() {
return param;
}
/**
* Finds the best fit parameters.
*
* @param initParam The initial set of parameters for the function.
* @param X The inputs to the function.
* @param Y The "observed" output of the function
* @return true if it succeeded and false if it did not.
*/
public boolean optimize( DenseMatrix64F initParam ,
DenseMatrix64F X ,
DenseMatrix64F Y )
{
if (X.numRows == 0)
{
return false;
}
configure(initParam,X,Y);
// save the cost of the initial parameters so that it knows if it improves or not
initialCost = cost(param,X,Y);
// iterate until the difference between the costs is insignificant
// or it iterates too many times
if( !adjustParam(X, Y, initialCost) ) {
finalCost = Double.NaN;
return false;
}
return true;
}
/**
* Iterate until the difference between the costs is insignificant
* or it iterates too many times
*/
private boolean adjustParam(DenseMatrix64F X, DenseMatrix64F Y,
double prevCost) {
// lambda adjusts how big of a step it takes
double lambda = initialLambda;
// the difference between the current and previous cost
double difference = 1000;
for( int iter = 0; iter < iter1 && difference > maxDifference ; iter++ ) {
// compute some variables based on the gradient
computeDandH(param,X,Y);
// try various step sizes and see if any of them improve the
// results over what has already been done
boolean foundBetter = false;
for( int i = 0; i < iter2; i++ ) {
computeA(A,H,lambda);
if( !solve(A,d,negDelta) ) {
return false;
}
// compute the candidate parameters
subtract(param, negDelta, tempParam);
double cost = cost(tempParam,X,Y);
if( cost < prevCost ) {
// the candidate parameters produced better results so use it
foundBetter = true;
param.set(tempParam);
difference = prevCost - cost;
prevCost = cost;
lambda /= 10.0;
} else {
lambda *= 10.0;
}
}
// it reached a point where it can't improve so exit
if( !foundBetter )
break;
}
finalCost = prevCost;
return true;
}
/**
* Performs sanity checks on the input data and reshapes internal matrices. By reshaping
* a matrix it will only declare new memory when needed.
*/
protected void configure( DenseMatrix64F initParam , DenseMatrix64F X , DenseMatrix64F Y )
{
if( Y.getNumRows() != X.getNumRows() ) {
throw new IllegalArgumentException("Different vector lengths");
} else if( Y.getNumCols() != 1 /*|| X.getNumCols() != 1*/ ) {
throw new IllegalArgumentException("Inputs must be a column vector");
}
int numParam = initParam.getNumElements();
int numPoints = Y.getNumRows();
if( param.getNumElements() != initParam.getNumElements() ) {
// reshaping a matrix means that new memory is only declared when needed
this.param.reshape(numParam,1, false);
this.d.reshape(numParam,1, false);
this.H.reshape(numParam,numParam, false);
this.negDelta.reshape(numParam,1, false);
this.tempParam.reshape(numParam,1, false);
this.A.reshape(numParam,numParam, false);
}
param.set(initParam);
// reshaping a matrix means that new memory is only declared when needed
temp0.reshape(numPoints,1, false);
temp1.reshape(numPoints,1, false);
tempDH.reshape(numPoints,1, false);
jacobian.reshape(numParam,numPoints, false);
}
/**
* Computes the d and H parameters. Where d is the average error gradient and
* H is an approximation of the hessian.
*/
private void computeDandH( DenseMatrix64F param , DenseMatrix64F x , DenseMatrix64F y )
{
func.compute(param,x, tempDH);
subtractEquals(tempDH, y);
if (jacobianFactory != null)
{
jacobianFactory.computeJacobian(param, x, jacobian);
}
else
{
computeNumericalJacobian(param,x,jacobian);
}
int numParam = param.getNumElements();
int length = y.getNumElements();
// d = average{ (f(x_i;p) - y_i) * jacobian(:,i) }
for( int i = 0; i < numParam; i++ ) {
double total = 0;
for( int j = 0; j < length; j++ ) {
total += tempDH.get(j,0)*jacobian.get(i,j);
}
d.set(i,0,total/length);
}
// compute the approximation of the hessian
multTransB(jacobian,jacobian,H);
scale(1.0/length,H);
}
/**
* A = H + lambda*I
*
* where I is an identity matrix.
*/
private void computeA( DenseMatrix64F A , DenseMatrix64F H , double lambda )
{
final int numParam = param.getNumElements();
A.set(H);
for( int i = 0; i < numParam; i++ ) {
A.set(i,i, A.get(i,i) + lambda);
}
}
/**
* Computes the "cost" for the parameters given.
*
* cost = (1/N) Sum (f(x;p) - y)^2
*/
public double cost( DenseMatrix64F param , DenseMatrix64F X , DenseMatrix64F Y)
{
func.compute(param,X, temp0);
double error = diffNormF(temp0,Y);
return error*error / (double)X.numRows;
}
/**
* Computes a simple numerical Jacobian.
*
* @param param The set of parameters that the Jacobian is to be computed at.
* @param pt The point around which the Jacobian is to be computed.
* @param deriv Where the jacobian will be stored
*/
protected void computeNumericalJacobian( DenseMatrix64F param ,
DenseMatrix64F pt ,
DenseMatrix64F deriv )
{
double invDelta = 1.0/DELTA;
func.compute(param,pt, temp0);
// compute the jacobian by perturbing the parameters slightly
// then seeing how it effects the results.
for( int i = 0; i < param.numRows; i++ ) {
param.data[i] += DELTA;
func.compute(param,pt, temp1);
// compute the difference between the two parameters and divide by the delta
add(invDelta,temp1,-invDelta,temp0,temp1);
// copy the results into the jacobian matrix
System.arraycopy(temp1.data,0,deriv.data,i*pt.numRows,pt.numRows);
param.data[i] -= DELTA;
}
}
/**
* Sets the main loop count. Initially 25
* @param iter1
*/
public void setIter1(int iter1)
{
this.iter1 = iter1;
}
/**
* Sets the inner loop count. Initially 5
* @param iter2
*/
public void setIter2(int iter2)
{
this.iter2 = iter2;
}
/**
* Sets the max cost difference
* @param maxDifference
*/
public void setMaxDifference(double maxDifference)
{
this.maxDifference = maxDifference;
}
/**
* The function that is being optimized.
*/
public interface Function {
/**
* Computes the output for each value in matrix x given the set of parameters.
*
* @param param The parameter for the function.
* @param x the input points.
* @param y the resulting output.
*/
public void compute( DenseMatrix64F param , DenseMatrix64F x , DenseMatrix64F y );
}
/**
* Jacobian matrix creator
*/
public interface JacobianFactory
{
public void computeJacobian( DenseMatrix64F param , DenseMatrix64F x , DenseMatrix64F jacobian );
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy