optim.BFGS Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of java-timeseries Show documentation
Show all versions of java-timeseries Show documentation
Time Series Analysis in Java
The newest version!
/*
* Copyright (c) 2016 Jacob Rachiele
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of this software
* and associated documentation files (the "Software"), to deal in the Software without restriction
* including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense
* and/or sell copies of the Software, and to permit persons to whom the Software is furnished to
* do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or
* substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED
* INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
* PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Contributors:
*
* Jacob Rachiele
*/
package optim;
import linear.doubles.Matrices;
import linear.doubles.Matrix;
import linear.doubles.Vector;
import math.function.AbstractMultivariateFunction;
import static java.lang.Math.abs;
import static java.lang.Math.max;
/**
* An implementation of the Broyden-Fletcher-Goldfarb-Shanno (BFGS) algorithm for unconstrained
* nonlinear optimization. This class is immutable and thread-safe.
*
* @author Jacob Rachiele
*/
public final class BFGS {
private static final double C1 = 1E-4;
private static final double STEP_REDUCTION_FACTOR = 0.2;
//private static final double c2 = 0.9;
private final Matrix identity;
private Vector iterate; // The point at which to evaluate the target function.
private double functionValue; // The latest value of the target function.
private double rho; // Defined as 1 divided by the dot product of y and s.
private Vector s; // The difference between successive iterates.
private Vector y; // The difference between successive gradients.
private Matrix H; // The inverse Hessian approximation.
/**
* Create a new BFGS object and run the algorithm with the supplied information.
*
* @param f the function to be minimized.
* @param startingPoint the initial guess of the minimum.
* @param gradientTolerance the tolerance for the norm of the gradient of the function.
* @param functionChangeTolerance the tolerance for the change in function value.
*/
public BFGS(final AbstractMultivariateFunction f, final Vector startingPoint, final double gradientTolerance,
final double functionChangeTolerance) {
this(f, startingPoint, gradientTolerance, functionChangeTolerance, Matrices.identity(startingPoint.size()));
}
/**
* Create a new BFGS object and run the algorithm with the supplied information.
*
* @param f the function to be minimized.
* @param startingPoint the initial guess of the minimum.
* @param gradientNormTolerance the tolerance for the norm of the gradient of the function.
* @param relativeChangeTolerance the tolerance for the change in function value.
* @param initialHessian The initial guess for the inverse Hessian approximation.
*/
public BFGS(final AbstractMultivariateFunction f, final Vector startingPoint, final double gradientNormTolerance,
final double relativeChangeTolerance, final Matrix initialHessian) {
this.identity = Matrices.identity(startingPoint.size());
this.H = initialHessian;
this.iterate = startingPoint;
int k = 0;
double priorFunctionValue;
functionValue = f.at(startingPoint);
Vector gradient = f.gradientAt(startingPoint, functionValue);
int maxIterations = 100;
if (gradient.size() > 0) {
double relativeChange;
double relativeChangeDenominator;
double stepSize;
double slopeAt0;
double yDotS;
Vector nextIterate;
Vector nextGradient;
Vector searchDirection;
boolean stop = gradient.norm() < gradientNormTolerance || !Double.isFinite(gradient.norm());
int iterationsSinceIdentity = 0;
while (!stop) {
if (iterationsSinceIdentity > 2 * iterate.size()) {
H = identity;
iterationsSinceIdentity = 0;
}
iterationsSinceIdentity++;
searchDirection = (H.times(gradient).scaledBy(-1.0));
slopeAt0 = searchDirection.dotProduct(gradient);
if (slopeAt0 > 0) {
H = this.identity;
searchDirection = (H.times(gradient).scaledBy(-1.0));
slopeAt0 = searchDirection.dotProduct(gradient);
}
// try {
// stepSize = updateStepSize(functionValue);
// } catch (NaNStepLengthException | ViolatedTheoremAssumptionsException e) {
// stop = true;
// continue;
// }
stepSize = 1.0;
s = searchDirection.scaledBy(stepSize);
nextIterate = iterate.plus(s);
priorFunctionValue = functionValue;
functionValue = f.at(nextIterate);
while (!(Double.isFinite(functionValue) &&
functionValue < priorFunctionValue + C1 * stepSize * slopeAt0) && !stop) {
relativeChangeDenominator = max(abs(priorFunctionValue), abs(nextIterate.norm()));
relativeChange = Math.abs((priorFunctionValue - functionValue) / relativeChangeDenominator);
if (relativeChange <= relativeChangeTolerance) {
stop = true;
} else {
stepSize *= STEP_REDUCTION_FACTOR;
s = searchDirection.scaledBy(stepSize);
nextIterate = iterate.plus(s);
functionValue = f.at(nextIterate);
}
}
nextGradient = f.gradientAt(nextIterate, functionValue);
if (!stop) {
relativeChangeDenominator = max(abs(priorFunctionValue), abs(nextIterate.norm()));
//Hamming, Numerical Methods, 2nd edition, pg. 22
relativeChange = Math.abs((priorFunctionValue - functionValue) / relativeChangeDenominator);
if (relativeChange <= relativeChangeTolerance || nextGradient.norm() < gradientNormTolerance) {
stop = true;
}
}
y = nextGradient.minus(gradient);
yDotS = y.dotProduct(s);
if (yDotS > 0) {
rho = 1 / yDotS;
H = updateHessian();
} else if (!stop) {
H = identity;
iterationsSinceIdentity = 0;
}
iterate = nextIterate;
gradient = nextGradient;
k += 1;
if (k > maxIterations) {
stop = true;
}
}
}
}
// private double updateStepSize(double functionValue) {
// int maxAttempts = 10;
// final double slope0 = gradient.dotProduct(searchDirection);
// if (slope0 > 0) {
// System.out.println("The slope at step size 0 is positive");
// }
// double stepSize = 1.0;
// Vector nextIterate = iterate.plus(searchDirection.scaledBy(stepSize));
// s = nextIterate.minus(iterate);
// double priorFunctionValue = functionValue;
// functionValue = f.at(nextIterate);
// int k = 1;
// while (!(Double.isFinite(functionValue) && functionValue < priorFunctionValue + C1 * stepSize * slope0) && k <
// maxAttempts) {
// stepSize *= 0.2;
// nextIterate = iterate.plus(searchDirection.scaledBy(stepSize));
// s = nextIterate.minus(iterate);
// functionValue = f.at(nextIterate);
// k++;
// }
// return stepSize;
//// final QuasiNewtonLineFunction lineFunction = new QuasiNewtonLineFunction(this.f, iterate, searchDirection);
//// StrongWolfeLineSearch lineSearch = StrongWolfeLineSearch.newBuilder(lineFunction, functionValue, slope0).C1(C1)
//// .c2(c2).alphaMax(50).alpha0(1.0).build();
//// return lineSearch.search();
// }
private Matrix updateHessian() {
Matrix a = identity.minus(s.outerProduct(y).scaledBy(rho));
Matrix b = identity.minus(y.outerProduct(s).scaledBy(rho));
Matrix c = s.outerProduct(s).scaledBy(rho);
return a.times(H).times(b).plus(c);
}
/**
* Return the final value of the target function.
*
* @return the final value of the target function.
*/
public double functionValue() {
return this.functionValue;
}
/**
* Return the final, optimized input parameters.
*
* @return the final, optimized input parameters.
*/
public Vector parameters() {
return this.iterate;
}
/**
* Return the final approximation to the inverse Hessian.
*
* @return the final approximation to the inverse Hessian.
*/
public Matrix inverseHessian() {
return this.H;
}
}