net.finmath.optimizer.LevenbergMarquardt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of finmath-lib Show documentation
finmath lib is a Mathematical Finance Library in Java. It provides algorithms and methodologies related to mathematical finance.
There is a newer version: 6.0.19
Show newest version
/*
 * (c) Copyright Christian P. Fries, Germany. All rights reserved. Contact: [email protected].
 *
 * Created on 16.06.2006
 */
package net.finmath.optimizer;

import java.util.Arrays;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.logging.Level;
import java.util.logging.Logger;

import net.finmath.functions.LinearAlgebra;

/**
 * This class implements a parallel Levenberg Marquardt non-linear least-squares fit
 * algorithm.
 * 
 * The design avoids the need to define the objective function as a
 * separate class. The objective function is defined by overriding a class
 * method, see the sample code below.
 * 
 * 
 * 
 * The Levenberg-Marquardt solver is implemented in using multi-threading.
 * The calculation of the derivatives (in case a specific implementation of
 * {@code setDerivatives(double[] parameters, double[][] derivatives)} is not
 * provided) may be performed in parallel by setting the parameter numberOfThreads.
 * 
 * 
 * 
 * To use the solver inherit from it and implement the objective function as
 * {@code setValues(double[] parameters, double[] values)} where values has
 * to be set to the value of the objective functions for the given parameters.
 * 

 * You may also provide an a derivative for your objective function by
 * additionally overriding the function {@code setDerivatives(double[] parameters, double[][] derivatives)},
 * otherwise the solver will calculate the derivative via finite differences.
 * 
 * 
 * The following simple example finds a solution for the equation 

 * 
 * 
 * 
 * 
 * 
 * Sample linear system of equations.
 * 0.0 * x₁ + 1.0 * x₂ = 5.0
 * 

 * 2.0 * x₁ + 1.0 * x₂ = 10.0
 * 
 * 
 * 
 *  * {@code
 * 	LevenbergMarquardt optimizer = new LevenbergMarquardt() {
 * 		// Override your objective function here
 * 		public void setValues(double[] parameters, double[] values) {
 * 			values[0] = parameters[0] * 0.0 + parameters[1];
 * 			values[1] = parameters[0] * 2.0 + parameters[1];
 * 		}
 * 	};
 * 
 * 	// Set solver parameters
 * 	optimizer.setInitialParameters(new double[] { 0, 0 });
 * 	optimizer.setWeights(new double[] { 1, 1 });
 * 	optimizer.setMaxIteration(100);
 * 	optimizer.setTargetValues(new double[] { 5, 10 });
 * 
 * 	optimizer.run();
 * 
 * 	double[] bestParameters = optimizer.getBestFitParameters();
 * }
 * 
 * 
 * See the example in the main method below.
 * 
 * 
 * The class can be initialized to use a multi-threaded valuation. If initialized
 * this way the implementation of setValues must be thread-safe.
 * The solver will evaluate the gradient of the value vector in parallel, i.e.,
 * use as many threads as the number of parameters.
 * 
 * 
 * Note: Iteration steps will be logged (java.util.logging) with LogLevel.FINE
 * 
 * @author Christian Fries
 * @version 1.4
 */
public abstract class LevenbergMarquardt {

	private double[] initialParameters = null;
	private double[] parameterSteps = null;
	private double[] targetValues = null;
	private double[] weights = null;

	private int		maxIteration = 100;

	private double	lambda				= 0.001;
	private double	lambdaMultiplicator	= 2.0;

	private double	errorMeanSquaredTolerance = 0.0;	// by default we solve upto machine presicion

	private int iteration = 0;

	private double[] parameterTest = null;
	private double[] parameterIncrement = null;
	private double[] valueTest = null;

	private double[] parameterCurrent = null;
	private double[] valueCurrent = null;
	private double[][] derivativeCurrent = null;

	private double errorMeanSquaredCurrent	= Double.POSITIVE_INFINITY;
	private double errorMeanSquaredChange	= Double.POSITIVE_INFINITY;

	private boolean isParameterCurrentDerivativeValid = false;

	// These members will be updated in each iteration. These are members to prevent repeated memory allocation.
	private double[][]	hessianMatrix = null;
	private double[]	beta = null;

	private int				numberOfThreads	= 1;
	private ExecutorService executor		= null;

	private final Logger logger = Logger.getLogger("net.finmath");

	// A simple test
	public static void main(String[] args) throws SolverException {

		LevenbergMarquardt optimizer = new LevenbergMarquardt() {

			// Override your objective function here
			@Override
			public void setValues(double[] parameters, double[] values) {
				values[0] = parameters[0] * 0.0 + parameters[1];
				values[1] = parameters[0] * 2.0 + parameters[1];
			}
		};

		// Set solver parameters
		optimizer.setInitialParameters(new double[] { 0, 0 });
		optimizer.setWeights(new double[] { 1, 1 });
		optimizer.setMaxIteration(100);
		optimizer.setTargetValues(new double[] { 5, 10 });

		optimizer.run();

		double[] bestParameters = optimizer.getBestFitParameters();
		System.out.println("The solver required " + optimizer.getIterations()
				+ " iterations. The best fit parameters are:");
		for (int i = 0; i < bestParameters.length; i++) {
			System.out.println("\tparameter[" + i + "]: " + bestParameters[i]);
		}
	}

	/**
	 * Create a Levenberg-Marquardt solver.
	 * 
	 * @param initialParameters Initial value for the parameters where the solver starts its search.
	 * @param targetValues Target values to achieve.
	 * @param maxIteration Maximum number of iterations.
	 * @param numberOfThreads Maximum number of threads. Warning: If this number is larger than one, the implementation of setValues has to be thread safe!
	 */
	public LevenbergMarquardt(double[] initialParameters, double[] targetValues, int maxIteration, int numberOfThreads) {
		super();
		this.initialParameters	= initialParameters;
		this.targetValues		= targetValues;
		this.maxIteration		= maxIteration;

		this.weights			= new double[targetValues.length];
		java.util.Arrays.fill(weights, 1.0);

		this.numberOfThreads = numberOfThreads;
	}

	/**
	 * Create a Levenberg-Marquardt solver.
	 */
	public LevenbergMarquardt() {
		super();
	}

	/**
	 * Create a Levenberg-Marquardt solver.
	 * 
	 * @param numberOfThreads Maximum number of threads. Warning: If this number is larger than one, the implementation of setValues has to be thread safe!
	 */
	public LevenbergMarquardt(int numberOfThreads) {
		super();
		this.numberOfThreads = numberOfThreads;
	}


	/**
	 * Set the initial parameters for the solver.
	 * 
	 * @param initialParameters
	 *            The initial parameters.
	 */
	public void setInitialParameters(double[] initialParameters) {
		this.initialParameters = initialParameters;
	}

	/**
	 * Set the parameter step for the solver.
	 * The parameter step is used to evaluate the derivatives via
	 * finite differences, if analytic derivatives are not provided.
	 * 
	 * @param parameterSteps The parameter step.
	 */
	public void setParameterSteps(double[] parameterSteps) {
		this.parameterSteps = parameterSteps;
	}

	/**
	 * Set the target values for the solver. The solver will solver the
	 * equation weights * objectiveFunction = targetValues.
	 * 
	 * @param targetValues
	 *            The target values.
	 */
	public void setTargetValues(double[] targetValues) {
		this.targetValues = targetValues;
	}

	/**
	 * Set the maximum number of iterations to be performed until the solver
	 * gives up.
	 * 
	 * @param maxIteration
	 *            The maximum number of iterations.
	 */
	public void setMaxIteration(int maxIteration) {
		this.maxIteration = maxIteration;
	}

	/**
	 * Set the weight for the objective function.
	 * 
	 * @param weights The weights for the objective function.
	 */
	public void setWeights(double[] weights) {
		this.weights = weights;
	}


	/**
	 * Set the error tolerance. The solver considers the solution "found"
	 * if the error is not improving by this given error tolerance.
	 * 
	 * @param errorTolerance The error tolerance.
	 */
	public void setErrorTolerance(double errorTolerance) {
		/*
		 * The solver uses internally a mean squared error.
		 * To avoid calculation of Math.sqrt we convert the tolarance
		 * to its squared.
		 */
		this.errorMeanSquaredTolerance = errorTolerance * errorTolerance;
	}

	/**
	 * Get the best fit parameter vector.
	 * 
	 * @return The best fit parameter.
	 */
	public double[] getBestFitParameters() {
		return parameterCurrent;
	}

	/**
	 * Get the number of iterations.
	 * 
	 * @return The number of iterations required
	 */
	public int getIterations() {
		return iteration;
	}

	/**
	 * The objective function. Override this method to implement your custom
	 * function.
	 * 
	 * @param parameters Input value. The parameter vector.
	 * @param values Output value. The vector of values f(i,parameters), i=1,...,n
	 * @throws SolverException Thrown if the valuation fails, specific cause may be available via the cause() method.
	 */
	public abstract void setValues(double[] parameters, double[] values) throws SolverException;

	/**
	 * The derivative of the objective function. You may override this method
	 * if you like to implement your own derivative.
	 * 
	 * @param parameters Input value. The parameter vector.
	 * @param derivatives Output value, where derivatives[i][j] is d(value(j)) / d(parameters(i)
	 * @throws SolverException Thrown if the valuation fails, specific cause may be available via the cause() method.
	 */
	public void setDerivatives(double[] parameters, double[][] derivatives) throws SolverException {
		// Calculate new derivatives. Note that this method is called only with
		// parameters = parameterCurrent, so we may use valueCurrent.

    	Vector> valueFutures = new Vector>(parameterCurrent.length);
		for (int parameterIndex = 0; parameterIndex < parameterCurrent.length; parameterIndex++) {
			final double[] parametersNew	= parameters.clone();
			final double[] derivative		= derivatives[parameterIndex];

			final int workerParameterIndex = parameterIndex;
			Callable worker = new  Callable() {
				public double[] call() throws SolverException {
					double parameterFiniteDifference;
					if(parameterSteps != null) {
						parameterFiniteDifference = parameterSteps[workerParameterIndex];
					}
					else {
						/*
						 * Try to adaptively set a parameter shift. Note that in some
						 * applications it may be important to set parameterSteps.
						 * appropriately.
						 */
						parameterFiniteDifference = (Math.abs(parametersNew[workerParameterIndex]) + 1) * 1E-8;
					}
		
					// Shift parameter value
					parametersNew[workerParameterIndex] += parameterFiniteDifference;
		
					// Calculate derivative as (valueUpShift - valueCurrent) /
					// parameterFiniteDifference
					try {
						setValues(parametersNew, derivative);
					} catch (Exception e) {
						// We signal an exception to calculate the derivative as NaN
						Arrays.fill(derivative, Double.NaN);
					}
					for (int valueIndex = 0; valueIndex < valueCurrent.length; valueIndex++) {
						derivative[valueIndex] -= valueCurrent[valueIndex];
						derivative[valueIndex] /= parameterFiniteDifference;
					}
					return derivative;
				}
			};
			if(executor != null) {
				Future valueFuture = executor.submit(worker);
				valueFutures.add(parameterIndex, valueFuture);
			}
			else {
				FutureTask valueFutureTask = new FutureTask(worker);
				valueFutureTask.run();
				valueFutures.add(parameterIndex, valueFutureTask);
			}
		}

		for (int parameterIndex = 0; parameterIndex < parameterCurrent.length; parameterIndex++) {
        	try {
        		derivatives[parameterIndex] = valueFutures.get(parameterIndex).get();
        	}
    		catch (InterruptedException e) {
    			throw new SolverException(e);
			} catch (ExecutionException e) {
    			throw new SolverException(e);
			}
		}
	}

	/**
	 * You may override this method to implement a custom stop condition.
	 * 
	 * @return Stop condition.
	 */
	boolean done() {
		// The solver terminates if...
		return 
				// Maximum number of iterations is reached
				(iteration > maxIteration)	
				||
				// Error does not improve by more that the given error tolerance
				(errorMeanSquaredChange <= errorMeanSquaredTolerance)
				||
				/*
				 * Lambda is infinite, i.e., no new point is acceptable.
				 * For example, this may happen if setValue repeatedly give contains invalid (NaN) values.
				 */
				Double.isInfinite(lambda);
	}

	/**
	 * Runs the optimization.
	 * 
	 * @throws SolverException Thrown if the valuation fails, specific cause may be available via the cause() method.
	 */
	public void run() throws SolverException {
		// Create an executor for concurrent evaluation of derivatives
		if(numberOfThreads > 1) if(executor == null) executor = Executors.newFixedThreadPool(numberOfThreads);

		// Allocate memory
		int numberOfParameters	= initialParameters.length;
		int numberOfValues		= targetValues.length;

		parameterTest		= initialParameters.clone();
		parameterIncrement	= new double[numberOfParameters];
		parameterCurrent	= new double[numberOfParameters];

		valueTest			= new double[numberOfValues];
		valueCurrent		= new double[numberOfValues];
		derivativeCurrent	= new double[parameterCurrent.length][valueCurrent.length];

		hessianMatrix = new double[parameterCurrent.length][parameterCurrent.length];
		beta = new double[parameterCurrent.length];

		iteration = 0;

		while(true) {
			// Count iterations
			iteration++;

			// Calculate values for test parameters
			setValues(parameterTest, valueTest);

			// calculate error
			double errorMeanSquaredTest = getMeanSquaredError(valueTest);

			if (errorMeanSquaredTest < errorMeanSquaredCurrent) {
				errorMeanSquaredChange = errorMeanSquaredCurrent - errorMeanSquaredTest;

				// Accept point
				System.arraycopy(parameterTest, 0, parameterCurrent, 0, parameterCurrent.length);
				System.arraycopy(valueTest, 0, valueCurrent, 0, valueCurrent.length);
				errorMeanSquaredCurrent		= errorMeanSquaredTest;

				// Derivative has to be recalculated
				isParameterCurrentDerivativeValid = false;

				// Decrease lambda (move faster)
				lambda			/= 1.3;
				lambdaMultiplicator	= 1.3;
			} else {
				errorMeanSquaredChange = errorMeanSquaredTest - errorMeanSquaredCurrent;

				// Reject point, increase lambda (move slower)
				lambda				*= lambdaMultiplicator;
				lambdaMultiplicator *= 1.3;
			}

			// Update a new parameter trial, if we are not done
			if (!done())
				updateParameterTest();
			else
				break;

			// Log iteration
			if (logger.isLoggable(Level.FINE))
			{
				String logString = "Iteration: " + iteration + "\tLambda="
						+ lambda + "\tError Current:" + errorMeanSquaredCurrent
						+ "\tError Change:" + errorMeanSquaredChange + "\t";
				for (int i = 0; i < parameterCurrent.length; i++) {
					logString += "[" + i + "] = " + parameterCurrent[i] + "\t";
				}
				logger.fine(logString);
			}
		}

		// Shutdown executor if present.
		if(executor != null) {
			executor.shutdown();
			executor = null;
		}
	}

	private double getMeanSquaredError(double[] value) {
		double error = 0.0;

		for (int valueIndex = 0; valueIndex < value.length; valueIndex++) {
			double deviation = value[valueIndex] - targetValues[valueIndex];
			error += weights[valueIndex] * deviation * deviation;
		}

		return error/value.length;
	}

	/**
	 * Calculate a new parameter guess.
	 * 
	 * @throws SolverException Thrown if the valuation fails, specific cause may be available via the cause() method.
	 */
	private void updateParameterTest() throws SolverException {
		if (!isParameterCurrentDerivativeValid) {
			this.setDerivatives(parameterCurrent, derivativeCurrent);
			isParameterCurrentDerivativeValid = true;
		}

		boolean hessianInvalid = true;

		while (hessianInvalid) {
			hessianInvalid = false;
			// Build matrix H (hessian approximation)
			for (int i = 0; i < parameterCurrent.length; i++) {
				for (int j = i; j < parameterCurrent.length; j++) {
					double alphaElement = 0.0;
					for (int valueIndex = 0; valueIndex < valueCurrent.length; valueIndex++) {
						alphaElement += weights[valueIndex] * derivativeCurrent[i][valueIndex] * derivativeCurrent[j][valueIndex];
					}
					if (i == j) {
						if (alphaElement == 0.0)
							alphaElement = 1.0;
						else
							alphaElement *= 1 + lambda;
					}

					hessianMatrix[i][j] = alphaElement;
					hessianMatrix[j][i] = alphaElement;
				}
			}

			// Build beta (Newton step)
			for (int i = 0; i < parameterCurrent.length; i++) {
				double betaElement = 0.0;
				double[] derivativeCurrentSingleParam = derivativeCurrent[i];
				for (int k = 0; k < valueCurrent.length; k++) {
					betaElement += weights[k] * (targetValues[k] - valueCurrent[k]) * derivativeCurrentSingleParam[k];
				}
				beta[i] = betaElement;
			}

			try {
				// Calculate new increment
				parameterIncrement = LinearAlgebra.solveLinearEquationSymmetric(hessianMatrix, beta);
			} catch (Exception e) {
				hessianInvalid	= true;
				lambda			*= 16;
			}
		}

		// Calculate new parameter
		for (int i = 0; i < parameterCurrent.length; i++) {
			parameterTest[i] = parameterCurrent[i] + parameterIncrement[i];
		}
	}
}