net.finmath.optimizer.StochasticPathwiseLevenbergMarquardt Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of finmath-lib Show documentation
finmath lib is a Mathematical Finance Library in Java. It provides algorithms and methodologies related to mathematical finance.
There is a newer version: 6.0.19
Show newest version
/*
 * (c) Copyright Christian P. Fries, Germany. Contact: [email protected].
 *
 * Created on 16.06.2006
 */
package net.finmath.optimizer;

import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.Vector;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.logging.Level;
import java.util.logging.Logger;

import net.finmath.functions.LinearAlgebra;
import net.finmath.montecarlo.RandomVariableFromDoubleArray;
import net.finmath.stochastic.RandomVariable;
import net.finmath.stochastic.Scalar;

/**
 * This class implements a stochastic Levenberg Marquardt non-linear least-squares fit
 * algorithm.
 * 
 * The design avoids the need to define the objective function as a
 * separate class. The objective function is defined by overriding a class
 * method, see the sample code below.
 * 
 *
 * 
 * The Levenberg-Marquardt solver is implemented in using multi-threading.
 * The calculation of the derivatives (in case a specific implementation of
 * {@code setDerivatives(RandomVariable[] parameters, RandomVariable[][] derivatives)} is not
 * provided) may be performed in parallel by setting the parameter numberOfThreads.
 * 
 *
 * 
 * To use the solver inherit from it and implement the objective function as
 * {@code setValues(RandomVariable[] parameters, RandomVariable[] values)} where values has
 * to be set to the value of the objective functions for the given parameters.
 * 

 * You may also provide an a derivative for your objective function by
 * additionally overriding the function {@code setDerivatives(RandomVariable[] parameters, RandomVariable[][] derivatives)},
 * otherwise the solver will calculate the derivative via finite differences.
 * 
 * 
 * To reject a point, it is allowed to set an element of values to {@link java.lang.Double#NaN}
 * in the implementation of {@code setValues(RandomVariable[] parameters, RandomVariable[] values)}.
 * Put differently: The solver handles NaN values in values as an error larger than
 * the current one (regardless of the current error) and rejects the point.
 * 

 * Note, however, that is is an error if the initial parameter guess results in an NaN value.
 * That is, the solver should be initialized with an initial parameter in an admissible region.
 * 
 *
 * The following simple example finds a solution for the equation 

 * 
 * 
 * 
 * 
 * Sample linear system of equations.
 * 0.0 * x₁ + 1.0 * x₂ = 5.0
 * 

 * 2.0 * x₁ + 1.0 * x₂ = 10.0
 * 
 *
 *  * 
 * 	LevenbergMarquardt optimizer = new LevenbergMarquardt() {
 * 		// Override your objective function here
 * 		public void setValues(RandomVariable[] parameters, RandomVariable[] values) {
 * 			values[0] = parameters[0] * 0.0 + parameters[1];
 * 			values[1] = parameters[0] * 2.0 + parameters[1];
 * 		}
 * 	};
 *
 * 	// Set solver parameters
 * 	optimizer.setInitialParameters(new RandomVariable[] { 0, 0 });
 * 	optimizer.setWeights(new RandomVariable[] { 1, 1 });
 * 	optimizer.setMaxIteration(100);
 * 	optimizer.setTargetValues(new RandomVariable[] { 5, 10 });
 *
 * 	optimizer.run();
 *
 * 	RandomVariable[] bestParameters = optimizer.getBestFitParameters();
 * 
 * 
 *
 * See the example in the main method below.
 *
 * 
 * The class can be initialized to use a multi-threaded valuation. If initialized
 * this way the implementation of setValues must be thread-safe.
 * The solver will evaluate the gradient of the value vector in parallel, i.e.,
 * use as many threads as the number of parameters.
 * 
 *
 * Note: Iteration steps will be logged (java.util.logging) with LogLevel.FINE
 *
 * @author Christian Fries
 * @version 1.6
 */
public abstract class StochasticPathwiseLevenbergMarquardt implements Serializable, Cloneable, StochasticOptimizer {

	private static final long serialVersionUID = 4560864869394838155L;

	private RandomVariable[] initialParameters = null;
	private RandomVariable[] parameterSteps = null;
	private RandomVariable[] targetValues = null;
	private RandomVariable[] weights = null;

	private final int		maxIteration;

	// Local state of the solver
	private double[]	lambda;
	private final double		lambdaInitialValue = 0.001;
	private double		lambdaDivisor = 1.3;
	private double 		lambdaMultiplicator	= 2.0;
	private int			numberOfPaths;

	private final RandomVariable	errorTolerance;

	private int iteration = 0;

	private RandomVariable[] parameterTest = null;
	private RandomVariable[] valueTest = null;

	private RandomVariable[] parameterCurrent = null;
	private RandomVariable[] valueCurrent = null;
	private RandomVariable[][] derivativeCurrent = null;

	private RandomVariable errorMeanSquaredCurrent	= new RandomVariableFromDoubleArray(Double.POSITIVE_INFINITY);
	private RandomVariable errorRootMeanSquaredChange	= new RandomVariableFromDoubleArray(Double.POSITIVE_INFINITY);

	private boolean[]		isParameterCurrentDerivativeValid;

	/*
	 * Used for multi-threadded calculation of the derivative.
	 * The use may provide its own executor. If not and numberOfThreads > 1
	 * we will temporarily create an executor with the specified number of threads.
	 * Note: If an executor was provided upon construction, it will not receive a shutdown when done.
	 */
	private ExecutorService executor					= null;
	private boolean			executorShutdownWhenDone	= true;

	private final Logger logger = Logger.getLogger("net.finmath");

	// A simple test
	public static void main(final String[] args) throws SolverException {
		// RandomVariableDifferentiableAAD is possible here!
		// RandomVariable[] initialParameters = new RandomVariable[] { new RandomVariableDifferentiableAAD(2), new RandomVariableDifferentiableAAD(2) };
		final RandomVariable[] initialParameters = new RandomVariable[] { new RandomVariableFromDoubleArray(2), new RandomVariableFromDoubleArray(2) };
		final RandomVariable[] weights = new RandomVariable[] { new RandomVariableFromDoubleArray(1), new RandomVariableFromDoubleArray(1) };
		final RandomVariable[] parameterSteps = new RandomVariable[] { new RandomVariableFromDoubleArray(1), new RandomVariableFromDoubleArray(1) };
		final int maxIteration = 100;
		final RandomVariable[] targetValues = new RandomVariable[] { new RandomVariableFromDoubleArray(25), new RandomVariableFromDoubleArray(100) };

		final StochasticPathwiseLevenbergMarquardt optimizer = new StochasticPathwiseLevenbergMarquardt(initialParameters, targetValues, weights, parameterSteps, maxIteration, null, null) {
			private static final long serialVersionUID = -282626938650139518L;

			// Override your objective function here
			@Override
			public void setValues(final RandomVariable[] parameters, final RandomVariable[] values) {
				values[0] = parameters[0].mult(0.0).add(parameters[1]).squared();
				values[1] = parameters[0].mult(2.0).add(parameters[1]).squared();
			}
		};

		// Set solver parameters

		optimizer.run();

		final RandomVariable[] bestParameters = optimizer.getBestFitParameters();
		System.out.println("The solver for problem 1 required " + optimizer.getIterations() + " iterations. The best fit parameters are:");
		for (int i = 0; i < bestParameters.length; i++) {
			System.out.println("\tparameter[" + i + "]: " + bestParameters[i]);
		}
		System.out.println("The solver accuracy is " + optimizer.getRootMeanSquaredError());

		/*
		 * Creating a clone, continuing the search with new target values.
		 * Note that we do not re-define the setValues method.
		 */
		//		Optimizer optimizer2 = optimizer.getCloneWithModifiedTargetValues(new double[] { 5.1, 10.2 }, new double[] { 1, 1 }, true);
		//		optimizer2.run();

		//		double[] bestParameters2 = optimizer2.getBestFitParameters();
		//		System.out.println("The solver for problem 2 required " + optimizer2.getIterations() + " iterations. The best fit parameters are:");
		//		for (int i = 0; i < bestParameters2.length; i++) System.out.println("\tparameter[" + i + "]: " + bestParameters2[i]);
	}

	/**
	 * Create a Levenberg-Marquardt solver.
	 *
	 * @param initialParameters Initial value for the parameters where the solver starts its search.
	 * @param targetValues Target values to achieve.
	 * @param weights Weights applied to the error.
	 * @param parameterSteps Step used for finite difference approximation.
	 * @param maxIteration Maximum number of iterations.
	 * @param errorTolerance Error tolerance / accuracy.
	 * @param executorService Executor to be used for concurrent valuation of the derivatives. This is only performed if setDerivative is not overwritten. Warning: The implementation of setValues has to be thread safe!
	 */
	public StochasticPathwiseLevenbergMarquardt(final RandomVariable[] initialParameters, final RandomVariable[] targetValues, final RandomVariable[] weights, final RandomVariable[] parameterSteps, final int maxIteration, final RandomVariable errorTolerance, final ExecutorService executorService) {
		super();
		this.initialParameters	= initialParameters;
		this.targetValues		= targetValues;
		this.weights			= weights;
		this.parameterSteps		= parameterSteps;
		this.maxIteration		= maxIteration;
		this.errorTolerance		= errorTolerance != null ? errorTolerance : new RandomVariableFromDoubleArray(0.0);

		if(weights == null) {
			this.weights = new RandomVariable[targetValues.length];
			for(int i=0; iWarning: If this number is larger than one, the implementation of setValues has to be thread safe!
	 */
	public StochasticPathwiseLevenbergMarquardt(final RandomVariable[] initialParameters, final RandomVariable[] targetValues, final int maxIteration, final int numberOfThreads) {
		this(initialParameters, targetValues, null, null, maxIteration, null, numberOfThreads > 1 ? Executors.newFixedThreadPool(numberOfThreads) : null);
	}

	/**
	 * Create a Levenberg-Marquardt solver.
	 *
	 * @param initialParameters List of initial values for the parameters where the solver starts its search.
	 * @param targetValues List of target values to achieve.
	 * @param maxIteration Maximum number of iterations.
	 * @param executorService Executor to be used for concurrent valuation of the derivatives. This is only performed if setDerivative is not overwritten. Warning: The implementation of setValues has to be thread safe!
	 */
	public StochasticPathwiseLevenbergMarquardt(final List initialParameters, final List targetValues, final int maxIteration, final ExecutorService executorService) {
		this(numberListToDoubleArray(initialParameters), numberListToDoubleArray(targetValues), null, null, maxIteration, null, executorService);
	}

	/**
	 * Create a Levenberg-Marquardt solver.
	 *
	 * @param initialParameters Initial value for the parameters where the solver starts its search.
	 * @param targetValues Target values to achieve.
	 * @param maxIteration Maximum number of iterations.
	 * @param numberOfThreads Maximum number of threads. Warning: If this number is larger than one, the implementation of setValues has to be thread safe!
	 */
	public StochasticPathwiseLevenbergMarquardt(final List initialParameters, final List targetValues, final int maxIteration, final int numberOfThreads) {
		this(numberListToDoubleArray(initialParameters), numberListToDoubleArray(targetValues), maxIteration, numberOfThreads);
	}

	/**
	 * Convert a list of numbers to an array of doubles.
	 *
	 * @param listOfNumbers A list of numbers.
	 * @return A corresponding array of doubles executing doubleValue() on each element.
	 */
	private static RandomVariable[] numberListToDoubleArray(final List listOfNumbers) {
		final RandomVariable[] array	= new RandomVariable[listOfNumbers.size()];
		for(int i=0; i 1.");
		}
		this.lambdaMultiplicator = lambdaMultiplicator;
	}

	/**
	 * Get the divisor applied to lambda (for the next iteration) if the inversion of regularized
	 * Hessian succeeds, that is, if \( H + \lambda \diag H \) is invertable.
	 *
	 * @return the lambdaDivisor
	 */
	public double getLambdaDivisor() {
		return lambdaDivisor;
	}

	/**
	 * Set the divisor applied to lambda (for the next iteration) if the inversion of regularized
	 * Hessian succeeds, that is, if \( H + \lambda \diag H \) is invertable.
	 *
	 * This will make lambda smaller, hence let the stepping move faster.
	 *
	 * @param lambdaDivisor the lambdaDivisor to set. Should be > 1.
	 */
	public void setLambdaDivisor(final double lambdaDivisor) {
		if(lambdaDivisor <= 1.0) {
			throw new IllegalArgumentException("Parameter lambdaDivisor is required to be > 1.");
		}
		this.lambdaDivisor = lambdaDivisor;
	}

	@Override
	public RandomVariable[] getBestFitParameters() {
		return parameterCurrent;
	}

	@Override
	public double getRootMeanSquaredError() {
		return errorMeanSquaredCurrent.average().sqrt().doubleValue();
	}

	/**
	 * @param errorMeanSquaredCurrent the errorMeanSquaredCurrent to set
	 */
	public void setErrorMeanSquaredCurrent(final RandomVariable errorMeanSquaredCurrent) {
		this.errorMeanSquaredCurrent = errorMeanSquaredCurrent;
	}

	@Override
	public int getIterations() {
		return iteration;
	}

	protected void prepareAndSetValues(final RandomVariable[] parameters, final RandomVariable[] values) throws SolverException {
		setValues(parameters, values);
	}

	protected void prepareAndSetDerivatives(final RandomVariable[] parameters, final RandomVariable[] values, final RandomVariable[][] derivatives) throws SolverException {
		setDerivatives(parameters, derivatives);
	}

	/**
	 * The objective function. Override this method to implement your custom
	 * function.
	 *
	 * @param parameters Input value. The parameter vector.
	 * @param values Output value. The vector of values f(i,parameters), i=1,...,n
	 * @throws SolverException Thrown if the valuation fails, specific cause may be available via the cause() method.
	 */
	public abstract void setValues(RandomVariable[] parameters, RandomVariable[] values) throws SolverException;

	/**
	 * The derivative of the objective function. You may override this method
	 * if you like to implement your own derivative.
	 *
	 * @param parameters Input value. The parameter vector.
	 * @param derivatives Output value, where derivatives[i][j] is d(value(j)) / d(parameters(i)
	 * @throws SolverException Thrown if the valuation fails, specific cause may be available via the cause() method.
	 */
	public void setDerivatives(RandomVariable[] parameters, final RandomVariable[][] derivatives) throws SolverException {
		// Calculate new derivatives. Note that this method is called only with
		// parameters = parameterTest, so we may use valueTest.

		parameters = parameterCurrent;
		final Vector> valueFutures = new Vector<>(parameterCurrent.length);
		for (int parameterIndex = 0; parameterIndex < parameterCurrent.length; parameterIndex++) {
			final RandomVariable[] parametersNew	= parameters.clone();
			final RandomVariable[] derivative		= derivatives[parameterIndex];

			final int workerParameterIndex = parameterIndex;
			final Callable worker = new  Callable() {
				@Override
				public RandomVariable[] call() {
					RandomVariable parameterFiniteDifference;
					if(parameterSteps != null) {
						parameterFiniteDifference = parameterSteps[workerParameterIndex];
					}
					else {
						/*
						 * Try to adaptively set a parameter shift. Note that in some
						 * applications it may be important to set parameterSteps.
						 * appropriately.
						 */
						parameterFiniteDifference = parametersNew[workerParameterIndex].abs().add(1.0).mult(1E-8);
					}

					// Shift parameter value
					parametersNew[workerParameterIndex] = parametersNew[workerParameterIndex].add(parameterFiniteDifference);

					// Calculate derivative as (valueUpShift - valueCurrent) / parameterFiniteDifference
					try {
						prepareAndSetValues(parametersNew, derivative);
					} catch (final Exception e) {
						// We signal an exception to calculate the derivative as NaN
						Arrays.fill(derivative, new RandomVariableFromDoubleArray(Double.NaN));
					}
					for (int valueIndex = 0; valueIndex < valueCurrent.length; valueIndex++) {
						derivative[valueIndex] = derivative[valueIndex].sub(valueCurrent[valueIndex]).div(parameterFiniteDifference);
						derivative[valueIndex] = derivative[valueIndex].isNaN().sub(0.5).mult(-1).choose(derivative[valueIndex], new Scalar(0.0));
					}
					return derivative;
				}
			};
			if(executor != null) {
				final Future valueFuture = executor.submit(worker);
				valueFutures.add(parameterIndex, valueFuture);
			}
			else {
				final FutureTask valueFutureTask = new FutureTask<>(worker);
				valueFutureTask.run();
				valueFutures.add(parameterIndex, valueFutureTask);
			}
		}

		for (int parameterIndex = 0; parameterIndex < parameterCurrent.length; parameterIndex++) {
			try {
				derivatives[parameterIndex] = valueFutures.get(parameterIndex).get();
			}
			catch (final InterruptedException | ExecutionException e) {
				throw new SolverException(e);
			}
		}
	}

	/**
	 * You may override this method to implement a custom stop condition.
	 *
	 * @return Stop condition.
	 */
	boolean done() {
		// The solver terminates if...
		return
				// Maximum number of iterations is reached
				(iteration > maxIteration)
				||
				// Error does not improve by more that the given error tolerance
				(errorRootMeanSquaredChange.sub(errorTolerance).getMax() <= 0);
	}

	@Override
	public void run() throws SolverException {
		try {

			// Allocate memory
			final int numberOfParameters	= initialParameters.length;
			final int numberOfValues		= targetValues.length;

			parameterTest		= initialParameters.clone();
			parameterCurrent	= initialParameters.clone();

			valueTest		= new RandomVariable[numberOfValues];
			valueCurrent		= new RandomVariable[numberOfValues];
			Arrays.fill(valueCurrent, new RandomVariableFromDoubleArray(Double.NaN));
			derivativeCurrent	= new RandomVariable[numberOfParameters][numberOfValues];

			iteration = 0;

			while(true) {
				// Count iterations
				iteration++;

				// Calculate values for test parameters
				prepareAndSetValues(parameterTest, valueTest);

				// Calculate error
				final RandomVariable errorMeanSquaredTest = getMeanSquaredError(valueTest);

				/*
				 * Note: The following test will be false if errorMeanSquaredTest is NaN.
				 * That is: NaN is consider as a rejected point.
				 */
				final RandomVariable isPointAccepted = errorMeanSquaredCurrent.sub(errorMeanSquaredTest);

				for(int parameterIndex = 0; parameterIndex= 0 ? lambda[pathIndex] / lambdaDivisor : lambda[pathIndex] * lambdaMultiplicator;
				}

				/*
				 * Calculate new derivative at parameterTest (where point is accepted).
				 * Note: the first argument should be parameterTest to use shortest operator tree.
				 */
				prepareAndSetDerivatives(parameterTest, valueTest, derivativeCurrent);

				/*
				 * Calculate new parameterTest
				 */
				final double[][]	parameterIncrement = new double[parameterCurrent.length][numberOfPaths];
				for(int pathIndex=0; pathIndex newTargetVaues, final List newWeights, final boolean isUseBestParametersAsInitialParameters) throws CloneNotSupportedException {
		final StochasticPathwiseLevenbergMarquardt clonedOptimizer = clone();
		clonedOptimizer.targetValues = numberListToDoubleArray(newTargetVaues);
		clonedOptimizer.weights = numberListToDoubleArray(newWeights);

		if(isUseBestParametersAsInitialParameters && this.done()) {
			clonedOptimizer.initialParameters = this.getBestFitParameters();
		}

		return clonedOptimizer;
	}
}