All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.math3.optimization.direct.CMAESOptimizer Maven / Gradle / Ivy

Go to download

The Apache Commons Math project is a library of lightweight, self-contained mathematics and statistics components addressing the most common practical problems not immediately available in the Java programming language or commons-lang.

There is a newer version: 3.6.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math3.optimization.direct;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.math3.analysis.MultivariateFunction;
import org.apache.commons.math3.exception.DimensionMismatchException;
import org.apache.commons.math3.exception.NotPositiveException;
import org.apache.commons.math3.exception.NotStrictlyPositiveException;
import org.apache.commons.math3.exception.OutOfRangeException;
import org.apache.commons.math3.exception.TooManyEvaluationsException;
import org.apache.commons.math3.linear.Array2DRowRealMatrix;
import org.apache.commons.math3.linear.EigenDecomposition;
import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
import org.apache.commons.math3.optimization.ConvergenceChecker;
import org.apache.commons.math3.optimization.OptimizationData;
import org.apache.commons.math3.optimization.GoalType;
import org.apache.commons.math3.optimization.MultivariateOptimizer;
import org.apache.commons.math3.optimization.PointValuePair;
import org.apache.commons.math3.optimization.SimpleValueChecker;
import org.apache.commons.math3.random.MersenneTwister;
import org.apache.commons.math3.random.RandomGenerator;
import org.apache.commons.math3.util.FastMath;
import org.apache.commons.math3.util.MathArrays;

/**
 * 

An implementation of the active Covariance Matrix Adaptation Evolution Strategy (CMA-ES) * for non-linear, non-convex, non-smooth, global function minimization. * The CMA-Evolution Strategy (CMA-ES) is a reliable stochastic optimization method * which should be applied if derivative-based methods, e.g. quasi-Newton BFGS or * conjugate gradient, fail due to a rugged search landscape (e.g. noise, local * optima, outlier, etc.) of the objective function. Like a * quasi-Newton method, the CMA-ES learns and applies a variable metric * on the underlying search space. Unlike a quasi-Newton method, the * CMA-ES neither estimates nor uses gradients, making it considerably more * reliable in terms of finding a good, or even close to optimal, solution.

* *

In general, on smooth objective functions the CMA-ES is roughly ten times * slower than BFGS (counting objective function evaluations, no gradients provided). * For up to N=10 variables also the derivative-free simplex * direct search method (Nelder and Mead) can be faster, but it is * far less reliable than CMA-ES.

* *

The CMA-ES is particularly well suited for non-separable * and/or badly conditioned problems. To observe the advantage of CMA compared * to a conventional evolution strategy, it will usually take about * 30 N function evaluations. On difficult problems the complete * optimization (a single run) is expected to take roughly between * 30 N and 300 N2 * function evaluations.

* *

This implementation is translated and adapted from the Matlab version * of the CMA-ES algorithm as implemented in module {@code cmaes.m} version 3.51.

* * For more information, please refer to the following links: * * * @deprecated As of 3.1 (to be removed in 4.0). * @since 3.0 */ @Deprecated public class CMAESOptimizer extends BaseAbstractMultivariateSimpleBoundsOptimizer implements MultivariateOptimizer { /** Default value for {@link #checkFeasableCount}: {@value}. */ public static final int DEFAULT_CHECKFEASABLECOUNT = 0; /** Default value for {@link #stopFitness}: {@value}. */ public static final double DEFAULT_STOPFITNESS = 0; /** Default value for {@link #isActiveCMA}: {@value}. */ public static final boolean DEFAULT_ISACTIVECMA = true; /** Default value for {@link #maxIterations}: {@value}. */ public static final int DEFAULT_MAXITERATIONS = 30000; /** Default value for {@link #diagonalOnly}: {@value}. */ public static final int DEFAULT_DIAGONALONLY = 0; /** Default value for {@link #random}. */ public static final RandomGenerator DEFAULT_RANDOMGENERATOR = new MersenneTwister(); // global search parameters /** * Population size, offspring number. The primary strategy parameter to play * with, which can be increased from its default value. Increasing the * population size improves global search properties in exchange to speed. * Speed decreases, as a rule, at most linearly with increasing population * size. It is advisable to begin with the default small population size. */ private int lambda; // population size /** * Covariance update mechanism, default is active CMA. isActiveCMA = true * turns on "active CMA" with a negative update of the covariance matrix and * checks for positive definiteness. OPTS.CMA.active = 2 does not check for * pos. def. and is numerically faster. Active CMA usually speeds up the * adaptation. */ private boolean isActiveCMA; /** * Determines how often a new random offspring is generated in case it is * not feasible / beyond the defined limits, default is 0. */ private int checkFeasableCount; /** * @see Sigma */ private double[] inputSigma; /** Number of objective variables/problem dimension */ private int dimension; /** * Defines the number of initial iterations, where the covariance matrix * remains diagonal and the algorithm has internally linear time complexity. * diagonalOnly = 1 means keeping the covariance matrix always diagonal and * this setting also exhibits linear space complexity. This can be * particularly useful for dimension > 100. * @see A Simple Modification in CMA-ES */ private int diagonalOnly = 0; /** Number of objective variables/problem dimension */ private boolean isMinimize = true; /** Indicates whether statistic data is collected. */ private boolean generateStatistics = false; // termination criteria /** Maximal number of iterations allowed. */ private int maxIterations; /** Limit for fitness value. */ private double stopFitness; /** Stop if x-changes larger stopTolUpX. */ private double stopTolUpX; /** Stop if x-change smaller stopTolX. */ private double stopTolX; /** Stop if fun-changes smaller stopTolFun. */ private double stopTolFun; /** Stop if back fun-changes smaller stopTolHistFun. */ private double stopTolHistFun; // selection strategy parameters /** Number of parents/points for recombination. */ private int mu; // /** log(mu + 0.5), stored for efficiency. */ private double logMu2; /** Array for weighted recombination. */ private RealMatrix weights; /** Variance-effectiveness of sum w_i x_i. */ private double mueff; // // dynamic strategy parameters and constants /** Overall standard deviation - search volume. */ private double sigma; /** Cumulation constant. */ private double cc; /** Cumulation constant for step-size. */ private double cs; /** Damping for step-size. */ private double damps; /** Learning rate for rank-one update. */ private double ccov1; /** Learning rate for rank-mu update' */ private double ccovmu; /** Expectation of ||N(0,I)|| == norm(randn(N,1)). */ private double chiN; /** Learning rate for rank-one update - diagonalOnly */ private double ccov1Sep; /** Learning rate for rank-mu update - diagonalOnly */ private double ccovmuSep; // CMA internal values - updated each generation /** Objective variables. */ private RealMatrix xmean; /** Evolution path. */ private RealMatrix pc; /** Evolution path for sigma. */ private RealMatrix ps; /** Norm of ps, stored for efficiency. */ private double normps; /** Coordinate system. */ private RealMatrix B; /** Scaling. */ private RealMatrix D; /** B*D, stored for efficiency. */ private RealMatrix BD; /** Diagonal of sqrt(D), stored for efficiency. */ private RealMatrix diagD; /** Covariance matrix. */ private RealMatrix C; /** Diagonal of C, used for diagonalOnly. */ private RealMatrix diagC; /** Number of iterations already performed. */ private int iterations; /** History queue of best values. */ private double[] fitnessHistory; /** Size of history queue of best values. */ private int historySize; /** Random generator. */ private RandomGenerator random; /** History of sigma values. */ private List statisticsSigmaHistory = new ArrayList(); /** History of mean matrix. */ private List statisticsMeanHistory = new ArrayList(); /** History of fitness values. */ private List statisticsFitnessHistory = new ArrayList(); /** History of D matrix. */ private List statisticsDHistory = new ArrayList(); /** * Default constructor, uses default parameters * * @deprecated As of version 3.1: Parameter {@code lambda} must be * passed with the call to {@link #optimize(int,MultivariateFunction,GoalType,OptimizationData[]) * optimize} (whereas in the current code it is set to an undocumented value). */ @Deprecated public CMAESOptimizer() { this(0); } /** * @param lambda Population size. * @deprecated As of version 3.1: Parameter {@code lambda} must be * passed with the call to {@link #optimize(int,MultivariateFunction,GoalType,OptimizationData[]) * optimize} (whereas in the current code it is set to an undocumented value).. */ @Deprecated public CMAESOptimizer(int lambda) { this(lambda, null, DEFAULT_MAXITERATIONS, DEFAULT_STOPFITNESS, DEFAULT_ISACTIVECMA, DEFAULT_DIAGONALONLY, DEFAULT_CHECKFEASABLECOUNT, DEFAULT_RANDOMGENERATOR, false, null); } /** * @param lambda Population size. * @param inputSigma Initial standard deviations to sample new points * around the initial guess. * @deprecated As of version 3.1: Parameters {@code lambda} and {@code inputSigma} must be * passed with the call to {@link #optimize(int,MultivariateFunction,GoalType,OptimizationData[]) * optimize}. */ @Deprecated public CMAESOptimizer(int lambda, double[] inputSigma) { this(lambda, inputSigma, DEFAULT_MAXITERATIONS, DEFAULT_STOPFITNESS, DEFAULT_ISACTIVECMA, DEFAULT_DIAGONALONLY, DEFAULT_CHECKFEASABLECOUNT, DEFAULT_RANDOMGENERATOR, false); } /** * @param lambda Population size. * @param inputSigma Initial standard deviations to sample new points * around the initial guess. * @param maxIterations Maximal number of iterations. * @param stopFitness Whether to stop if objective function value is smaller than * {@code stopFitness}. * @param isActiveCMA Chooses the covariance matrix update method. * @param diagonalOnly Number of initial iterations, where the covariance matrix * remains diagonal. * @param checkFeasableCount Determines how often new random objective variables are * generated in case they are out of bounds. * @param random Random generator. * @param generateStatistics Whether statistic data is collected. * @deprecated See {@link SimpleValueChecker#SimpleValueChecker()} */ @Deprecated public CMAESOptimizer(int lambda, double[] inputSigma, int maxIterations, double stopFitness, boolean isActiveCMA, int diagonalOnly, int checkFeasableCount, RandomGenerator random, boolean generateStatistics) { this(lambda, inputSigma, maxIterations, stopFitness, isActiveCMA, diagonalOnly, checkFeasableCount, random, generateStatistics, new SimpleValueChecker()); } /** * @param lambda Population size. * @param inputSigma Initial standard deviations to sample new points * around the initial guess. * @param maxIterations Maximal number of iterations. * @param stopFitness Whether to stop if objective function value is smaller than * {@code stopFitness}. * @param isActiveCMA Chooses the covariance matrix update method. * @param diagonalOnly Number of initial iterations, where the covariance matrix * remains diagonal. * @param checkFeasableCount Determines how often new random objective variables are * generated in case they are out of bounds. * @param random Random generator. * @param generateStatistics Whether statistic data is collected. * @param checker Convergence checker. * @deprecated As of version 3.1: Parameters {@code lambda} and {@code inputSigma} must be * passed with the call to {@link #optimize(int,MultivariateFunction,GoalType,OptimizationData[]) * optimize}. */ @Deprecated public CMAESOptimizer(int lambda, double[] inputSigma, int maxIterations, double stopFitness, boolean isActiveCMA, int diagonalOnly, int checkFeasableCount, RandomGenerator random, boolean generateStatistics, ConvergenceChecker checker) { super(checker); this.lambda = lambda; this.inputSigma = inputSigma == null ? null : (double[]) inputSigma.clone(); this.maxIterations = maxIterations; this.stopFitness = stopFitness; this.isActiveCMA = isActiveCMA; this.diagonalOnly = diagonalOnly; this.checkFeasableCount = checkFeasableCount; this.random = random; this.generateStatistics = generateStatistics; } /** * @param maxIterations Maximal number of iterations. * @param stopFitness Whether to stop if objective function value is smaller than * {@code stopFitness}. * @param isActiveCMA Chooses the covariance matrix update method. * @param diagonalOnly Number of initial iterations, where the covariance matrix * remains diagonal. * @param checkFeasableCount Determines how often new random objective variables are * generated in case they are out of bounds. * @param random Random generator. * @param generateStatistics Whether statistic data is collected. * @param checker Convergence checker. * * @since 3.1 */ public CMAESOptimizer(int maxIterations, double stopFitness, boolean isActiveCMA, int diagonalOnly, int checkFeasableCount, RandomGenerator random, boolean generateStatistics, ConvergenceChecker checker) { super(checker); this.maxIterations = maxIterations; this.stopFitness = stopFitness; this.isActiveCMA = isActiveCMA; this.diagonalOnly = diagonalOnly; this.checkFeasableCount = checkFeasableCount; this.random = random; this.generateStatistics = generateStatistics; } /** * @return History of sigma values. */ public List getStatisticsSigmaHistory() { return statisticsSigmaHistory; } /** * @return History of mean matrix. */ public List getStatisticsMeanHistory() { return statisticsMeanHistory; } /** * @return History of fitness values. */ public List getStatisticsFitnessHistory() { return statisticsFitnessHistory; } /** * @return History of D matrix. */ public List getStatisticsDHistory() { return statisticsDHistory; } /** * Input sigma values. * They define the initial coordinate-wise standard deviations for * sampling new search points around the initial guess. * It is suggested to set them to the estimated distance from the * initial to the desired optimum. * Small values induce the search to be more local (and very small * values are more likely to find a local optimum close to the initial * guess). * Too small values might however lead to early termination. * @since 3.1 */ public static class Sigma implements OptimizationData { /** Sigma values. */ private final double[] sigma; /** * @param s Sigma values. * @throws NotPositiveException if any of the array entries is smaller * than zero. */ public Sigma(double[] s) throws NotPositiveException { for (int i = 0; i < s.length; i++) { if (s[i] < 0) { throw new NotPositiveException(s[i]); } } sigma = s.clone(); } /** * @return the sigma values. */ public double[] getSigma() { return sigma.clone(); } } /** * Population size. * The number of offspring is the primary strategy parameter. * In the absence of better clues, a good default could be an * integer close to {@code 4 + 3 ln(n)}, where {@code n} is the * number of optimized parameters. * Increasing the population size improves global search properties * at the expense of speed (which in general decreases at most * linearly with increasing population size). * @since 3.1 */ public static class PopulationSize implements OptimizationData { /** Population size. */ private final int lambda; /** * @param size Population size. * @throws NotStrictlyPositiveException if {@code size <= 0}. */ public PopulationSize(int size) throws NotStrictlyPositiveException { if (size <= 0) { throw new NotStrictlyPositiveException(size); } lambda = size; } /** * @return the population size. */ public int getPopulationSize() { return lambda; } } /** * Optimize an objective function. * * @param maxEval Allowed number of evaluations of the objective function. * @param f Objective function. * @param goalType Optimization type. * @param optData Optimization data. The following data will be looked for: *
    *
  • {@link org.apache.commons.math3.optimization.InitialGuess InitialGuess}
  • *
  • {@link Sigma}
  • *
  • {@link PopulationSize}
  • *
* @return the point/value pair giving the optimal value for objective * function. */ @Override protected PointValuePair optimizeInternal(int maxEval, MultivariateFunction f, GoalType goalType, OptimizationData... optData) { // Scan "optData" for the input specific to this optimizer. parseOptimizationData(optData); // The parent's method will retrieve the common parameters from // "optData" and call "doOptimize". return super.optimizeInternal(maxEval, f, goalType, optData); } /** {@inheritDoc} */ @Override protected PointValuePair doOptimize() { checkParameters(); // -------------------- Initialization -------------------------------- isMinimize = getGoalType().equals(GoalType.MINIMIZE); final FitnessFunction fitfun = new FitnessFunction(); final double[] guess = getStartPoint(); // number of objective variables/problem dimension dimension = guess.length; initializeCMA(guess); iterations = 0; double bestValue = fitfun.value(guess); push(fitnessHistory, bestValue); PointValuePair optimum = new PointValuePair(getStartPoint(), isMinimize ? bestValue : -bestValue); PointValuePair lastResult = null; // -------------------- Generation Loop -------------------------------- generationLoop: for (iterations = 1; iterations <= maxIterations; iterations++) { // Generate and evaluate lambda offspring final RealMatrix arz = randn1(dimension, lambda); final RealMatrix arx = zeros(dimension, lambda); final double[] fitness = new double[lambda]; // generate random offspring for (int k = 0; k < lambda; k++) { RealMatrix arxk = null; for (int i = 0; i < checkFeasableCount + 1; i++) { if (diagonalOnly <= 0) { arxk = xmean.add(BD.multiply(arz.getColumnMatrix(k)) .scalarMultiply(sigma)); // m + sig * Normal(0,C) } else { arxk = xmean.add(times(diagD,arz.getColumnMatrix(k)) .scalarMultiply(sigma)); } if (i >= checkFeasableCount || fitfun.isFeasible(arxk.getColumn(0))) { break; } // regenerate random arguments for row arz.setColumn(k, randn(dimension)); } copyColumn(arxk, 0, arx, k); try { fitness[k] = fitfun.value(arx.getColumn(k)); // compute fitness } catch (TooManyEvaluationsException e) { break generationLoop; } } // Sort by fitness and compute weighted mean into xmean final int[] arindex = sortedIndices(fitness); // Calculate new xmean, this is selection and recombination final RealMatrix xold = xmean; // for speed up of Eq. (2) and (3) final RealMatrix bestArx = selectColumns(arx, MathArrays.copyOf(arindex, mu)); xmean = bestArx.multiply(weights); final RealMatrix bestArz = selectColumns(arz, MathArrays.copyOf(arindex, mu)); final RealMatrix zmean = bestArz.multiply(weights); final boolean hsig = updateEvolutionPaths(zmean, xold); if (diagonalOnly <= 0) { updateCovariance(hsig, bestArx, arz, arindex, xold); } else { updateCovarianceDiagonalOnly(hsig, bestArz); } // Adapt step size sigma - Eq. (5) sigma *= FastMath.exp(FastMath.min(1, (normps/chiN - 1) * cs / damps)); final double bestFitness = fitness[arindex[0]]; final double worstFitness = fitness[arindex[arindex.length - 1]]; if (bestValue > bestFitness) { bestValue = bestFitness; lastResult = optimum; optimum = new PointValuePair(fitfun.repair(bestArx.getColumn(0)), isMinimize ? bestFitness : -bestFitness); if (getConvergenceChecker() != null && lastResult != null && getConvergenceChecker().converged(iterations, optimum, lastResult)) { break generationLoop; } } // handle termination criteria // Break, if fitness is good enough if (stopFitness != 0 && bestFitness < (isMinimize ? stopFitness : -stopFitness)) { break generationLoop; } final double[] sqrtDiagC = sqrt(diagC).getColumn(0); final double[] pcCol = pc.getColumn(0); for (int i = 0; i < dimension; i++) { if (sigma * FastMath.max(FastMath.abs(pcCol[i]), sqrtDiagC[i]) > stopTolX) { break; } if (i >= dimension - 1) { break generationLoop; } } for (int i = 0; i < dimension; i++) { if (sigma * sqrtDiagC[i] > stopTolUpX) { break generationLoop; } } final double historyBest = min(fitnessHistory); final double historyWorst = max(fitnessHistory); if (iterations > 2 && FastMath.max(historyWorst, worstFitness) - FastMath.min(historyBest, bestFitness) < stopTolFun) { break generationLoop; } if (iterations > fitnessHistory.length && historyWorst-historyBest < stopTolHistFun) { break generationLoop; } // condition number of the covariance matrix exceeds 1e14 if (max(diagD)/min(diagD) > 1e7) { break generationLoop; } // user defined termination if (getConvergenceChecker() != null) { final PointValuePair current = new PointValuePair(bestArx.getColumn(0), isMinimize ? bestFitness : -bestFitness); if (lastResult != null && getConvergenceChecker().converged(iterations, current, lastResult)) { break generationLoop; } lastResult = current; } // Adjust step size in case of equal function values (flat fitness) if (bestValue == fitness[arindex[(int)(0.1+lambda/4.)]]) { sigma *= FastMath.exp(0.2 + cs / damps); } if (iterations > 2 && FastMath.max(historyWorst, bestFitness) - FastMath.min(historyBest, bestFitness) == 0) { sigma *= FastMath.exp(0.2 + cs / damps); } // store best in history push(fitnessHistory,bestFitness); fitfun.setValueRange(worstFitness-bestFitness); if (generateStatistics) { statisticsSigmaHistory.add(sigma); statisticsFitnessHistory.add(bestFitness); statisticsMeanHistory.add(xmean.transpose()); statisticsDHistory.add(diagD.transpose().scalarMultiply(1E5)); } } return optimum; } /** * Scans the list of (required and optional) optimization data that * characterize the problem. * * @param optData Optimization data. The following data will be looked for: *
    *
  • {@link Sigma}
  • *
  • {@link PopulationSize}
  • *
*/ private void parseOptimizationData(OptimizationData... optData) { // The existing values (as set by the previous call) are reused if // not provided in the argument list. for (OptimizationData data : optData) { if (data instanceof Sigma) { inputSigma = ((Sigma) data).getSigma(); continue; } if (data instanceof PopulationSize) { lambda = ((PopulationSize) data).getPopulationSize(); continue; } } } /** * Checks dimensions and values of boundaries and inputSigma if defined. */ private void checkParameters() { final double[] init = getStartPoint(); final double[] lB = getLowerBound(); final double[] uB = getUpperBound(); if (inputSigma != null) { if (inputSigma.length != init.length) { throw new DimensionMismatchException(inputSigma.length, init.length); } for (int i = 0; i < init.length; i++) { if (inputSigma[i] < 0) { // XXX Remove this block in 4.0 (check performed in "Sigma" class). throw new NotPositiveException(inputSigma[i]); } if (inputSigma[i] > uB[i] - lB[i]) { throw new OutOfRangeException(inputSigma[i], 0, uB[i] - lB[i]); } } } } /** * Initialization of the dynamic search parameters * * @param guess Initial guess for the arguments of the fitness function. */ private void initializeCMA(double[] guess) { if (lambda <= 0) { // XXX Line below to replace the current one in 4.0 (MATH-879). // throw new NotStrictlyPositiveException(lambda); lambda = 4 + (int) (3 * FastMath.log(dimension)); } // initialize sigma final double[][] sigmaArray = new double[guess.length][1]; for (int i = 0; i < guess.length; i++) { // XXX Line below to replace the current one in 4.0 (MATH-868). // sigmaArray[i][0] = inputSigma[i]; sigmaArray[i][0] = inputSigma == null ? 0.3 : inputSigma[i]; } final RealMatrix insigma = new Array2DRowRealMatrix(sigmaArray, false); sigma = max(insigma); // overall standard deviation // initialize termination criteria stopTolUpX = 1e3 * max(insigma); stopTolX = 1e-11 * max(insigma); stopTolFun = 1e-12; stopTolHistFun = 1e-13; // initialize selection strategy parameters mu = lambda / 2; // number of parents/points for recombination logMu2 = FastMath.log(mu + 0.5); weights = log(sequence(1, mu, 1)).scalarMultiply(-1).scalarAdd(logMu2); double sumw = 0; double sumwq = 0; for (int i = 0; i < mu; i++) { double w = weights.getEntry(i, 0); sumw += w; sumwq += w * w; } weights = weights.scalarMultiply(1 / sumw); mueff = sumw * sumw / sumwq; // variance-effectiveness of sum w_i x_i // initialize dynamic strategy parameters and constants cc = (4 + mueff / dimension) / (dimension + 4 + 2 * mueff / dimension); cs = (mueff + 2) / (dimension + mueff + 3.); damps = (1 + 2 * FastMath.max(0, FastMath.sqrt((mueff - 1) / (dimension + 1)) - 1)) * FastMath.max(0.3, 1 - dimension / (1e-6 + maxIterations)) + cs; // minor increment ccov1 = 2 / ((dimension + 1.3) * (dimension + 1.3) + mueff); ccovmu = FastMath.min(1 - ccov1, 2 * (mueff - 2 + 1 / mueff) / ((dimension + 2) * (dimension + 2) + mueff)); ccov1Sep = FastMath.min(1, ccov1 * (dimension + 1.5) / 3); ccovmuSep = FastMath.min(1 - ccov1, ccovmu * (dimension + 1.5) / 3); chiN = FastMath.sqrt(dimension) * (1 - 1 / ((double) 4 * dimension) + 1 / ((double) 21 * dimension * dimension)); // intialize CMA internal values - updated each generation xmean = MatrixUtils.createColumnRealMatrix(guess); // objective variables diagD = insigma.scalarMultiply(1 / sigma); diagC = square(diagD); pc = zeros(dimension, 1); // evolution paths for C and sigma ps = zeros(dimension, 1); // B defines the coordinate system normps = ps.getFrobeniusNorm(); B = eye(dimension, dimension); D = ones(dimension, 1); // diagonal D defines the scaling BD = times(B, repmat(diagD.transpose(), dimension, 1)); C = B.multiply(diag(square(D)).multiply(B.transpose())); // covariance historySize = 10 + (int) (3 * 10 * dimension / (double) lambda); fitnessHistory = new double[historySize]; // history of fitness values for (int i = 0; i < historySize; i++) { fitnessHistory[i] = Double.MAX_VALUE; } } /** * Update of the evolution paths ps and pc. * * @param zmean Weighted row matrix of the gaussian random numbers generating * the current offspring. * @param xold xmean matrix of the previous generation. * @return hsig flag indicating a small correction. */ private boolean updateEvolutionPaths(RealMatrix zmean, RealMatrix xold) { ps = ps.scalarMultiply(1 - cs).add( B.multiply(zmean).scalarMultiply(FastMath.sqrt(cs * (2 - cs) * mueff))); normps = ps.getFrobeniusNorm(); final boolean hsig = normps / FastMath.sqrt(1 - FastMath.pow(1 - cs, 2 * iterations)) / chiN < 1.4 + 2 / ((double) dimension + 1); pc = pc.scalarMultiply(1 - cc); if (hsig) { pc = pc.add(xmean.subtract(xold).scalarMultiply(FastMath.sqrt(cc * (2 - cc) * mueff) / sigma)); } return hsig; } /** * Update of the covariance matrix C for diagonalOnly > 0 * * @param hsig Flag indicating a small correction. * @param bestArz Fitness-sorted matrix of the gaussian random values of the * current offspring. */ private void updateCovarianceDiagonalOnly(boolean hsig, final RealMatrix bestArz) { // minor correction if hsig==false double oldFac = hsig ? 0 : ccov1Sep * cc * (2 - cc); oldFac += 1 - ccov1Sep - ccovmuSep; diagC = diagC.scalarMultiply(oldFac) // regard old matrix .add(square(pc).scalarMultiply(ccov1Sep)) // plus rank one update .add((times(diagC, square(bestArz).multiply(weights))) // plus rank mu update .scalarMultiply(ccovmuSep)); diagD = sqrt(diagC); // replaces eig(C) if (diagonalOnly > 1 && iterations > diagonalOnly) { // full covariance matrix from now on diagonalOnly = 0; B = eye(dimension, dimension); BD = diag(diagD); C = diag(diagC); } } /** * Update of the covariance matrix C. * * @param hsig Flag indicating a small correction. * @param bestArx Fitness-sorted matrix of the argument vectors producing the * current offspring. * @param arz Unsorted matrix containing the gaussian random values of the * current offspring. * @param arindex Indices indicating the fitness-order of the current offspring. * @param xold xmean matrix of the previous generation. */ private void updateCovariance(boolean hsig, final RealMatrix bestArx, final RealMatrix arz, final int[] arindex, final RealMatrix xold) { double negccov = 0; if (ccov1 + ccovmu > 0) { final RealMatrix arpos = bestArx.subtract(repmat(xold, 1, mu)) .scalarMultiply(1 / sigma); // mu difference vectors final RealMatrix roneu = pc.multiply(pc.transpose()) .scalarMultiply(ccov1); // rank one update // minor correction if hsig==false double oldFac = hsig ? 0 : ccov1 * cc * (2 - cc); oldFac += 1 - ccov1 - ccovmu; if (isActiveCMA) { // Adapt covariance matrix C active CMA negccov = (1 - ccovmu) * 0.25 * mueff / (FastMath.pow(dimension + 2, 1.5) + 2 * mueff); // keep at least 0.66 in all directions, small popsize are most // critical final double negminresidualvariance = 0.66; // where to make up for the variance loss final double negalphaold = 0.5; // prepare vectors, compute negative updating matrix Cneg final int[] arReverseIndex = reverse(arindex); RealMatrix arzneg = selectColumns(arz, MathArrays.copyOf(arReverseIndex, mu)); RealMatrix arnorms = sqrt(sumRows(square(arzneg))); final int[] idxnorms = sortedIndices(arnorms.getRow(0)); final RealMatrix arnormsSorted = selectColumns(arnorms, idxnorms); final int[] idxReverse = reverse(idxnorms); final RealMatrix arnormsReverse = selectColumns(arnorms, idxReverse); arnorms = divide(arnormsReverse, arnormsSorted); final int[] idxInv = inverse(idxnorms); final RealMatrix arnormsInv = selectColumns(arnorms, idxInv); // check and set learning rate negccov final double negcovMax = (1 - negminresidualvariance) / square(arnormsInv).multiply(weights).getEntry(0, 0); if (negccov > negcovMax) { negccov = negcovMax; } arzneg = times(arzneg, repmat(arnormsInv, dimension, 1)); final RealMatrix artmp = BD.multiply(arzneg); final RealMatrix Cneg = artmp.multiply(diag(weights)).multiply(artmp.transpose()); oldFac += negalphaold * negccov; C = C.scalarMultiply(oldFac) .add(roneu) // regard old matrix .add(arpos.scalarMultiply( // plus rank one update ccovmu + (1 - negalphaold) * negccov) // plus rank mu update .multiply(times(repmat(weights, 1, dimension), arpos.transpose()))) .subtract(Cneg.scalarMultiply(negccov)); } else { // Adapt covariance matrix C - nonactive C = C.scalarMultiply(oldFac) // regard old matrix .add(roneu) // plus rank one update .add(arpos.scalarMultiply(ccovmu) // plus rank mu update .multiply(times(repmat(weights, 1, dimension), arpos.transpose()))); } } updateBD(negccov); } /** * Update B and D from C. * * @param negccov Negative covariance factor. */ private void updateBD(double negccov) { if (ccov1 + ccovmu + negccov > 0 && (iterations % 1. / (ccov1 + ccovmu + negccov) / dimension / 10.) < 1) { // to achieve O(N^2) C = triu(C, 0).add(triu(C, 1).transpose()); // enforce symmetry to prevent complex numbers final EigenDecomposition eig = new EigenDecomposition(C); B = eig.getV(); // eigen decomposition, B==normalized eigenvectors D = eig.getD(); diagD = diag(D); if (min(diagD) <= 0) { for (int i = 0; i < dimension; i++) { if (diagD.getEntry(i, 0) < 0) { diagD.setEntry(i, 0, 0); } } final double tfac = max(diagD) / 1e14; C = C.add(eye(dimension, dimension).scalarMultiply(tfac)); diagD = diagD.add(ones(dimension, 1).scalarMultiply(tfac)); } if (max(diagD) > 1e14 * min(diagD)) { final double tfac = max(diagD) / 1e14 - min(diagD); C = C.add(eye(dimension, dimension).scalarMultiply(tfac)); diagD = diagD.add(ones(dimension, 1).scalarMultiply(tfac)); } diagC = diag(C); diagD = sqrt(diagD); // D contains standard deviations now BD = times(B, repmat(diagD.transpose(), dimension, 1)); // O(n^2) } } /** * Pushes the current best fitness value in a history queue. * * @param vals History queue. * @param val Current best fitness value. */ private static void push(double[] vals, double val) { for (int i = vals.length-1; i > 0; i--) { vals[i] = vals[i-1]; } vals[0] = val; } /** * Sorts fitness values. * * @param doubles Array of values to be sorted. * @return a sorted array of indices pointing into doubles. */ private int[] sortedIndices(final double[] doubles) { final DoubleIndex[] dis = new DoubleIndex[doubles.length]; for (int i = 0; i < doubles.length; i++) { dis[i] = new DoubleIndex(doubles[i], i); } Arrays.sort(dis); final int[] indices = new int[doubles.length]; for (int i = 0; i < doubles.length; i++) { indices[i] = dis[i].index; } return indices; } /** * Used to sort fitness values. Sorting is always in lower value first * order. */ private static class DoubleIndex implements Comparable { /** Value to compare. */ private final double value; /** Index into sorted array. */ private final int index; /** * @param value Value to compare. * @param index Index into sorted array. */ DoubleIndex(double value, int index) { this.value = value; this.index = index; } /** {@inheritDoc} */ public int compareTo(DoubleIndex o) { return Double.compare(value, o.value); } /** {@inheritDoc} */ @Override public boolean equals(Object other) { if (this == other) { return true; } if (other instanceof DoubleIndex) { return Double.compare(value, ((DoubleIndex) other).value) == 0; } return false; } /** {@inheritDoc} */ @Override public int hashCode() { long bits = Double.doubleToLongBits(value); return (int) ((1438542 ^ (bits >>> 32) ^ bits) & 0xffffffff); } } /** * Normalizes fitness values to the range [0,1]. Adds a penalty to the * fitness value if out of range. The penalty is adjusted by calling * setValueRange(). */ private class FitnessFunction { /** Determines the penalty for boundary violations */ private double valueRange; /** * Flag indicating whether the objective variables are forced into their * bounds if defined */ private final boolean isRepairMode; /** Simple constructor. */ public FitnessFunction() { valueRange = 1; isRepairMode = true; } /** * @param point Normalized objective variables. * @return the objective value + penalty for violated bounds. */ public double value(final double[] point) { double value; if (isRepairMode) { double[] repaired = repair(point); value = CMAESOptimizer.this.computeObjectiveValue(repaired) + penalty(point, repaired); } else { value = CMAESOptimizer.this.computeObjectiveValue(point); } return isMinimize ? value : -value; } /** * @param x Normalized objective variables. * @return {@code true} if in bounds. */ public boolean isFeasible(final double[] x) { final double[] lB = CMAESOptimizer.this.getLowerBound(); final double[] uB = CMAESOptimizer.this.getUpperBound(); for (int i = 0; i < x.length; i++) { if (x[i] < lB[i]) { return false; } if (x[i] > uB[i]) { return false; } } return true; } /** * @param valueRange Adjusts the penalty computation. */ public void setValueRange(double valueRange) { this.valueRange = valueRange; } /** * @param x Normalized objective variables. * @return the repaired (i.e. all in bounds) objective variables. */ private double[] repair(final double[] x) { final double[] lB = CMAESOptimizer.this.getLowerBound(); final double[] uB = CMAESOptimizer.this.getUpperBound(); final double[] repaired = new double[x.length]; for (int i = 0; i < x.length; i++) { if (x[i] < lB[i]) { repaired[i] = lB[i]; } else if (x[i] > uB[i]) { repaired[i] = uB[i]; } else { repaired[i] = x[i]; } } return repaired; } /** * @param x Normalized objective variables. * @param repaired Repaired objective variables. * @return Penalty value according to the violation of the bounds. */ private double penalty(final double[] x, final double[] repaired) { double penalty = 0; for (int i = 0; i < x.length; i++) { double diff = FastMath.abs(x[i] - repaired[i]); penalty += diff * valueRange; } return isMinimize ? penalty : -penalty; } } // -----Matrix utility functions similar to the Matlab build in functions------ /** * @param m Input matrix * @return Matrix representing the element-wise logarithm of m. */ private static RealMatrix log(final RealMatrix m) { final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { d[r][c] = FastMath.log(m.getEntry(r, c)); } } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix. * @return Matrix representing the element-wise square root of m. */ private static RealMatrix sqrt(final RealMatrix m) { final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { d[r][c] = FastMath.sqrt(m.getEntry(r, c)); } } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix. * @return Matrix representing the element-wise square of m. */ private static RealMatrix square(final RealMatrix m) { final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { double e = m.getEntry(r, c); d[r][c] = e * e; } } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix 1. * @param n Input matrix 2. * @return the matrix where the elements of m and n are element-wise multiplied. */ private static RealMatrix times(final RealMatrix m, final RealMatrix n) { final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { d[r][c] = m.getEntry(r, c) * n.getEntry(r, c); } } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix 1. * @param n Input matrix 2. * @return Matrix where the elements of m and n are element-wise divided. */ private static RealMatrix divide(final RealMatrix m, final RealMatrix n) { final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { d[r][c] = m.getEntry(r, c) / n.getEntry(r, c); } } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix. * @param cols Columns to select. * @return Matrix representing the selected columns. */ private static RealMatrix selectColumns(final RealMatrix m, final int[] cols) { final double[][] d = new double[m.getRowDimension()][cols.length]; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < cols.length; c++) { d[r][c] = m.getEntry(r, cols[c]); } } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix. * @param k Diagonal position. * @return Upper triangular part of matrix. */ private static RealMatrix triu(final RealMatrix m, int k) { final double[][] d = new double[m.getRowDimension()][m.getColumnDimension()]; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { d[r][c] = r <= c - k ? m.getEntry(r, c) : 0; } } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix. * @return Row matrix representing the sums of the rows. */ private static RealMatrix sumRows(final RealMatrix m) { final double[][] d = new double[1][m.getColumnDimension()]; for (int c = 0; c < m.getColumnDimension(); c++) { double sum = 0; for (int r = 0; r < m.getRowDimension(); r++) { sum += m.getEntry(r, c); } d[0][c] = sum; } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix. * @return the diagonal n-by-n matrix if m is a column matrix or the column * matrix representing the diagonal if m is a n-by-n matrix. */ private static RealMatrix diag(final RealMatrix m) { if (m.getColumnDimension() == 1) { final double[][] d = new double[m.getRowDimension()][m.getRowDimension()]; for (int i = 0; i < m.getRowDimension(); i++) { d[i][i] = m.getEntry(i, 0); } return new Array2DRowRealMatrix(d, false); } else { final double[][] d = new double[m.getRowDimension()][1]; for (int i = 0; i < m.getColumnDimension(); i++) { d[i][0] = m.getEntry(i, i); } return new Array2DRowRealMatrix(d, false); } } /** * Copies a column from m1 to m2. * * @param m1 Source matrix. * @param col1 Source column. * @param m2 Target matrix. * @param col2 Target column. */ private static void copyColumn(final RealMatrix m1, int col1, RealMatrix m2, int col2) { for (int i = 0; i < m1.getRowDimension(); i++) { m2.setEntry(i, col2, m1.getEntry(i, col1)); } } /** * @param n Number of rows. * @param m Number of columns. * @return n-by-m matrix filled with 1. */ private static RealMatrix ones(int n, int m) { final double[][] d = new double[n][m]; for (int r = 0; r < n; r++) { Arrays.fill(d[r], 1); } return new Array2DRowRealMatrix(d, false); } /** * @param n Number of rows. * @param m Number of columns. * @return n-by-m matrix of 0 values out of diagonal, and 1 values on * the diagonal. */ private static RealMatrix eye(int n, int m) { final double[][] d = new double[n][m]; for (int r = 0; r < n; r++) { if (r < m) { d[r][r] = 1; } } return new Array2DRowRealMatrix(d, false); } /** * @param n Number of rows. * @param m Number of columns. * @return n-by-m matrix of zero values. */ private static RealMatrix zeros(int n, int m) { return new Array2DRowRealMatrix(n, m); } /** * @param mat Input matrix. * @param n Number of row replicates. * @param m Number of column replicates. * @return a matrix which replicates the input matrix in both directions. */ private static RealMatrix repmat(final RealMatrix mat, int n, int m) { final int rd = mat.getRowDimension(); final int cd = mat.getColumnDimension(); final double[][] d = new double[n * rd][m * cd]; for (int r = 0; r < n * rd; r++) { for (int c = 0; c < m * cd; c++) { d[r][c] = mat.getEntry(r % rd, c % cd); } } return new Array2DRowRealMatrix(d, false); } /** * @param start Start value. * @param end End value. * @param step Step size. * @return a sequence as column matrix. */ private static RealMatrix sequence(double start, double end, double step) { final int size = (int) ((end - start) / step + 1); final double[][] d = new double[size][1]; double value = start; for (int r = 0; r < size; r++) { d[r][0] = value; value += step; } return new Array2DRowRealMatrix(d, false); } /** * @param m Input matrix. * @return the maximum of the matrix element values. */ private static double max(final RealMatrix m) { double max = -Double.MAX_VALUE; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { double e = m.getEntry(r, c); if (max < e) { max = e; } } } return max; } /** * @param m Input matrix. * @return the minimum of the matrix element values. */ private static double min(final RealMatrix m) { double min = Double.MAX_VALUE; for (int r = 0; r < m.getRowDimension(); r++) { for (int c = 0; c < m.getColumnDimension(); c++) { double e = m.getEntry(r, c); if (min > e) { min = e; } } } return min; } /** * @param m Input array. * @return the maximum of the array values. */ private static double max(final double[] m) { double max = -Double.MAX_VALUE; for (int r = 0; r < m.length; r++) { if (max < m[r]) { max = m[r]; } } return max; } /** * @param m Input array. * @return the minimum of the array values. */ private static double min(final double[] m) { double min = Double.MAX_VALUE; for (int r = 0; r < m.length; r++) { if (min > m[r]) { min = m[r]; } } return min; } /** * @param indices Input index array. * @return the inverse of the mapping defined by indices. */ private static int[] inverse(final int[] indices) { final int[] inverse = new int[indices.length]; for (int i = 0; i < indices.length; i++) { inverse[indices[i]] = i; } return inverse; } /** * @param indices Input index array. * @return the indices in inverse order (last is first). */ private static int[] reverse(final int[] indices) { final int[] reverse = new int[indices.length]; for (int i = 0; i < indices.length; i++) { reverse[i] = indices[indices.length - i - 1]; } return reverse; } /** * @param size Length of random array. * @return an array of Gaussian random numbers. */ private double[] randn(int size) { final double[] randn = new double[size]; for (int i = 0; i < size; i++) { randn[i] = random.nextGaussian(); } return randn; } /** * @param size Number of rows. * @param popSize Population size. * @return a 2-dimensional matrix of Gaussian random numbers. */ private RealMatrix randn1(int size, int popSize) { final double[][] d = new double[size][popSize]; for (int r = 0; r < size; r++) { for (int c = 0; c < popSize; c++) { d[r][c] = random.nextGaussian(); } } return new Array2DRowRealMatrix(d, false); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy