All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openimaj.ml.gmm.GaussianMixtureModelEM Maven / Gradle / Ivy

Go to download

Various clustering algorithm implementations for all primitive types including random, random forest, K-Means (Exact, Hierarchical and Approximate), ...

There is a newer version: 1.3.5
Show newest version
/**
 * Copyright (c) 2011, The University of Southampton and the individual contributors.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *   * 	Redistributions of source code must retain the above copyright notice,
 * 	this list of conditions and the following disclaimer.
 *
 *   *	Redistributions in binary form must reproduce the above copyright notice,
 * 	this list of conditions and the following disclaimer in the documentation
 * 	and/or other materials provided with the distribution.
 *
 *   *	Neither the name of the University of Southampton nor the names of its
 * 	contributors may be used to endorse or promote products derived from this
 * 	software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
package org.openimaj.ml.gmm;

import gnu.trove.list.array.TDoubleArrayList;

import java.util.Arrays;
import java.util.EnumSet;

import org.apache.commons.math.util.MathUtils;
import org.openimaj.math.matrix.MatrixUtils;
import org.openimaj.math.statistics.MeanAndCovariance;
import org.openimaj.math.statistics.distribution.AbstractMultivariateGaussian;
import org.openimaj.math.statistics.distribution.DiagonalMultivariateGaussian;
import org.openimaj.math.statistics.distribution.FullMultivariateGaussian;
import org.openimaj.math.statistics.distribution.MixtureOfGaussians;
import org.openimaj.math.statistics.distribution.MultivariateGaussian;
import org.openimaj.math.statistics.distribution.SphericalMultivariateGaussian;
import org.openimaj.ml.clustering.DoubleCentroidsResult;
import org.openimaj.ml.clustering.kmeans.DoubleKMeans;
import org.openimaj.util.array.ArrayUtils;
import org.openimaj.util.pair.IndependentPair;

import Jama.Matrix;

/**
 * Gaussian mixture model learning using the EM algorithm. Supports a range of
 * different shapes Gaussian through different covariance matrix forms. An
 * initialisation step is used to learn the initial means using K-Means,
 * although this can be disabled in the constructor.
 * 

* Implementation was originally inspired by the SciPy's "gmm.py". * * @author Jonathon Hare ([email protected]) */ public class GaussianMixtureModelEM { /** * Different forms of covariance matrix supported by the * {@link GaussianMixtureModelEM}. * * @author Jonathon Hare ([email protected]) */ public static enum CovarianceType { /** * Spherical Gaussians: variance is the same along all axes and zero * across-axes. */ Spherical { @Override protected void setCovariances(MultivariateGaussian[] gaussians, Matrix cv) { double mean = 0; for (int i = 0; i < cv.getRowDimension(); i++) for (int j = 0; j < cv.getColumnDimension(); j++) mean += cv.get(i, j); mean /= (cv.getColumnDimension() * cv.getRowDimension()); for (final MultivariateGaussian mg : gaussians) { ((SphericalMultivariateGaussian) mg).variance = mean; } } @Override protected MultivariateGaussian[] createGaussians(int ngauss, int ndims) { final MultivariateGaussian[] arr = new MultivariateGaussian[ngauss]; for (int i = 0; i < ngauss; i++) { arr[i] = new SphericalMultivariateGaussian(ndims); } return arr; } @Override protected void mstep(EMGMM gmm, GaussianMixtureModelEM learner, Matrix X, Matrix responsibilities, Matrix weightedXsum, double[] norm) { final Matrix avgX2uw = responsibilities.transpose().times(X.arrayTimes(X)); for (int i = 0; i < gmm.gaussians.length; i++) { final Matrix weightedXsumi = new Matrix(new double[][] { weightedXsum.getArray()[i] }); final Matrix avgX2uwi = new Matrix(new double[][] { avgX2uw.getArray()[i] }); final Matrix avgX2 = avgX2uwi.times(norm[i]); final Matrix mu = ((AbstractMultivariateGaussian) gmm.gaussians[i]).mean; final Matrix avgMeans2 = MatrixUtils.pow(mu, 2); final Matrix avgXmeans = mu.arrayTimes(weightedXsumi).times(norm[i]); final Matrix covar = MatrixUtils.plus(avgX2.minus(avgXmeans.times(2)).plus(avgMeans2), learner.minCovar); ((SphericalMultivariateGaussian) gmm.gaussians[i]).variance = MatrixUtils.sum(covar) / X.getColumnDimension(); } } }, /** * Gaussians with diagonal covariance matrices. */ Diagonal { @Override protected void setCovariances(MultivariateGaussian[] gaussians, Matrix cv) { for (final MultivariateGaussian mg : gaussians) { ((DiagonalMultivariateGaussian) mg).variance = MatrixUtils.diagVector(cv); } } @Override protected MultivariateGaussian[] createGaussians(int ngauss, int ndims) { final MultivariateGaussian[] arr = new MultivariateGaussian[ngauss]; for (int i = 0; i < ngauss; i++) { arr[i] = new DiagonalMultivariateGaussian(ndims); } return arr; } @Override protected void mstep(EMGMM gmm, GaussianMixtureModelEM learner, Matrix X, Matrix responsibilities, Matrix weightedXsum, double[] norm) { final Matrix avgX2uw = responsibilities.transpose().times(X.arrayTimes(X)); for (int i = 0; i < gmm.gaussians.length; i++) { final Matrix weightedXsumi = new Matrix(new double[][] { weightedXsum.getArray()[i] }); final Matrix avgX2uwi = new Matrix(new double[][] { avgX2uw.getArray()[i] }); final Matrix avgX2 = avgX2uwi.times(norm[i]); final Matrix mu = ((AbstractMultivariateGaussian) gmm.gaussians[i]).mean; final Matrix avgMeans2 = MatrixUtils.pow(mu, 2); final Matrix avgXmeans = mu.arrayTimes(weightedXsumi).times(norm[i]); final Matrix covar = MatrixUtils.plus(avgX2.minus(avgXmeans.times(2)).plus(avgMeans2), learner.minCovar); ((DiagonalMultivariateGaussian) gmm.gaussians[i]).variance = covar.getArray()[0]; } } }, /** * Gaussians with full covariance */ Full { @Override protected MultivariateGaussian[] createGaussians(int ngauss, int ndims) { final MultivariateGaussian[] arr = new MultivariateGaussian[ngauss]; for (int i = 0; i < ngauss; i++) { arr[i] = new FullMultivariateGaussian(ndims); } return arr; } @Override protected void setCovariances(MultivariateGaussian[] gaussians, Matrix cv) { for (final MultivariateGaussian mg : gaussians) { ((FullMultivariateGaussian) mg).covar = cv.copy(); } } @Override protected void mstep(EMGMM gmm, GaussianMixtureModelEM learner, Matrix X, Matrix responsibilities, Matrix weightedXsum, double[] norm) { // Eq. 12 from K. Murphy, // "Fitting a Conditional Linear Gaussian Distribution" final int nfeatures = X.getColumnDimension(); for (int c = 0; c < learner.nComponents; c++) { final Matrix post = responsibilities.getMatrix(0, X.getRowDimension() - 1, c, c).transpose(); final double factor = 1.0 / (ArrayUtils.sumValues(post.getArray()) + 10 * MathUtils.EPSILON); final Matrix pXt = X.transpose(); for (int i = 0; i < pXt.getRowDimension(); i++) for (int j = 0; j < pXt.getColumnDimension(); j++) pXt.set(i, j, pXt.get(i, j) * post.get(0, j)); final Matrix argcv = pXt.times(X).times(factor); final Matrix mu = ((FullMultivariateGaussian) gmm.gaussians[c]).mean; ((FullMultivariateGaussian) gmm.gaussians[c]).covar = argcv.minusEquals(mu.transpose().times(mu)) .plusEquals(Matrix.identity(nfeatures, nfeatures).times(learner.minCovar)); } } }, /** * Gaussians with a tied covariance matrix; the same covariance matrix * is shared by all the gaussians. */ Tied { // @Override // protected double[][] logProbability(double[][] x, // MultivariateGaussian[] gaussians) // { // final int ndim = x[0].length; // final int nmix = gaussians.length; // final int nsamples = x.length; // final Matrix X = new Matrix(x); // // final double[][] logProb = new double[nsamples][nmix]; // final Matrix cv = ((FullMultivariateGaussian) // gaussians[0]).covar; // // final CholeskyDecomposition chol = cv.chol(); // Matrix cvChol; // if (chol.isSPD()) { // cvChol = chol.getL(); // } else { // // covar probably doesn't have enough samples, so // // recondition it // final Matrix m = cv.plus(Matrix.identity(ndim, ndim).timesEquals( // MixtureOfGaussians.MIN_COVAR_RECONDITION)); // cvChol = m.chol().getL(); // } // // double cvLogDet = 0; // final double[][] cvCholD = cvChol.getArray(); // for (int j = 0; j < ndim; j++) { // cvLogDet += Math.log(cvCholD[j][j]); // } // cvLogDet *= 2; // // for (int i = 0; i < nmix; i++) { // final Matrix mu = ((FullMultivariateGaussian) gaussians[i]).mean; // final Matrix cvSol = cvChol.solve(MatrixUtils.minusRow(X, // mu.getArray()[0]).transpose()) // .transpose(); // for (int k = 0; k < nsamples; k++) { // double sum = 0; // for (int j = 0; j < ndim; j++) { // sum += cvSol.get(k, j) * cvSol.get(k, j); // } // // logProb[k][i] = -0.5 * (sum + cvLogDet + ndim * Math.log(2 * // Math.PI)); // } // } // // return logProb; // } @Override protected void setCovariances(MultivariateGaussian[] gaussians, Matrix cv) { for (final MultivariateGaussian mg : gaussians) { ((FullMultivariateGaussian) mg).covar = cv; } } @Override protected MultivariateGaussian[] createGaussians(int ngauss, int ndims) { final MultivariateGaussian[] arr = new MultivariateGaussian[ngauss]; final Matrix covar = new Matrix(ndims, ndims); for (int i = 0; i < ngauss; i++) { arr[i] = new FullMultivariateGaussian(new Matrix(1, ndims), covar); } return arr; } @Override protected void mstep(EMGMM gmm, GaussianMixtureModelEM learner, Matrix X, Matrix responsibilities, Matrix weightedXsum, double[] norm) { // Eq. 15 from K. Murphy, "Fitting a Conditional Linear Gaussian final int nfeatures = X.getColumnDimension(); final Matrix avgX2 = X.transpose().times(X); final double[][] mudata = new double[gmm.gaussians.length][]; for (int i = 0; i < mudata.length; i++) mudata[i] = ((FullMultivariateGaussian) gmm.gaussians[i]).mean.getArray()[0]; final Matrix mu = new Matrix(mudata); final Matrix avgMeans2 = mu.transpose().times(weightedXsum); final Matrix covar = avgX2.minus(avgMeans2) .plus(Matrix.identity(nfeatures, nfeatures).times(learner.minCovar)) .times(1.0 / X.getRowDimension()); for (int i = 0; i < learner.nComponents; i++) ((FullMultivariateGaussian) gmm.gaussians[i]).covar = covar; } }; protected abstract MultivariateGaussian[] createGaussians(int ngauss, int ndims); protected abstract void setCovariances(MultivariateGaussian[] gaussians, Matrix cv); /** * Mode specific maximisation-step. Implementors should use the state to * update the covariance of each of the * {@link GaussianMixtureModelEM#gaussians}. * * @param gmm * the mixture model being learned * @param X * the data * @param responsibilities * matrix with the same number of rows as X where each col is * the amount that the data point belongs to each gaussian * @param weightedXsum * responsibilities.T * X * @param inverseWeights * 1/weights */ protected abstract void mstep(EMGMM gmm, GaussianMixtureModelEM learner, Matrix X, Matrix responsibilities, Matrix weightedXsum, double[] inverseWeights); } /** * Options for controlling what gets updated during the initialisation * and/or iterations. * * @author Jonathon Hare ([email protected]) */ public static enum UpdateOptions { /** * Update the means */ Means, /** * Update the weights */ Weights, /** * Update the covariances */ Covariances } private static class EMGMM extends MixtureOfGaussians { EMGMM(int nComponents) { super(null, null); this.weights = new double[nComponents]; Arrays.fill(this.weights, 1.0 / nComponents); } } private static final double DEFAULT_THRESH = 1e-2; private static final double DEFAULT_MIN_COVAR = 1e-3; private static final int DEFAULT_NITERS = 100; private static final int DEFAULT_NINIT = 1; CovarianceType ctype; int nComponents; private double thresh; private double minCovar; private int nIters; private int nInit; private boolean converged = false; private EnumSet initOpts; private EnumSet iterOpts; /** * Construct with the given arguments. * * @param nComponents * the number of gaussian components * @param ctype * the form of the covariance matrices * @param thresh * the threshold at which to stop iterating * @param minCovar * the minimum value allowed in the diagonal of the estimated * covariance matrices to prevent overfitting * @param nIters * the maximum number of iterations * @param nInit * the number of runs of the algorithm to perform; the best * result will be kept. * @param iterOpts * options controlling what is updated during iteration * @param initOpts * options controlling what is updated during initialisation. * Enabling the {@link UpdateOptions#Means} option will cause * K-Means to be used to generate initial starting points for the * means. */ public GaussianMixtureModelEM(int nComponents, CovarianceType ctype, double thresh, double minCovar, int nIters, int nInit, EnumSet iterOpts, EnumSet initOpts) { this.ctype = ctype; this.nComponents = nComponents; this.thresh = thresh; this.minCovar = minCovar; this.nIters = nIters; this.nInit = nInit; this.iterOpts = iterOpts; this.initOpts = initOpts; if (nInit < 1) { throw new IllegalArgumentException("GMM estimation requires at least one run"); } this.converged = false; } /** * Construct with the given arguments. * * @param nComponents * the number of gaussian components * @param ctype * the form of the covariance matrices */ public GaussianMixtureModelEM(int nComponents, CovarianceType ctype) { this(nComponents, ctype, DEFAULT_THRESH, DEFAULT_MIN_COVAR, DEFAULT_NITERS, DEFAULT_NINIT, EnumSet .allOf(UpdateOptions.class), EnumSet.allOf(UpdateOptions.class)); } /** * Get's the convergence state of the algorithm. Will return false if * {@link #estimate(double[][])} has not been called, or if the last call to * {@link #estimate(double[][])} failed to reach convergence before running * out of iterations. * * @return true if the last call to {@link #estimate(double[][])} reached * convergence; false otherwise */ public boolean hasConverged() { return converged; } /** * Estimate a new {@link MixtureOfGaussians} from the given data. Use * {@link #hasConverged()} to check whether the EM algorithm reached * convergence in the estimation of the returned model. * * @param X * the data matrix. * @return the generated GMM. */ public MixtureOfGaussians estimate(Matrix X) { return estimate(X.getArray()); } /** * Estimate a new {@link MixtureOfGaussians} from the given data. Use * {@link #hasConverged()} to check whether the EM algorithm reached * convergence in the estimation of the returned model. * * @param X * the data array. * @return the generated GMM. */ public MixtureOfGaussians estimate(double[][] X) { final EMGMM gmm = new EMGMM(nComponents); if (X.length < nComponents) throw new IllegalArgumentException(String.format( "GMM estimation with %d components, but got only %d samples", nComponents, X.length)); double max_log_prob = Double.NEGATIVE_INFINITY; for (int j = 0; j < nInit; j++) { gmm.gaussians = ctype.createGaussians(nComponents, X[0].length); if (initOpts.contains(UpdateOptions.Means)) { // initialise using k-means final DoubleKMeans km = DoubleKMeans.createExact(nComponents); final DoubleCentroidsResult means = km.cluster(X); for (int i = 0; i < nComponents; i++) { ((AbstractMultivariateGaussian) gmm.gaussians[i]).mean.getArray()[0] = means.centroids[i]; } } if (initOpts.contains(UpdateOptions.Weights)) { gmm.weights = new double[nComponents]; Arrays.fill(gmm.weights, 1.0 / nComponents); } if (initOpts.contains(UpdateOptions.Covariances)) { // cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1]) final Matrix cv = MeanAndCovariance.computeCovariance(X); ctype.setCovariances(gmm.gaussians, cv); } // EM algorithm final TDoubleArrayList log_likelihood = new TDoubleArrayList(); // reset converged to false converged = false; double[] bestWeights = null; MultivariateGaussian[] bestMixture = null; for (int i = 0; i < nIters; i++) { // Expectation step final IndependentPair score = gmm.scoreSamples(X); final double[] curr_log_likelihood = score.firstObject(); final double[][] responsibilities = score.secondObject(); log_likelihood.add(ArrayUtils.sumValues(curr_log_likelihood)); // Check for convergence. if (i > 0 && Math.abs(log_likelihood.get(i) - log_likelihood.get(i - 1)) < thresh) { converged = true; break; } // Perform the maximisation step mstep(gmm, X, responsibilities); // if the results are better, keep it if (nIters > 0) { if (log_likelihood.getQuick(i) > max_log_prob) { max_log_prob = log_likelihood.getQuick(i); bestWeights = gmm.weights; bestMixture = gmm.gaussians; } } // check the existence of an init param that was not subject to // likelihood computation issue. if (Double.isInfinite(max_log_prob) && nIters > 0) { throw new RuntimeException( "EM algorithm was never able to compute a valid likelihood given initial " + "parameters. Try different init parameters (or increasing n_init) or " + "check for degenerate data."); } if (nIters > 0) { gmm.gaussians = bestMixture; gmm.weights = bestWeights; } } } return gmm; } private void mstep(EMGMM gmm, double[][] X, double[][] responsibilities) { final double[] weights = ArrayUtils.colSum(responsibilities); final Matrix resMat = new Matrix(responsibilities); final Matrix Xmat = new Matrix(X); final Matrix weighted_X_sum = resMat.transpose().times(Xmat); final double[] inverse_weights = new double[weights.length]; for (int i = 0; i < inverse_weights.length; i++) inverse_weights[i] = 1.0 / (weights[i] + 10 * MathUtils.EPSILON); if (iterOpts.contains(UpdateOptions.Weights)) { final double sum = ArrayUtils.sumValues(weights); for (int i = 0; i < weights.length; i++) { gmm.weights[i] = (weights[i] / (sum + 10 * MathUtils.EPSILON) + MathUtils.EPSILON); } } if (iterOpts.contains(UpdateOptions.Means)) { // self.means_ = weighted_X_sum * inverse_weights final double[][] wx = weighted_X_sum.getArray(); for (int i = 0; i < nComponents; i++) { final double[][] m = ((AbstractMultivariateGaussian) gmm.gaussians[i]).mean.getArray(); for (int j = 0; j < m[0].length; j++) { m[0][j] = wx[i][j] * inverse_weights[i]; } } } if (iterOpts.contains(UpdateOptions.Covariances)) { ctype.mstep(gmm, this, Xmat, resMat, weighted_X_sum, inverse_weights); } } @Override public GaussianMixtureModelEM clone() { try { return (GaussianMixtureModelEM) super.clone(); } catch (final CloneNotSupportedException e) { throw new RuntimeException(e); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy