smile.manifold.KPCA Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of smile-core Show documentation
smile-core
The newest version!
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.manifold;

import java.io.Serializable;
import java.util.Arrays;
import java.util.function.Function;
import smile.math.MathEx;
import smile.math.blas.UPLO;
import smile.math.kernel.MercerKernel;
import smile.math.matrix.ARPACK;
import smile.math.matrix.Matrix;

/**
 * Kernel principal component analysis. Kernel PCA is an extension of
 * principal component analysis (PCA) using techniques of kernel methods.
 * Using a kernel, the originally linear operations of PCA are done in a
 * reproducing kernel Hilbert space with a non-linear mapping.
 * 
 * In practice, a large data set leads to a large Kernel/Gram matrix K, and
 * storing K may become a problem. One way to deal with this is to perform
 * clustering on your large dataset, and populate the kernel with the means
 * of those clusters. Since even this method may yield a relatively large K,
 * it is common to compute only the top P eigenvalues and eigenvectors of K.
 * 

 * Kernel PCA with an isotropic kernel function is closely related to metric MDS.
 * Carrying out metric MDS on the kernel matrix K produces an equivalent configuration
 * of points as the distance (2(1 - K(x_i, x_j)))^1/2
 * computed in feature space.
 * 

 * Kernel PCA also has close connections with Isomap, LLE, and Laplacian eigenmaps.
 *
 * 
References
 * 
 * Bernhard Scholkopf, Alexander Smola, and Klaus-Robert Muller. Nonlinear Component Analysis as a Kernel Eigenvalue Problem. Neural Computation, 1998.
 * 
 *
 * @see smile.math.kernel.MercerKernel
 * @see smile.feature.extraction.PCA
 * @see smile.manifold.IsoMap
 * @see smile.manifold.LLE
 * @see smile.manifold.LaplacianEigenmap
 * @see smile.manifold.SammonMapping
 *
 * @author Haifeng Li
 */
public class KPCA implements Function, Serializable {
    private static final long serialVersionUID = 2L;

    /**
     * Training data.
     */
    private final T[] data;
    /**
     * Mercer kernel.
     */
    private final MercerKernel kernel;
    /**
     * The row mean of kernel matrix.
     */
    private final double[] mean;
    /**
     * The mean of kernel matrix.
     */
    private final double mu;
    /**
     * The eigenvalues of kernel principal components.
     */
    private final double[] latent;
    /**
     * The projection matrix.
     */
    private final Matrix projection;
    /**
     * The coordinates of projected training data.
     */
    private final double[][] coordinates;

    /**
     * Constructor.
     * @param data training data.
     * @param kernel Mercer kernel.
     * @param mean the row/column average of kernel matrix.
     * @param mu the average of kernel matrix.
     * @param coordinates the coordinates of projected training data.
     * @param latent the projection matrix.
     * @param projection the projection matrix.
     */
    public KPCA(T[] data, MercerKernel kernel, double[] mean, double mu, double[][] coordinates, double[] latent, Matrix projection) {
        this.data = data;
        this.kernel = kernel;
        this.mean = mean;
        this.mu = mu;
        this.coordinates = coordinates;
        this.latent = latent;
        this.projection = projection;
    }

    /**
     * Fits kernel principal component analysis.
     * @param data training data.
     * @param kernel Mercer kernel.
     * @param k choose up to k principal components (larger than 0.0001) used for projection.
     * @param  the data type of samples.
     * @return the model.
     */
    public static  KPCA fit(T[] data, MercerKernel kernel, int k) {
        return fit(data, kernel, k, 0.0001);
    }

    /**
     * Fits kernel principal component analysis.
     * @param data training data.
     * @param kernel Mercer kernel.
     * @param k choose top k principal components used for projection.
     * @param threshold only principal components with eigenvalues
     *                  larger than the given threshold will be kept.
     * @param  the data type of samples.
     * @return the model.
     */
    public static  KPCA fit(T[] data, MercerKernel kernel, int k, double threshold) {
        if (threshold < 0) {
            throw new IllegalArgumentException("Invalid threshold = " + threshold);
        }

        if (k < 1 || k > data.length) {
            throw new IllegalArgumentException("Invalid dimension of feature space: " + k);
        }

        int n = data.length;

        Matrix K = new Matrix(n, n);
        for (int i = 0; i < n; i++) {
            for (int j = 0; j <= i; j++) {
                double x = kernel.k(data[i], data[j]);
                K.set(i, j, x);
                K.set(j, i, x);
            }
        }

        double[] mean = K.rowMeans();
        double mu = MathEx.mean(mean);

        for (int i = 0; i < n; i++) {
            for (int j = 0; j <= i; j++) {
                double x = K.get(i, j) - mean[i] - mean[j] + mu;
                K.set(i, j, x);
                K.set(j, i, x);
            }
        }

        K.uplo(UPLO.LOWER);
        Matrix.EVD eigen = ARPACK.syev(K, ARPACK.SymmOption.LA, k);

        double[] eigvalues = eigen.wr;
        Matrix eigvectors = eigen.Vr;

        int p = (int) Arrays.stream(eigvalues).limit(k).filter(e -> e/n > threshold).count();

        double[] latent = new double[p];
        Matrix projection = new Matrix(p, n);
        for (int j = 0; j < p; j++) {
            latent[j] = eigvalues[j];
            double s = Math.sqrt(latent[j]);
            for (int i = 0; i < n; i++) {
                projection.set(j, i, eigvectors.get(i, j) / s);
            }
        }

        Matrix coord = projection.mm(K);
        double[][] coordinates = new double[n][p];
        for (int i = 0; i < n; i++) {
            for (int j = 0; j < p; j++) {
                coordinates[i][j] = coord.get(j, i);
            }
        }

        return new KPCA<>(data, kernel, mean, mu, coordinates, latent, projection);
    }

    /**
     * Returns the eigenvalues of kernel principal components, ordered from largest to smallest.
     * @return the eigenvalues of kernel principal components, ordered from largest to smallest.
     */
    public double[] variances() {
        return latent;
    }

    /**
     * Returns the projection matrix. The dimension reduced data can be obtained
     * by y = W * K(x, ·).
     * @return the projection matrix.
     */
    public Matrix projection() {
        return projection;
    }

    /**
     * Returns the nonlinear principal component scores, i.e., the representation
     * of learning data in the nonlinear principal component space. Rows
     * correspond to observations, columns to components.
     * @return the nonlinear principal component scores.
     */
    public double[][] coordinates() {
        return coordinates;
    }

    @Override
    public double[] apply(T x) {
        int n = data.length;

        double[] y = new double[n];
        for (int i = 0; i < n; i++) {
            y[i] = kernel.k(x, data[i]);
        }

        double my = MathEx.mean(y);
        for (int i = 0; i < n; i++) {
            y[i] = y[i] - my - mean[i] + mu;
        }

        return projection.mv(y);
    }

    /**
     * Project a set of data to the feature space.
     * @param x the data set.
     * @return the projection in the feature space.
     */
    public double[][] apply(T[] x) {
        int m = x.length;
        double[][] y = new double[m][];
        for (int i = 0; i < m; i++) {
            y[i] = apply(x[i]);
        }
        return y;
    }
}