smile.feature.extraction.KernelPCA Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of smile-core Show documentation
smile-core
The newest version!
package smile.feature.extraction;

import smile.data.DataFrame;
import smile.manifold.KPCA;
import smile.math.kernel.MercerKernel;

/**
 * Kernel PCA transform. Kernel PCA is an extension of
 * principal component analysis (PCA) using techniques of kernel methods.
 * Using a kernel, the originally linear operations of PCA are done in a
 * reproducing kernel Hilbert space with a non-linear mapping.
 * 
 * In practice, a large data set leads to a large Kernel/Gram matrix K, and
 * storing K may become a problem. One way to deal with this is to perform
 * clustering on your large dataset, and populate the kernel with the means
 * of those clusters. Since even this method may yield a relatively large K,
 * it is common to compute only the top P eigenvalues and eigenvectors of K.
 * 

 * Kernel PCA with an isotropic kernel function is closely related to metric MDS.
 * Carrying out metric MDS on the kernel matrix K produces an equivalent configuration
 * of points as the distance (2(1 - K(x_i, x_j)))^1/2
 * computed in feature space.
 * 

 * Kernel PCA also has close connections with Isomap, LLE, and Laplacian eigenmaps.
 *
 * 
References
 * 
 * Bernhard Scholkopf, Alexander Smola, and Klaus-Robert Muller. Nonlinear Component Analysis as a Kernel Eigenvalue Problem. Neural Computation, 1998.
 * 
 *
 * @see smile.feature.extraction.PCA
 * @see smile.math.kernel.MercerKernel
 * @see smile.manifold.KPCA
] *
 * @author Haifeng Li
 */
public class KernelPCA extends Projection {
    private static final long serialVersionUID = 2L;

    /**
     * Kernel PCA.
     */
    public final KPCA kpca;

    /**
     * Constructor.
     * @param kpca kernel PCA object.
     * @param columns the columns to fit kernel PCA. If empty, all columns
     *                will be used.
     */
    public KernelPCA(KPCA kpca, String... columns) {
        super(kpca.projection(), "KPCA", columns);
        this.kpca = kpca;
    }

    /**
     * Fits kernel principal component analysis.
     * @param data training data.
     * @param kernel Mercer kernel.
     * @param k choose up to k principal components (larger than 0.0001) used for projection.
     * @param columns the columns to fit kernel PCA. If empty, all columns
     *                will be used.
     * @return the model.
     */
    public static KernelPCA fit(DataFrame data, MercerKernel kernel, int k, String... columns) {
        return fit(data, kernel, k, 0.0001, columns);
    }

    /**
     * Fits kernel principal component analysis.
     * @param data training data.
     * @param kernel Mercer kernel.
     * @param k choose top k principal components used for projection.
     * @param threshold only principal components with eigenvalues
     *                  larger than the given threshold will be kept.
     * @param columns the columns to fit kernel PCA. If empty, all columns
     *                will be used.
     * @return the model.
     */
    public static KernelPCA fit(DataFrame data, MercerKernel kernel, int k, double threshold, String... columns) {
        double[][] x = data.toArray(columns);
        KPCA kpca = KPCA.fit(x, kernel, k, threshold);
        return new KernelPCA(kpca, columns);
    }

    @Override
    public double[] apply(double[] x) {
        return kpca.apply(x);
    }
}