smile.feature.extraction.KernelPCA Maven / Gradle / Ivy
The newest version!
package smile.feature.extraction;
import smile.data.DataFrame;
import smile.manifold.KPCA;
import smile.math.kernel.MercerKernel;
/**
* Kernel PCA transform. Kernel PCA is an extension of
* principal component analysis (PCA) using techniques of kernel methods.
* Using a kernel, the originally linear operations of PCA are done in a
* reproducing kernel Hilbert space with a non-linear mapping.
*
* In practice, a large data set leads to a large Kernel/Gram matrix K, and
* storing K may become a problem. One way to deal with this is to perform
* clustering on your large dataset, and populate the kernel with the means
* of those clusters. Since even this method may yield a relatively large K,
* it is common to compute only the top P eigenvalues and eigenvectors of K.
*
* Kernel PCA with an isotropic kernel function is closely related to metric MDS.
* Carrying out metric MDS on the kernel matrix K produces an equivalent configuration
* of points as the distance (2(1 - K(xi, xj)))1/2
* computed in feature space.
*
* Kernel PCA also has close connections with Isomap, LLE, and Laplacian eigenmaps.
*
*
References
*
* - Bernhard Scholkopf, Alexander Smola, and Klaus-Robert Muller. Nonlinear Component Analysis as a Kernel Eigenvalue Problem. Neural Computation, 1998.
*
*
* @see smile.feature.extraction.PCA
* @see smile.math.kernel.MercerKernel
* @see smile.manifold.KPCA
] *
* @author Haifeng Li
*/
public class KernelPCA extends Projection {
private static final long serialVersionUID = 2L;
/**
* Kernel PCA.
*/
public final KPCA kpca;
/**
* Constructor.
* @param kpca kernel PCA object.
* @param columns the columns to fit kernel PCA. If empty, all columns
* will be used.
*/
public KernelPCA(KPCA kpca, String... columns) {
super(kpca.projection(), "KPCA", columns);
this.kpca = kpca;
}
/**
* Fits kernel principal component analysis.
* @param data training data.
* @param kernel Mercer kernel.
* @param k choose up to k principal components (larger than 0.0001) used for projection.
* @param columns the columns to fit kernel PCA. If empty, all columns
* will be used.
* @return the model.
*/
public static KernelPCA fit(DataFrame data, MercerKernel kernel, int k, String... columns) {
return fit(data, kernel, k, 0.0001, columns);
}
/**
* Fits kernel principal component analysis.
* @param data training data.
* @param kernel Mercer kernel.
* @param k choose top k principal components used for projection.
* @param threshold only principal components with eigenvalues
* larger than the given threshold will be kept.
* @param columns the columns to fit kernel PCA. If empty, all columns
* will be used.
* @return the model.
*/
public static KernelPCA fit(DataFrame data, MercerKernel kernel, int k, double threshold, String... columns) {
double[][] x = data.toArray(columns);
KPCA kpca = KPCA.fit(x, kernel, k, threshold);
return new KernelPCA(kpca, columns);
}
@Override
public double[] apply(double[] x) {
return kpca.apply(x);
}
}