smile.manifold.LaplacianEigenmap Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
*
* Smile is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Smile is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Smile. If not, see .
*/
package smile.manifold;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import smile.data.SparseDataset;
import smile.graph.AdjacencyList;
import smile.graph.Graph.Edge;
import smile.math.distance.Distance;
import smile.math.distance.EuclideanDistance;
import smile.math.matrix.ARPACK;
import smile.math.matrix.Matrix;
import smile.math.matrix.SparseMatrix;
import smile.util.SparseArray;
/**
* Laplacian Eigenmap. Using the notion of the Laplacian of the nearest
* neighbor adjacency graph, Laplacian Eigenmap computes a low dimensional
* representation of the dataset that optimally preserves local neighborhood
* information in a certain sense. The representation map generated by the
* algorithm may be viewed as a discrete approximation to a continuous map
* that naturally arises from the geometry of the manifold.
*
* The locality preserving character of the Laplacian Eigenmap algorithm makes
* it relatively insensitive to outliers and noise. It is also not prone to
* "short circuiting" as only the local distances are used.
*
* @see IsoMap
* @see LLE
* @see UMAP
*
*
References
*
* - Mikhail Belkin and Partha Niyogi. Laplacian Eigenmaps and Spectral Techniques for Embedding and Clustering. NIPS, 2001.
*
*
* @author Haifeng Li
*/
public class LaplacianEigenmap implements Serializable {
private static final long serialVersionUID = 2L;
/**
* The width of heat kernel.
*/
public final double width;
/**
* The original sample index.
*/
public final int[] index;
/**
* The coordinate matrix in embedding space.
*/
public final double[][] coordinates;
/**
* Nearest neighbor graph.
*/
public final AdjacencyList graph;
/**
* Constructor with discrete weights.
* @param index the original sample index.
* @param coordinates the coordinates.
* @param graph the nearest neighbor graph.
*/
public LaplacianEigenmap(int[] index, double[][] coordinates, AdjacencyList graph) {
this(-1, index, coordinates, graph);
}
/**
* Constructor with Gaussian kernel.
* @param width the width of heat kernel.
* @param index the original sample index.
* @param coordinates the coordinates.
* @param graph the nearest neighbor graph.
*/
public LaplacianEigenmap(double width, int[] index, double[][] coordinates, AdjacencyList graph) {
this.width = width;
this.index = index;
this.coordinates = coordinates;
this.graph = graph;
}
/**
* Laplacian Eigenmaps with discrete weights.
* @param data the input data.
* @param k k-nearest neighbor.
* @return the model.
*/
public static LaplacianEigenmap of(double[][] data, int k) {
return of(data, k, 2, -1);
}
/**
* Laplacian Eigenmap with Gaussian kernel.
* @param data the input data.
* @param d the dimension of the manifold.
* @param k k-nearest neighbor.
* @param t the smooth/width parameter of heat kernel exp(-||x-y||2 / t).
* Non-positive value means discrete weights.
* @return the model.
*/
public static LaplacianEigenmap of(double[][] data, int k, int d, double t) {
return of(data, new EuclideanDistance(), k, d, t);
}
/**
* Laplacian Eigenmaps with discrete weights.
* @param data the input data.
* @param distance the distance function.
* @param k k-nearest neighbor.
* @param the data type of points.
* @return the model.
*/
public static LaplacianEigenmap of(T[] data, Distance distance, int k) {
return of(data, distance, k, 2, -1);
}
/**
* Laplacian Eigenmap with Gaussian kernel.
* @param data the input data.
* @param distance the distance function.
* @param k k-nearest neighbor.
* @param d the dimension of the manifold.
* @param t the smooth/width parameter of heat kernel exp(-||x-y||2 / t).
* Non-positive value means discrete weights.
* @param the data type of points.
* @return the model.
*/
public static LaplacianEigenmap of(T[] data, Distance distance, int k, int d, double t) {
// Use largest connected component of nearest neighbor graph.
AdjacencyList graph = NearestNeighborGraph.of(data, distance, k, false, null);
NearestNeighborGraph nng = NearestNeighborGraph.largest(graph);
int[] index = nng.index;
int n = index.length;
graph = nng.graph;
double[] D = new double[n];
double gamma = -1.0 / t;
ArrayList W = new ArrayList<>(n);
for (int i = 0; i < n; i++) {
SparseArray row = new SparseArray();
Collection edges = graph.getEdges(i);
for (Edge edge : edges) {
int j = edge.v2;
if (i == j) j = edge.v1;
double w = t <= 0 ? 1.0 : Math.exp(gamma * edge.weight * edge.weight);
row.set(j, w);
D[i] += w;
}
D[i] = 1 / Math.sqrt(D[i]);
W.add(i, row);
}
for (int i = 0; i < n; i++) {
SparseArray row = W.get(i);
for (SparseArray.Entry e : row) {
e.update(-D[i] * e.x * D[e.i]);
}
row.set(i, 1.0);
}
// Here L is actually I - D^(-1/2) * W * D^(-1/2)
SparseMatrix L = SparseDataset.of(W, n).toMatrix();
// ARPACK may not find all needed eigenvalues for k = d + 1.
// Hack it with 10 * (d + 1).
Matrix.EVD eigen = ARPACK.syev(L, ARPACK.SymmOption.SM, Math.min(10*(d+1), n-1));
Matrix V = eigen.Vr;
double[][] coordinates = new double[n][d];
for (int j = d; --j >= 0; ) {
double norm = 0.0;
int c = V.ncol() - j - 2;
for (int i = 0; i < n; i++) {
double xi = V.get(i, c) * D[i];
coordinates[i][j] = xi;
norm += xi * xi;
}
norm = Math.sqrt(norm);
for (int i = 0; i < n; i++) {
coordinates[i][j] /= norm;
}
}
return new LaplacianEigenmap(t, index, coordinates, graph);
}
}