
smile.manifold.LaplacianEigenmap Maven / Gradle / Ivy
/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.manifold;
import java.util.Collection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import smile.data.SparseDataset;
import smile.graph.AdjacencyList;
import smile.graph.Graph;
import smile.graph.Graph.Edge;
import smile.math.Math;
import smile.math.SparseArray;
import smile.math.distance.EuclideanDistance;
import smile.math.matrix.DenseMatrix;
import smile.math.matrix.EVD;
import smile.math.matrix.SparseMatrix;
import smile.neighbor.CoverTree;
import smile.neighbor.KDTree;
import smile.neighbor.KNNSearch;
import smile.neighbor.Neighbor;
/**
* Laplacian Eigenmap. Using the notion of the Laplacian of the nearest
* neighbor adjacency graph, Laplacian Eigenmap computes a low dimensional
* representation of the dataset that optimally preserves local neighborhood
* information in a certain sense. The representation map generated by the
* algorithm may be viewed as a discrete approximation to a continuous map
* that naturally arises from the geometry of the manifold.
*
* The locality preserving character of the Laplacian Eigenmap algorithm makes
* it relatively insensitive to outliers and noise. It is also not prone to
* "short circuiting" as only the local distances are used.
*
* @see IsoMap
* @see LLE
*
*
References
*
* - Mikhail Belkin and Partha Niyogi. Laplacian Eigenmaps and Spectral Techniques for Embedding and Clustering. NIPS, 2001.
*
*
* @author Haifeng Li
*/
public class LaplacianEigenmap {
private static final Logger logger = LoggerFactory.getLogger(LaplacianEigenmap.class);
/**
* The width of heat kernel.
*/
private double t;
/**
* The original sample index.
*/
private int[] index;
/**
* Coordinate matrix.
*/
private double[][] coordinates;
/**
* Nearest neighbor graph.
*/
private Graph graph;
/**
* Constructor. Learn Laplacian Eigenmaps with discrete weights.
* @param data the dataset.
* @param d the dimension of the manifold.
* @param k k-nearest neighbor.
*/
public LaplacianEigenmap(double[][] data, int d, int k) {
this(data, d, k, -1);
}
/**
* Constructor. Learn Laplacian Eigenmap with Gaussian kernel.
* @param data the dataset.
* @param d the dimension of the manifold.
* @param k k-nearest neighbor.
* @param t the smooth/width parameter of heat kernel e-||x-y||2 / t.
* Non-positive value means discrete weights.
*/
public LaplacianEigenmap(double[][] data, int d, int k, double t) {
this.t = t;
int n = data.length;
KNNSearch knn = null;
if (data[0].length < 10) {
knn = new KDTree<>(data, data);
} else {
knn = new CoverTree<>(data, new EuclideanDistance());
}
graph = new AdjacencyList(n);
for (int i = 0; i < n; i++) {
Neighbor[] neighbors = knn.knn(data[i], k);
for (int j = 0; j < k; j++) {
graph.setWeight(i, neighbors[j].index, neighbors[j].distance);
}
}
// Use largest connected component.
int[][] cc = graph.bfs();
if (cc.length == 1) {
index = new int[n];
for (int i = 0; i < n; i++) {
index[i] = i;
}
} else {
n = 0;
int component = 0;
for (int i = 0; i < cc.length; i++) {
if (cc[i].length > n) {
component = i;
n = cc[i].length;
}
}
logger.info("Laplacian Eigenmap: {} connected components, largest one has {} samples.", cc.length, n);
index = cc[component];
graph = graph.subgraph(index);
}
SparseDataset W = new SparseDataset(n);
double[] D = new double[n];
double gamma = -1.0 / t;
for (int i = 0; i < n; i++) {
Collection edges = graph.getEdges(i);
for (Edge edge : edges) {
int j = edge.v2;
if (i == j) {
j = edge.v1;
}
double w = t <= 0 ? 1.0 : Math.exp(gamma * Math.sqr(edge.weight));
W.set(i, j, w);
D[i] += w;
}
D[i] = 1 / Math.sqrt(D[i]);
}
for (int i = 0; i < n; i++) {
SparseArray edges = W.get(i).x;
for (SparseArray.Entry edge : edges) {
int j = edge.i;
double s = D[i] * edge.x * D[j];
W.set(i, j, s);
}
W.set(i, i, 0.0);
}
SparseMatrix L = W.toSparseMatrix();
L.setSymmetric(true);
// ARPACK may not find all needed eigen values for k = d + 1.
// Set it to 10 * (d + 1) as a hack to NCV parameter of DSAUPD.
// Our Lanczos class has no such issue.
EVD eigen = L.eigen(Math.min(10*(d + 1), n - 1));
DenseMatrix V = eigen.getEigenVectors();
coordinates = new double[n][d];
for (int j = 0; j < d; j++) {
double norm = 0.0;
for (int i = 0; i < n; i++) {
coordinates[i][j] = V.get(i, j + 1) * D[i];
norm += coordinates[i][j] * coordinates[i][j];
}
norm = Math.sqrt(norm);
for (int i = 0; i < n; i++) {
coordinates[i][j] /= norm;
}
}
}
/**
* Returns the original sample index. Because Laplacian Eigenmap is applied to the largest
* connected component of k-nearest neighbor graph, we record the the original
* indices of samples in the largest component.
*/
public int[] getIndex() {
return index;
}
/**
* Returns the coordinates of projected data.
*/
public double[][] getCoordinates() {
return coordinates;
}
/**
* Returns the nearest neighbor graph.
*/
public Graph getNearestNeighborGraph() {
return graph;
}
/**
* Returns the width of heat kernel.
*/
public double getHeatKernelWidth() {
return t;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy