All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.manifold.LaplacianEigenmap Maven / Gradle / Ivy

There is a newer version: 4.3.0
Show newest version
/*******************************************************************************
 * Copyright (c) 2010 Haifeng Li
 *   
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package smile.manifold;

import java.util.Collection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import smile.data.SparseDataset;
import smile.graph.AdjacencyList;
import smile.graph.Graph;
import smile.graph.Graph.Edge;
import smile.math.Math;
import smile.math.SparseArray;
import smile.math.distance.EuclideanDistance;
import smile.math.matrix.DenseMatrix;
import smile.math.matrix.EVD;
import smile.math.matrix.SparseMatrix;
import smile.neighbor.CoverTree;
import smile.neighbor.KDTree;
import smile.neighbor.KNNSearch;
import smile.neighbor.Neighbor;

/**
 * Laplacian Eigenmap. Using the notion of the Laplacian of the nearest
 * neighbor adjacency graph, Laplacian Eigenmap computes a low dimensional
 * representation of the dataset that optimally preserves local neighborhood
 * information in a certain sense. The representation map generated by the
 * algorithm may be viewed as a discrete approximation to a continuous map
 * that naturally arises from the geometry of the manifold.
 * 

* The locality preserving character of the Laplacian Eigenmap algorithm makes * it relatively insensitive to outliers and noise. It is also not prone to * "short circuiting" as only the local distances are used. * * @see IsoMap * @see LLE * *

References

*
    *
  1. Mikhail Belkin and Partha Niyogi. Laplacian Eigenmaps and Spectral Techniques for Embedding and Clustering. NIPS, 2001.
  2. *
* * @author Haifeng Li */ public class LaplacianEigenmap { private static final Logger logger = LoggerFactory.getLogger(LaplacianEigenmap.class); /** * The width of heat kernel. */ private double t; /** * The original sample index. */ private int[] index; /** * Coordinate matrix. */ private double[][] coordinates; /** * Nearest neighbor graph. */ private Graph graph; /** * Constructor. Learn Laplacian Eigenmaps with discrete weights. * @param data the dataset. * @param d the dimension of the manifold. * @param k k-nearest neighbor. */ public LaplacianEigenmap(double[][] data, int d, int k) { this(data, d, k, -1); } /** * Constructor. Learn Laplacian Eigenmap with Gaussian kernel. * @param data the dataset. * @param d the dimension of the manifold. * @param k k-nearest neighbor. * @param t the smooth/width parameter of heat kernel e-||x-y||2 / t. * Non-positive value means discrete weights. */ public LaplacianEigenmap(double[][] data, int d, int k, double t) { this.t = t; int n = data.length; KNNSearch knn = null; if (data[0].length < 10) { knn = new KDTree<>(data, data); } else { knn = new CoverTree<>(data, new EuclideanDistance()); } graph = new AdjacencyList(n); for (int i = 0; i < n; i++) { Neighbor[] neighbors = knn.knn(data[i], k); for (int j = 0; j < k; j++) { graph.setWeight(i, neighbors[j].index, neighbors[j].distance); } } // Use largest connected component. int[][] cc = graph.bfs(); if (cc.length == 1) { index = new int[n]; for (int i = 0; i < n; i++) { index[i] = i; } } else { n = 0; int component = 0; for (int i = 0; i < cc.length; i++) { if (cc[i].length > n) { component = i; n = cc[i].length; } } logger.info("Laplacian Eigenmap: {} connected components, largest one has {} samples.", cc.length, n); index = cc[component]; graph = graph.subgraph(index); } SparseDataset W = new SparseDataset(n); double[] D = new double[n]; double gamma = -1.0 / t; for (int i = 0; i < n; i++) { Collection edges = graph.getEdges(i); for (Edge edge : edges) { int j = edge.v2; if (i == j) { j = edge.v1; } double w = t <= 0 ? 1.0 : Math.exp(gamma * Math.sqr(edge.weight)); W.set(i, j, w); D[i] += w; } D[i] = 1 / Math.sqrt(D[i]); } for (int i = 0; i < n; i++) { SparseArray edges = W.get(i).x; for (SparseArray.Entry edge : edges) { int j = edge.i; double s = D[i] * edge.x * D[j]; W.set(i, j, s); } W.set(i, i, 0.0); } SparseMatrix L = W.toSparseMatrix(); L.setSymmetric(true); // ARPACK may not find all needed eigen values for k = d + 1. // Set it to 10 * (d + 1) as a hack to NCV parameter of DSAUPD. // Our Lanczos class has no such issue. EVD eigen = L.eigen(Math.min(10*(d + 1), n - 1)); DenseMatrix V = eigen.getEigenVectors(); coordinates = new double[n][d]; for (int j = 0; j < d; j++) { double norm = 0.0; for (int i = 0; i < n; i++) { coordinates[i][j] = V.get(i, j + 1) * D[i]; norm += coordinates[i][j] * coordinates[i][j]; } norm = Math.sqrt(norm); for (int i = 0; i < n; i++) { coordinates[i][j] /= norm; } } } /** * Returns the original sample index. Because Laplacian Eigenmap is applied to the largest * connected component of k-nearest neighbor graph, we record the the original * indices of samples in the largest component. */ public int[] getIndex() { return index; } /** * Returns the coordinates of projected data. */ public double[][] getCoordinates() { return coordinates; } /** * Returns the nearest neighbor graph. */ public Graph getNearestNeighborGraph() { return graph; } /** * Returns the width of heat kernel. */ public double getHeatKernelWidth() { return t; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy