All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.manifold.IsoMap Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 * Copyright (c) 2010 Haifeng Li
 *   
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package smile.manifold;

import smile.graph.AdjacencyList;
import smile.graph.Graph;
import smile.graph.Graph.Edge;
import smile.math.distance.EuclideanDistance;
import smile.math.Math;
import smile.math.matrix.EigenValueDecomposition;
import smile.neighbor.CoverTree;
import smile.neighbor.KDTree;
import smile.neighbor.KNNSearch;
import smile.neighbor.Neighbor;

/**
 * Isometric feature mapping. Isomap is a widely used low-dimensional embedding methods,
 * where geodesic distances on a weighted graph are incorporated with the
 * classical multidimensional scaling. Isomap is used for computing a
 * quasi-isometric, low-dimensional embedding of a set of high-dimensional
 * data points. Isomap is highly efficient and generally applicable to a broad
 * range of data sources and dimensionalities.
 * 

* To be specific, the classical MDS performs low-dimensional embedding based * on the pairwise distance between data points, which is generally measured * using straight-line Euclidean distance. Isomap is distinguished by * its use of the geodesic distance induced by a neighborhood graph * embedded in the classical scaling. This is done to incorporate manifold * structure in the resulting embedding. Isomap defines the geodesic distance * to be the sum of edge weights along the shortest path between two nodes. * The top n eigenvectors of the geodesic distance matrix, represent the * coordinates in the new n-dimensional Euclidean space. *

* The connectivity of each data point in the neighborhood graph is defined * as its nearest k Euclidean neighbors in the high-dimensional space. This * step is vulnerable to "short-circuit errors" if k is too large with * respect to the manifold structure or if noise in the data moves the * points slightly off the manifold. Even a single short-circuit error * can alter many entries in the geodesic distance matrix, which in turn * can lead to a drastically different (and incorrect) low-dimensional * embedding. Conversely, if k is too small, the neighborhood graph may * become too sparse to approximate geodesic paths accurately. *

* This class implements C-Isomap that involves magnifying the regions * of high density and shrink the regions of low density of data points * in the manifold. Edge weights that are maximized in Multi-Dimensional * Scaling(MDS) are modified, with everything else remaining unaffected. * * @see LLE * @see LaplacianEigenmap * *

References

*
    *
  1. J. B. Tenenbaum, V. de Silva and J. C. Langford A Global Geometric Framework for Nonlinear Dimensionality Reduction. Science 290(5500):2319-2323, 2000.
  2. *
* * @author Haifeng Li */ public class IsoMap { /** * The original sample index. */ private int[] index; /** * Coordinate matrix. */ private double[][] coordinates; /** * Nearest neighbor graph. */ private Graph graph; /** * Constructor. C-Isomap algorithm by default. * @param data the dataset. * @param d the dimension of the manifold. * @param k k-nearest neighbor. */ public IsoMap(double[][] data, int d, int k) { this(data, d, k, true); } /** * Constructor. * @param data the dataset. * @param d the dimension of the manifold. * @param k k-nearest neighbor. * @param CIsomap C-Isomap algorithm if true, otherwise standard algorithm. */ public IsoMap(double[][] data, int d, int k, boolean CIsomap) { int n = data.length; KNNSearch knn = null; if (data[0].length < 10) { knn = new KDTree(data, data); } else { knn = new CoverTree(data, new EuclideanDistance()); } graph = new AdjacencyList(n); double[] M = new double[n]; for (int i = 0; i < n; i++) { Neighbor[] neighbors = knn.knn(data[i], k); for (int j = 0; j < neighbors.length; j++) { graph.setWeight(i, neighbors[j].index, neighbors[j].distance); M[i] += neighbors[j].distance; } M[i] = Math.sqrt(M[i] / neighbors.length); } // C-Isomap if (CIsomap) { for (Edge edge : graph.getEdges()) { edge.weight /= (M[edge.v1] * M[edge.v2]); } } // Use largest connected component. int[][] cc = graph.dfs(); if (cc.length == 1) { index = new int[n]; for (int i = 0; i < n; i++) { index[i] = i; } } else { n = 0; int component = 0; for (int i = 0; i < cc.length; i++) { if (cc[i].length > n) { component = i; n = cc[i].length; } } System.out.format("IsoMap: %d connected components, largest one has %d samples.\n", cc.length, n); index = cc[component]; graph = graph.subgraph(index); } double[][] D = graph.dijkstra(); for (int i = 0; i < n; i++) { for (int j = 0; j < i; j++) { D[i][j] = -0.5 * D[i][j] * D[i][j]; D[j][i] = D[i][j]; } } double[] mean = Math.rowMean(D); double mu = Math.mean(mean); double[][] B = new double[n][n]; for (int i = 0; i < n; i++) { for (int j = 0; j <= i; j++) { B[i][j] = D[i][j] - mean[i] - mean[j] + mu; B[j][i] = B[i][j]; } } EigenValueDecomposition eigen = Math.eigen(B, d); coordinates = new double[n][d]; for (int j = 0; j < d; j++) { if (eigen.getEigenValues()[j] < 0) { throw new IllegalArgumentException(String.format("Some of the first %d eigenvalues are < 0.", d)); } double scale = Math.sqrt(eigen.getEigenValues()[j]); for (int i = 0; i < n; i++) { coordinates[i][j] = eigen.getEigenVectors()[i][j] * scale; } } } /** * Returns the original sample index. Because IsoMap is applied to the largest * connected component of k-nearest neighbor graph, we record the the original * indices of samples in the largest component. */ public int[] getIndex() { return index; } /** * Returns the coordinates of projected data. */ public double[][] getCoordinates() { return coordinates; } /** * Returns the nearest neighbor graph. */ public Graph getNearestNeighborGraph() { return graph; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy