All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.manifold.NearestNeighborGraph Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.manifold;

import smile.graph.AdjacencyList;
import smile.math.distance.Distance;
import smile.math.distance.EuclideanDistance;
import smile.neighbor.LinearSearch;
import smile.neighbor.Neighbor;

/**
 * Nearest neighbor graph builder.
 *
 * @author Haifeng Li
 */
class NearestNeighborGraph {
    private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(NearestNeighborGraph.class);

    /**
     * The original sample index.
     */
    public final int[] index;
    /**
     * Nearest neighbor graph.
     */
    public final AdjacencyList graph;

    /**
     * Constructor.
     * @param index the original sample index.
     * @param graph the nearest neighbor graph.
     */
    public NearestNeighborGraph(int[] index, AdjacencyList graph) {
        this.index = index;
        this.graph = graph;
    }

    /** The nearest neighbor edge consumer. */
    public interface EdgeConsumer {
        /**
         * Performs this operation on the edge.
         * @param v1 the center of neighborhood.
         * @param v2 the neighbor.
         * @param weight the weight/distance.
         * @param j the index of neighbor of knn search.
         */
        void accept(int v1, int v2, double weight, int j);
    }

    /**
     * Creates a nearest neighbor graph with Euclidean distance.
     *
     * @param data the dataset.
     * @param k k-nearest neighbor.
     * @param digraph flag to create a directed graph.
     * @param consumer an optional lambda to perform some side effect operations.
     */
    public static AdjacencyList of(double[][] data, int k, boolean digraph, EdgeConsumer consumer) {
        return of(data, new EuclideanDistance(), k, digraph ,consumer);
    }

    /**
     * Creates a nearest neighbor graph.
     *
     * @param data the dataset.
     * @param k k-nearest neighbor.
     * @param distance the distance function.
     * @param digraph flag to create a directed graph.
     * @param consumer an optional lambda to perform some side effect operations.
     */
    public static  AdjacencyList of(T[] data, Distance distance, int k, boolean digraph, EdgeConsumer consumer) {
        // This is actually faster on many core systems.
        LinearSearch knn = LinearSearch.of(data, distance);

        int n = data.length;
        AdjacencyList graph = new AdjacencyList(n, digraph);

        if (consumer != null) {
            for (int i = 0; i < n; i++) {
                Neighbor[] neighbors = knn.search(data[i], k);

                int v1 = i;
                for (int j = 0; j < neighbors.length; j++) {
                    int v2 = neighbors[j].index;
                    double weight = neighbors[j].distance;
                    graph.setWeight(v1, v2, weight);
                    consumer.accept(v1, v2, weight, j);
                }
            }
        } else {
            for (int i = 0; i < n; i++) {
                for (Neighbor neighbor : knn.search(data[i], k)) {
                    graph.setWeight(i, neighbor.index, neighbor.distance);
                }
            }
        }

        return graph;
    }

    /**
     * Finds the largest connected components of a nearest neighbor graph.
     * If the graph has multiple connected components, keep the
     * largest one.
     *
     * @param graph the nearest neighbor graph.
     */
    public static NearestNeighborGraph largest(AdjacencyList graph) {
        int n = graph.getNumVertices();

        // Use largest connected component.
        int[][] cc = graph.bfs();
        int[] index;
        if (cc.length == 1) {
            index = new int[n];
            for (int i = 0; i < n; i++) {
                index[i] = i;
            }
        } else {
            n = 0;
            int largest = 0;
            for (int i = 0; i < cc.length; i++) {
                if (cc[i].length > n) {
                    largest = i;
                    n = cc[i].length;
                }
            }

            logger.info("{} connected components, largest one has {} samples.", cc.length, n);

            index = cc[largest];
            graph = graph.subgraph(index);
        }

        return new NearestNeighborGraph(index, graph);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy