org.jgrapht.alg.clustering.LabelPropagationClustering Maven / Gradle / Ivy

Go to download
/*
 * (C) Copyright 2020-2021, by Dimitrios Michail and Contributors.
 *
 * JGraphT : a free Java graph-theory library
 *
 * See the CONTRIBUTORS.md file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License 2.0 which is available at
 * http://www.eclipse.org/legal/epl-2.0, or the
 * GNU Lesser General Public License v2.1 or later
 * which is available at
 * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR LGPL-2.1-or-later
 */
package org.jgrapht.alg.clustering;

import org.jgrapht.*;
import org.jgrapht.alg.interfaces.*;
import org.jgrapht.alg.util.*;

import java.util.*;
import java.util.stream.*;

/**
 * A label propagation clustering algorithm.
 * 
 * 
 * The algorithm is a near linear time algorithm capable of discovering communities in large graphs.
 * It is described in detail in the following
 * paper:
 * 

 * Raghavan, U. N., Albert, R., and Kumara, S. (2007). Near linear time algorithm to detect
 * community structures in large-scale networks. Physical review E, 76(3), 036106.
 * 
 * 
 * 
 * As the paper title suggests the running time is close to linear. The algorithm runs in
 * iterations, each of which runs in $O(n + m)$ where $n$ is the number of vertices and $m$ is the
 * number of edges. The authors found experimentally that in most cases, 95% of the nodes or more
 * are classified correctly by the end of iteration 5. See the paper for more details.
 * 
 * 
 * The algorithm is randomized, meaning that two runs on the same graph may return different
 * results. If the user requires deterministic behavior, the random number generator can be provided
 * by the constructor.
 * 
 * @author Dimitrios Michail
 *
 * @param  the graph vertex type
 * @param  the graph edge type
 */
public class LabelPropagationClustering
    implements
    ClusteringAlgorithm
{
    private Graph graph;
    private int maxIterations;
    private Random rng;
    private Clustering result;

    /**
     * Create a new clustering algorithm.
     * 
     * @param graph the graph (needs to be undirected)
     */
    public LabelPropagationClustering(Graph graph)
    {
        this(graph, 0, new Random());
    }

    /**
     * Create a new clustering algorithm.
     * 
     * @param graph the graph (needs to be undirected)
     * @param rng random number generator
     */
    public LabelPropagationClustering(Graph graph, Random rng)
    {
        this(graph, 0, rng);
    }

    /**
     * Create a new clustering algorithm.
     * 
     * @param graph the graph (needs to be undirected)
     * @param maxIterations maximum number of iterations (zero means no limit)
     */
    public LabelPropagationClustering(Graph graph, int maxIterations)
    {
        this(graph, maxIterations, new Random());
    }

    /**
     * Create a new clustering algorithm.
     * 
     * @param graph the graph (needs to be undirected)
     * @param maxIterations maximum number of iterations (zero means no limit)
     * @param rng random number generator
     */
    public LabelPropagationClustering(Graph graph, int maxIterations, Random rng)
    {
        this.graph = GraphTests.requireUndirected(graph);
        this.maxIterations = maxIterations;
        this.rng = Objects.requireNonNull(rng);
        if (maxIterations < 0) {
            throw new IllegalArgumentException("Max iterations cannot be negative");
        }
    }

    @Override
    public Clustering getClustering()
    {
        if (result == null) {
            result =
                new ClusteringImpl<>(new Implementation<>(graph, rng, maxIterations).compute());
        }
        return result;
    }

    /**
     * The actual implementation
     *
     * @param  the graph vertex type
     * @param  the graph edge type
     */
    private static class Implementation
    {
        private Graph graph;
        private Random rng;
        private int maxIterations;
        private Map labels;

        /**
         * Initialize the computation
         * 
         * @param graph the graph
         * @param rng the random number generator
         * @param maxIterations maximum iterations
         */
        public Implementation(Graph graph, Random rng, int maxIterations)
        {
            this.graph = graph;
            this.rng = rng;
            this.maxIterations = maxIterations;
            this.labels = new HashMap<>();

            int i = 0;
            for (V v : graph.vertexSet()) {
                labels.put(v, String.valueOf(i++));
            }
        }

        /**
         * Main loop of the algorithm
         * 
         * @return the clusters
         */
        public List> compute()
        {
            int currentIteration = 0;
            while (true) {
                // is there a limit on the number of iterations?
                if (maxIterations > 0 && currentIteration > maxIterations) {
                    break;
                }

                // perform synchronous label update (to avoid oscillations)
                boolean anyChange = false;
                List allVertices = new ArrayList<>(graph.vertexSet());
                Collections.shuffle(allVertices, rng);
                for (V v : allVertices) {
                    if (updateLabel(v)) {
                        anyChange = true;
                    }
                }

                // stopping criterion
                if (anyChange == false || shouldStop()) {
                    break;
                }

                currentIteration++;
            }

            return computeCommunities();
        }

        /**
         * Stopping criterion. Perform the iterative process until every node in the network has a
         * label equal to a label that the maximum number of its neighbors belong to.
         * 
         * @return true whether we should stop, false otherwise
         */
        private boolean shouldStop()
        {
            for (V v : graph.vertexSet()) {
                Pair, Integer> labelCountsAndMaximum =
                    getNeighborLabelCountsAndMaximum(v);
                Map counts = labelCountsAndMaximum.getFirst();

                String vLabel = labels.get(v);
                int vLabelCount = counts.getOrDefault(vLabel, 0);
                int maxCount = labelCountsAndMaximum.getSecond();
                if (maxCount > vLabelCount) {
                    return false;
                }
            }
            return true;
        }

        /**
         * Compute the frequency of the labels of all neighbors of a vertex and the maximum
         * frequency of the vertices, which have a label not equal to the input vertex label.
         * 
         * @param v the input vertex
         * @return the frequency of the labels of all neighbors of a vertex and the maximum label
         *         frequency of the vertices with a label not equal to the input vertex label
         */
        private Pair, Integer> getNeighborLabelCountsAndMaximum(V v)
        {
            Map counts = new HashMap<>();

            String vLabel = labels.get(v);
            int maxCount = 0;
            for (E e : graph.edgesOf(v)) {
                V u = Graphs.getOppositeVertex(graph, e, v);
                String uLabel = labels.get(u);
                int newCount = counts.getOrDefault(uLabel, 0) + 1;
                counts.put(uLabel, newCount);
                if (newCount > maxCount && !uLabel.equals(vLabel)) {
                    maxCount = newCount;
                }
            }

            return Pair.of(counts, maxCount);
        }

        /**
         * Update the label of a vertex.
         * 
         * @param v the vertex
         * @return true if a label change occurred
         */
        private boolean updateLabel(V v)
        {
            if (graph.degreeOf(v) == 0) {
                return false;
            }

            Pair, Integer> labelCountsAndMaximum =
                getNeighborLabelCountsAndMaximum(v);
            Map counts = labelCountsAndMaximum.getFirst();

            String oldLabel = labels.get(v);
            int vLabelCount = counts.getOrDefault(oldLabel, 0);
            final int maxCount = Math.max(labelCountsAndMaximum.getSecond(), vLabelCount);

            ArrayList maxLabels = counts
                .entrySet().stream().filter(e -> e.getValue() == maxCount).map(Map.Entry::getKey)
                .collect(Collectors.toCollection(ArrayList::new));
            String newLabel = maxLabels.get(rng.nextInt(maxLabels.size()));

            if (oldLabel.equals(newLabel)) {
                return false;
            } else {
                labels.put(v, newLabel);
                return true;
            }
        }

        /**
         * Compute the final communities from the labels. We need to do some extra work due to the
         * way the algorithm works, as described in the following paragraph from the original paper.
         * 
         * "When the algorithm terminates it is possible that two or more disconnected groups of
         * nodes have the same label (the groups are connected in the network via other nodes of
         * different labels). This happens when two or more neighbors of a node receive its label
         * and pass the labels in different directions, which ultimately leads to different
         * communities adopting the same label. In such cases, after the algorithm terminates one
         * can run a simple breadth-first search on the sub-networks of each individual groups to
         * separate the disconnected communities."
         * 
         * @return the clustering
         */
        private List> computeCommunities()
        {
            Map finalLabels = new HashMap<>();
            int nextLabel = 0;

            for (V v : graph.vertexSet()) {
                if (finalLabels.containsKey(v)) {
                    continue;
                }

                // start a BFS
                Deque frontier = new ArrayDeque<>();
                String currentLabel = String.valueOf(nextLabel++);
                finalLabels.put(v, currentLabel);
                frontier.addLast(v);

                while (!frontier.isEmpty()) {
                    V u = frontier.removeFirst();
                    String uLabel = labels.get(u);

                    for (E e : graph.edgesOf(u)) {
                        V w = Graphs.getOppositeVertex(graph, e, u);
                        String wLabel = labels.get(w);
                        if (!wLabel.equals(uLabel) || finalLabels.containsKey(w)) {
                            continue;
                        }
                        finalLabels.put(w, currentLabel);
                        frontier.addLast(w);
                    }
                }
            }

            return convert(graph, finalLabels);
        }

        /**
         * Convert from a map representation to a list of sets.
         * 
         * @param graph the graph
         * @param labels the map representation
         * @return the list of sets
         */
        private List> convert(Graph graph, Map labels)
        {
            Map> clusterMap = new LinkedHashMap<>();
            for (V v : graph.vertexSet()) {
                String rv = labels.get(v);
                if (rv == null) {
                    throw new IllegalArgumentException("Not all vertices have labels.");
                }
                Set cluster = clusterMap.get(rv);
                if (cluster == null) {
                    cluster = new LinkedHashSet<>();
                    clusterMap.put(rv, cluster);
                }
                cluster.add(v);
            }
            return new ArrayList<>(clusterMap.values());
        }

    }

}