org.jgrapht.alg.clustering.LabelPropagationClustering Maven / Gradle / Ivy
/*
* (C) Copyright 2020-2021, by Dimitrios Michail and Contributors.
*
* JGraphT : a free Java graph-theory library
*
* See the CONTRIBUTORS.md file distributed with this work for additional
* information regarding copyright ownership.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License 2.0 which is available at
* http://www.eclipse.org/legal/epl-2.0, or the
* GNU Lesser General Public License v2.1 or later
* which is available at
* http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html.
*
* SPDX-License-Identifier: EPL-2.0 OR LGPL-2.1-or-later
*/
package org.jgrapht.alg.clustering;
import org.jgrapht.*;
import org.jgrapht.alg.interfaces.*;
import org.jgrapht.alg.util.*;
import java.util.*;
import java.util.stream.*;
/**
* A label propagation clustering algorithm.
*
*
* The algorithm is a near linear time algorithm capable of discovering communities in large graphs.
* It is described in detail in the following
* paper:
*
* - Raghavan, U. N., Albert, R., and Kumara, S. (2007). Near linear time algorithm to detect
* community structures in large-scale networks. Physical review E, 76(3), 036106.
*
*
*
* As the paper title suggests the running time is close to linear. The algorithm runs in
* iterations, each of which runs in $O(n + m)$ where $n$ is the number of vertices and $m$ is the
* number of edges. The authors found experimentally that in most cases, 95% of the nodes or more
* are classified correctly by the end of iteration 5. See the paper for more details.
*
*
* The algorithm is randomized, meaning that two runs on the same graph may return different
* results. If the user requires deterministic behavior, the random number generator can be provided
* by the constructor.
*
* @author Dimitrios Michail
*
* @param the graph vertex type
* @param the graph edge type
*/
public class LabelPropagationClustering
implements
ClusteringAlgorithm
{
private Graph graph;
private int maxIterations;
private Random rng;
private Clustering result;
/**
* Create a new clustering algorithm.
*
* @param graph the graph (needs to be undirected)
*/
public LabelPropagationClustering(Graph graph)
{
this(graph, 0, new Random());
}
/**
* Create a new clustering algorithm.
*
* @param graph the graph (needs to be undirected)
* @param rng random number generator
*/
public LabelPropagationClustering(Graph graph, Random rng)
{
this(graph, 0, rng);
}
/**
* Create a new clustering algorithm.
*
* @param graph the graph (needs to be undirected)
* @param maxIterations maximum number of iterations (zero means no limit)
*/
public LabelPropagationClustering(Graph graph, int maxIterations)
{
this(graph, maxIterations, new Random());
}
/**
* Create a new clustering algorithm.
*
* @param graph the graph (needs to be undirected)
* @param maxIterations maximum number of iterations (zero means no limit)
* @param rng random number generator
*/
public LabelPropagationClustering(Graph graph, int maxIterations, Random rng)
{
this.graph = GraphTests.requireUndirected(graph);
this.maxIterations = maxIterations;
this.rng = Objects.requireNonNull(rng);
if (maxIterations < 0) {
throw new IllegalArgumentException("Max iterations cannot be negative");
}
}
@Override
public Clustering getClustering()
{
if (result == null) {
result =
new ClusteringImpl<>(new Implementation<>(graph, rng, maxIterations).compute());
}
return result;
}
/**
* The actual implementation
*
* @param the graph vertex type
* @param the graph edge type
*/
private static class Implementation
{
private Graph graph;
private Random rng;
private int maxIterations;
private Map labels;
/**
* Initialize the computation
*
* @param graph the graph
* @param rng the random number generator
* @param maxIterations maximum iterations
*/
public Implementation(Graph graph, Random rng, int maxIterations)
{
this.graph = graph;
this.rng = rng;
this.maxIterations = maxIterations;
this.labels = new HashMap<>();
int i = 0;
for (V v : graph.vertexSet()) {
labels.put(v, String.valueOf(i++));
}
}
/**
* Main loop of the algorithm
*
* @return the clusters
*/
public List> compute()
{
int currentIteration = 0;
while (true) {
// is there a limit on the number of iterations?
if (maxIterations > 0 && currentIteration > maxIterations) {
break;
}
// perform synchronous label update (to avoid oscillations)
boolean anyChange = false;
List allVertices = new ArrayList<>(graph.vertexSet());
Collections.shuffle(allVertices, rng);
for (V v : allVertices) {
if (updateLabel(v)) {
anyChange = true;
}
}
// stopping criterion
if (anyChange == false || shouldStop()) {
break;
}
currentIteration++;
}
return computeCommunities();
}
/**
* Stopping criterion. Perform the iterative process until every node in the network has a
* label equal to a label that the maximum number of its neighbors belong to.
*
* @return true whether we should stop, false otherwise
*/
private boolean shouldStop()
{
for (V v : graph.vertexSet()) {
Pair