src.it.unimi.dsi.webgraph.algo.FourSweepIterativeFringeDiameter Maven / Gradle / Ivy
package it.unimi.dsi.webgraph.algo;
/*
* Copyright (C) 2011 Sebastiano Vigna
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see .
*
*/
import it.unimi.dsi.Util;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.lang.ObjectParser;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.util.XorShiftStarRandom;
import it.unimi.dsi.webgraph.GraphClassParser;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.ImmutableGraph.LoadMethod;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.concurrent.atomic.AtomicIntegerArray;
import org.apache.log4j.Logger;
import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
/** Computes the diameter of a symmetric (a.k.a. undirected) graph.
*
* This class implements a variant of the heuristic algorithm proposed by Pierluigi Crescenzi, Roberto Grossi, Michel Habib,
* Leonardo Lanzi and Andrea Marino in “On computing the diameter of real-world undirected graphs”, presented
* at the Workshop on Graph Algorithms and Applications (Zurich, July 3, 2011), which extends
* the double-sweep heuristic for bounding the diameter suggested by Clémence Magnien,
* Matthieu Latapy and Michel Habib in “Fast computation of empirically tight bounds for the diameter of massive graphs”,
* J. Exp. Algorithmics, 13:1.10:1−1.10:9, ACM, 2009.
*
*
To understand why the following algorithm works, recall that the eccentricity of a node x is the
* maximum distance d(x, y). The minimum eccentricity over all nodes is called the radius of the graph, and
* a node with minimum eccentricity is called a center. The diameter is just the maximum eccentricity, so
* the diameter is bounded by twice the radius (but it might not be equal: a line with an even number of nodes is a counterexample).
* The following two observations are obvious:
*
* - the eccentricity of a node is a lower bound for the diameter;
*
- given a node x and an integer h, 2h maximised with the
* eccentricities of all nodes at distance greater than h from x is an
* upper bound for the diameter.
*
*
* The double-sweep algorithm is the standard algorithm to compute the diameter of a tree:
* we take a random node and locate using a breadth-first visit a
* farthest node x. Then, we perform a second breadth-first visit, computing the
* eccentricity of x, which turns out to be the diameter of the tree.
* When applied to a general graph, the double-sweep algorithm provides a good lower bound (in general, whenever we perform
* a breadth-first visit we use the resulting eccentricity to improve the current lower bound).
* With some (usually few) additional visits, the iterative
* fringe algorithm often makes it possible to make the bounds match.
*
*
More precisely, after the second visit we find a node c that is
* halfway between x and a node farthest from x. The
* node c is a tentative center of the graph,
* and it certainly is if the graph is a tree.
*
*
We then perform a breadth-first visit from c and compute its eccentricity h, obtaining an upper bound
* 2h for the diameter.
*
*
In case our upper bound does not match the lower bound, we compute the eccentricities of the fringe, that is, the set
* of nodes at distance h from c, by performing a breadth-first visit from each node in the fringe. At each
* eccentricity computed, we update our lower bound, and stop if it matches our current upper bound. Finally, when the fringe is exhausted,
* assuming M is the maximum of the eccentricities computed, max(2(h − 1), M)
* is an improved upper bound for the diameter. We iterate the procedure with the new fringe
* (nodes at distance h − 1), and so on, until the lower and upper bounds do match.
*
*
The description above is a bit simplified: after finding c, we actually
* do a double sweep again starting from c and update c accordingly. This
* four-sweep procedure often improves the quality (e.g., reduces the eccentricity) of c.
*
*
Performance issues
*
* This class uses an instance of {@link ParallelBreadthFirstVisit} to ensure a high degree of parallelism (see its
* documentation for memory requirements).
*/
public class FourSweepIterativeFringeDiameter {
private static final Logger LOGGER = Util.getLogger( FourSweepIterativeFringeDiameter.class );
/** Checks that we are always visiting the same component of the same size and possibly logs a warning or throws an exception.
*
* @param visit the current visit.
* @param componentSize the size of the visited component, or 0 if unknown.
* @return the size of the visited component.
*/
private static int componentSize( final ParallelBreadthFirstVisit visit, int componentSize ) {
if ( visit.queue.size() != visit.graph.numNodes() ) {
if ( componentSize == -1 ) {
componentSize = visit.queue.size();
LOGGER.warn( "The graph is not connected: computing the diameter of a component of " + componentSize + " < " + visit.graph.numNodes() + " nodes" );
}
else if ( componentSize != visit.queue.size() ) throw new IllegalStateException( "Queue size (" + visit.queue.size() + ") is different from component size (" + componentSize + "): maybe the graph is not symmetric." );
}
return componentSize;
}
/** Computes the diameter of a symmetric graph.
*
* @param symGraph a symmetric graph.
* @param threads the requested number of threads (0 for {@link Runtime#availableProcessors()}).
* @param pl a progress logger, or null
.
* @param seed a seed for generating random starting points.
* @return the diameter.
*/
public static int run( final ImmutableGraph symGraph, final int threads, final ProgressLogger pl, final long seed ) {
ParallelBreadthFirstVisit visit = new ParallelBreadthFirstVisit( symGraph, threads, true, pl );
final AtomicIntegerArray parent = visit.marker;
XorShiftStarRandom random = new XorShiftStarRandom( seed );
final int n = symGraph.numNodes();
int lowerBound = 0, upperBound = n - 1, componentSize = -1;
while( lowerBound < upperBound ) {
if ( pl != null ) pl.logger.info( "New round of bound refinement... [" + lowerBound + ".." + upperBound + "]" );
// After the first iteration, we pick a node from the visit queue
visit.clear();
visit.visit( visit.queue.isEmpty() ? random.nextInt( n ) : visit.queue.getInt( random.nextInt( visit.queue.size() ) ), componentSize );
int border = visit.nodeAtMaxDistance();
componentSize = componentSize( visit, componentSize );
lowerBound = Math.max( visit.maxDistance(), lowerBound );
upperBound = Math.min( upperBound, 2 * visit.maxDistance() );
if ( pl != null ) pl.logger.info( "After visit from random node: [" + lowerBound + ".." + upperBound + "]" );
if ( lowerBound == upperBound ) break;
visit.clear();
visit.visit( border, componentSize );
border = visit.nodeAtMaxDistance();
componentSize = componentSize( visit, componentSize );
lowerBound = Math.max( visit.maxDistance(), lowerBound );
upperBound = Math.min( upperBound, 2 * visit.maxDistance() );
if ( pl != null ) pl.logger.info( "After first double sweep: [" + lowerBound + ".." + upperBound + "]" );
if ( lowerBound == upperBound ) break;
// Find first tentative center of the graph (certainly the center if it is a tree).
int center = border;
for( int i = visit.maxDistance() / 2; i-- != 0; ) center = parent.get( center );
// We now visit from the tentative center.
visit.clear();
visit.visit( center, componentSize );
border = visit.nodeAtMaxDistance();
componentSize = componentSize( visit, componentSize );
lowerBound = Math.max( visit.maxDistance(), lowerBound );
upperBound = Math.min( upperBound, 2 * visit.maxDistance() );
if ( pl != null ) pl.logger.info( "After visit from first tentative center (node " + center + "): [" + lowerBound + ".." + upperBound + "]" );
if ( lowerBound == upperBound ) break;
// Last sweep
visit.clear();
visit.visit( border );
border = visit.nodeAtMaxDistance();
componentSize = componentSize( visit, componentSize );
lowerBound = Math.max( visit.maxDistance(), lowerBound );
upperBound = Math.min( upperBound, 2 * visit.maxDistance() );
if ( pl != null ) pl.logger.info( "After second double sweep: [" + lowerBound + ".." + upperBound + "]" );
if ( lowerBound == upperBound ) break;
// Find new (and hopefully improved) center.
center = border;
for( int i = visit.maxDistance() / 2; i-- != 0; ) center = parent.get( center );
// We now visit from the new center.
visit.clear();
visit.visit( center, componentSize );
componentSize = componentSize( visit, componentSize );
lowerBound = Math.max( visit.maxDistance(), lowerBound );
upperBound = Math.min( upperBound, 2 * visit.maxDistance() );
if ( pl != null ) pl.logger.info( "After visit from new center (node " + center + "): [" + lowerBound + ".." + upperBound + "]" );
if ( lowerBound == upperBound ) break;
// Copy cutpoints and queue as they are needed to visit incrementally the fringe (this stuff could go on disk, actually).
final IntArrayList cutPoints = visit.cutPoints.clone();
final IntArrayList queue = visit.queue.clone();
final ProgressLogger globalProgressLogger = pl == null ? null : new ProgressLogger( pl.logger, pl.logInterval, "visits" );
if ( pl != null ) {
pl.logger.debug( "Cutpoints: " + cutPoints );
globalProgressLogger.start( "Starting visits..." );
}
/* We now incrementally remove nodes at decreasing distance d from the center,
* keeping track of the maximum eccentricity maxEcc of the removed nodes.
* max( maxEcc, 2(d - 1) ) is obviously an upper bound for the diameter. */
int maxEcc = 0;
for( int d = visit.maxDistance(); d > 0 && lowerBound < upperBound; d-- ) {
if ( pl != null ) {
globalProgressLogger.expectedUpdates = pl.count + cutPoints.getInt( d + 1 ) - cutPoints.getInt( lowerBound / 2 + 1 );
pl.logger.info( "Examining " + ( cutPoints.getInt( d + 1 ) - cutPoints.getInt( d ) ) + " nodes at distance " + d + " (at most " + globalProgressLogger.expectedUpdates + " visits to go)..." );
}
for( int pos = cutPoints.getInt( d ); pos < cutPoints.getInt( d + 1 ); pos++ ) {
final int x = queue.getInt( pos );
visit.clear();
visit.visit( x );
componentSize = componentSize( visit, componentSize );
maxEcc = Math.max( maxEcc, visit.maxDistance() );
lowerBound = Math.max( lowerBound, maxEcc );
if ( lowerBound == upperBound ) return lowerBound;
}
upperBound = Math.max( maxEcc, 2 * ( d - 1 ) );
if ( pl != null ) {
globalProgressLogger.updateAndDisplay( cutPoints.getInt( d + 1 ) - cutPoints.getInt( d ) );
pl.logger.info( "After enlarging fringe: [" + lowerBound + ".." + upperBound + "]" );
}
}
if ( globalProgressLogger != null ) globalProgressLogger.done();
}
return lowerBound;
}
static public void main( String arg[] ) throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, JSAPException, IOException, ClassNotFoundException, InstantiationException {
SimpleJSAP jsap = new SimpleJSAP( FourSweepIterativeFringeDiameter.class.getName(), "Computes the diamater of a symmetric graph using Magnien-Latay-Habib's technique.",
new Parameter[] {
new FlaggedOption( "graphClass", GraphClassParser.getParser(), null, JSAP.NOT_REQUIRED, 'g', "graph-class", "Forces a Java class for the source graph." ),
new Switch( "spec", 's', "spec", "The basename is rather a specification of the form (arg,arg,...)." ),
new Switch( "mapped", 'm', "mapped", "Do not load the graph in main memory, but rather memory-map it." ),
new FlaggedOption( "logInterval", JSAP.LONG_PARSER, Long.toString( ProgressLogger.DEFAULT_LOG_INTERVAL ), JSAP.NOT_REQUIRED, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds." ),
new FlaggedOption( "threads", JSAP.INTSIZE_PARSER, "0", JSAP.NOT_REQUIRED, 'T', "threads", "The number of threads to be used. If 0, the number will be estimated automatically." ),
new UnflaggedOption( "basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph." ),
}
);
JSAPResult jsapResult = jsap.parse( arg );
if ( jsap.messagePrinted() ) System.exit( 1 );
final ProgressLogger pl = new ProgressLogger( LOGGER, jsapResult.getLong( "logInterval" ) );
final String basename = jsapResult.getString( "basename" );
final Class> graphClass = jsapResult.getClass( "graphClass" );
final boolean spec = jsapResult.getBoolean( "spec" );
final boolean mapped = jsapResult.getBoolean( "mapped" );
final int threads = jsapResult.getInt( "threads" );
final ImmutableGraph graph;
if ( graphClass != null ) {
if ( spec ) {
System.err.println( "Options --graph-class and --spec are incompatible" );
System.exit( 1 );
return; // Just to avoid spurious errors about graph not being initialised.
}
else graph = (ImmutableGraph)graphClass.getMethod( mapped ? LoadMethod.MAPPED.toMethod() : LoadMethod.STANDARD.toMethod(), CharSequence.class ).invoke( null, basename );
}
else {
if ( !spec ) graph = mapped ? ImmutableGraph.loadMapped( basename, pl ) : ImmutableGraph.load( basename, pl );
else graph = ObjectParser.fromSpec( basename, ImmutableGraph.class, GraphClassParser.PACKAGE );
}
System.out.println( run( graph, threads, new ProgressLogger( LOGGER ), Util.randomSeed() ) );
}
}