
org.neo4j.gds.msbfs.MultiSourceBFSAccessMethods Maven / Gradle / Ivy
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.neo4j.gds.msbfs;
import org.jetbrains.annotations.Nullable;
import org.neo4j.gds.api.Graph;
import org.neo4j.gds.api.properties.relationships.RelationshipIterator;
import org.neo4j.gds.collections.ha.HugeLongArray;
import org.neo4j.gds.core.concurrency.Concurrency;
import org.neo4j.gds.core.concurrency.ParallelUtil;
import org.neo4j.gds.core.concurrency.RunWithConcurrency;
import org.neo4j.gds.termination.TerminationFlag;
import org.neo4j.gds.utils.CloseableThreadLocal;
import java.util.Arrays;
import java.util.Collection;
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
/**
* Multi Source Breadth First Search implemented as described in [1].
*
* The benefit of running this MS-BFS instead of multiple execution of a regular
* BFS for every source is that the MS-BFS algorithm can collapse traversals that are
* the same for multiple nodes. If any two or more given BFSs would traverse the same nodes
* at the same iteration depth, the MS-BFS will traverse only once and group all sources
* for this traversal.
*
* The consumer of this algorithm provides a callback function, the gets called
* with:
*
* - the node id where the BFS traversal is at
* - the depth or BFS iteration at which this node is traversed
* - a lazily evaluated list of all source nodes that have arrived at this node at the same depth/iteration
*
* The sources iterator is only valid during the execution of the callback and
* should not be stored.
*
* We use a fixed {@code ω} (OMEGA) of 64, which allows us to implement the
* seen/visitNext bit sets as a packed long which improves memory locality
* as suggested in 4.1. of the paper.
* If the number of sources exceed 64, multiple instances of MS-BFS are run
* in parallel.
*
* If the MS-BFS runs in parallel, the callback may be executed from multiple threads
* at the same time. The implementation should therefore be thread-safe.
*
* [1]: The More the Merrier: Efficient Multi-Source Graph Traversal
*/
public final class MultiSourceBFSAccessMethods {
private final CloseableThreadLocal visits;
private final CloseableThreadLocal visitsNext;
private final CloseableThreadLocal seens;
private final @Nullable CloseableThreadLocal seensNext;
private final long nodeCount;
private final RelationshipIterator relationships;
private final ExecutionStrategy strategy;
private final boolean allowStartNodeTraversal;
private final long @Nullable [] sourceNodes;
// hypothesis: you supply actual source nodes, or you provide a count - if so that should be rationalised
private final int sourceNodeCount;
private final long nodeOffset;
private final TerminationFlag terminationFlag;
public static MultiSourceBFSAccessMethods aggregatedNeighborProcessing(
long nodeCount,
RelationshipIterator relationships,
BfsConsumer perNodeAction,
Optional sourceNodes,
TerminationFlag terminationFlag
) {
var builder = new MultiSourceBFSInitializationSpecBuilder();
sourceNodes.ifPresent(builder::sourceNodes);
return createMultiSourceBFS(
nodeCount,
relationships,
new ANPStrategy(perNodeAction),
builder.build(),
terminationFlag
);
}
public static MultiSourceBFSAccessMethods predecessorProcessing(
Graph graph,
BfsConsumer perNodeAction,
BfsWithPredecessorConsumer perNeighborAction,
Optional sourceNodes,
TerminationFlag terminationFlag
) {
MultiSourceBFSInitializationSpecBuilder builder = new MultiSourceBFSInitializationSpecBuilder()
.seenNext(true);
sourceNodes.ifPresent(sources -> builder.sourceNodes(sources).sortSourceNodes(true));
return createMultiSourceBFS(
graph.nodeCount(),
graph,
new PredecessorStrategy(perNodeAction, perNeighborAction),
builder.build(),
terminationFlag
);
}
private static MultiSourceBFSAccessMethods createMultiSourceBFS(
long nodeCount,
RelationshipIterator relationships,
ExecutionStrategy strategy,
MultiSourceBFSInitializationSpec spec,
TerminationFlag terminationFlag
) {
var visits = new LocalHugeLongArray(nodeCount);
var visitsNext = new LocalHugeLongArray(nodeCount);
var seens = new LocalHugeLongArray(nodeCount);
var seensNext = spec.seenNext() ? new LocalHugeLongArray(nodeCount) : null;
var sourceNodes = spec.sourceNodes();
if (spec.sortSourceNodes()) {
if (sourceNodes == null || sourceNodes.length == 0) {
throw new IllegalArgumentException("You must provide source nodes");
}
Arrays.sort(sourceNodes);
}
return new MultiSourceBFSAccessMethods(
visits,
visitsNext,
seens,
seensNext,
nodeCount,
relationships,
strategy,
spec.allowStartNodeTraversal(),
sourceNodes,
0,
0,
terminationFlag
);
}
/**
* There is just one constructor, and it only does assignments.
*/
private MultiSourceBFSAccessMethods(
CloseableThreadLocal visits,
CloseableThreadLocal visitsNext,
CloseableThreadLocal seens,
@Nullable CloseableThreadLocal seensNext,
long nodeCount,
RelationshipIterator relationships,
ExecutionStrategy strategy,
boolean allowStartNodeTraversal,
long @Nullable [] sourceNodes,
int sourceNodeCount,
long nodeOffset,
TerminationFlag terminationFlag
) {
this.visits = visits;
this.visitsNext = visitsNext;
this.seens = seens;
this.seensNext = seensNext;
this.nodeCount = nodeCount;
this.relationships = relationships;
this.strategy = strategy;
this.allowStartNodeTraversal = allowStartNodeTraversal;
this.sourceNodes = sourceNodes;
this.sourceNodeCount = sourceNodeCount;
this.nodeOffset = nodeOffset;
this.terminationFlag = terminationFlag;
}
/**
* Runs MS-BFS, possibly in parallel.
*/
public void run(Concurrency concurrency, ExecutorService executor) {
final int threads = numberOfThreads();
var bfss = allSourceBfss(threads);
RunWithConcurrency.builder()
.concurrency(concurrency)
.tasks(bfss)
.terminationFlag(terminationFlag)
.maxWaitRetries((long) threads << 2)
.waitTime(100L, TimeUnit.MICROSECONDS)
.executor(executor)
.run();
}
private long sourceLength() {
if (sourceNodes != null) {
return sourceNodes.length;
}
if (sourceNodeCount == 0) {
return nodeCount;
}
return sourceNodeCount;
}
private int numberOfThreads() {
long sourceLength = sourceLength();
long threads = ParallelUtil.threadCount(MSBFSConstants.OMEGA, sourceLength);
if ((int) threads != threads) {
throw new IllegalArgumentException("Unable run MS-BFS on " + sourceLength + " sources.");
}
return (int) threads;
}
// lazily creates MS-BFS instances for OMEGA sized source chunks
private Collection allSourceBfss(int threads) {
if (sourceNodes == null) {
long sourceLength = nodeCount;
return new ParallelMultiSources(threads, sourceLength) {
@Override
MultiSourceBFSRunnable next(final long from, final int length) {
return new MultiSourceBFSRunnable(
visits,
visitsNext,
seens,
seensNext,
sourceLength,
relationships.concurrentCopy(),
strategy,
allowStartNodeTraversal,
null,
length,
from
);
}
};
}
long[] sourceNodes = this.sourceNodes;
int sourceLength = sourceNodes.length;
return new ParallelMultiSources(threads, sourceLength) {
@Override
MultiSourceBFSRunnable next(final long from, final int length) {
return new MultiSourceBFSRunnable(
visits,
visitsNext,
seens,
seensNext,
nodeCount,
relationships.concurrentCopy(),
strategy,
allowStartNodeTraversal,
Arrays.copyOfRange(sourceNodes, (int) from, (int) (from + length)),
0,
0
);
}
};
}
@Override
public String toString() {
if (sourceNodes != null && sourceNodes.length > 0) {
return "MSBFS{" + sourceNodes[0] +
" .. " + (sourceNodes[sourceNodes.length - 1] + 1) +
" (" + sourceNodes.length +
")}";
}
return "MSBFS{" + nodeOffset +
" .. " + (nodeOffset + sourceNodeCount) +
" (" + sourceNodeCount +
")}";
}
}