
com.hazelcast.jet.impl.TopologicalSorter Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.impl;
import javax.annotation.Nonnull;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.function.Function;
import static com.hazelcast.jet.Util.entry;
import static com.hazelcast.jet.impl.util.Util.toList;
import static java.lang.Math.min;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toMap;
/**
* Computes a topological ordering of the vertices in a graph.
* Validates against cycles.
*/
public final class TopologicalSorter {
// Consulted, but not updated, by the algorithm:
private final Map, List>> adjacencyMap;
private final Function vertexNameFn;
// Updated by the algorithm:
private final ArrayDeque topologicallySorted = new ArrayDeque<>();
private final Deque> tarjanStack = new ArrayDeque<>();
private int nextIndex;
private TopologicalSorter(
@Nonnull Map, List>> adjacencyMap,
@Nonnull Function vertexNameFn
) {
this.adjacencyMap = adjacencyMap;
this.vertexNameFn = vertexNameFn;
}
/**
* Returns an iterable that will encounter the vertices of a graph in
* a topological order (the order is not unique). If the graph cannot
* be topologically ordered due to the presence of a cycle, it will
* throw an exception.
*
* @param adjacencyMap the description of the graph: for each vertex,
* a list of its adjacent vertices
* @param vertexNameFn a function that returns a vertex's name, used to generate
* diagnostic information in the case of a cycle in the graph
* @param type used to represent the vertices
*/
public static Iterable topologicalSort(
@Nonnull Map> adjacencyMap, @Nonnull Function vertexNameFn
) {
// decorate all the vertices with Tarjan vertices, which hold the
// metadata needed by the algorithm
Map> tarjanVertices =
adjacencyMap.keySet().stream()
.map(v -> entry(v, new TarjanVertex<>(v)))
.collect(toMap(Entry::getKey, Entry::getValue));
Map, List>> tarjanAdjacencyMap =
adjacencyMap.entrySet().stream()
.collect(toMap(e -> tarjanVertices.get(e.getKey()),
e -> toList(e.getValue(), tarjanVertices::get)));
return new TopologicalSorter<>(tarjanAdjacencyMap, vertexNameFn).go();
}
/**
* Checks that a collection we assume is topologically sorted actually is
* sorted.
*
* @throws RuntimeException if it's not sorted
*/
public static void checkTopologicalSort(Iterable>> adjacencyMap) {
Set seen = new HashSet<>();
for (Entry> parentAndChildren : adjacencyMap) {
for (V child : parentAndChildren.getValue()) {
if (seen.contains(child)) {
throw new RuntimeException("A child seen before its parent");
}
}
seen.add(parentAndChildren.getKey());
}
}
// Partial implementation of Tarjan's algorithm:
// http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
// https://rjlipton.files.wordpress.com/2009/10/dfs1971.pdf
//
// The full algorithm outputs all strongly-connected (SC) components of a
// graph; this code just finds any SC component involving more than a
// single vertex.
private Iterable go() {
for (TarjanVertex tv : adjacencyMap.keySet()) {
if (tv.index != -1) {
continue;
}
// The stack invariant:
// Vertices are placed on a stack in the order in which they are visited.
// When the depth-first search recursively visits a vertex v and its
// descendants, those vertices are not all necessarily popped from the
// stack when this recursive call returns. The invariant is that a vertex
// remains on the stack after it has been visited if and only if there is
// a path from it to some vertex earlier on the stack.
assert tarjanStack.isEmpty() : "Broken stack invariant";
strongConnect(tv);
}
return topologicallySorted;
}
// method name identical to the one used in the Wikipedia article
private void strongConnect(TarjanVertex currTv) {
currTv.visitedAtIndex(nextIndex++);
push(currTv);
for (TarjanVertex outTv : adjacencyMap.get(currTv)) {
if (outTv == currTv) {
throw new IllegalArgumentException(
"Vertex " + vertexNameFn.apply(currTv.v) + " is connected to itself");
}
if (outTv.index == -1) {
// outTv not discovered yet, visit it...
strongConnect(outTv);
// ... and propagate lowlink computed for it to currTv
currTv.lowlink = min(currTv.lowlink, outTv.lowlink);
} else if (outTv.onStack) {
// outTv is already on the stack => there is a cycle in the graph.
// Proceed with the algorithm until the full extent of the cycle
// is known.
currTv.lowlink = min(currTv.lowlink, outTv.index);
}
}
if (currTv.lowlink < currTv.index) {
// currTv has a path to some vertex that is already on the stack.
// Leave currTv on the stack and return.
return;
}
assert currTv.lowlink == currTv.index : "Broken lowlink invariant";
// currTv is the root of an SC component. Find out if the component has
// more than one member.
TarjanVertex popped = pop();
if (popped == currTv) {
// currTv was on the top of the stack => it is the sole member of its SC
// component => it is not involved in any cycles. Add it to the output
// list and return.
topologicallySorted.addFirst(currTv.v);
return;
}
// There are vertices on the stack beyond currTv => it is not the sole
// member of its SC component => it is involved in a cycle. Report an
// error with a list of all the members of the SC component.
//
// At this point the algorithm is over. The following stack operations
// are not a part of it, their sole purpose is generating the desired
// error message.
while (tarjanStack.peekFirst() != currTv) {
tarjanStack.removeFirst();
}
tarjanStack.addLast(popped);
tarjanStack.addLast(currTv);
throw new IllegalArgumentException("DAG contains a cycle: "
+ tarjanStack.stream()
.map(av -> vertexNameFn.apply(av.v))
.collect(joining(" -> ")));
}
private void push(TarjanVertex thisTv) {
thisTv.onStack = true;
tarjanStack.addLast(thisTv);
}
private TarjanVertex pop() {
TarjanVertex popped = tarjanStack.removeLast();
popped.onStack = false;
return popped;
}
private static final class TarjanVertex {
V v;
// Field names identical to those used in the Wikipedia article:
int index = -1;
int lowlink = -1;
boolean onStack; // tells whether the vertex is currently on the Tarjan stack
TarjanVertex(@Nonnull V v) {
this.v = v;
}
void visitedAtIndex(int index) {
this.index = index;
this.lowlink = index;
}
@Override
public String toString() {
return v.toString();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy