com.google.common.graph.Traverser Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of guava Show documentation
Guava is a suite of core and expanded libraries that include utility classes, Google's collections, I/O classes, and much more.
There is a newer version: 33.1.0-jre
Show newest version
/*
 * Copyright (C) 2017 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.graph;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static java.util.Objects.requireNonNull;

import com.google.common.annotations.Beta;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableSet;
import com.google.errorprone.annotations.DoNotMock;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import javax.annotation.CheckForNull;

/**
 * An object that can traverse the nodes that are reachable from a specified (set of) start node(s)
 * using a specified {@link SuccessorsFunction}.
 *
 * There are two entry points for creating a {@code Traverser}: {@link
 * #forTree(SuccessorsFunction)} and {@link #forGraph(SuccessorsFunction)}. You should choose one
 * based on your answers to the following questions:
 *
 * 

 *   Is there only one path to any node that's reachable from any start node? (If so, the graph
 *       to be traversed is a tree or forest even if it is a subgraph of a graph which is neither.)
 *   
Are the node objects' implementations of {@code equals()}/{@code hashCode()} recursive?
 * 
 *
 * If your answers are:
 *
 * 

 *   (1) "no" and (2) "no", use {@link #forGraph(SuccessorsFunction)}.
 *   
(1) "yes" and (2) "yes", use {@link #forTree(SuccessorsFunction)}.
 *   
(1) "yes" and (2) "no", you can use either, but {@code forTree()} will be more efficient.
 *   
(1) "no" and (2) "yes", neither will work, but if you transform your node
 *       objects into a non-recursive form, you can use {@code forGraph()}.
 * 
 *
 * @author Jens Nyman
 * @param  Node parameter type
 * @since 23.1
 */
@Beta
@DoNotMock(
    "Call forGraph or forTree, passing a lambda or a Graph with the desired edges (built with"
        + " GraphBuilder)")
@ElementTypesAreNonnullByDefault
public abstract class Traverser {
  private final SuccessorsFunction successorFunction;

  private Traverser(SuccessorsFunction successorFunction) {
    this.successorFunction = checkNotNull(successorFunction);
  }

  /**
   * Creates a new traverser for the given general {@code graph}.
   *
   * Traversers created using this method are guaranteed to visit each node reachable from the
   * start node(s) at most once.
   *
   * 
If you know that no node in {@code graph} is reachable by more than one path from the start
   * node(s), consider using {@link #forTree(SuccessorsFunction)} instead.
   *
   * 
Performance notes
   *
   * 

   *   Traversals require O(n) time (where n is the number of nodes reachable from
   *       the start node), assuming that the node objects have O(1) {@code equals()} and
   *       {@code hashCode()} implementations. (See the 
   *       notes on element objects for more information.)
   *   
While traversing, the traverser will use O(n) space (where n is the number
   *       of nodes that have thus far been visited), plus O(H) space (where H is the
   *       number of nodes that have been seen but not yet visited, that is, the "horizon").
   * 
   *
   * @param graph {@link SuccessorsFunction} representing a general graph that may have cycles.
   */
  public static  Traverser forGraph(final SuccessorsFunction graph) {
    return new Traverser(graph) {
      @Override
      Traversal newTraversal() {
        return Traversal.inGraph(graph);
      }
    };
  }

  /**
   * Creates a new traverser for a directed acyclic graph that has at most one path from the start
   * node(s) to any node reachable from the start node(s), and has no paths from any start node to
   * any other start node, such as a tree or forest.
   *
   * {@code forTree()} is especially useful (versus {@code forGraph()}) in cases where the data
   * structure being traversed is, in addition to being a tree/forest, also defined recursively.
   * This is because the {@code forTree()}-based implementations don't keep track of visited nodes,
   * and therefore don't need to call `equals()` or `hashCode()` on the node objects; this saves
   * both time and space versus traversing the same graph using {@code forGraph()}.
   *
   * 
Providing a graph to be traversed for which there is more than one path from the start
   * node(s) to any node may lead to:
   *
   * 

   *   Traversal not terminating (if the graph has cycles)
   *   
Nodes being visited multiple times (if multiple paths exist from any start node to any
   *       node reachable from any start node)
   * 
   *
   * Performance notes
   *
   * 

   *   Traversals require O(n) time (where n is the number of nodes reachable from
   *       the start node).
   *   
While traversing, the traverser will use O(H) space (where H is the number
   *       of nodes that have been seen but not yet visited, that is, the "horizon").
   * 
   *
   * Examples (all edges are directed facing downwards)
   *
   * 
The graph below would be valid input with start nodes of {@code a, f, c}. However, if {@code
   * b} were also a start node, then there would be multiple paths to reach {@code e} and
   * {@code h}.
   *
   * 
{@code
   *    a     b      c
   *   / \   / \     |
   *  /   \ /   \    |
   * d     e     f   g
   *       |
   *       |
   *       h
   * }
   *
   * .
   *
   * 
The graph below would be a valid input with start nodes of {@code a, f}. However, if {@code
   * b} were a start node, there would be multiple paths to {@code f}.
   *
   * 
{@code
   *    a     b
   *   / \   / \
   *  /   \ /   \
   * c     d     e
   *        \   /
   *         \ /
   *          f
   * }
   *
   * Note on binary trees
   *
   * 
This method can be used to traverse over a binary tree. Given methods {@code
   * leftChild(node)} and {@code rightChild(node)}, this method can be called as
   *
   * 
{@code
   * Traverser.forTree(node -> ImmutableList.of(leftChild(node), rightChild(node)));
   * }
   *
   * @param tree {@link SuccessorsFunction} representing a directed acyclic graph that has at most
   *     one path between any two nodes
   */
  public static  Traverser forTree(final SuccessorsFunction tree) {
    if (tree instanceof BaseGraph) {
      checkArgument(((BaseGraph) tree).isDirected(), "Undirected graphs can never be trees.");
    }
    if (tree instanceof Network) {
      checkArgument(((Network) tree).isDirected(), "Undirected networks can never be trees.");
    }
    return new Traverser(tree) {
      @Override
      Traversal newTraversal() {
        return Traversal.inTree(tree);
      }
    };
  }

  /**
   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
   * the order of a breadth-first traversal. That is, all the nodes of depth 0 are returned, then
   * depth 1, then 2, and so on.
   *
   * Example: The following graph with {@code startNode} {@code a} would return nodes in
   * the order {@code abcdef} (assuming successors are returned in alphabetical order).
   *
   * 
{@code
   * b ---- a ---- d
   * |      |
   * |      |
   * e ---- c ---- f
   * }
   *
   * The behavior of this method is undefined if the nodes, or the topology of the graph, change
   * while iteration is in progress.
   *
   * 
The returned {@code Iterable} can be iterated over multiple times. Every iterator will
   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
   * number of nodes as follows:
   *
   * 
{@code
   * Iterables.limit(Traverser.forGraph(graph).breadthFirst(node), maxNumberOfNodes);
   * }
   *
   * See Wikipedia for more
   * info.
   *
   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
   */
  public final Iterable breadthFirst(N startNode) {
    return breadthFirst(ImmutableSet.of(startNode));
  }

  /**
   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
   * startNodes}, in the order of a breadth-first traversal. This is equivalent to a breadth-first
   * traversal of a graph with an additional root node whose successors are the listed {@code
   * startNodes}.
   *
   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
   * @see #breadthFirst(Object)
   * @since 24.1
   */
  public final Iterable breadthFirst(Iterable startNodes) {
    final ImmutableSet validated = validate(startNodes);
    return new Iterable() {
      @Override
      public Iterator iterator() {
        return newTraversal().breadthFirst(validated.iterator());
      }
    };
  }

  /**
   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
   * the order of a depth-first pre-order traversal. "Pre-order" implies that nodes appear in the
   * {@code Iterable} in the order in which they are first visited.
   *
   * 
Example: The following graph with {@code startNode} {@code a} would return nodes in
   * the order {@code abecfd} (assuming successors are returned in alphabetical order).
   *
   * 
{@code
   * b ---- a ---- d
   * |      |
   * |      |
   * e ---- c ---- f
   * }
   *
   * The behavior of this method is undefined if the nodes, or the topology of the graph, change
   * while iteration is in progress.
   *
   * 
The returned {@code Iterable} can be iterated over multiple times. Every iterator will
   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
   * number of nodes as follows:
   *
   * 
{@code
   * Iterables.limit(
   *     Traverser.forGraph(graph).depthFirstPreOrder(node), maxNumberOfNodes);
   * }
   *
   * See Wikipedia for more info.
   *
   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
   */
  public final Iterable depthFirstPreOrder(N startNode) {
    return depthFirstPreOrder(ImmutableSet.of(startNode));
  }

  /**
   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
   * startNodes}, in the order of a depth-first pre-order traversal. This is equivalent to a
   * depth-first pre-order traversal of a graph with an additional root node whose successors are
   * the listed {@code startNodes}.
   *
   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
   * @see #depthFirstPreOrder(Object)
   * @since 24.1
   */
  public final Iterable depthFirstPreOrder(Iterable startNodes) {
    final ImmutableSet validated = validate(startNodes);
    return new Iterable() {
      @Override
      public Iterator iterator() {
        return newTraversal().preOrder(validated.iterator());
      }
    };
  }

  /**
   * Returns an unmodifiable {@code Iterable} over the nodes reachable from {@code startNode}, in
   * the order of a depth-first post-order traversal. "Post-order" implies that nodes appear in the
   * {@code Iterable} in the order in which they are visited for the last time.
   *
   * 
Example: The following graph with {@code startNode} {@code a} would return nodes in
   * the order {@code fcebda} (assuming successors are returned in alphabetical order).
   *
   * 
{@code
   * b ---- a ---- d
   * |      |
   * |      |
   * e ---- c ---- f
   * }
   *
   * The behavior of this method is undefined if the nodes, or the topology of the graph, change
   * while iteration is in progress.
   *
   * 
The returned {@code Iterable} can be iterated over multiple times. Every iterator will
   * compute its next element on the fly. It is thus possible to limit the traversal to a certain
   * number of nodes as follows:
   *
   * 
{@code
   * Iterables.limit(
   *     Traverser.forGraph(graph).depthFirstPostOrder(node), maxNumberOfNodes);
   * }
   *
   * See Wikipedia for more info.
   *
   * @throws IllegalArgumentException if {@code startNode} is not an element of the graph
   */
  public final Iterable depthFirstPostOrder(N startNode) {
    return depthFirstPostOrder(ImmutableSet.of(startNode));
  }

  /**
   * Returns an unmodifiable {@code Iterable} over the nodes reachable from any of the {@code
   * startNodes}, in the order of a depth-first post-order traversal. This is equivalent to a
   * depth-first post-order traversal of a graph with an additional root node whose successors are
   * the listed {@code startNodes}.
   *
   * @throws IllegalArgumentException if any of {@code startNodes} is not an element of the graph
   * @see #depthFirstPostOrder(Object)
   * @since 24.1
   */
  public final Iterable depthFirstPostOrder(Iterable startNodes) {
    final ImmutableSet validated = validate(startNodes);
    return new Iterable() {
      @Override
      public Iterator iterator() {
        return newTraversal().postOrder(validated.iterator());
      }
    };
  }

  abstract Traversal newTraversal();

  @SuppressWarnings("CheckReturnValue")
  private ImmutableSet validate(Iterable startNodes) {
    ImmutableSet copy = ImmutableSet.copyOf(startNodes);
    for (N node : copy) {
      successorFunction.successors(node); // Will throw if node doesn't exist
    }
    return copy;
  }

  /**
   * Abstracts away the difference between traversing a graph vs. a tree. For a tree, we just take
   * the next element from the next non-empty iterator; for graph, we need to loop through the next
   * non-empty iterator to find first unvisited node.
   */
  private abstract static class Traversal {
    final SuccessorsFunction successorFunction;

    Traversal(SuccessorsFunction successorFunction) {
      this.successorFunction = successorFunction;
    }

    static  Traversal inGraph(SuccessorsFunction graph) {
      final Set visited = new HashSet<>();
      return new Traversal(graph) {
        @Override
        @CheckForNull
        N visitNext(Deque> horizon) {
          Iterator top = horizon.getFirst();
          while (top.hasNext()) {
            N element = top.next();
            // requireNonNull is safe because horizon contains only graph nodes.
            /*
             * TODO(cpovirk): Replace these two statements with one (`N element =
             * requireNonNull(top.next())`) once our checker supports it.
             *
             * (The problem is likely
             * https://github.com/jspecify/nullness-checker-for-checker-framework/blob/61aafa4ae52594830cfc2d61c8b113009dbdb045/src/main/java/com/google/jspecify/nullness/NullSpecAnnotatedTypeFactory.java#L896)
             */
            requireNonNull(element);
            if (visited.add(element)) {
              return element;
            }
          }
          horizon.removeFirst();
          return null;
        }
      };
    }

    static  Traversal inTree(SuccessorsFunction tree) {
      return new Traversal(tree) {
        @CheckForNull
        @Override
        N visitNext(Deque> horizon) {
          Iterator top = horizon.getFirst();
          if (top.hasNext()) {
            return checkNotNull(top.next());
          }
          horizon.removeFirst();
          return null;
        }
      };
    }

    final Iterator breadthFirst(Iterator startNodes) {
      return topDown(startNodes, InsertionOrder.BACK);
    }

    final Iterator preOrder(Iterator startNodes) {
      return topDown(startNodes, InsertionOrder.FRONT);
    }

    /**
     * In top-down traversal, an ancestor node is always traversed before any of its descendant
     * nodes. The traversal order among descendant nodes (particularly aunts and nieces) are
     * determined by the {@code InsertionOrder} parameter: nieces are placed at the FRONT before
     * aunts for pre-order; while in BFS they are placed at the BACK after aunts.
     */
    private Iterator topDown(Iterator startNodes, final InsertionOrder order) {
      final Deque> horizon = new ArrayDeque<>();
      horizon.add(startNodes);
      return new AbstractIterator() {
        @Override
        @CheckForNull
        protected N computeNext() {
          do {
            N next = visitNext(horizon);
            if (next != null) {
              Iterator successors = successorFunction.successors(next).iterator();
              if (successors.hasNext()) {
                // BFS: horizon.addLast(successors)
                // Pre-order: horizon.addFirst(successors)
                order.insertInto(horizon, successors);
              }
              return next;
            }
          } while (!horizon.isEmpty());
          return endOfData();
        }
      };
    }

    final Iterator postOrder(Iterator startNodes) {
      final Deque ancestorStack = new ArrayDeque<>();
      final Deque> horizon = new ArrayDeque<>();
      horizon.add(startNodes);
      return new AbstractIterator() {
        @Override
        @CheckForNull
        protected N computeNext() {
          for (N next = visitNext(horizon); next != null; next = visitNext(horizon)) {
            Iterator successors = successorFunction.successors(next).iterator();
            if (!successors.hasNext()) {
              return next;
            }
            horizon.addFirst(successors);
            ancestorStack.push(next);
          }
          // TODO(b/192579700): Use a ternary once it no longer confuses our nullness checker.
          if (!ancestorStack.isEmpty()) {
            return ancestorStack.pop();
          }
          return endOfData();
        }
      };
    }

    /**
     * Visits the next node from the top iterator of {@code horizon} and returns the visited node.
     * Null is returned to indicate reaching the end of the top iterator.
     *
     * For example, if horizon is {@code [[a, b], [c, d], [e]]}, {@code visitNext()} will return
     * {@code [a, b, null, c, d, null, e, null]} sequentially, encoding the topological structure.
     * (Note, however, that the callers of {@code visitNext()} often insert additional iterators
     * into {@code horizon} between calls to {@code visitNext()}. This causes them to receive
     * additional values interleaved with those shown above.)
     */
    @CheckForNull
    abstract N visitNext(Deque> horizon);
  }

  /** Poor man's method reference for {@code Deque::addFirst} and {@code Deque::addLast}. */
  private enum InsertionOrder {
    FRONT {
      @Override
       void insertInto(Deque deque, T value) {
        deque.addFirst(value);
      }
    },
    BACK {
      @Override
       void insertInto(Deque deque, T value) {
        deque.addLast(value);
      }
    };

    abstract  void insertInto(Deque deque, T value);
  }
}