All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.cassovary.graph.Traverser.scala Maven / Gradle / Ivy

There is a newer version: 7.1.0
Show newest version
/*
 * Copyright 2014 Twitter, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
 * file except in compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package com.twitter.cassovary.graph

import com.twitter.cassovary.graph.GraphDir._
import com.twitter.cassovary.graph.GraphUtils.RandomWalkParams
import com.twitter.cassovary.graph.tourist.{BoolInfoKeeper, IntInfoKeeper, PrevNbrCounter}
import com.twitter.cassovary.util.collections.CQueue
import com.twitter.logging.Logger

import scala.annotation.tailrec
import scala.util.Random

/**
 * A Traverser traverses the graph in a certain order of nodes.
 */
trait Traverser[+V <: Node] extends Iterator[V] { self =>
  /**
   * Traverser only visits node Ids listed in some node's edges, and thus
   * we know that node must exist, safe to assume graph.getNodeById return non-None value
   */
  protected def getExistingNodeById[W >: V <: Node](graph: Graph[W], id: Int): W = graph.getNodeById(id).get
}

/**
 * Bounds an iterator to go no more than a specified maximum number of steps
 */
trait BoundedIterator[+T] extends Iterator[T] {
  /**
   * @return If option is defined, it is the maximal number of elements the iterator returns.
   *         If is `None`, the iterator is not bounded.
   */
  def maxSteps: Option[Long]

  private var numStepsTaken = 0L

  abstract override def next() = {
    numStepsTaken += 1
    super.next()
  }

  abstract override def hasNext = maxSteps match {
    case Some(max) if numStepsTaken >= max => false
    case _ => super.hasNext
  }
}

/**
 * Randomly traverse the graph, going from one node to a random neighbor in direction `dir`.
 * @param graph the graph to traverse on
 * @param dir direction in which to traverse
 * @param homeNodeIds the ids of nodes that the traverser will go to next if the node has no
 * neighbors, or we reset the traversal with probability `resetProbability`,
 * or if the number of out-going edges at the current node exceeds `maxNumEdgesThresh`
 * @param resetProbability the probability of teleporting back to home node set at each step
 * @param maxNumEdgesThresh if set, do not traverse edges with > maxNumEdgesThresh outgoing edges
 * @param onlyOnce specifies whether the same node should only be allowed to be visited once
 * in any path.
 * @param randNumGen a random number generator (for stable walk, a seeded random number
 * generator is used).
 * @param maxDepth if set, max depth of path
 * @param filterHomeNodeByNumEdges filter home node by number of edges
 */
class RandomTraverser[+V <: Node](graph: Graph[V], dir: GraphDir, homeNodeIds: Seq[Int],
                      resetProbability: Double, maxNumEdgesThresh: Option[Int], onlyOnce: Boolean,
                      randNumGen: Random, maxDepth: Option[Int], filterHomeNodeByNumEdges: Boolean)
  extends Traverser[V] {  self =>

  private[this] var currNode: V = _
  private val homeNodeIdSet = Set(homeNodeIds: _*)

  private val seenNodesTracker = new BoolInfoKeeper(self.onlyOnce)

  protected def seenBefore(id: Int) = seenNodesTracker.infoOfNode(id).isDefined
  private var pathLength = 0

  private def goHome(): Int = {
    pathLength = 0
    NodeUtils.pickRandNodeId(homeNodeIds, randNumGen)
  }

  private def takeRandomStep(): Int = {
    val nextRandom = randNumGen.nextDouble()
    val needToFilterByNumEdges = filterHomeNodeByNumEdges || !(homeNodeIdSet contains currNode.id)
    if (nextRandom < resetProbability ||
      (needToFilterByNumEdges && NodeUtils.hasTooManyEdges(dir, maxNumEdgesThresh)(currNode))) {
      goHome()
    } else {
      currNode.randomNeighbor(dir, randNumGen).getOrElse(goHome())
    }
  }

  def next() = {
    val nextNodeId = if (currNode == null ||
      (maxDepth.isDefined && pathLength >= maxDepth.get)) {
      goHome()
    } else {
      val randNextNodeId = takeRandomStep()
      if (onlyOnce && seenBefore(randNextNodeId)) {
        seenNodesTracker.clear()
        goHome()
      } else {
        randNextNodeId
      }
    }
    seenNodesTracker.recordInfo(nextNodeId, false)
    pathLength += 1
    currNode = getExistingNodeById(graph, nextNodeId)

    currNode
  }

  def hasNext = true
}

/**
 * Same as RandomTraverser except that the number of steps taken is bounded by `maxSteps`
 */
class RandomBoundedTraverser[+V <: Node](graph: Graph[V], dir: GraphDir, homeNodeIds: Seq[Int], maxStepss: Long,
    walkParams: RandomWalkParams) extends RandomTraverser(graph, dir, homeNodeIds,
    walkParams.resetProbability, walkParams.maxNumEdgesThresh, walkParams.visitSameNodeOnce,
    walkParams.randNumGen, walkParams.maxDepth,
    walkParams.filterHomeNodeByNumEdges) with BoundedIterator[V] {
  val maxSteps: Option[Long] = Some(maxStepss)
}

object Walk {
  /**
   * Optional limit used for limiting depth or degree of nodes that are added
   * to the queue.
   */
  class Limit(limit: Option[Int]) {
    def isLimitReached(value: Int): Boolean = limit match {
      case Some(d) if value >= d => true
      case _ => false
    }
  }

  /**
   * We use node colors in the QueueBasedTraverser. We use
   * 3 colors defined as `NodeColor`.
   */
  object NodeColor extends Enumeration {
    type Color = Value
    val Unenqueued = Value
    val Enqueued = Value
    val Visited = Value
  }

  /**
   * Stores information of three colors assigned to node ids (Ints).
   *
   * Initially every node has color `Unenqueued`. When adding it to the
   * queue for the first time, we mark it `Enqueued` and then `Visited`,
   * when visiting it for the first time.
   * There are no other color changes.
   *
   * Uses BoolInfoKeeper under the hood.
   */
  trait NodeColoring {

    import NodeColor._

    val map = new BoolInfoKeeper(false)

    def +=(kv: (Int, Color)) {
      kv._2 match {
        case Unenqueued => ()
        case Enqueued => if (get(kv._1) == Unenqueued) map.recordInfo(kv._1, false)
        case Visited => if (get(kv._1) != Visited) map.recordInfo(kv._1, true)
      }
    }

    def get(number: Int): Color = {
      map.infoOfNode(number) match {
        case None => Unenqueued
        case Some(false) => Enqueued
        case Some(true) => Visited
      }
    }
  }

  /**
   * Parameters used to limit Traversers defined below.
   *
   * @param maxDepth the maximum depth of nodes to visit
   * @param maxNumEdgesThreshold threshold of the number of neighbors a node can have, if a node has more
   *                             than the threshold value, we skip its children
   * @param maxSteps number of steps the traverser makes
   */
  case class Limits(maxDepth: Option[Int] = None, maxNumEdgesThreshold: Option[Int] = None,
                        maxSteps: Option[Long] = None) {
    def this(depth: Int, numEdges: Int, steps: Long) = this(Some(depth), Some(numEdges), Some(steps))
  }

}

/**
 * General schema for some Traversers (like BFS, DFS). 
 * `QueueBasedTraverser` keeps nodes to visit next in a queue. 
 * It iteratively visits nodes from the front of the queue in order based on the type of traversal
 * and optionally adds new nodes to the queue.
 */
trait QueueBasedTraverser[+V <: Node] extends Traverser[V] {
  import Walk._

  /**
   * Graph to be traversed.
   */
  def graph: Graph[V]

  /**
   * Nodes to start the traversal from.
   */
  def homeNodeIds: Seq[Int]

  /**
   * Direction of the walk.
   */
  def dir: GraphDir

  /**
   * The priority of nodes adding to the queue. `BFS` adds nodes at the
   * end and `DFS` adds nodes at the beginning.
   */
  object GraphTraverserNodePriority extends Enumeration {
    type GraphTraverserNodePriority = Value

    val FIFO = Value
    val LIFO = Value
  }

  import GraphTraverserNodePriority._

  /**
   * The priority of nodes adding to the queue.
   */
  def nodePriority: GraphTraverserNodePriority

  /**
   * Action performed when visiting node `node` (before iteratur retuns `node`).
   * Should be implemented in subclass.
   */
  def visitNode(node: Node): Unit = {}

  /**
   * Queue that stores nodes to be visited next.
   */
  protected val queue = CQueue.fifo[Int]()

  /**
   * Number of nodes ever enqueued in the `queue`.
   */
  protected var numEnqueuedEver = 0L

  /**
   * Optional counter of previous nodes to a given node.
   */
  lazy val prevNbrCounter: Option[PrevNbrCounter] = None

  /**
   * Depth of visit of a given node.
   */
  def depth(node: Int): Option[Int]

  /**
   * We mark nodes using 3 'colors': `Unenqueued`, `Enqueued`, `Visited`.
   * Initially every node is implicitly marked with `Unenqueued`.
   *
   * Node is marked `Enqueued`, when we add it to the queue. We mark
   * it `Visited` just before returning with iterator's `next` function.
   */
  lazy val coloring = new Walk.NodeColoring {}

  /**
   * Checks if the node of a given color should be enqueued.
   */
  def shouldBeEnqueued(color: Walk.NodeColor.Color): Boolean

  /**
   * Set to true to deque a node from the queue before processing.
   */
  val shouldBeDequeuedBeforeProcessing: Boolean = true

  /**
   * Options to limit traverser walk.
   */
  lazy val limits: Walk.Limits = Walk.Limits()

  lazy val depthLimit: Limit = new Limit(limits.maxDepth)
  lazy val degreeLimit: Limit = new Limit(limits.maxNumEdgesThreshold)
  lazy val maxSteps: Option[Long] = limits.maxSteps

  /**
   * During initialization we enqueue `homeNodeIds`.
   */
  enqueue(homeNodeIds, None)

  /**
   * If the queue is longer than the number of nodes that can be visited due to
   * `maxSteps` bound, cuts the list of nodes that are to be added to the queue
   * in order not to add too many of them.
   *
   * Assumes that all nodes in the queue are being visited.
   */
  def limitAddedToQueue(nodes: Seq[Int]): Seq[Int] = {
    maxSteps match {
      case None => nodes
      case Some(max) => if (max - numEnqueuedEver > Int.MaxValue) {
        nodes
      } else {
        nodes.take((max - numEnqueuedEver).toInt)
      }
    }
  }

  /**
   * Returns nodes ids that should be added to the queue after visiting `node`.
   */
  def chooseNodesToEnqueue(node: Node): Seq[Int] = {
    val currDepth = depth(node.id).get

    if (depthLimit.isLimitReached(currDepth) ||
      degreeLimit.isLimitReached(node.neighborCount(dir))) {
      Seq()
    } else {
      limitAddedToQueue(node.neighborIds(dir).filter(n => shouldBeEnqueued(coloring.get(n))))
    }
  }

  /**
   * Enqueues `nodes` added after `from` node is visited (or None
   * when equeueing initial nodes).
   */
  protected def enqueue(nodes: Seq[Int], from: Option[Int]): Unit = {
    numEnqueuedEver += nodes.size
    nodes.foreach {
      node => coloring += (node, NodeColor.Enqueued)
    }
    if (prevNbrCounter.isDefined && from.isDefined) {
      nodes.foreach {
        node => prevNbrCounter.get.recordPreviousNeighbor(node, from.get)
      }
    }
    nodePriority match {
      case GraphTraverserNodePriority.LIFO =>
        nodes.reverse.foreach (node => queue.enqueueFirst(node))
      case GraphTraverserNodePriority.FIFO =>
        nodes.foreach (node => queue += node)
    }
  }

  /**
   * Performs action needed when visiting a node.
   */
  protected def processNode(node : Node): Unit = {
    visitNode(node)
    coloring += (node.id, NodeColor.Visited)
    enqueue(chooseNodesToEnqueue(node), Some(node.id))
  }

  /**
   * Finds in the queue the node that will be visited next.
   */
  protected def findNextNodeToVisit(): Option[Int] = {
    if (queue.isEmpty)
      None
    else
      Some(queue.first())
  }

  def next() = {
    findNextNodeToVisit() match {
      case Some(nextId) =>
        if (shouldBeDequeuedBeforeProcessing) queue.deque()
        val node = getExistingNodeById(graph, nextId)
        processNode(node)
        node
      case None =>
        Iterator.empty.next()
    }
  }

  def hasNext = findNextNodeToVisit().isDefined
}

/**
 * A traverser that keeps track of first depth of visiting a given node.
 */
trait DepthTracker extends QueueBasedTraverser[Node] {

  private lazy val depthTracker = new IntInfoKeeper(true)

  abstract override protected def enqueue(nodes: Seq[Int], from: Option[Int]): Unit = {
    val fromDepth = from.flatMap(depthTracker.infoOfNode).getOrElse(-1)
    nodes foreach { id =>
      depthTracker.recordInfo(id, fromDepth + 1)
    }
    super.enqueue(nodes, from)
  }

  def depth(id: Int) = depthTracker.infoOfNode(id)
}

/**
 * Traverses in BFS order. This implies that first all the neighbors of
 * `homeNodeIds` are visited, then their neighbors, etc.
 * @param graph the graph to traverse on
 * @param dir direction in which to traverse
 * @param homeNodeIds the ids of nodes that the walk starts from
 * @param walkLimits limiting parameters
 * @param bfsPrevNbrCounter if set, tracks previous neighbors by occurrence
 */
class BreadthFirstTraverser[+V <: Node](val graph: Graph[V], val dir: GraphDir, val homeNodeIds: Seq[Int],
                            walkLimits: Walk.Limits = Walk.Limits(),
                            bfsPrevNbrCounter: Option[PrevNbrCounter] = None)
  extends DepthTracker
  with BoundedIterator[V]
{
  private val log = Logger.get()

  override def nodePriority = GraphTraverserNodePriority.FIFO

  override lazy val limits = walkLimits

  override lazy val prevNbrCounter = bfsPrevNbrCounter

  override def visitNode(node: Node): Unit = {
    val currDepth = depth(node.id).get
    log.ifTrace { "visiting %d, nbrCount=%d, maxNumEdges=%d, depth=%d".format(node.id,
      node.neighborCount(dir), limits.maxNumEdgesThreshold.getOrElse(-1), currDepth) }
    super.visitNode(node)
  }

  def shouldBeEnqueued(color: Walk.NodeColor.Color): Boolean = {
    color == Walk.NodeColor.Unenqueued
  }
}

/**
 * Traverses in FIFO (breadth first) order but without any limit on the
 * number of times every node can be visited.
 * Every time the walk visits a node, it adds all its neighbors (along direction `dir`)
 * to the queue.
 * @param graph the graph to traverse on
 * @param dir direction in which to traverse
 * @param homeNodeIds the ids of nodes that the walk starts from
 * @param walkLimits limiting parameters
 * @param apwPrevNbrCounter if set, tracks previous neighbors by occurrence
 */
class AllPathsWalk[+V <: Node](val graph: Graph[V], val dir: GraphDir, val homeNodeIds: Seq[Int],
                   walkLimits: Walk.Limits = Walk.Limits(),
                   apwPrevNbrCounter: Option[PrevNbrCounter] = None)
  extends DepthTracker
  with BoundedIterator[V]
{
  val onlyOnce = false

  override lazy val limits = walkLimits

  override lazy val prevNbrCounter = apwPrevNbrCounter

  override def nodePriority = GraphTraverserNodePriority.FIFO

  override def shouldBeEnqueued(color: Walk.NodeColor.Color): Boolean = true
}

/**
 * Traverses in DFS order (every node can be visited only once).
 * @param graph the graph to traverse on
 * @param dir direction in which to traverse
 * @param homeNodeIds the ids of nodes that the walk starts from
 * @param walkLimits limiting parameters
 */
class DepthFirstTraverser[+V <: Node](val graph: Graph[V], val dir: GraphDir, val homeNodeIds: Seq[Int],
                                walkLimits: Walk.Limits = Walk.Limits())
    extends DepthTracker
    with BoundedIterator[V]
{
  private val log = Logger.get()

  override lazy val limits = walkLimits

  override def nodePriority = GraphTraverserNodePriority.LIFO

  override def visitNode(node: Node): Unit = {
    log.ifTrace { "visiting %d, nbrCount=%d, depth=%d".format(node.id,
      node.neighborCount(dir), depth(node.id).get) }
    super.visitNode(node)
  }

  /**
   * Seeks the next node that should be visited. We may need to skip
   * some nodes, because it is possible that we add a node to the queue twice.
   *
   * Takes already visited nodes from the queue.
   * Does not take the next node from the queue (in order to allow hasNext checking).
   * */
   @tailrec
   override final def findNextNodeToVisit(): Option[Int] = {
    if (queue.isEmpty) {
      None
    } else {
      val next = queue.first()
      val visitedBefore = coloring.get(next) match {
        case Walk.NodeColor.Visited => true
        case _ => false
      }
      if (visitedBefore) {
        handleVisitedInQueue(next)
        findNextNodeToVisit()
      } else {
        Some(next)
      }
    }
  }

  def handleVisitedInQueue(next: Int) {
    queue.deque()
  }

  // can't limit number of nodes added to the queue, because we skip some nodes
  override def limitAddedToQueue(nodes: Seq[Int]): Seq[Int] = nodes

  override def shouldBeEnqueued(color: Walk.NodeColor.Color): Boolean = color != Walk.NodeColor.Visited
}


/**
 * A trait to be mixed in to the DepthFirstTraverser that keeps track of visiting distance
 * from `homeNodeIds`.
 *
 * Note that DepthTracker for the DFS case returns the depth of first seeing a particular
 * node. It does not have to be the same as the depth at which the node is visited.
 */
trait PathLengthTracker extends DepthFirstTraverser[Node] {
  private lazy val nextVisitDistanceTracker = new IntInfoKeeper(false)
  private lazy val visitDistanceTracker = new IntInfoKeeper(true)

  abstract override protected def enqueue(nodes: Seq[Int], from: Option[Int]): Unit = {
    val fromDistance = from.flatMap(visitDistanceTracker.infoOfNode).getOrElse(-1)
    nodes foreach { id =>
      nextVisitDistanceTracker.recordInfo(id, fromDistance + 1)
    }
    super.enqueue(nodes, from)
  }

  override def visitNode(node: Node) {
    visitDistanceTracker.recordInfo(node.id, nextVisitDistanceTracker.infoOfNode(node).get)
    super.visitNode(node)
  }

  def distance(id: Int) = visitDistanceTracker.infoOfNode(id)
}

/**
 * Trait to be mixed in to the BFS/DFS traverser to add discovery and finishing
 * times of a node.
 *
 * Discovery time of a node is time when the node is added to the queue for the first time.
 *
 * Finishing time of a node is time just after all `dir` neighbors of the node became finished
 * (or when it is visited if it has no `dir` neighbors that should be processed later).
 */
trait DiscoveryAndFinishTimeTracker extends DepthFirstTraverser[Node] {
  private[this] var time: Int = _ // automatically initialized to 0 before first enqueue

  private lazy val discoveryTime = new IntInfoKeeper(false)

  private lazy val finishingTime = new IntInfoKeeper(false)

  override val shouldBeDequeuedBeforeProcessing = false

  abstract override protected def enqueue(nodes: Seq[Int], from: Option[Int]): Unit = {
    nodes.foreach {
      node =>
        if (discoveryTime(node).isEmpty) {
          discoveryTime.recordInfo(node, time)
          time += 1
        }
    }
    super.enqueue(nodes, from)
  }

  override def handleVisitedInQueue(node: Int): Unit = {
    if (finishingTime(node).isEmpty) {
      finishingTime.recordInfo(node, time)
      time += 1
    }
    super.handleVisitedInQueue(node)
  }

  /**
   * @return Number of nodes discovered or finished before discovering node with `id`.
   */
  def discoveryTime(id: Int): Option[Int] = {
    discoveryTime.infoOfNode(id)
  }

  /**
   * @return Number of nodes discovered or finished before finishing node with `id`.
   */
  def finishingTime(id: Int): Option[Int] = {
    finishingTime.infoOfNode(id)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy