All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.cassovary.graph.GraphUtils.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014 Twitter, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
 * file except in compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 */
package com.twitter.cassovary.graph

import com.twitter.cassovary.graph.GraphDir._
import com.twitter.cassovary.graph.tourist._
import com.twitter.finagle.stats.{DefaultStatsReceiver, Stat}
import it.unimi.dsi.fastutil.ints.Int2IntMap
import it.unimi.dsi.fastutil.objects.Object2IntMap
import scala.util.Random


/**
 * This class contains some common graph utilities and convenience functions.
 */
class GraphUtils[+V <: Node](val graph: Graph[V]) {

  private val statsReceiver = DefaultStatsReceiver

  import GraphUtils._

  /**
   * This is a breadth-first all paths walk with possible multiple visits to a node
   * along the direction specified by `dir`.
   *
   * All paths walk is a queue based walk, where each node adds all her neighbors to the queue
   * and each node can be visited multiple times.
   * @param startNodeId(s) node(s) to start the random walk from. These must exist in the graph.
   * @param numTopPathsPerNode number of paths per node to keep track of
   * @param walkLimits walk limits
   * @return a pair of elements
   *         1. Counter tracking a visited node's id V and the number of visits to that node.
   *         2. Counter tracking a visited node's id V and a set of neighbors. The neighbors
   *         are sorted in decreasing order by occurrence.*/
  def allPathsWalk(dir: GraphDir, startNodeId: Int, numTopPathsPerNode: Int,
                   walkLimits: Walk.Limits):
      (VisitsCounter, PrevNbrCounter) = {

    val visitsCounter = new VisitsCounter
    val prevNbrCounter = new PrevNbrCounter(Some(numTopPathsPerNode), false)

    if (graph.existsNodeId(startNodeId)) {
      val traversedNodes = new BreadthFirstTraverser(graph, dir, Seq(startNodeId),
        walkLimits, Some(prevNbrCounter))

      statsReceiver.counter("bfs_walk_request").incr()
      Stat.time(statsReceiver.stat("bfs_walk_traverse")) {
        traversedNodes.foreach { node =>
          // prevNbrCounter is mutated within BreadthFirstTraverser
          visitsCounter.visit(node)
        }
      }
    }

    (visitsCounter, prevNbrCounter)
  }

  /**
   * This is a breadth-first walk along the direction specified by `dir`.
   * @param startNodeId node to start the random walk from. These must exist in the graph.
   * @param walkLimits walk limits
   * @return a sequence of visited nodes and depths of the visit (or empty sequence if
   *         startNodeId was not found in the graph)
   */
  def bfsWalk(startNodeId: Int, dir: GraphDir, walkLimits: Walk.Limits):
    Seq[(Int, Int)] = {

    val prevNbrCounter = new PrevNbrCounter(Some(1), false)

    if (graph.existsNodeId(startNodeId)) {
      val traversedNodes = new BreadthFirstTraverser(graph, dir, Seq(startNodeId),
        walkLimits, Some(prevNbrCounter))

      statsReceiver.counter("bfs_walk_request").incr()
      Stat.time(statsReceiver.stat ("bfs_walk_traverse")) {
        traversedNodes.map { node =>
          (node.id, traversedNodes.depth(node.id).get)
        }.toSeq
      }
    } else Seq()
  }

  /**
   * Do a random walk starting from the set of nodes with ids `startNodeIds`.
   * The walk maintains a count of the number of times that nodes have been visited
   * during the walk.
   * @param startNodeIds nodes to start the random walk from
   * @param walkParams the `RandomWalkParams` random walk parameters
   * @return a tuple of two elements.
   *         The first is a counter tracking a visited node's id to the number of visits to that node.
   *         The second is a counter tracking a visited node's id to the paths visited while hitting
   *         that node. The paths are sorted in decreasing order by occurrence
   *         Each path is kept as a `DirectedPath`.
   */
  def randomWalk(dir: GraphDir, startNodeIds: Seq[Int], walkParams: RandomWalkParams)():
      (VisitsCounter, Option[PathsCounter]) = {
    val startNodesExist = (startNodeIds.length > 0) && startNodeIds.forall(graph.existsNodeId)

    val visitsCounter = new VisitsCounter
    val pathsCounterOption = walkParams.numTopPathsPerNode match {
      case Some(k) if k > 0 => Some(new PathsCounter(k, startNodeIds))
      case _ => None
    }

    if (startNodesExist) {
      val traversedNodes = new RandomBoundedTraverser(graph, dir, startNodeIds,
        walkParams.numSteps, walkParams)

      Stat.time(statsReceiver.stat("random_walk_traverse")) {
        traversedNodes.foreach { node =>
          visitsCounter.visit(node)
          if (pathsCounterOption.isDefined) {
            pathsCounterOption.get.visit(node)
          }
        }
      }
    }
    (visitsCounter, pathsCounterOption)
  }

  /**
   * Calculates the reputation of graph nodes personalized to a given node based on a random walk.
   * @param startNodeIds the ids of the node to get personalized reputations for
   * @param walkParams the `RandomWalkParams` random walk parameters
   * @return a 2-tuple:
   *         1. List of (node's id, the number of visits made to the node) sorted in decreasing
   *            order of the number of visits, and
   *         2. A mapping, for a visited node with id V to the top paths leading to V
   *            in the form of (P as a `DirectedPath`, frequency of walking P).
   */
  def calculatePersonalizedReputation(startNodeIds: Seq[Int], walkParams: RandomWalkParams):
      (collection.Map[Int, Int], Option[collection.Map[Int, Object2IntMap[DirectedPath]]]) = {
    Stat.time(statsReceiver.stat("%s_total".format("PTC"))) {
      val (visitsCounter, pathsCounterOption) = randomWalk(walkParams.dir, startNodeIds, walkParams)
      val topPathsOption = pathsCounterOption flatMap { counter => Some(counter.infoAllNodes) }
      (visitsCounter.infoAllNodes, topPathsOption)
    }
  }

  def calculatePersonalizedReputation(startNodeId: Int, walkParams: RandomWalkParams):
      (collection.Map[Int, Int], Option[collection.Map[Int, Object2IntMap[DirectedPath]]]) = {
    calculatePersonalizedReputation(Seq(startNodeId), walkParams)
  }

  /**
   * Does a breadth-first-walk starting from {@code startNodeId} using the walk
   * parameters specified in {@code walkParams}. Returns a 2-tuple:
   * 1. List of (node's id, the number of visits made to the node) sorted in decreasing
   *    order of the number of visits, and
   * 2. A mapping for a visited node with id V to the top paths leading to V
   *    in the form of (P as a {@link DirectedPath}, frequency of walking P).
   */
  def calculateAllPathsWalk(startNodeId: Int, dir: GraphDir, numTopPathsPerNode: Int,
                            walkLimits: Walk.Limits):
      (collection.Map[Int, Int], collection.Map[Int, Int2IntMap]) = {
    Stat.time(statsReceiver.stat("%s_total".format("AllPathsWalk"))) {
      val (visitsCounter, prevNbrCounter) = allPathsWalk(dir, startNodeId,
        numTopPathsPerNode, walkLimits)
      (visitsCounter.infoAllNodes, prevNbrCounter.infoAllNodes)
    }
  }

  /**
   * @param id the id of the node to count neighbors of
   * @param dir the direction of interest
   * @return number of neighbors (i.e., number of nodes 1 hop away) in `dir`.
   *         Warning: Lossy: Returns 0 if id is not found.
   */
  def neighborCount(id: Int, dir: GraphDir) = graph.getNodeById(id).map(_.neighborCount((dir))).getOrElse(0)


  /**
   * Assuming that the node stores both directions of edges, calculate the number of mutual edges
   * incident on this node
   * @return number of neighbors of node that have an edge to and from this node
   */
  def getNumMutualEdgesBothDirs(node: Node): Long = {
    val (dirSmall, dirLarge) = if (node.outboundCount < node.inboundCount)
      (node.outboundNodes(), node.inboundNodes())
    else
      (node.inboundNodes(), node.outboundNodes())
    val setSmall = dirSmall.toSeq.toSet
    val numEdges = dirLarge.toSeq.foldLeft(0L) { (num, curr) =>
      if (setSmall contains curr) num+1
      else num
    }
    numEdges
  }
}

object GraphUtils {

  /**
   * Parameters of a walk (such as a random walk, or a breadth first walk).
   * @param numSteps number of steps to take in the walk
   * @param resetProbability the probability with which to reset back to `startNodeId`.
   *        Must lie between 0.0 and 1.0, both inclusive. Ignored for non-random walks
   * @param maxNumEdgesThresh Max number of edges allowed for a node
   *        beyond which the next random step is `startNodeId` regardless of anything else
   * @param numTopPathsPerNode the number of top paths to `node` to maintain, None if we don't
   *        want to maintain them at all
   * @param maxDepth the maximum depth to visit in depth-related search (e.g. startNodeId has
   *        depth 0, its immediate neighbors have depth 1, etc.). None if we don't want to maintain
   *        them at all
   * @param visitSameNodeOnce if true, the walk would only visit the same node once
   *        and will likely restart to a start node when visiting the same node twice
   * @param dir traverse out-direction or in-direction
   * @param stable if true, use a fixed random number generator in the random walk
   * @param filterHomeNodeByNumEdges if true, home node will be checked for maxNumEdgesThresh
   * when visited during random walk
  */
  case class RandomWalkParams(numSteps: Long,
      resetProbability: Double,
      maxNumEdgesThresh: Option[Int] = None,
      numTopPathsPerNode: Option[Int] = None,
      maxDepth: Option[Int] = None,
      visitSameNodeOnce: Boolean = false,
      dir: GraphDir = GraphDir.OutDir,
      stable: Boolean = false,
      filterHomeNodeByNumEdges: Boolean = false) {
    /**
     * The pruning function that guides the pruning of a random walk. If the random walk
     * reached a node, it resets to the starting node if pruneFn returns true for that node.
     * Can be overridden by subclasses.
     */
    val pruneFn: Node => Boolean = NodeUtils.hasTooManyEdges(OutDir, maxNumEdgesThresh)

    private val seed = 268430371266199L
    lazy val randNumGen = if (stable) new Random(seed) else new Random
    require(resetProbability >= 0 && resetProbability <= 1.0,
      "reset probability must be between 0.0 and 1.0")
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy