All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.hashing.diagnostics.scala Maven / Gradle / Ivy

package com.twitter.hashing


import scala.collection.mutable

class DistributionTester[A](distributor: Distributor[A]) {

  /**
  * Returns a normalized standard deviation indicating how well the keys
  * are distributed between the nodes. The closer to 0 the better.
  */
  def distributionDeviation(keys: Seq[Long]): Double = {
    val keysPerNode = mutable.Map[A, Int]()
    keys map { distributor.nodeForHash(_) } foreach { key =>
      if (!keysPerNode.contains(key)) keysPerNode(key) = 0
      keysPerNode(key) += 1
    }
    var frequencies = keysPerNode.values.toList
    frequencies ++= 0 until (distributor.nodeCount - frequencies.size) map { _ => 0 }
    val average = frequencies.sum.toDouble / frequencies.size
    val diffs = frequencies.map { v => math.pow((v - average), 2) }
    val sd = math.sqrt(diffs.sum / (frequencies.size - 1))
    sd / average
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy