All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.algebird.benchmark.CMSHashingBenchmark.scala Maven / Gradle / Ivy

There is a newer version: 0.12.2
Show newest version
package com.twitter.algebird.benchmark

import java.util.concurrent.TimeUnit
import org.openjdk.jmh.annotations._

/**
 * Benchmarks the hashing algorithms used by Count-Min sketch for CMS[BigInt].
 *
 * The input values are generated ahead of time to ensure that each trial uses the same input (and that the RNG is not
 * influencing the runtime of the trials).
 *
 * More details available at https://github.com/twitter/algebird/issues/392.
 */
// Once we can convince cappi (https://github.com/softprops/capp) -- the sbt plugin we use to run
// caliper benchmarks -- to work with the latest caliper 1.0-beta-1, we would:
//     - Let `CMSHashingBenchmark` extend `Benchmark` (instead of `SimpleBenchmark`)
//     - Annotate `timePlus` with `@MacroBenchmark`.
object CMSHashingBenchmark {
  @State(Scope.Benchmark)
  class CMSState {
    /**
     * The `a` parameter for CMS' default ("legacy") hashing algorithm: `h_i(x) = a_i * x + b_i (mod p)`.
     */
    @Param(Array("5123456"))
    var a: Int = 0

    /**
     * The `b` parameter for CMS' default ("legacy") hashing algorithm: `h_i(x) = a_i * x + b_i (mod p)`.
     *
     * Algebird's CMS implementation hard-codes `b` to `0`.
     */
    @Param(Array("0"))
    var b: Int = 0

    /**
     * Width of the counting table.
     */
    @Param(Array("11" /* eps = 0.271 */ , "544" /* eps = 0.005 */ , "2719" /* eps = 1E-3 */ , "271829" /* eps = 1E-5 */ ))
    var width: Int = 0

    /**
     * Number of operations per benchmark repetition.
     */
    @Param(Array("100000"))
    var operations: Int = 0

    /**
     * Maximum number of bits for randomly generated BigInt instances.
     */
    @Param(Array("128", "1024", "2048"))
    var maxBits: Int = 0

    var random: scala.util.Random = _
    var inputs: Seq[BigInt] = _

    @Setup(Level.Trial)
    def setup(): Unit = {
      random = new scala.util.Random
      // We draw numbers randomly from a 2^maxBits address space.
      inputs = (1 to operations).view.map { _ => scala.math.BigInt(maxBits, random) }
    }

  }
}
class CMSHashingBenchmark {
  import CMSHashingBenchmark._

  private def murmurHashScala(a: Int, b: Int, width: Int)(x: BigInt) = {
    val hash: Int = scala.util.hashing.MurmurHash3.arrayHash(x.toByteArray, a)
    val h = {
      // We only want positive integers for the subsequent modulo.  This method mimics Java's Hashtable
      // implementation.  The Java code uses `0x7FFFFFFF` for the bit-wise AND, which is equal to Int.MaxValue.
      val positiveHash = hash & Int.MaxValue
      positiveHash % width
    }
    assert(h >= 0, "hash must not be negative")
    h
  }

  private val PRIME_MODULUS = (1L << 31) - 1

  private def brokenCurrentHash(a: Int, b: Int, width: Int)(x: BigInt) = {
    val unModded: BigInt = (x * a) + b
    val modded: BigInt = (unModded + (unModded >> 32)) & PRIME_MODULUS
    val h = modded.toInt % width
    assert(h >= 0, "hash must not be negative")
    h
  }

  def timeBrokenCurrentHashWithRandomMaxBitsNumbers(state: CMSState) = {
    state.inputs.foreach { input => brokenCurrentHash(state.a, state.b, state.width)(input) }
  }

  def timeMurmurHashScalaWithRandomMaxBitsNumbers(state: CMSState) = {
    state.inputs.foreach { input => murmurHashScala(state.a, state.b, state.width)(input) }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy