All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.algebird.benchmark.CMSBenchmark.scala Maven / Gradle / Ivy

There is a newer version: 0.12.2
Show newest version
package com.twitter.algebird
package benchmark

import java.util.concurrent.TimeUnit
import org.openjdk.jmh.annotations._
import scala.util.Random.nextString

import CMSFunctions.generateHashes

/**
 * Benchmarks the Count-Min sketch (CMS) implementation in Algebird.
 *
 * We benchmark different `K` types as well as different input data streams.
 */
object CMSBenchmark {

  import CMSHasherImplicits.CMSHasherBigInt

  @State(Scope.Benchmark)
  class CMSState {

    val Seed: Int = 1
    val MaxBits: Int = 2048

    @Param(Array("0.1", "0.005"))
    var eps: Double = 0.0

    @Param(Array("0.0000001")) // 1e-8
    var delta: Double = 0.0

    // number of data values to combine into a CMS
    @Param(Array("1000"))
    var size: Int = 0

    // need to initialize later because we don't have `size` yet.
    var smallLongs: Vector[Long] = _
    var smallBigInts: Vector[BigInt] = _
    var largeBigInts: Vector[BigInt] = _
    var largeStrings: Vector[String] = _

    // need to initialize later because we don't have `eps` and `delta` yet.
    var longMonoid: CMSMonoid[Long] = _
    var bigIntMonoid: CMSMonoid[BigInt] = _
    var stringMonoid: CMSMonoid[String] = _

    @Setup(Level.Trial)
    def setup(): Unit = {
      longMonoid = CMS.monoid[Long](eps, delta, Seed)
      bigIntMonoid = CMS.monoid[BigInt](eps, delta, Seed)
      stringMonoid = CMS.monoid[String](eps, delta, Seed)

      val bitsPerChar = 16
      largeStrings = (1 to size).map(i => nextString(MaxBits / bitsPerChar)).toVector
      largeBigInts = largeStrings.map(s => BigInt(s.getBytes)).toVector
      smallLongs = (1 to size).map(_.toLong).toVector
      smallBigInts = (1 to size).map(BigInt(_)).toVector
    }

  }

  def sumCmsVector[A](as: Vector[A], m: CMSMonoid[A]): CMS[A] =
    m.sum(as.iterator.map(CMSItem(_, 1L, m.params)))
}

class CMSBenchmark {
  import CMSBenchmark._

  @Benchmark
  def sumSmallLongCms(st: CMSState): CMS[Long] =
    sumCmsVector(st.smallLongs, st.longMonoid)

  @Benchmark
  def sumSmallBigIntCms(st: CMSState): CMS[BigInt] =
    sumCmsVector(st.smallBigInts, st.bigIntMonoid)

  @Benchmark
  def sumLargeBigIntCms(st: CMSState): CMS[BigInt] =
    sumCmsVector(st.largeBigInts, st.bigIntMonoid)

  @Benchmark
  def sumLargeStringCms(st: CMSState): CMS[String] =
    sumCmsVector(st.largeStrings, st.stringMonoid)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy