All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pekko.util.FrequencySketch.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * license agreements; and to You under the Apache License, version 2.0:
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * This file is part of the Apache Pekko project, which was derived from Akka.
 */

/*
 * Copyright (C) 2021-2022 Lightbend Inc. 
 */

/*
 * Copyright 2015 Ben Manes. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.pekko.util

import org.apache.pekko.annotation.InternalApi

import scala.util.hashing.MurmurHash3

/**
 * INTERNAL API
 *
 * A frequency sketch for estimating the popularity of items. For implementing the TinyLFU cache admission policy.
 * The frequency sketch includes the TinyLFU reset operation, which periodically halves all counters.
 */
@InternalApi
private[pekko] object FrequencySketch {

  /**
   * Create a new FrequencySketch based on the cache capacity (which will be increased to the nearest power of two).
   *
   * @param capacity the cache capacity (maximum items that will be cached)
   * @param widthMultiplier a multiplier for the width of the sketch
   * @param resetMultiplier the multiplier on the capacity for the reset size
   * @param depth the depth of count-min sketch (number of hash functions)
   * @param counterBits the size of the counters in bits: 2, 4, 8, 16, 32, or 64 bits
   * @param hasher the hash function for the element type
   * @return a configured FrequencySketch
   */
  def apply[A](
      capacity: Int,
      widthMultiplier: Int = 4,
      resetMultiplier: Double = 10,
      depth: Int = 4,
      counterBits: Int = 4)(implicit hasher: Hasher[A]): FrequencySketch[A] = {
    val width = widthMultiplier * Bits.ceilingPowerOfTwo(capacity)
    val resetSize = (resetMultiplier * capacity).toInt
    new FrequencySketch(depth, width, counterBits, resetSize, hasher)
  }

  sealed trait Hasher[A] {
    def hash(value: A): Int
  }

  object Hasher {
    final val DefaultSeed = 135283237

    implicit val StringHasher: StringHasher = new StringHasher(DefaultSeed)

    final class StringHasher(seed: Int) extends Hasher[String] {
      override def hash(value: String): Int = MurmurHash3.stringHash(value, seed)
    }
  }

  object Bits {
    def isPowerOfTwo(i: Int): Boolean = (i & (i - 1)) == 0

    def powerOfTwoExponent(i: Int): Int = 32 - Integer.numberOfLeadingZeros(i - 1)

    def ceilingPowerOfTwo(i: Int): Int = 1 << -Integer.numberOfLeadingZeros(i - 1)
  }
}

/**
 * INTERNAL API
 *
 * A frequency sketch for estimating the popularity of items. For implementing the TinyLFU cache admission policy.
 *
 * This is a generalised frequency sketch with configurable depth (number of hash functions) and counter size.
 *
 * The matrix of counters is a two-dimensional array of longs, which each hold multiple counters depending on the
 * counter size (the number of bits for each counter). Powers of two are used to enable bit manipulation operations.
 *
 * The frequency sketch includes the TinyLFU reset operation, which periodically halves all counters, to allow
 * smaller counters to be used while retaining reasonable accuracy of relative frequencies.
 *
 * To get pairwise independent hash functions for the given depth, this implementation combines two hash functions
 * using the "Building a Better Bloom Filter" approach, where gi(x) = h1(x) + i * h2(x) mod p.
 *
 * References:
 *
 * "TinyLFU: A Highly Efficient Cache Admission Policy"
 * Gil Einziger, Roy Friedman, Ben Manes
 *
 * "An Improved Data Stream Summary: The Count-Min Sketch and its Applications"
 * Graham Cormode, S. Muthukrishnan
 *
 * "Less Hashing, Same Performance: Building a Better Bloom Filter"
 * Adam Kirsch, Michael Mitzenmacher
 *
 * @param depth depth of the count-min sketch (number of hash functions)
 * @param width width of the count-min sketch (number of counters)
 * @param counterBits the size of the counters in bits: 2, 4, 8, 16, 32, or 64 bits
 * @param resetSize the size (number of counter increments) to apply the reset operation
 * @param hasher the hash function for the element type
 */
@InternalApi
private[pekko] final class FrequencySketch[A](
    depth: Int,
    width: Int,
    counterBits: Int,
    resetSize: Int,
    hasher: FrequencySketch.Hasher[A]) {

  require(FrequencySketch.Bits.isPowerOfTwo(width), "width must be a power of two")
  require(Set(2, 4, 8, 16, 32, 64)(counterBits), "counterBits must be 2, 4, 8, 16, 32, or 64 bits")

  private final val SlotBits = 64

  private[this] val counterWidth = counterBits
  private[this] val slots = SlotBits / counterWidth
  private[this] val rowWidth = math.max(1, width / slots)
  private[this] val columnMask = width - 1
  private[this] val slotShift = FrequencySketch.Bits.powerOfTwoExponent(slots)
  private[this] val slotMask = slots - 1
  private[this] val counterShift = FrequencySketch.Bits.powerOfTwoExponent(counterWidth)
  private[this] val counterMask = if (counterBits == 64) Long.MaxValue else (1L << counterWidth) - 1

  private[this] val oddMask = (1 to slots).foldLeft(1L)((mask, count) => mask | (1L << (count * counterWidth)))

  private[this] val resetMask = {
    val counterResetMask = counterMask >> 1
    (1 to slots).foldLeft(counterResetMask)((mask, count) => mask | (counterResetMask << (count * counterWidth)))
  }

  private[this] val matrix = Array.fill[Array[Long]](depth)(Array.ofDim[Long](rowWidth))
  private[this] val rowSizes = Array.ofDim[Int](depth)
  private[this] var updatedSize = 0

  /**
   * Get the current size of the sketch (the number of incremented counters).
   */
  def size: Int = updatedSize

  /**
   * Get the estimated frequency for a value. Limited by the maximum size of the counters.
   * Note that frequencies are also periodically halved as an aging mechanism.
   */
  def frequency(value: A): Int = {
    val hash1 = hasher.hash(value)
    val hash2 = rehash(hash1)
    var minCount = Int.MaxValue
    var row = 0
    while (row < depth) {
      val hash = hash1 + row * hash2
      minCount = Math.min(minCount, getCounter(row, hash))
      row += 1
    }
    minCount
  }

  /**
   * Increment the estimated frequency of a value. Limited by the maximum size of the counters.
   * Note that frequencies are also periodically halved as an aging mechanism.
   */
  def increment(value: A): Unit = {
    val hash1 = hasher.hash(value)
    val hash2 = rehash(hash1)
    var updated = false
    var row = 0
    while (row < depth) {
      val hash = hash1 + row * hash2
      updated |= incrementCounter(row, hash)
      row += 1
    }
    if (updated) {
      updatedSize += 1
      if (updatedSize == resetSize) reset()
    }
  }

  private def rehash(hash: Int): Int =
    MurmurHash3.finalizeHash(MurmurHash3.mixLast(hash, hash), 2)

  private def getCounter(row: Int, hash: Int): Int = {
    val column = (hash & columnMask) >>> slotShift
    val slot = (hash & slotMask) << counterShift
    ((matrix(row)(column) >>> slot) & counterMask).toInt
  }

  private def incrementCounter(row: Int, hash: Int): Boolean = {
    val column = (hash & columnMask) >>> slotShift
    val slot = (hash & slotMask) << counterShift
    val mask = counterMask << slot
    if ((matrix(row)(column) & mask) != mask) {
      matrix(row)(column) += (1L << slot)
      rowSizes(row) += 1
      true
    } else false
  }

  /**
   * The TinyLFU reset operation (periodically halving all counters).
   * Adjusts for truncation from integer division (bit shift for efficiency)
   * by adjusting for the number of odd counters per row (each off by 0.5).
   */
  private def reset(): Unit = {
    var row = 0
    while (row < depth) {
      var column = 0
      var odd = 0
      while (column < rowWidth) {
        odd += java.lang.Long.bitCount(matrix(row)(column) & oddMask)
        matrix(row)(column) = (matrix(row)(column) >>> 1) & resetMask
        column += 1
      }
      rowSizes(row) = (rowSizes(row) - odd) >>> 1
      row += 1
    }
    updatedSize = rowSizes.max
  }

  def toDebugString: String = FrequencySketchUtil.debugString(matrix, rowWidth, slots, counterWidth, counterMask)
}

/**
 * INTERNAL API
 */
@InternalApi
private[pekko] object FastFrequencySketch {

  /**
   * Create a new FastFrequencySketch based on the cache capacity (which will be increased to the nearest power of two).
   *
   * @param capacity the cache capacity (maximum items that will be cached)
   * @param widthMultiplier a multiplier for the width of the sketch
   * @param resetMultiplier the multiplier on the capacity for the reset size
   * @return a configured FastFrequencySketch
   */
  def apply[A](capacity: Int, widthMultiplier: Int = 4, resetMultiplier: Double = 10): FastFrequencySketch[A] = {
    val width = widthMultiplier * FrequencySketch.Bits.ceilingPowerOfTwo(capacity)
    val resetSize = (resetMultiplier * capacity).toInt
    new FastFrequencySketch(width, resetSize)
  }
}

/**
 * INTERNAL API
 *
 * A faster implementation of the frequency sketch (around twice as fast).
 *
 * This frequency sketch uses a fixed depth (number of hash functions) of 4 and a counter size of 4 bits (0-15),
 * so that constants can be used for improved efficiency. It also uses its own rehashing of item hash codes.
 *
 * The implementation is inspired by the approach used in the Caffeine caching library:
 * https://github.com/ben-manes/caffeine/blob/a6be555f0f2a44d33a9d7e52ea923622e373ac7f/caffeine/src/main/java/com/github/benmanes/caffeine/cache/FrequencySketch.java
 *
 * @param width width of the count-min sketch (number of counters)
 * @param resetSize the size (number of counter increments) to apply the reset operation
 */
@InternalApi
private[pekko] final class FastFrequencySketch[A](width: Int, resetSize: Int) {
  require(FrequencySketch.Bits.isPowerOfTwo(width), "width must be a power of two")

  private final val Depth = 4
  private final val SlotShift = 4
  private final val SlotMask = 0xF
  private final val CounterShift = 2
  private final val CounterMask = 0xFL
  private final val OddMask = 0x1111111111111111L
  private final val ResetMask = 0x7777777777777777L

  // seeds are large primes between 2^63 and 2^64
  private final val Seed0 = 0xC3A5C85C97CB3127L
  private final val Seed1 = 0xB492B66FBE98F273L
  private final val Seed2 = 0x9AE16A3B2F90404FL
  private final val Seed3 = 0xCBF29CE484222325L

  private[this] val rowWidth = math.max(1, width >>> SlotShift)
  private[this] val indexMask = width - 1

  private[this] val matrix = Array.fill[Array[Long]](Depth)(Array.ofDim[Long](rowWidth))
  private[this] val rowSizes = Array.ofDim[Int](Depth)
  private[this] var updatedSize = 0

  def size: Int = updatedSize

  def frequency(value: A): Int = {
    val hash = rehash(value.hashCode)
    var minCount = getCounter(row = 0, index(hash, Seed0))
    minCount = Math.min(minCount, getCounter(row = 1, index(hash, Seed1)))
    minCount = Math.min(minCount, getCounter(row = 2, index(hash, Seed2)))
    minCount = Math.min(minCount, getCounter(row = 3, index(hash, Seed3)))
    minCount
  }

  def increment(value: A): Unit = {
    val hash = rehash(value.hashCode)
    var updated = incrementCounter(row = 0, index(hash, Seed0))
    updated |= incrementCounter(row = 1, index(hash, Seed1))
    updated |= incrementCounter(row = 2, index(hash, Seed2))
    updated |= incrementCounter(row = 3, index(hash, Seed3))
    if (updated) {
      updatedSize += 1
      if (updatedSize == resetSize) reset()
    }
  }

  // A low-bias hash function found by Hash Function Prospector
  // https://github.com/skeeto/hash-prospector
  // hash-prospector has been placed in public domain - https://github.com/skeeto/hash-prospector/blob/master/UNLICENSE
  private def rehash(hash: Int): Int = {
    var x = hash
    x = ((x >>> 15) ^ x) * 0xD168AAAD
    x = ((x >>> 15) ^ x) * 0xAF723597
    (x >>> 15) ^ x
  }

  private def index(hash: Int, seed: Long): Int = {
    val x = (hash + seed) * seed
    (x + (x >>> 32)).toInt & indexMask
  }

  private def getCounter(row: Int, index: Int): Int = {
    val column = index >>> SlotShift
    val slot = (index & SlotMask) << CounterShift
    ((matrix(row)(column) >>> slot) & CounterMask).toInt
  }

  private def incrementCounter(row: Int, index: Int): Boolean = {
    val column = index >>> SlotShift
    val slot = (index & SlotMask) << CounterShift
    val mask = CounterMask << slot
    if ((matrix(row)(column) & mask) != mask) {
      matrix(row)(column) += (1L << slot)
      rowSizes(row) += 1
      true
    } else false
  }

  private def reset(): Unit = {
    var row = 0
    while (row < 4) {
      var column = 0
      var odd = 0
      while (column < rowWidth) {
        odd += java.lang.Long.bitCount(matrix(row)(column) & OddMask)
        matrix(row)(column) = (matrix(row)(column) >>> 1) & ResetMask
        column += 1
      }
      rowSizes(row) = (rowSizes(row) - odd) >>> 1
      row += 1
    }
    updatedSize = rowSizes.max
  }

  def toDebugString: String =
    FrequencySketchUtil.debugString(matrix, rowWidth, slots = 16, counterWidth = 4, CounterMask)
}

/**
 * INTERNAL API
 */
@InternalApi
private[pekko] object FrequencySketchUtil {

  /**
   * Create a pretty table with all the frequency sketch counters for debugging (smaller) sketches.
   */
  def debugString(
      matrix: Array[Array[Long]],
      rowWidth: Int,
      slots: Int,
      counterWidth: Int,
      counterMax: Long): String = {
    def digits(n: Long): Int = math.floor(math.log10(n.toDouble)).toInt + 1
    val indexDigits = digits(rowWidth)
    val counterDigits = math.max(2, digits(counterMax))
    def divider(start: String, line: String, separator1: String, separator: String, end: String): String =
      start + (line * (indexDigits + 2)) + separator1 + (line * (counterDigits + 2)) +
      ((separator + (line * (counterDigits + 2))) * (slots - 1)) + end + "\n"
    val builder = new StringBuilder
    builder ++= divider("╔", "═", "╦", "╤", "╗")
    builder ++= "║" + (" " * (indexDigits + 2))
    for (slot <- 0 until slots) {
      builder ++= (if (slot == 0) "║" else "│")
      builder ++= s" %${counterDigits}d ".format(slot)
    }
    builder ++= "║\n"
    for (row <- matrix.indices) {
      for (column <- matrix(0).indices) {
        builder ++= (if (column == 0) divider("╠", "═", "╬", "╪", "╣")
                     else divider("╟", "─", "╫", "┼", "╢"))
        builder ++= s"║ %${indexDigits}d ".format(column)
        var shift = 0
        while (shift < 64) {
          val count = (matrix(row)(column) >>> shift) & counterMax
          builder ++= (if (shift == 0) "║" else "│")
          builder ++= s" %${counterDigits}d ".format(count)
          shift += counterWidth
        }
        builder ++= "║\n"
      }
    }
    builder ++= divider("╚", "═", "╩", "╧", "╝")
    builder.result()
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy