All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.hashing.Hashable.scala Maven / Gradle / Ivy

package com.twitter.hashing

import java.nio.{ByteBuffer, ByteOrder}
import java.security.MessageDigest

/** Type-class for generic hashing
 */
trait Hashable[-T,+R] extends (T => R) { self =>
  override def andThen[A](fn : (R) => A): Hashable[T,A] = new Hashable[T,A] {
    override def apply(t: T) = fn(self.apply(t))
  }
  override def compose[A](fn : (A) => T): Hashable[A,R] = new Hashable[A,R] {
    override def apply(a: A) = self.apply(fn(a))
  }
}

trait LowPriorityHashable {
  // XOR the high 32 bits into the low to get a int:
  implicit def toInt[T](implicit h: Hashable[T,Long]): Hashable[T,Int] =
    h.andThen { long => (long>>32).toInt ^ long.toInt }

  // Get the UTF-8 bytes of a string to hash it
  implicit def fromString[T](implicit h: Hashable[Array[Byte],T]): Hashable[String,T] =
    h.compose { s: String => s.getBytes }
}

object Hashable extends LowPriorityHashable {
  /** Pull the implicit Hashable instance in scope to compute hash for this type.
   *
   * If in your scope, you set:
   * implicit def hasher[T]: Hashable[T,Int] = Hashable.hashCode // Bad choice, just an example
   * you can just call:
   * hash("hey") : Int
   * to get a hashvalue
   */
  def hash[T,R](t: T)(implicit hasher: Hashable[T,R]): R = hasher(t)

  // Some standard hashing:
  def hashCode[T]: Hashable[T,Int] = new Hashable[T,Int] { def apply(t: T) = t.hashCode }

  private[this] val MaxUnsignedInt: Long = 0xFFFFFFFFL
  /**
   * FNV fast hashing algorithm in 32 bits.
   * @see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
   */
  val FNV1_32 = new Hashable[Array[Byte],Int] {
    def apply(key: Array[Byte]) = {
      val PRIME: Int = 16777619
      var i = 0
      val len = key.length
      var rv: Long = 0x811c9dc5L
      while (i < len) {
        rv = (rv * PRIME) ^ (key(i) & 0xff)
        i += 1
      }
      (rv & MaxUnsignedInt).toInt
    }

    override def toString() = "FNV1_32"
  }

  /**
   * FNV fast hashing algorithm in 32 bits, variant with operations reversed.
   * @see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
   */
  val FNV1A_32 = new Hashable[Array[Byte],Int] {
    def apply(key: Array[Byte]): Int = {
      val PRIME: Int = 16777619
      var i = 0
      val len = key.length
      var rv: Long = 0x811c9dc5L
      while (i < len) {
        rv = (rv ^ (key(i) & 0xff)) * PRIME
        i += 1
      }
      (rv & MaxUnsignedInt).toInt
    }

    override def toString() = "FNV1A_32"
  }

  /**
   * FNV fast hashing algorithm in 64 bits.
   * @see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
   */
  val FNV1_64 = new Hashable[Array[Byte],Long] {
    def apply(key: Array[Byte]): Long = {
      val PRIME: Long = 1099511628211L
      var i = 0
      val len = key.length
      var rv: Long = 0xcbf29ce484222325L
      while (i < len) {
        rv = (rv * PRIME) ^ (key(i) & 0xff)
        i += 1
      }
      rv
    }

    override def toString() = "FNV1_64"
  }

  /**
   * FNV fast hashing algorithm in 64 bits, variant with operations reversed.
   * @see http://en.wikipedia.org/wiki/Fowler_Noll_Vo_hash
   */
  val FNV1A_64 = new Hashable[Array[Byte],Long] {
    def apply(key: Array[Byte]): Long = {
      val PRIME: Long = 1099511628211L
      var i = 0
      val len = key.length
      var rv: Long = 0xcbf29ce484222325L
      while (i < len) {
        rv = (rv ^ (key(i) & 0xff)) * PRIME
        i += 1
      }
      rv
    }

    override def toString() = "FNV1A_64"
  }

  /**
   * Ketama's default hash algorithm: the first 4 bytes of the MD5 as a little-endian int.
   * Wow, really? Who thought that was a good way to do it? :(
   */
  val MD5_LEInt = new Hashable[Array[Byte],Int] {
    def apply(key: Array[Byte]): Int = {
      val hasher = MessageDigest.getInstance("MD5")
      hasher.update(key)
      val buffer = ByteBuffer.wrap(hasher.digest)
      buffer.order(ByteOrder.LITTLE_ENDIAN)
      buffer.getInt
    }

    override def toString() = "Ketama"
  }

  /**
   * The default memcache hash algorithm is the ITU-T variant of CRC-32.
   */
  val CRC32_ITU = new Hashable[Array[Byte],Int] {
    def apply(key: Array[Byte]): Int = {
      var i = 0
      val len = key.length
      var rv: Long = MaxUnsignedInt
      while (i < len) {
        rv = rv ^ (key(i) & 0xff)
        var j = 0
        while (j < 8) {
          if ((rv & 1) != 0) {
            rv = (rv >> 1) ^ 0xedb88320L
          } else {
            rv >>= 1
          }
          j += 1
        }
        i += 1
      }
      ((rv ^ MaxUnsignedInt) & MaxUnsignedInt).toInt
    }

    override def toString() = "CRC32_ITU"
  }

  /**
   * Paul Hsieh's hash function.
   * http://www.azillionmonkeys.com/qed/hash.html
   */
  val HSIEH = new Hashable[Array[Byte],Int] {
    override def apply(key: Array[Byte]): Int = {
      var hash: Int = 0

      if (key.isEmpty)
        return 0

      for (i <- 0 until key.length / 4) {
        val b0 = key(i*4)
        val b1 = key(i*4 + 1)
        val b2 = key(i*4 + 2)
        val b3 = key(i*4 + 3)
        val s0 = (b1 << 8) | b0
        val s1 = (b3 << 8) | b2

        hash += s0
        val tmp = (s1 << 11) ^ hash
        hash = (hash << 16) ^ tmp
        hash += hash >>> 11
      }

      val rem = key.length % 4
      val offset = key.length - rem
      rem match {
        case 3 =>
          val b0 = key(offset)
          val b1 = key(offset + 1)
          val b2 = key(offset + 2)
          val s0 = b1 << 8 | b0
          hash += s0
          hash ^= hash << 16
          hash ^= b2 << 18
          hash += hash >>> 11
        case 2 =>
          val b0 = key(offset)
          val b1 = key(offset + 1)
          val s0 = b1 << 8 | b0
          hash += s0
          hash ^= hash << 11
          hash += hash >>> 17
        case 1 =>
          val b0 = key(offset)
          hash += b0
          hash ^= hash << 10
          hash += hash >>> 1
        case 0 => ()
      }

      hash ^= hash << 3
      hash += hash >>> 5
      hash ^= hash << 4
      hash += hash >>> 17
      hash ^= hash << 25
      hash += hash >>> 6

      hash
    }

    override def toString() = "Hsieh"
  }

  /**
  * Jenkins Hash Function
  * http://en.wikipedia.org/wiki/Jenkins_hash_function
  */
  val JENKINS = new Hashable[Array[Byte],Long] {
    override def apply(key: Array[Byte]): Long = {
      var a, b, c = 0xdeadbeef + key.length

      def rot(x: Int, k: Int) = (((x) << (k)) | ((x) >> (32 - (k))))

      def mix() {
        a -= c; a ^= rot(c, 4); c += b
        b -= a; b ^= rot(a, 6); a += c
        c -= b; c ^= rot(b, 8); b += a
        a -= c; a ^= rot(c, 16); c += b
        b -= a; b ^= rot(a, 19); a += c
        c -= b; c ^= rot(b, 4); b += a
      }

      def fin() {
        c ^= b; c -= rot(b, 14); a ^= c; a -= rot(c, 11)
        b ^= a; b -= rot(a, 25); c ^= b; c -= rot(b, 16)
        a ^= c; a -= rot(c, 4); b ^= a; b -= rot(a, 14)
        c ^= b; c -= rot(b, 24)
      }

      var block = 0
      val numBlocks = (key.length - 1) / 12
      while (block < numBlocks) {
        val offset = block * 12
        a += key(offset)
        a += key(offset + 1) << 8
        a += key(offset + 2) << 16
        a += key(offset + 3) << 24

        b += key(offset + 4)
        b += key(offset + 5) << 8
        b += key(offset + 6) << 16
        b += key(offset + 7) << 24

        c += key(offset + 8)
        c += key(offset + 9) << 8
        c += key(offset + 10) << 16
        c += key(offset + 11) << 24

        mix()
        block += 1
      }

      val remaining = key.length - (numBlocks * 12)
      val offset = numBlocks * 12

      if (remaining > 0) a += key(offset)
      if (remaining > 1) a += key(offset + 1) << 8
      if (remaining > 2) a += key(offset + 2) << 16
      if (remaining > 3) a += key(offset + 3) << 24

      if (remaining > 4) b += key(offset + 4)
      if (remaining > 5) b += key(offset + 5) << 8
      if (remaining > 6) b += key(offset + 6) << 16
      if (remaining > 7) b += key(offset + 7) << 24

      if (remaining > 8) c += key(offset + 8)
      if (remaining > 9) c += key(offset + 9) << 8
      if (remaining > 10) c += key(offset + 10) << 16
      if (remaining > 11) c += key(offset + 11) << 24

      if (key.size > 0) fin()

      (b.toLong << 32) + c.toLong
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy