All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.scalding.serialization.MurmurHashUtils.scala Maven / Gradle / Ivy

/*
Copyright 2015 Twitter, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.twitter.scalding.serialization

// Taking a few functions from:
// https://guava-libraries.googlecode.com/git/guava/src/com/google/common/hash/Murmur3_32HashFunction.java
object MurmurHashUtils {
  final val seed = 0xf7ca7fd2
  private final val C1: Int = 0xcc9e2d51
  private final val C2: Int = 0x1b873593

  final def hashInt(input: Int): Int = {
    val k1 = mixK1(input)
    val h1 = mixH1(seed, k1)

    fmix(h1, 4) // length of int is 4 bytes
  }

  final def hashLong(input: Long): Int = {
    val low = input.toInt
    val high = (input >>> 32).toInt

    var k1 = mixK1(low)
    var h1 = mixH1(seed, k1)

    k1 = mixK1(high)
    h1 = mixH1(h1, k1)

    fmix(h1, 8) // 8 bytes
  }

  final def hashUnencodedChars(input: CharSequence): Int = {
    var h1 = seed;

    // step through the CharSequence 2 chars at a time
    var i = 0
    while (i < input.length) {
      var k1 = input.charAt(i - 1) | (input.charAt(i) << 16)
      k1 = mixK1(k1)
      h1 = mixH1(h1, k1)
      i += 2
    }

    // deal with any remaining characters
    if ((input.length() & 1) == 1) {
      var k1: Int = input.charAt(input.length() - 1)
      k1 = mixK1(k1)
      h1 ^= k1
    }

    fmix(h1, (Character.SIZE / java.lang.Byte.SIZE) * input.length())
  }

  final def mixK1(k1Input: Int): Int = {
    var k1 = k1Input
    k1 *= C1
    k1 = Integer.rotateLeft(k1, 15)
    k1 *= C2
    k1
  }

  final def mixH1(h1Input: Int, k1Input: Int): Int = {
    var h1 = h1Input
    var k1 = k1Input
    h1 ^= k1
    h1 = Integer.rotateLeft(h1, 13)
    h1 = h1 * 5 + 0xe6546b64
    h1
  }

  // Finalization mix - force all bits of a hash block to avalanche
  final def fmix(h1Input: Int, length: Int): Int = {
    var h1 = h1Input
    h1 ^= length
    h1 ^= h1 >>> 16
    h1 *= 0x85ebca6b
    h1 ^= h1 >>> 13
    h1 *= 0xc2b2ae35
    h1 ^= h1 >>> 16
    h1
  }

  def iteratorHash[T](a: Iterator[T])(hashFn: T => Int): Int = {
    var h1 = seed
    var i = 0
    while (a.hasNext) {
      var k1 = hashFn(a.next)
      h1 = mixH1(h1, k1)
      i += 1
    }
    fmix(h1, i)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy