com.flagsense.util.MurmurHash3 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sdk Show documentation
Show all versions of sdk Show documentation
Java SDK for consuming flagsense services
package com.flagsense.util;
public final class MurmurHash3 {
/** 128 bits of state */
public static final class LongPair {
public long val1;
public long val2;
}
public static final int fmix32(int h) {
h ^= h >>> 16;
h *= 0x85ebca6b;
h ^= h >>> 13;
h *= 0xc2b2ae35;
h ^= h >>> 16;
return h;
}
public static final long fmix64(long k) {
k ^= k >>> 33;
k *= 0xff51afd7ed558ccdL;
k ^= k >>> 33;
k *= 0xc4ceb9fe1a85ec53L;
k ^= k >>> 33;
return k;
}
/** Gets a long from a byte buffer in little endian byte order. */
public static final long getLongLittleEndian(byte[] buf, int offset) {
return ((long)buf[offset+7] << 56) // no mask needed
| ((buf[offset+6] & 0xffL) << 48)
| ((buf[offset+5] & 0xffL) << 40)
| ((buf[offset+4] & 0xffL) << 32)
| ((buf[offset+3] & 0xffL) << 24)
| ((buf[offset+2] & 0xffL) << 16)
| ((buf[offset+1] & 0xffL) << 8)
| ((buf[offset ] & 0xffL)); // no shift needed
}
/** Returns the MurmurHash3_x86_32 hash. */
@SuppressWarnings("fallthrough")
public static int murmurhash3_x86_32(byte[] data, int offset, int len, int seed) {
final int c1 = 0xcc9e2d51;
final int c2 = 0x1b873593;
int h1 = seed;
int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block
for (int i=offset; i>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
// tail
int k1 = 0;
switch(len & 0x03) {
case 3:
k1 = (data[roundedEnd + 2] & 0xff) << 16;
// fallthrough
case 2:
k1 |= (data[roundedEnd + 1] & 0xff) << 8;
// fallthrough
case 1:
k1 |= (data[roundedEnd] & 0xff);
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
}
// finalization
h1 ^= len;
// fmix(h1);
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return h1;
}
/** Returns the MurmurHash3_x86_32 hash of the UTF-8 bytes of the String without actually encoding
* the string to a temporary buffer. This is more than 2x faster than hashing the result
* of String.getBytes().
*/
public static int murmurhash3_x86_32(CharSequence data, int offset, int len, int seed) {
final int c1 = 0xcc9e2d51;
final int c2 = 0x1b873593;
int h1 = seed;
int pos = offset;
int end = offset + len;
int k1 = 0;
int k2 = 0;
int shift = 0;
int bits = 0;
int nBytes = 0; // length in UTF8 bytes
while (pos < end) {
int code = data.charAt(pos++);
if (code < 0x80) {
k2 = code;
bits = 8;
}
else if (code < 0x800) {
k2 = (0xC0 | (code >> 6))
| ((0x80 | (code & 0x3F)) << 8);
bits = 16;
}
else if (code < 0xD800 || code > 0xDFFF || pos>=end) {
// we check for pos>=end to encode an unpaired surrogate as 3 bytes.
k2 = (0xE0 | (code >> 12))
| ((0x80 | ((code >> 6) & 0x3F)) << 8)
| ((0x80 | (code & 0x3F)) << 16);
bits = 24;
} else {
// surrogate pair
// int utf32 = pos < end ? (int) data.charAt(pos++) : 0;
int utf32 = (int) data.charAt(pos++);
utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF);
k2 = (0xff & (0xF0 | (utf32 >> 18)))
| ((0x80 | ((utf32 >> 12) & 0x3F))) << 8
| ((0x80 | ((utf32 >> 6) & 0x3F))) << 16
| (0x80 | (utf32 & 0x3F)) << 24;
bits = 32;
}
k1 |= k2 << shift;
// int used_bits = 32 - shift; // how many bits of k2 were used in k1.
// int unused_bits = bits - used_bits; // (bits-(32-shift)) == bits+shift-32 == bits-newshift
shift += bits;
if (shift >= 32) {
// mix after we have a complete word
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
shift -= 32;
// unfortunately, java won't let you shift 32 bits off, so we need to check for 0
if (shift != 0) {
k1 = k2 >>> (bits-shift); // bits used == bits - newshift
} else {
k1 = 0;
}
nBytes += 4;
}
} // inner
// handle tail
if (shift > 0) {
nBytes += shift >> 3;
k1 *= c1;
k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
}
// finalization
h1 ^= nBytes;
// fmix(h1);
h1 ^= h1 >>> 16;
h1 *= 0x85ebca6b;
h1 ^= h1 >>> 13;
h1 *= 0xc2b2ae35;
h1 ^= h1 >>> 16;
return h1;
}
/** Returns the MurmurHash3_x64_128 hash, placing the result in "out". */
@SuppressWarnings("fallthrough")
public static void murmurhash3_x64_128(byte[] key, int offset, int len, int seed, LongPair out) {
// The original algorithm does have a 32 bit unsigned seed.
// We have to mask to match the behavior of the unsigned types and prevent sign extension.
long h1 = seed & 0x00000000FFFFFFFFL;
long h2 = seed & 0x00000000FFFFFFFFL;
final long c1 = 0x87c37b91114253d5L;
final long c2 = 0x4cf5ad432745937fL;
int roundedEnd = offset + (len & 0xFFFFFFF0); // round down to 16 byte block
for (int i=offset; i