io.polaris.core.hash.CityHash Maven / Gradle / Ivy
package io.polaris.core.hash;
import io.polaris.core.lang.primitive.Bytes;
import java.util.Arrays;
/**
* Google发布的Hash计算算法:CityHash64 与 CityHash128。
* 它们分别根据字串计算 64 和 128 位的散列值。这些算法不适用于加密,但适合用在散列表等处。
*
*
* 代码来自:https://github.com/rolandhe/string-tools
* 原始算法:https://github.com/google/cityhash
*
* @author hexiufeng
* @author Qt
* @since Aug 01, 2023
*/
public class CityHash {
// Some primes between 2^63 and 2^64 for various uses.
private static final long k0 = 0xc3a5c85c97cb3127L;
private static final long k1 = 0xb492b66fbe98f273L;
private static final long k2 = 0x9ae16a3b2f90404fL;
private static final long kMul = 0x9ddfea08eb382d69L;
// Magic numbers for 32-bit hashing. Copied from Murmur3.
private static final int c1 = 0xcc9e2d51;
private static final int c2 = 0x1b873593;
/**
* 计算32位City Hash值
*
* @param data 数据
* @return hash值
*/
public static int hash32(byte[] data) {
int len = data.length;
if (len <= 24) {
return len <= 12 ?
(len <= 4 ? hash32Len0to4(data) : hash32Len5to12(data)) :
hash32Len13to24(data);
}
// len > 24
int h = len, g = c1 * len, f = g;
int a0 = rotate32(fetch32(data, len - 4) * c1, 17) * c2;
int a1 = rotate32(fetch32(data, len - 8) * c1, 17) * c2;
int a2 = rotate32(fetch32(data, len - 16) * c1, 17) * c2;
int a3 = rotate32(fetch32(data, len - 12) * c1, 17) * c2;
int a4 = rotate32(fetch32(data, len - 20) * c1, 17) * c2;
h ^= a0;
h = rotate32(h, 19);
h = h * 5 + 0xe6546b64;
h ^= a2;
h = rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a1;
g = rotate32(g, 19);
g = g * 5 + 0xe6546b64;
g ^= a3;
g = rotate32(g, 19);
g = g * 5 + 0xe6546b64;
f += a4;
f = rotate32(f, 19);
f = f * 5 + 0xe6546b64;
int iters = (len - 1) / 20;
int pos = 0;
do {
a0 = rotate32(fetch32(data, pos) * c1, 17) * c2;
a1 = fetch32(data, pos + 4);
a2 = rotate32(fetch32(data, pos + 8) * c1, 17) * c2;
a3 = rotate32(fetch32(data, pos + 12) * c1, 17) * c2;
a4 = fetch32(data, pos + 16);
h ^= a0;
h = rotate32(h, 18);
h = h * 5 + 0xe6546b64;
f += a1;
f = rotate32(f, 19);
f = f * c1;
g += a2;
g = rotate32(g, 18);
g = g * 5 + 0xe6546b64;
h ^= a3 + a1;
h = rotate32(h, 19);
h = h * 5 + 0xe6546b64;
g ^= a4;
g = Integer.reverseBytes(g) * 5;
h += a4 * 5;
h = Integer.reverseBytes(h);
f += a0;
int swapValue = f;
f = g;
g = h;
h = swapValue;
pos += 20;
} while (--iters != 0);
g = rotate32(g, 11) * c1;
g = rotate32(g, 17) * c1;
f = rotate32(f, 11) * c1;
f = rotate32(f, 17) * c1;
h = rotate32(h + g, 19);
h = h * 5 + 0xe6546b64;
h = rotate32(h, 17) * c1;
h = rotate32(h + f, 19);
h = h * 5 + 0xe6546b64;
h = rotate32(h, 17) * c1;
return h;
}
/**
* 计算64位City Hash值
*
* @param data 数据
* @return hash值
*/
public static long hash64(byte[] data) {
int len = data.length;
if (len <= 32) {
if (len <= 16) {
return hashLen0to16(data);
} else {
return hashLen17to32(data);
}
} else if (len <= 64) {
return hashLen33to64(data);
}
// For strings over 64 bytes we hash the end first, and then as we
// loop we keep 56 bytes of state: v, w, x, y, and z.
long x = fetch64(data, len - 40);
long y = fetch64(data, len - 16) + fetch64(data, len - 56);
long z = hashLen16(fetch64(data, len - 48) + len, fetch64(data, len - 24));
Number128 v = weakHashLen32WithSeeds(data, len - 64, len, z);
Number128 w = weakHashLen32WithSeeds(data, len - 32, y + k1, x);
x = x * k1 + fetch64(data, 0);
// Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
len = (len - 1) & ~63;
int pos = 0;
do {
x = rotate64(x + y + v.getLowValue() + fetch64(data, pos + 8), 37) * k1;
y = rotate64(y + v.getHighValue() + fetch64(data, pos + 48), 42) * k1;
x ^= w.getHighValue();
y += v.getLowValue() + fetch64(data, pos + 40);
z = rotate64(z + w.getLowValue(), 33) * k1;
v = weakHashLen32WithSeeds(data, pos, v.getHighValue() * k1, x + w.getLowValue());
w = weakHashLen32WithSeeds(data, pos + 32, z + w.getHighValue(), y + fetch64(data, pos + 16));
// swap z,x value
long swapValue = x;
x = z;
z = swapValue;
pos += 64;
len -= 64;
} while (len != 0);
return hashLen16(hashLen16(v.getLowValue(), w.getLowValue()) + shiftMix(y) * k1 + z,
hashLen16(v.getHighValue(), w.getHighValue()) + x);
}
/**
* 计算64位City Hash值
*
* @param data 数据
* @param seed0 种子1
* @param seed1 种子2
* @return hash值
*/
public static long hash64(byte[] data, long seed0, long seed1) {
return hashLen16(hash64(data) - seed0, seed1);
}
/**
* 计算64位City Hash值,种子1使用默认的{@link #k2}
*
* @param data 数据
* @param seed 种子2
* @return hash值
*/
public static long hash64(byte[] data, long seed) {
return hash64(data, k2, seed);
}
/**
* 计算128位City Hash值
*
* @param data 数据
* @return hash值
*/
public static Number128 hash128(byte[] data) {
int len = data.length;
return len >= 16 ?
hash128(data, 16,
new Number128(fetch64(data, 0), fetch64(data, 8) + k0)) :
hash128(data, 0, new Number128(k0, k1));
}
/**
* 计算128位City Hash值
*
* @param data 数据
* @param seed 种子
* @return hash值
*/
public static Number128 hash128(byte[] data, Number128 seed) {
return hash128(data, 0, seed);
}
//------------------------------------------------------------------------------------------------------- Private method start
private static Number128 hash128(final byte[] byteArray, int start, final Number128 seed) {
int len = byteArray.length - start;
if (len < 128) {
return cityMurmur(Arrays.copyOfRange(byteArray, start, byteArray.length), seed);
}
// We expect len >= 128 to be the common case. Keep 56 bytes of state:
// v, w, x, y, and z.
Number128 v = new Number128(0L, 0L);
Number128 w = new Number128(0L, 0L);
long x = seed.getLowValue();
long y = seed.getHighValue();
long z = len * k1;
v.setLowValue(rotate64(y ^ k1, 49) * k1 + fetch64(byteArray, start));
v.setHighValue(rotate64(v.getLowValue(), 42) * k1 + fetch64(byteArray, start + 8));
w.setLowValue(rotate64(y + z, 35) * k1 + x);
w.setHighValue(rotate64(x + fetch64(byteArray, start + 88), 53) * k1);
// This is the same inner loop as CityHash64(), manually unrolled.
int pos = start;
do {
x = rotate64(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1;
y = rotate64(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1;
x ^= w.getHighValue();
y += v.getLowValue() + fetch64(byteArray, pos + 40);
z = rotate64(z + w.getLowValue(), 33) * k1;
v = weakHashLen32WithSeeds(byteArray, pos, v.getHighValue() * k1, x + w.getLowValue());
w = weakHashLen32WithSeeds(byteArray, pos + 32, z + w.getHighValue(), y + fetch64(byteArray, pos + 16));
long swapValue = x;
x = z;
z = swapValue;
pos += 64;
x = rotate64(x + y + v.getLowValue() + fetch64(byteArray, pos + 8), 37) * k1;
y = rotate64(y + v.getHighValue() + fetch64(byteArray, pos + 48), 42) * k1;
x ^= w.getHighValue();
y += v.getLowValue() + fetch64(byteArray, pos + 40);
z = rotate64(z + w.getLowValue(), 33) * k1;
v = weakHashLen32WithSeeds(byteArray, pos, v.getHighValue() * k1, x + w.getLowValue());
w = weakHashLen32WithSeeds(byteArray, pos + 32, z + w.getHighValue(), y + fetch64(byteArray, pos + 16));
swapValue = x;
x = z;
z = swapValue;
pos += 64;
len -= 128;
} while (len >= 128);
x += rotate64(v.getLowValue() + z, 49) * k0;
y = y * k0 + rotate64(w.getHighValue(), 37);
z = z * k0 + rotate64(w.getLowValue(), 27);
w.setLowValue(w.getLowValue() * 9);
v.setLowValue(v.getLowValue() * k0);
// If 0 < len < 128, hash up to 4 chunks of 32 bytes each from the end of s.
for (int tail_done = 0; tail_done < len; ) {
tail_done += 32;
y = rotate64(x + y, 42) * k0 + v.getHighValue();
w.setLowValue(w.getLowValue() + fetch64(byteArray, pos + len - tail_done + 16));
x = x * k0 + w.getLowValue();
z += w.getHighValue() + fetch64(byteArray, pos + len - tail_done);
w.setHighValue(w.getHighValue() + v.getLowValue());
v = weakHashLen32WithSeeds(byteArray, pos + len - tail_done, v.getLowValue() + z, v.getHighValue());
v.setLowValue(v.getLowValue() * k0);
}
// At this point our 56 bytes of state should contain more than
// enough information for a strong 128-bit hash. We use two
// different 56-byte-to-8-byte hashes to get a 16-byte final result.
x = hashLen16(x, v.getLowValue());
y = hashLen16(y + z, w.getLowValue());
return new Number128(hashLen16(x + v.getHighValue(), w.getHighValue()) + y,
hashLen16(x + w.getHighValue(), y + v.getHighValue()));
}
private static int hash32Len0to4(final byte[] byteArray) {
int b = 0;
int c = 9;
int len = byteArray.length;
for (int v : byteArray) {
b = b * c1 + v;
c ^= b;
}
return fmix(mur(b, mur(len, c)));
}
private static int hash32Len5to12(final byte[] byteArray) {
int len = byteArray.length;
int a = len, b = len * 5, c = 9, d = b;
a += fetch32(byteArray, 0);
b += fetch32(byteArray, len - 4);
c += fetch32(byteArray, ((len >>> 1) & 4));
return fmix(mur(c, mur(b, mur(a, d))));
}
private static int hash32Len13to24(byte[] byteArray) {
int len = byteArray.length;
int a = fetch32(byteArray, (len >>> 1) - 4);
int b = fetch32(byteArray, 4);
int c = fetch32(byteArray, len - 8);
int d = fetch32(byteArray, (len >>> 1));
int e = fetch32(byteArray, 0);
int f = fetch32(byteArray, len - 4);
@SuppressWarnings("UnnecessaryLocalVariable")
int h = len;
return fmix(mur(f, mur(e, mur(d, mur(c, mur(b, mur(a, h)))))));
}
private static long hashLen0to16(byte[] byteArray) {
int len = byteArray.length;
if (len >= 8) {
long mul = k2 + len * 2L;
long a = fetch64(byteArray, 0) + k2;
long b = fetch64(byteArray, len - 8);
long c = rotate64(b, 37) * mul + a;
long d = (rotate64(a, 25) + b) * mul;
return hashLen16(c, d, mul);
}
if (len >= 4) {
long mul = k2 + len * 2;
long a = fetch32(byteArray, 0) & 0xffffffffL;
return hashLen16(len + (a << 3), fetch32(byteArray, len - 4) & 0xffffffffL, mul);
}
if (len > 0) {
int a = byteArray[0] & 0xff;
int b = byteArray[len >>> 1] & 0xff;
int c = byteArray[len - 1] & 0xff;
int y = a + (b << 8);
int z = len + (c << 2);
return shiftMix(y * k2 ^ z * k0) * k2;
}
return k2;
}
// This probably works well for 16-byte strings as well, but it may be overkill in that case.
private static long hashLen17to32(byte[] byteArray) {
int len = byteArray.length;
long mul = k2 + len * 2L;
long a = fetch64(byteArray, 0) * k1;
long b = fetch64(byteArray, 8);
long c = fetch64(byteArray, len - 8) * mul;
long d = fetch64(byteArray, len - 16) * k2;
return hashLen16(rotate64(a + b, 43) + rotate64(c, 30) + d,
a + rotate64(b + k2, 18) + c, mul);
}
private static long hashLen33to64(byte[] byteArray) {
int len = byteArray.length;
long mul = k2 + len * 2L;
long a = fetch64(byteArray, 0) * k2;
long b = fetch64(byteArray, 8);
long c = fetch64(byteArray, len - 24);
long d = fetch64(byteArray, len - 32);
long e = fetch64(byteArray, 16) * k2;
long f = fetch64(byteArray, 24) * 9;
long g = fetch64(byteArray, len - 8);
long h = fetch64(byteArray, len - 16) * mul;
long u = rotate64(a + g, 43) + (rotate64(b, 30) + c) * 9;
long v = ((a + g) ^ d) + f + 1;
long w = Long.reverseBytes((u + v) * mul) + h;
long x = rotate64(e + f, 42) + c;
long y = (Long.reverseBytes((v + w) * mul) + g) * mul;
long z = e + f + c;
a = Long.reverseBytes((x + z) * mul + y) + b;
b = shiftMix((z + a) * mul + d + h) * mul;
return b + x;
}
private static long fetch64(byte[] byteArray, int start) {
return Bytes.bytesToLong(byteArray, start, Bytes.CPU_ENDIAN);
}
private static int fetch32(byte[] byteArray, final int start) {
return Bytes.bytesToInt(byteArray, start, Bytes.CPU_ENDIAN);
}
private static long rotate64(long val, int shift) {
// Avoid shifting by 64: doing so yields an undefined result.
return shift == 0 ? val : ((val >>> shift) | (val << (64 - shift)));
}
private static int rotate32(int val, int shift) {
// Avoid shifting by 32: doing so yields an undefined result.
return shift == 0 ? val : ((val >>> shift) | (val << (32 - shift)));
}
private static long hashLen16(long u, long v, long mul) {
// Murmur-inspired hashing.
long a = (u ^ v) * mul;
a ^= (a >>> 47);
long b = (v ^ a) * mul;
b ^= (b >>> 47);
b *= mul;
return b;
}
private static long hashLen16(long u, long v) {
return hash128to64(new Number128(u, v));
}
private static long hash128to64(final Number128 number128) {
// Murmur-inspired hashing.
long a = (number128.getLowValue() ^ number128.getHighValue()) * kMul;
a ^= (a >>> 47);
long b = (number128.getHighValue() ^ a) * kMul;
b ^= (b >>> 47);
b *= kMul;
return b;
}
private static long shiftMix(long val) {
return val ^ (val >>> 47);
}
private static int fmix(int h) {
h ^= h >>> 16;
h *= 0x85ebca6b;
h ^= h >>> 13;
h *= 0xc2b2ae35;
h ^= h >>> 16;
return h;
}
private static int mur(int a, int h) {
// Helper from Murmur3 for combining two 32-bit values.
a *= c1;
a = rotate32(a, 17);
a *= c2;
h ^= a;
h = rotate32(h, 19);
return h * 5 + 0xe6546b64;
}
private static Number128 weakHashLen32WithSeeds(
long w, long x, long y, long z, long a, long b) {
a += w;
b = rotate64(b + a + z, 21);
long c = a;
a += x;
a += y;
b += rotate64(a, 44);
return new Number128(a + z, b + c);
}
// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
private static Number128 weakHashLen32WithSeeds(
byte[] byteArray, int start, long a, long b) {
return weakHashLen32WithSeeds(fetch64(byteArray, start),
fetch64(byteArray, start + 8),
fetch64(byteArray, start + 16),
fetch64(byteArray, start + 24),
a,
b);
}
private static Number128 cityMurmur(final byte[] byteArray, Number128 seed) {
int len = byteArray.length;
long a = seed.getLowValue();
long b = seed.getHighValue();
long c;
long d;
int l = len - 16;
if (l <= 0) { // len <= 16
a = shiftMix(a * k1) * k1;
c = b * k1 + hashLen0to16(byteArray);
d = shiftMix(a + (len >= 8 ? fetch64(byteArray, 0) : c));
} else { // len > 16
c = hashLen16(fetch64(byteArray, len - 8) + k1, a);
d = hashLen16(b + len, c + fetch64(byteArray, len - 16));
a += d;
int pos = 0;
do {
a ^= shiftMix(fetch64(byteArray, pos) * k1) * k1;
a *= k1;
b ^= a;
c ^= shiftMix(fetch64(byteArray, pos + 8) * k1) * k1;
c *= k1;
d ^= c;
pos += 16;
l -= 16;
} while (l > 0);
}
a = hashLen16(a, c);
b = hashLen16(d, b);
return new Number128(a ^ b, hashLen16(b, a));
}
//------------------------------------------------------------------------------------------------------- Private method end
}