com.clickzetta.platform.catalyst.utils.MurmurHashUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of clickzetta-java Show documentation
Show all versions of clickzetta-java Show documentation
The java SDK for clickzetta's Lakehouse
package com.clickzetta.platform.catalyst.utils;
import com.clickzetta.platform.catalyst.memory.MemorySegment;
import static com.clickzetta.platform.catalyst.memory.MemorySegment.BYTE_ARRAY_BASE_OFFSET;
import static com.clickzetta.platform.catalyst.memory.MemorySegment.UNSAFE;
/**
* Murmur Hash. This is inspired by Guava's Murmur3_32HashFunction.
*/
public final class MurmurHashUtils {
private static final int C1 = 0xcc9e2d51;
private static final int C2 = 0x1b873593;
public static final int DEFAULT_SEED = 42;
private MurmurHashUtils() {
// do not instantiate
}
/**
* Hash unsafe bytes, length must be aligned to 4 bytes.
*
* @param base base unsafe object
* @param offset offset for unsafe object
* @param lengthInBytes length in bytes
* @return hash code
*/
public static int hashUnsafeBytesByWords(Object base, long offset, int lengthInBytes) {
return hashUnsafeBytesByWords(base, offset, lengthInBytes, DEFAULT_SEED);
}
/**
* Hash bytes.
*/
public static int hashBytesPositive(byte[] bytes) {
return hashBytes(bytes) & 0x7fffffff;
}
/**
* Hash bytes.
*/
public static int hashBytes(byte[] bytes) {
return hashUnsafeBytes(bytes, BYTE_ARRAY_BASE_OFFSET, bytes.length, DEFAULT_SEED);
}
/**
* Hash unsafe bytes.
*
* @param base base unsafe object
* @param offset offset for unsafe object
* @param lengthInBytes length in bytes
* @return hash code
*/
public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
return hashUnsafeBytes(base, offset, lengthInBytes, DEFAULT_SEED);
}
/**
* Hash bytes in MemorySegment, length must be aligned to 4 bytes.
*
* @param segment segment.
* @param offset offset for MemorySegment
* @param lengthInBytes length in MemorySegment
* @return hash code
*/
public static int hashBytesByWords(MemorySegment segment, int offset, int lengthInBytes) {
return hashBytesByWords(segment, offset, lengthInBytes, DEFAULT_SEED);
}
/**
* Hash bytes in MemorySegment.
*
* @param segment segment.
* @param offset offset for MemorySegment
* @param lengthInBytes length in MemorySegment
* @return hash code
*/
public static int hashBytes(MemorySegment segment, int offset, int lengthInBytes) {
return hashBytes(segment, offset, lengthInBytes, DEFAULT_SEED);
}
private static int hashUnsafeBytesByWords(
Object base, long offset, int lengthInBytes, int seed) {
int h1 = hashUnsafeBytesByInt(base, offset, lengthInBytes, seed);
return fmix(h1, lengthInBytes);
}
private static int hashBytesByWords(
MemorySegment segment, int offset, int lengthInBytes, int seed) {
int h1 = hashBytesByInt(segment, offset, lengthInBytes, seed);
return fmix(h1, lengthInBytes);
}
private static int hashBytes(MemorySegment segment, int offset, int lengthInBytes, int seed) {
int lengthAligned = lengthInBytes - lengthInBytes % 4;
int h1 = hashBytesByInt(segment, offset, lengthAligned, seed);
for (int i = lengthAligned; i < lengthInBytes; i++) {
int k1 = mixK1(segment.get(offset + i));
h1 = mixH1(h1, k1);
}
return fmix(h1, lengthInBytes);
}
private static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
int lengthAligned = lengthInBytes - lengthInBytes % 4;
int h1 = hashUnsafeBytesByInt(base, offset, lengthAligned, seed);
for (int i = lengthAligned; i < lengthInBytes; i++) {
int halfWord = UNSAFE.getByte(base, offset + i);
int k1 = mixK1(halfWord);
h1 = mixH1(h1, k1);
}
return fmix(h1, lengthInBytes);
}
private static int hashUnsafeBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
assert (lengthInBytes % 4 == 0);
int h1 = seed;
for (int i = 0; i < lengthInBytes; i += 4) {
int halfWord = UNSAFE.getInt(base, offset + i);
int k1 = mixK1(halfWord);
h1 = mixH1(h1, k1);
}
return h1;
}
private static int hashBytesByInt(
MemorySegment segment, int offset, int lengthInBytes, int seed) {
assert (lengthInBytes % 4 == 0);
int h1 = seed;
for (int i = 0; i < lengthInBytes; i += 4) {
int halfWord = segment.getInt(offset + i);
int k1 = mixK1(halfWord);
h1 = mixH1(h1, k1);
}
return h1;
}
private static int mixK1(int k1) {
k1 *= C1;
k1 = Integer.rotateLeft(k1, 15);
k1 *= C2;
return k1;
}
private static int mixH1(int h1, int k1) {
h1 ^= k1;
h1 = Integer.rotateLeft(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
return h1;
}
// Finalization mix - force all bits of a hash block to avalanche
private static int fmix(int h1, int length) {
h1 ^= length;
return fmix(h1);
}
public static int fmix(int h) {
h ^= h >>> 16;
h *= 0x85ebca6b;
h ^= h >>> 13;
h *= 0xc2b2ae35;
h ^= h >>> 16;
return h;
}
public static long fmix(long h) {
h ^= (h >>> 33);
h *= 0xff51afd7ed558ccdL;
h ^= (h >>> 33);
h *= 0xc4ceb9fe1a85ec53L;
h ^= (h >>> 33);
return h;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy