org.apache.datasketches.memory.XxHash64 Maven / Gradle / Ivy
Show all versions of datasketches-memory Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.datasketches.memory;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_BOOLEAN_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_BYTE_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_CHAR_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_DOUBLE_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_FLOAT_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_INT_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_LONG_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.ARRAY_SHORT_BASE_OFFSET;
import static org.apache.datasketches.memory.UnsafeUtil.CHAR_SHIFT;
import static org.apache.datasketches.memory.UnsafeUtil.DOUBLE_SHIFT;
import static org.apache.datasketches.memory.UnsafeUtil.FLOAT_SHIFT;
import static org.apache.datasketches.memory.UnsafeUtil.INT_SHIFT;
import static org.apache.datasketches.memory.UnsafeUtil.LONG_SHIFT;
import static org.apache.datasketches.memory.UnsafeUtil.SHORT_SHIFT;
import static org.apache.datasketches.memory.UnsafeUtil.unsafe;
/**
* The XxHash is a fast, non-cryptographic, 64-bit hash function that has
* excellent avalanche and 2-way bit independence properties.
* This java version adapted the C++ version and the OpenHFT/Zero-Allocation-Hashing implementation
* referenced below as inspiration.
*
* The C++ source repository:
*
* https://github.com/Cyan4973/xxHash. It has a BSD 2-Clause License:
*
* http://www.opensource.org/licenses/bsd-license.php. See LICENSE.
*
*
Portions of this code were adapted from
*
* OpenHFT/Zero-Allocation-Hashing,
* which has an Apache 2 license as does this site. See LICENSE.
*
* @author Lee Rhodes
*/
public class XxHash64 {
// Unsigned, 64-bit primes
private static final long P1 = -7046029288634856825L;
private static final long P2 = -4417276706812531889L;
private static final long P3 = 1609587929392839161L;
private static final long P4 = -8796714831421723037L;
private static final long P5 = 2870177450012600261L;
/**
* Returns the 64-bit hash of the sequence of bytes in the unsafeObject specified by
* cumOffsetBytes, lengthBytes and a seed.
*
* @param unsafeObj A reference to the object parameter required by unsafe. It may be null.
* @param cumOffsetBytes cumulative offset in bytes of this object from the backing resource
* including any user given offsetBytes. This offset may also include other offset components
* such as the native off-heap memory address, DirectByteBuffer split offsets, region offsets,
* and unsafe arrayBaseOffsets.
* @param lengthBytes the length in bytes of the sequence to be hashed
* @param seed a given seed
* @return the 64-bit hash of the sequence of bytes in the unsafeObject specified by
* cumOffsetBytes, lengthBytes and a seed.
*/
static long hash(final Object unsafeObj, long cumOffsetBytes, final long lengthBytes,
final long seed) {
long hash;
long remaining = lengthBytes;
if (remaining >= 32) {
long v1 = seed + P1 + P2;
long v2 = seed + P2;
long v3 = seed;
long v4 = seed - P1;
do {
v1 += unsafe.getLong(unsafeObj, cumOffsetBytes) * P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;
v2 += unsafe.getLong(unsafeObj, cumOffsetBytes + 8L) * P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;
v3 += unsafe.getLong(unsafeObj, cumOffsetBytes + 16L) * P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;
v4 += unsafe.getLong(unsafeObj, cumOffsetBytes + 24L) * P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;
cumOffsetBytes += 32;
remaining -= 32;
} while (remaining >= 32);
hash = Long.rotateLeft(v1, 1)
+ Long.rotateLeft(v2, 7)
+ Long.rotateLeft(v3, 12)
+ Long.rotateLeft(v4, 18);
v1 *= P2;
v1 = Long.rotateLeft(v1, 31);
v1 *= P1;
hash ^= v1;
hash = (hash * P1) + P4;
v2 *= P2;
v2 = Long.rotateLeft(v2, 31);
v2 *= P1;
hash ^= v2;
hash = (hash * P1) + P4;
v3 *= P2;
v3 = Long.rotateLeft(v3, 31);
v3 *= P1;
hash ^= v3;
hash = (hash * P1) + P4;
v4 *= P2;
v4 = Long.rotateLeft(v4, 31);
v4 *= P1;
hash ^= v4;
hash = (hash * P1) + P4;
} //end remaining >= 32
else {
hash = seed + P5;
}
hash += lengthBytes;
while (remaining >= 8) {
long k1 = unsafe.getLong(unsafeObj, cumOffsetBytes);
k1 *= P2;
k1 = Long.rotateLeft(k1, 31);
k1 *= P1;
hash ^= k1;
hash = (Long.rotateLeft(hash, 27) * P1) + P4;
cumOffsetBytes += 8;
remaining -= 8;
}
if (remaining >= 4) { //treat as unsigned ints
hash ^= (unsafe.getInt(unsafeObj, cumOffsetBytes) & 0XFFFF_FFFFL) * P1;
hash = (Long.rotateLeft(hash, 23) * P2) + P3;
cumOffsetBytes += 4;
remaining -= 4;
}
while (remaining != 0) { //treat as unsigned bytes
hash ^= (unsafe.getByte(unsafeObj, cumOffsetBytes) & 0XFFL) * P5;
hash = Long.rotateLeft(hash, 11) * P1;
--remaining;
++cumOffsetBytes;
}
return finalize(hash);
}
private static long finalize(long hash) {
hash ^= hash >>> 33;
hash *= P2;
hash ^= hash >>> 29;
hash *= P3;
hash ^= hash >>> 32;
return hash;
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetBooleans starting at this offset
* @param lengthBooleans continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashBooleans(final boolean[] arr, final long offsetBooleans,
final long lengthBooleans, final long seed) {
return hash(arr, ARRAY_BOOLEAN_BASE_OFFSET + offsetBooleans, lengthBooleans, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetBytes starting at this offset
* @param lengthBytes continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashBytes(final byte[] arr, final long offsetBytes,
final long lengthBytes, final long seed) {
return hash(arr, ARRAY_BYTE_BASE_OFFSET + offsetBytes, lengthBytes, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetShorts starting at this offset
* @param lengthShorts continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashShorts(final short[] arr, final long offsetShorts,
final long lengthShorts, final long seed) {
return hash(arr, ARRAY_SHORT_BASE_OFFSET + (offsetShorts << SHORT_SHIFT),
lengthShorts << SHORT_SHIFT, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetChars starting at this offset
* @param lengthChars continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashChars(final char[] arr, final long offsetChars,
final long lengthChars, final long seed) {
return hash(arr, ARRAY_CHAR_BASE_OFFSET + (offsetChars << CHAR_SHIFT),
lengthChars << CHAR_SHIFT, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetInts starting at this offset
* @param lengthInts continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashInts(final int[] arr, final long offsetInts,
final long lengthInts, final long seed) {
return hash(arr, ARRAY_INT_BASE_OFFSET + (offsetInts << INT_SHIFT),
lengthInts << INT_SHIFT, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetLongs starting at this offset
* @param lengthLongs continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashLongs(final long[] arr, final long offsetLongs,
final long lengthLongs, final long seed) {
return hash(arr, ARRAY_LONG_BASE_OFFSET + (offsetLongs << LONG_SHIFT),
lengthLongs << LONG_SHIFT, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetFloats starting at this offset
* @param lengthFloats continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashFloats(final float[] arr, final long offsetFloats,
final long lengthFloats, final long seed) {
return hash(arr, ARRAY_FLOAT_BASE_OFFSET + (offsetFloats << FLOAT_SHIFT),
lengthFloats << FLOAT_SHIFT, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param arr the given array
* @param offsetDoubles starting at this offset
* @param lengthDoubles continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashDoubles(final double[] arr, final long offsetDoubles,
final long lengthDoubles, final long seed) {
return hash(arr, ARRAY_DOUBLE_BASE_OFFSET + (offsetDoubles << DOUBLE_SHIFT),
lengthDoubles << DOUBLE_SHIFT, seed);
}
/**
* Hash the given arr starting at the given offset and continuing for the given length using the
* given seed.
* @param str the given string
* @param offsetChars starting at this offset
* @param lengthChars continuing for this length
* @param seed the given seed
* @return the hash
*/
public static long hashString(final String str, final long offsetChars,
final long lengthChars, final long seed) {
return hashChars(str.toCharArray(), offsetChars, lengthChars, seed);
}
}