All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.datasketches.hash.MurmurHash3v2 Maven / Gradle / Ivy

Go to download

Core sketch algorithms used alone and by other Java repositories in the DataSketches library.

There is a newer version: 7.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.hash;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.datasketches.memory.UnsafeUtil.unsafe;

import org.apache.datasketches.memory.Memory;
import org.apache.datasketches.memory.WritableMemory;

/**
 * 

The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has * excellent avalanche and 2-way bit independence properties.

* *

Austin Appleby's C++ * * MurmurHash3_x64_128(...), final revision 150, * which is in the Public Domain, was the inspiration for this implementation in Java.

* *

This implementation of the MurmurHash3 allows hashing of a block of Memory defined by an offset * and length. The calling API also allows the user to supply the small output array of two longs, * so that the entire hash function is static and free of object allocations.

* *

This implementation produces exactly the same hash result as the * {@link MurmurHash3#hash} function given compatible inputs.

* * @author Lee Rhodes */ public final class MurmurHash3v2 { private static final long C1 = 0x87c37b91114253d5L; private static final long C2 = 0x4cf5ad432745937fL; //Provided for backward compatibility /** * Returns a 128-bit hash of the input. * Provided for compatibility with older version of MurmurHash3, * but empty or null input now returns a hash. * @param in long array * @param seed A long valued seed. * @return the hash */ public static long[] hash(final long[] in, final long seed) { if ((in == null) || (in.length == 0)) { return emptyOrNull(seed, new long[2]); } return hash(Memory.wrap(in), 0L, in.length << 3, seed, new long[2]); } /** * Returns a 128-bit hash of the input. * Provided for compatibility with older version of MurmurHash3, * but empty or null input now returns a hash. * @param in int array * @param seed A long valued seed. * @return the hash */ public static long[] hash(final int[] in, final long seed) { if ((in == null) || (in.length == 0)) { return emptyOrNull(seed, new long[2]); } return hash(Memory.wrap(in), 0L, in.length << 2, seed, new long[2]); } /** * Returns a 128-bit hash of the input. * Provided for compatibility with older version of MurmurHash3, * but empty or null input now returns a hash. * @param in char array * @param seed A long valued seed. * @return the hash */ public static long[] hash(final char[] in, final long seed) { if ((in == null) || (in.length == 0)) { return emptyOrNull(seed, new long[2]); } return hash(Memory.wrap(in), 0L, in.length << 1, seed, new long[2]); } /** * Returns a 128-bit hash of the input. * Provided for compatibility with older version of MurmurHash3, * but empty or null input now returns a hash. * @param in byte array * @param seed A long valued seed. * @return the hash */ public static long[] hash(final byte[] in, final long seed) { if ((in == null) || (in.length == 0)) { return emptyOrNull(seed, new long[2]); } return hash(Memory.wrap(in), 0L, in.length, seed, new long[2]); } //Single primitive inputs /** * Returns a 128-bit hash of the input. * Note the entropy of the resulting hash cannot be more than 64 bits. * @param in a long * @param seed A long valued seed. * @param hashOut A long array of size 2 * @return the hash */ public static long[] hash(final long in, final long seed, final long[] hashOut) { final long h1 = seed ^ mixK1(in); final long h2 = seed; return finalMix128(h1, h2, 8, hashOut); } /** * Returns a 128-bit hash of the input. * Note the entropy of the resulting hash cannot be more than 64 bits. * @param in a double * @param seed A long valued seed. * @param hashOut A long array of size 2 * @return the hash */ public static long[] hash(final double in, final long seed, final long[] hashOut) { final double d = (in == 0.0) ? 0.0 : in; // canonicalize -0.0, 0.0 final long k1 = Double.doubleToLongBits(d); // canonicalize all NaN forms final long h1 = seed ^ mixK1(k1); final long h2 = seed; return finalMix128(h1, h2, 8, hashOut); } /** * Returns a 128-bit hash of the input. * @param in a String * @param seed A long valued seed. * @param hashOut A long array of size 2 * @return the hash */ public static long[] hash(final String in, final long seed, final long[] hashOut) { if ((in == null) || (in.length() == 0)) { return emptyOrNull(seed, hashOut); } final byte[] byteArr = in.getBytes(UTF_8); return hash(Memory.wrap(byteArr), 0L, byteArr.length, seed, hashOut); } //The main API call /** * Returns a 128-bit hash of the input as a long array of size 2. * * @param mem The input Memory. Must be non-null and non-empty. * @param offsetBytes the starting point within Memory. * @param lengthBytes the total number of bytes to be hashed. * @param seed A long valued seed. * @param hashOut the size 2 long array for the resulting 128-bit hash * @return the hash. */ public static long[] hash(final Memory mem, final long offsetBytes, final long lengthBytes, final long seed, final long[] hashOut) { if ((mem == null) || (mem.getCapacity() == 0L)) { return emptyOrNull(seed, hashOut); } final Object uObj = ((WritableMemory) mem).getArray(); //may be null long cumOff = mem.getCumulativeOffset() + offsetBytes; long h1 = seed; long h2 = seed; long rem = lengthBytes; // Process the 128-bit blocks (the body) into the hash while (rem >= 16L) { final long k1 = unsafe.getLong(uObj, cumOff); //0, 16, 32, ... final long k2 = unsafe.getLong(uObj, cumOff + 8); //8, 24, 40, ... cumOff += 16L; rem -= 16L; h1 ^= mixK1(k1); h1 = Long.rotateLeft(h1, 27); h1 += h2; h1 = (h1 * 5) + 0x52dce729L; h2 ^= mixK2(k2); h2 = Long.rotateLeft(h2, 31); h2 += h1; h2 = (h2 * 5) + 0x38495ab5L; } // Get the tail (if any): 1 to 15 bytes if (rem > 0L) { long k1 = 0; long k2 = 0; switch ((int) rem) { case 15: { k2 ^= (unsafe.getByte(uObj, cumOff + 14) & 0xFFL) << 48; } //$FALL-THROUGH$ case 14: { k2 ^= (unsafe.getShort(uObj, cumOff + 12) & 0xFFFFL) << 32; k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL); k1 = unsafe.getLong(uObj, cumOff); break; } case 13: { k2 ^= (unsafe.getByte(uObj, cumOff + 12) & 0xFFL) << 32; } //$FALL-THROUGH$ case 12: { k2 ^= (unsafe.getInt(uObj, cumOff + 8) & 0xFFFFFFFFL); k1 = unsafe.getLong(uObj, cumOff); break; } case 11: { k2 ^= (unsafe.getByte(uObj, cumOff + 10) & 0xFFL) << 16; } //$FALL-THROUGH$ case 10: { k2 ^= (unsafe.getShort(uObj, cumOff + 8) & 0xFFFFL); k1 = unsafe.getLong(uObj, cumOff); break; } case 9: { k2 ^= (unsafe.getByte(uObj, cumOff + 8) & 0xFFL); } //$FALL-THROUGH$ case 8: { k1 = unsafe.getLong(uObj, cumOff); break; } case 7: { k1 ^= (unsafe.getByte(uObj, cumOff + 6) & 0xFFL) << 48; } //$FALL-THROUGH$ case 6: { k1 ^= (unsafe.getShort(uObj, cumOff + 4) & 0xFFFFL) << 32; k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL); break; } case 5: { k1 ^= (unsafe.getByte(uObj, cumOff + 4) & 0xFFL) << 32; } //$FALL-THROUGH$ case 4: { k1 ^= (unsafe.getInt(uObj, cumOff) & 0xFFFFFFFFL); break; } case 3: { k1 ^= (unsafe.getByte(uObj, cumOff + 2) & 0xFFL) << 16; } //$FALL-THROUGH$ case 2: { k1 ^= (unsafe.getShort(uObj, cumOff) & 0xFFFFL); break; } case 1: { k1 ^= (unsafe.getByte(uObj, cumOff) & 0xFFL); break; } //default: break; //can't happen } h1 ^= mixK1(k1); h2 ^= mixK2(k2); } return finalMix128(h1, h2, lengthBytes, hashOut); } //--Helper methods---------------------------------------------------- /** * Self mix of k1 * * @param k1 input argument * @return mix */ private static long mixK1(long k1) { k1 *= C1; k1 = Long.rotateLeft(k1, 31); k1 *= C2; return k1; } /** * Self mix of k2 * * @param k2 input argument * @return mix */ private static long mixK2(long k2) { k2 *= C2; k2 = Long.rotateLeft(k2, 33); k2 *= C1; return k2; } /** * Final self mix of h*. * * @param h input to final mix * @return mix */ private static long finalMix64(long h) { h ^= h >>> 33; h *= 0xff51afd7ed558ccdL; h ^= h >>> 33; h *= 0xc4ceb9fe1a85ec53L; h ^= h >>> 33; return h; } /** * Finalization: Add the length into the hash and mix * @param h1 intermediate hash * @param h2 intermediate hash * @param lengthBytes the length in bytes * @param hashOut the output array of 2 longs * @return hashOut */ private static long[] finalMix128(long h1, long h2, final long lengthBytes, final long[] hashOut) { h1 ^= lengthBytes; h2 ^= lengthBytes; h1 += h2; h2 += h1; h1 = finalMix64(h1); h2 = finalMix64(h2); h1 += h2; h2 += h1; hashOut[0] = h1; hashOut[1] = h2; return hashOut; } private static long[] emptyOrNull(final long seed, final long[] hashOut) { return finalMix128(seed, seed, 0, hashOut); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy