smile.hash.MurmurHash3 Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package smile.hash;

import java.nio.ByteBuffer;

/**
 * MurmurHash is a very fast, non-cryptographic hash suitable for general hash-based
 * lookup. The name comes from two basic operations, multiply (MU) and rotate (R),
 * used in its inner loop. See http://murmurhash.googlepages.com/ for more details.
 * 
 * The current version is MurmurHash3, which yields a 32-bit or 128-bit hash value.
 * When using 128-bits, the x86 and x64 versions do not produce the same values,
 * as the algorithms are optimized for their respective platforms.
 * 
 * This class is adapted from Apache Cassandra.
 *
 * @author Haifeng Li
 */
public class MurmurHash3 {

    private static long getblock(ByteBuffer key, int offset, int index) {
        int i_8 = index << 3;
        int blockOffset = offset + i_8;
        return ((long) key.get(blockOffset + 0) & 0xff)
                + (((long) key.get(blockOffset + 1) & 0xff) << 8)
                + (((long) key.get(blockOffset + 2) & 0xff) << 16)
                + (((long) key.get(blockOffset + 3) & 0xff) << 24)
                + (((long) key.get(blockOffset + 4) & 0xff) << 32)
                + (((long) key.get(blockOffset + 5) & 0xff) << 40)
                + (((long) key.get(blockOffset + 6) & 0xff) << 48)
                + (((long) key.get(blockOffset + 7) & 0xff) << 56);
    }

    private static long rotl64(long v, int n) {
        return ((v << n) | (v >>> (64 - n)));
    }

    private static long fmix(long k) {
        k ^= k >>> 33;
        k *= 0xff51afd7ed558ccdL;
        k ^= k >>> 33;
        k *= 0xc4ceb9fe1a85ec53L;
        k ^= k >>> 33;

        return k;
    }

    /**
     * 32-bit MurmurHash3.
     */
    public static int hash32(byte[] data, int offset, int len, int seed) {
        int c1 = 0xcc9e2d51;
        int c2 = 0x1b873593;
        int h1 = seed;
        // round down to 4 byte block
        int roundedEnd = offset + (len & 0xfffffffc);
        for (int i = offset; i < roundedEnd; i += 4) {
            // little endian load order
            int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | ((data[i + 2] & 0xff) << 16)
                    | (data[i + 3] << 24);
            k1 *= c1;
            // ROTL32(k1,15);
            k1 = (k1 << 15) | (k1 >>> 17);
            k1 *= c2;
            h1 ^= k1;
            // ROTL32(h1,13);
            h1 = (h1 << 13) | (h1 >>> 19);
            h1 = h1 * 5 + 0xe6546b64;
        }
        // tail
        int k1 = 0;
        switch (len & 0x03) {
            case 3:
                k1 = (data[roundedEnd + 2] & 0xff) << 16;
                // fallthrough
            case 2:
                k1 |= (data[roundedEnd + 1] & 0xff) << 8;
                // fallthrough
            case 1:
                k1 |= (data[roundedEnd] & 0xff);
                k1 *= c1;
                // ROTL32(k1,15);
                k1 = (k1 << 15) | (k1 >>> 17);
                k1 *= c2;
                h1 ^= k1;
        }
        // finalization
        h1 ^= len;
        // fmix(h1);
        h1 ^= h1 >>> 16;
        h1 *= 0x85ebca6b;
        h1 ^= h1 >>> 13;
        h1 *= 0xc2b2ae35;
        h1 ^= h1 >>> 16;
        return h1;
    }

    /**
     * 128-bit MurmurHash3 for x64.
     * When using 128-bits, the x86 and x64 versions do not produce
     * the same values, as the algorithms are optimized for their
     * respective platforms.
     */
    public static void hash128(ByteBuffer key, int offset, int length, long seed, long[] result) {
        final int nblocks = length >> 4; // Process as 128-bit blocks.

        long h1 = seed;
        long h2 = seed;

        long c1 = 0x87c37b91114253d5L;
        long c2 = 0x4cf5ad432745937fL;

        // ----------
        // body

        for (int i = 0; i < nblocks; i++) {
            long k1 = getblock(key, offset, i * 2 + 0);
            long k2 = getblock(key, offset, i * 2 + 1);

            k1 *= c1;
            k1 = rotl64(k1, 31);
            k1 *= c2;
            h1 ^= k1;

            h1 = rotl64(h1, 27);
            h1 += h2;
            h1 = h1 * 5 + 0x52dce729;

            k2 *= c2;
            k2 = rotl64(k2, 33);
            k2 *= c1;
            h2 ^= k2;

            h2 = rotl64(h2, 31);
            h2 += h1;
            h2 = h2 * 5 + 0x38495ab5;
        }

        // ----------
        // tail

        // Advance offset to the unprocessed tail of the data.
        offset += nblocks * 16;

        long k1 = 0;
        long k2 = 0;

        switch (length & 15) {
            case 15:
                k2 ^= ((long) key.get(offset + 14)) << 48;
            case 14:
                k2 ^= ((long) key.get(offset + 13)) << 40;
            case 13:
                k2 ^= ((long) key.get(offset + 12)) << 32;
            case 12:
                k2 ^= ((long) key.get(offset + 11)) << 24;
            case 11:
                k2 ^= ((long) key.get(offset + 10)) << 16;
            case 10:
                k2 ^= ((long) key.get(offset + 9)) << 8;
            case 9:
                k2 ^= ((long) key.get(offset + 8)) << 0;
                k2 *= c2;
                k2 = rotl64(k2, 33);
                k2 *= c1;
                h2 ^= k2;

            case 8:
                k1 ^= ((long) key.get(offset + 7)) << 56;
            case 7:
                k1 ^= ((long) key.get(offset + 6)) << 48;
            case 6:
                k1 ^= ((long) key.get(offset + 5)) << 40;
            case 5:
                k1 ^= ((long) key.get(offset + 4)) << 32;
            case 4:
                k1 ^= ((long) key.get(offset + 3)) << 24;
            case 3:
                k1 ^= ((long) key.get(offset + 2)) << 16;
            case 2:
                k1 ^= ((long) key.get(offset + 1)) << 8;
            case 1:
                k1 ^= ((long) key.get(offset));
                k1 *= c1;
                k1 = rotl64(k1, 31);
                k1 *= c2;
                h1 ^= k1;
        }

        // ----------
        // finalization

        h1 ^= length;
        h2 ^= length;

        h1 += h2;
        h2 += h1;

        h1 = fmix(h1);
        h2 = fmix(h2);

        h1 += h2;
        h2 += h1;

        result[0] = h1;
        result[1] = h2;
    }
}