All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.common.hash.MurmurHash3 Maven / Gradle / Ivy

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.mayabot.nlp.common.hash;


import java.util.Objects;


/**
 * MurmurHash3 hashing functions.
 */
public enum MurmurHash3 {
    ;

    /**
     * A 128-bits hash.
     */
    public static class Hash128 {
        /**
         * lower 64 bits part
         **/
        public long h1;
        /**
         * higher 64 bits part
         **/
        public long h2;

        @Override
        public boolean equals(Object other) {
            if (this == other) {
                return true;
            }
            if (other == null || getClass() != other.getClass()) {
                return false;
            }
            Hash128 that = (Hash128) other;
            return Objects.equals(this.h1, that.h1)
                    && Objects.equals(this.h2, that.h2);
        }

        @Override
        public int hashCode() {
            return Objects.hash(h1, h2);
        }
    }

    private static long C1 = 0x87c37b91114253d5L;
    private static long C2 = 0x4cf5ad432745937fL;

    protected static long getblock(byte[] key, int offset, int index) {
        int i_8 = index << 3;
        int blockOffset = offset + i_8;
        return ByteUtils.readLongLE(key, blockOffset);
    }

    protected static long fmix(long k) {
        k ^= k >>> 33;
        k *= 0xff51afd7ed558ccdL;
        k ^= k >>> 33;
        k *= 0xc4ceb9fe1a85ec53L;
        k ^= k >>> 33;
        return k;
    }


    /**
     * Compute the hash of the MurmurHash3_x64_128 hashing function.
     */
    @SuppressWarnings("fallthrough") // Intentionally uses fallthrough to implement a well known hashing algorithm
    public static Hash128 hash128(byte[] key, int offset, int length, long seed, Hash128 hash) {
        long h1 = seed;
        long h2 = seed;

        if (length >= 16) {

            final int len16 = length & 0xFFFFFFF0; // higher multiple of 16 that is lower than or equal to length
            final int end = offset + len16;
            for (int i = offset; i < end; i += 16) {
                long k1 = ByteUtils.readLongLE(key, i);
                long k2 = ByteUtils.readLongLE(key, i + 8);

                k1 *= C1;
                k1 = Long.rotateLeft(k1, 31);
                k1 *= C2;
                h1 ^= k1;

                h1 = Long.rotateLeft(h1, 27);
                h1 += h2;
                h1 = h1 * 5 + 0x52dce729;

                k2 *= C2;
                k2 = Long.rotateLeft(k2, 33);
                k2 *= C1;
                h2 ^= k2;

                h2 = Long.rotateLeft(h2, 31);
                h2 += h1;
                h2 = h2 * 5 + 0x38495ab5;
            }

            // Advance offset to the unprocessed tail of the data.
            offset = end;
        }

        long k1 = 0;
        long k2 = 0;

        switch (length & 15) {
            case 15:
                k2 ^= (key[offset + 14] & 0xFFL) << 48;
            case 14:
                k2 ^= (key[offset + 13] & 0xFFL) << 40;
            case 13:
                k2 ^= (key[offset + 12] & 0xFFL) << 32;
            case 12:
                k2 ^= (key[offset + 11] & 0xFFL) << 24;
            case 11:
                k2 ^= (key[offset + 10] & 0xFFL) << 16;
            case 10:
                k2 ^= (key[offset + 9] & 0xFFL) << 8;
            case 9:
                k2 ^= (key[offset + 8] & 0xFFL) << 0;
                k2 *= C2;
                k2 = Long.rotateLeft(k2, 33);
                k2 *= C1;
                h2 ^= k2;

            case 8:
                k1 ^= (key[offset + 7] & 0xFFL) << 56;
            case 7:
                k1 ^= (key[offset + 6] & 0xFFL) << 48;
            case 6:
                k1 ^= (key[offset + 5] & 0xFFL) << 40;
            case 5:
                k1 ^= (key[offset + 4] & 0xFFL) << 32;
            case 4:
                k1 ^= (key[offset + 3] & 0xFFL) << 24;
            case 3:
                k1 ^= (key[offset + 2] & 0xFFL) << 16;
            case 2:
                k1 ^= (key[offset + 1] & 0xFFL) << 8;
            case 1:
                k1 ^= (key[offset] & 0xFFL);
                k1 *= C1;
                k1 = Long.rotateLeft(k1, 31);
                k1 *= C2;
                h1 ^= k1;
        }

        h1 ^= length;
        h2 ^= length;

        h1 += h2;
        h2 += h1;

        h1 = fmix(h1);
        h2 = fmix(h2);

        h1 += h2;
        h2 += h1;

        hash.h1 = h1;
        hash.h2 = h2;
        return hash;
    }

    /**
     * A 64-bit variant which accepts a long to hash, and returns the 64bit long hash.
     * This is useful if the input is already in long (or smaller) format and you don't
     * need the full 128b width and flexibility of
     * {@link MurmurHash3#hash128(byte[], int, int, long, Hash128)}
     * 

* Given the limited nature of this variant, it should be faster than the 128b version * when you only need 128b (many fewer instructions) */ public static long murmur64(long h) { h ^= h >>> 33; h *= 0xff51afd7ed558ccdL; h ^= h >>> 33; h *= 0xc4ceb9fe1a85ec53L; h ^= h >>> 33; return h; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy