All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.virtdata.core.murmur.Murmur3F Maven / Gradle / Ivy

Go to download

Statistical sampling library for use in virtualdataset libraries, based on apache commons math 4

There is a newer version: 2.12.15
Show newest version
/*
 * Copyright (C) 2014-2016 Markus Junginger, greenrobot (http://greenrobot.org)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.virtdata.core.murmur;


import java.math.BigInteger;

/** Murmur3F (MurmurHash3_x64_128) */
public class Murmur3F implements Checksum128 {

    private static final long C1 = 0x87c37b91114253d5L;
    private static final long C2 = 0x4cf5ad432745937fL;

    private final long seed;

    private long h1;
    private long h2;
    private int length;

    private int partialPos;
    private long partialK1;
    private long partialK2;

    private boolean finished;
    private long finishedH1;
    private long finishedH2;

    /**
     * This constructor allows you to require that an unsafe implementation of
     * primitive array operations is used, for added speed on platforms that you
     * know can support it. This allows callers to have an easy way to
     * exclusively opt in or out of unsafe behavior at a class-loader level,
     * rather than forcing an unsafe behavior before the caller gets a chance
     * to intervene. The choice is either/or, not "optional, but fall back if not
     * supported." Callers can instrument for this with exception handling if it
     * is needed. Effectively, setting the unsafe value chooses an implementation.
     *
     * @param seed A seed to initialize this hash with, or the input when using it
     *             as a function.
     * @param unsafe Whether to require that the implementation relies on an unsafe
     *               calls.
     */
    public Murmur3F(int seed, boolean unsafe) {
        this.seed = seed & 0xffffffffL;
        h1 = h2 = this.seed;
    }

    public Murmur3F() {
        this(0,false);
    }

    public Murmur3F(int seed) {
        this(seed,false);
    }

    @Override
    public void update(int b) {
        finished = false;
        switch (partialPos) {
            case 0:
                partialK1 = 0xff & b;
                break;
            case 1:
                partialK1 |= (0xff & b) << 8;
                break;
            case 2:
                partialK1 |= (0xff & b) << 16;
                break;
            case 3:
                partialK1 |= (0xffL & b) << 24;
                break;
            case 4:
                partialK1 |= (0xffL & b) << 32;
                break;
            case 5:
                partialK1 |= (0xffL & b) << 40;
                break;
            case 6:
                partialK1 |= (0xffL & b) << 48;
                break;
            case 7:
                partialK1 |= (0xffL & b) << 56;
                break;
            case 8:
                partialK2 = 0xff & b;
                break;
            case 9:
                partialK2 |= (0xff & b) << 8;
                break;
            case 10:
                partialK2 |= (0xff & b) << 16;
                break;
            case 11:
                partialK2 |= (0xffL & b) << 24;
                break;
            case 12:
                partialK2 |= (0xffL & b) << 32;
                break;
            case 13:
                partialK2 |= (0xffL & b) << 40;
                break;
            case 14:
                partialK2 |= (0xffL & b) << 48;
                break;
            case 15:
                partialK2 |= (0xffL & b) << 56;
                break;
        }

        partialPos++;
        if (partialPos == 16) {
            applyKs(partialK1, partialK2);
            partialPos = 0;
        }
        length++;
    }

    /**
     * Special update method to hash long values very efficiently using Java's native little endian (LE) byte order.
     * Note, that you cannot mix this with other (previous) hash updates, because it only supports 8-bytes alignment.
     * @param value the long to add to the hash register
     */
    public void updateLongLE(long value) {
        finished = false;
        switch (partialPos) {
            case 0:
                partialK1 = value;
                break;
            case 8:
                partialK2 = value;
                break;
            default:
                throw new IllegalStateException("Cannot mix long with other alignments than 8: " + partialPos);
        }

        partialPos += 8;
        if (partialPos == 16) {
            applyKs(partialK1, partialK2);
            partialPos = 0;
        }
        length += 8;
    }

    /**
     * Consider {@link #updateLongLE(long)} for better performance if you do not rely on big endian (BE) byte order.
     * @param value the long to add to the hash register
     */
    public void updateLongBE(long value) {
        updateLongLE(Long.reverseBytes(value));
    }

    public void update(byte[] b) {
        update(b, 0, b.length);
    }

    @Override
    public void update(byte[] b, int off, int len) {
        finished = false;
        while (partialPos != 0 && len > 0) {
            update(b[off]);
            off++;
            len--;
        }

        int remainder = len & 0xF;
        int stop = off + len - remainder;
        for (int i = off; i < stop; i += 16) {
            long k1 = getLongLE(b, i);
            long k2 = getLongLE(b, i + 8);
            applyKs(k1, k2);
        }
        length += stop - off;

        for (int i = 0; i < remainder; i++) {
            update(b[stop + i]);
        }
    }

    private void applyKs(long k1, long k2) {
        k1 *= C1;
        k1 = Long.rotateLeft(k1, 31);
        k1 *= C2;
        h1 ^= k1;

        h1 = Long.rotateLeft(h1, 27);
        h1 += h2;
        h1 = h1 * 5 + 0x52dce729;

        k2 *= C2;
        k2 = Long.rotateLeft(k2, 33);
        k2 *= C1;
        h2 ^= k2;

        h2 = Long.rotateLeft(h2, 31);
        h2 += h1;
        h2 = h2 * 5 + 0x38495ab5;
    }

    private void checkFinished() {
        if (!finished) {
            finished = true;
            finishedH1 = h1;
            finishedH2 = h2;
            if (partialPos > 0) {
                if (partialPos > 8) {
                    long k2 = partialK2 * C2;
                    k2 = Long.rotateLeft(k2, 33);
                    k2 *= C1;
                    finishedH2 ^= k2;
                }
                long k1 = partialK1 * C1;
                k1 = Long.rotateLeft(k1, 31);
                k1 *= C2;
                finishedH1 ^= k1;
            }

            finishedH1 ^= length;
            finishedH2 ^= length;

            finishedH1 += finishedH2;
            finishedH2 += finishedH1;

            finishedH1 = fmix64(finishedH1);
            finishedH2 = fmix64(finishedH2);

            finishedH1 += finishedH2;
            finishedH2 += finishedH1;
        }
    }

    private long fmix64(long k) {
        k ^= k >>> 33;
        k *= 0xff51afd7ed558ccdL;
        k ^= k >>> 33;
        k *= 0xc4ceb9fe1a85ec53L;
        k ^= k >>> 33;
        return k;
    }

    @Override
    /** Returns the lower 64 bits of the 128 bit hash (you can use just this value this as a 64 bit hash). */
    public long getValue() {
        checkFinished();
        return finishedH1;
    }

    /** Returns the higher 64 bits of the 128 bit hash. */
    public long getValueHigh() {
        checkFinished();
        return finishedH2;
    }

    /** Positive value. */
    public BigInteger getValueBigInteger() {
        byte[] bytes = getValueBytesBigEndian();
        return new BigInteger(1, bytes);
    }

    /** Padded with leading 0s to ensure length of 32. */
    public String getValueHexString() {
        checkFinished();
        return getPaddedHexString(finishedH2) + getPaddedHexString(finishedH1);
    }

    private String getPaddedHexString(long value) {
        String string = Long.toHexString(value);
        while (string.length() < 16) {
            string = '0' + string;
        }
        return string;
    }

    public byte[] getValueBytesBigEndian() {
        checkFinished();
        byte[] bytes = new byte[16];
        for (int i = 0; i < 8; i++) {
            bytes[i] = (byte) ((finishedH2 >>> (56 - i * 8)) & 0xff);
        }
        for (int i = 0; i < 8; i++) {
            bytes[8 + i] = (byte) ((finishedH1 >>> (56 - i * 8)) & 0xff);
        }
        return bytes;
    }

    public byte[] getValueBytesLittleEndian() {
        checkFinished();
        byte[] bytes = new byte[16];
        for (int i = 0; i < 8; i++) {
            bytes[i] = (byte) ((finishedH1 >>> (i * 8)) & 0xff);
        }
        for (int i = 0; i < 8; i++) {
            bytes[8 + i] = (byte) ((finishedH2 >>> (i * 8)) & 0xff);
        }
        return bytes;
    }

    @Override
    public void reset() {
        h1 = h2 = seed;
        length = 0;
        partialPos = 0;
        finished = false;

        // The remainder is not really necessary, but looks nicer when debugging
        partialK1 = partialK2 = 0;
        finishedH1 = finishedH2 = 0;
    }

    private static long getLongBE(byte[] bytes, int index) {
        return (bytes[index + 7] & 0xff) | ((bytes[index + 6] & 0xff) << 8) |
                ((bytes[index + 5] & 0xff) << 16) | ((bytes[index + 4] & 0xffL) << 24) |
                ((bytes[index + 3] & 0xffL) << 32) | ((bytes[index + 2] & 0xffL) << 40) |
                ((bytes[index + 1] & 0xffL) << 48) | (((long) bytes[index]) << 56);
    }

    private static long getLongLE(byte[] bytes, int index) {
        return (bytes[index] & 0xff) | ((bytes[index + 1] & 0xff) << 8) |
                ((bytes[index + 2] & 0xff) << 16) | ((bytes[index + 3] & 0xffL) << 24) |
                ((bytes[index + 4] & 0xffL) << 32) | ((bytes[index + 5] & 0xffL) << 40) |
                ((bytes[index + 6] & 0xffL) << 48) | (((long) bytes[index + 7]) << 56);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy