All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.utils.MurmurHashUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.utils;

import org.dinky.shaded.paimon.memory.MemorySegment;

import static org.dinky.shaded.paimon.memory.MemorySegment.BYTE_ARRAY_BASE_OFFSET;
import static org.dinky.shaded.paimon.memory.MemorySegment.UNSAFE;

/** Murmur Hash. This is inspired by Guava's Murmur3_32HashFunction. */
public final class MurmurHashUtils {

    private static final int C1 = 0xcc9e2d51;
    private static final int C2 = 0x1b873593;
    public static final int DEFAULT_SEED = 42;

    private MurmurHashUtils() {
        // do not instantiate
    }

    /**
     * Hash unsafe bytes, length must be aligned to 4 bytes.
     *
     * @param base base unsafe object
     * @param offset offset for unsafe object
     * @param lengthInBytes length in bytes
     * @return hash code
     */
    public static int hashUnsafeBytesByWords(Object base, long offset, int lengthInBytes) {
        return hashUnsafeBytesByWords(base, offset, lengthInBytes, DEFAULT_SEED);
    }

    /** Hash bytes. */
    public static int hashBytesPositive(byte[] bytes) {
        return hashBytes(bytes) & 0x7fffffff;
    }

    /** Hash bytes. */
    public static int hashBytes(byte[] bytes) {
        return hashUnsafeBytes(bytes, BYTE_ARRAY_BASE_OFFSET, bytes.length, DEFAULT_SEED);
    }

    /**
     * Hash unsafe bytes.
     *
     * @param base base unsafe object
     * @param offset offset for unsafe object
     * @param lengthInBytes length in bytes
     * @return hash code
     */
    public static int hashUnsafeBytes(Object base, long offset, int lengthInBytes) {
        return hashUnsafeBytes(base, offset, lengthInBytes, DEFAULT_SEED);
    }

    /**
     * Hash bytes in MemorySegment, length must be aligned to 4 bytes.
     *
     * @param segment segment.
     * @param offset offset for MemorySegment
     * @param lengthInBytes length in MemorySegment
     * @return hash code
     */
    public static int hashBytesByWords(MemorySegment segment, int offset, int lengthInBytes) {
        return hashBytesByWords(segment, offset, lengthInBytes, DEFAULT_SEED);
    }

    /**
     * Hash bytes in MemorySegment.
     *
     * @param segment segment.
     * @param offset offset for MemorySegment
     * @param lengthInBytes length in MemorySegment
     * @return hash code
     */
    public static int hashBytes(MemorySegment segment, int offset, int lengthInBytes) {
        return hashBytes(segment, offset, lengthInBytes, DEFAULT_SEED);
    }

    private static int hashUnsafeBytesByWords(
            Object base, long offset, int lengthInBytes, int seed) {
        int h1 = hashUnsafeBytesByInt(base, offset, lengthInBytes, seed);
        return fmix(h1, lengthInBytes);
    }

    private static int hashBytesByWords(
            MemorySegment segment, int offset, int lengthInBytes, int seed) {
        int h1 = hashBytesByInt(segment, offset, lengthInBytes, seed);
        return fmix(h1, lengthInBytes);
    }

    private static int hashBytes(MemorySegment segment, int offset, int lengthInBytes, int seed) {
        int lengthAligned = lengthInBytes - lengthInBytes % 4;
        int h1 = hashBytesByInt(segment, offset, lengthAligned, seed);
        for (int i = lengthAligned; i < lengthInBytes; i++) {
            int k1 = mixK1(segment.get(offset + i));
            h1 = mixH1(h1, k1);
        }
        return fmix(h1, lengthInBytes);
    }

    private static int hashUnsafeBytes(Object base, long offset, int lengthInBytes, int seed) {
        assert (lengthInBytes >= 0) : "lengthInBytes cannot be negative";
        int lengthAligned = lengthInBytes - lengthInBytes % 4;
        int h1 = hashUnsafeBytesByInt(base, offset, lengthAligned, seed);
        for (int i = lengthAligned; i < lengthInBytes; i++) {
            int halfWord = UNSAFE.getByte(base, offset + i);
            int k1 = mixK1(halfWord);
            h1 = mixH1(h1, k1);
        }
        return fmix(h1, lengthInBytes);
    }

    private static int hashUnsafeBytesByInt(Object base, long offset, int lengthInBytes, int seed) {
        assert (lengthInBytes % 4 == 0);
        int h1 = seed;
        for (int i = 0; i < lengthInBytes; i += 4) {
            int halfWord = UNSAFE.getInt(base, offset + i);
            int k1 = mixK1(halfWord);
            h1 = mixH1(h1, k1);
        }
        return h1;
    }

    private static int hashBytesByInt(
            MemorySegment segment, int offset, int lengthInBytes, int seed) {
        assert (lengthInBytes % 4 == 0);
        int h1 = seed;
        for (int i = 0; i < lengthInBytes; i += 4) {
            int halfWord = segment.getInt(offset + i);
            int k1 = mixK1(halfWord);
            h1 = mixH1(h1, k1);
        }
        return h1;
    }

    private static int mixK1(int k1) {
        k1 *= C1;
        k1 = Integer.rotateLeft(k1, 15);
        k1 *= C2;
        return k1;
    }

    private static int mixH1(int h1, int k1) {
        h1 ^= k1;
        h1 = Integer.rotateLeft(h1, 13);
        h1 = h1 * 5 + 0xe6546b64;
        return h1;
    }

    // Finalization mix - force all bits of a hash block to avalanche
    private static int fmix(int h1, int length) {
        h1 ^= length;
        return fmix(h1);
    }

    public static int fmix(int h) {
        h ^= h >>> 16;
        h *= 0x85ebca6b;
        h ^= h >>> 13;
        h *= 0xc2b2ae35;
        h ^= h >>> 16;
        return h;
    }

    public static long fmix(long h) {
        h ^= (h >>> 33);
        h *= 0xff51afd7ed558ccdL;
        h ^= (h >>> 33);
        h *= 0xc4ceb9fe1a85ec53L;
        h ^= (h >>> 33);
        return h;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy