All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.questdb.std.Hash Maven / Gradle / Ivy

/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2023 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.std;

import io.questdb.std.str.DirectUtf8Sequence;
import io.questdb.std.str.Utf8String;

public final class Hash {

    private static final long M2 = 0x7a646e4d;
    private static final int SPREAD_HASH_BITS = 0x7fffffff;

    private Hash() {
    }

    /**
     * Restricts hashCode() of the underlying char sequence to be no greater than max.
     *
     * @param seq char sequence
     * @param max max value of hashCode()
     * @return power of 2 integer
     */
    public static int boundedHash(CharSequence seq, int max) {
        return seq == null ? -1 : (Chars.hashCode(seq) & 0xFFFFFFF) & max;
    }

    public static int hashLong(long k) {
        long h = k * M2;
        return (int) (h ^ h >>> 32);
    }

    public static int hashLong128(long key1, long key2) {
        long h = key1 * M2 + key2;
        h *= M2;
        return (int) (h ^ h >>> 32);
    }

    /**
     * Same as {@link #hashMem32(long, long)}, but with on-heap char sequence
     * instead of direct unsafe access.
     */
    public static int hashMem32(Utf8String us) {
        final int len = us.size();
        long h = 0;
        int i = 0;
        for (; i + 7 < len; i += 8) {
            h = h * M2 + us.longAt(i);
        }
        if (i + 3 < len) {
            h = h * M2 + us.intAt(i);
            i += 4;
        }
        for (; i < len; i++) {
            h = h * M2 + us.byteAt(i);
        }
        h *= M2;
        return (int) (h ^ h >>> 32);
    }

    /**
     * Calculates positive integer hash of memory pointer using a polynomial
     * hash function.
     * 

* The function is a modified version of the function from * this article * by Peter Lawrey. * * @param p memory pointer * @param len memory length in bytes * @return hash code */ public static int hashMem32(long p, long len) { long h = 0; int i = 0; for (; i + 7 < len; i += 8) { h = h * M2 + Unsafe.getUnsafe().getLong(p + i); } if (i + 3 < len) { h = h * M2 + Unsafe.getUnsafe().getInt(p + i); i += 4; } for (; i < len; i++) { h = h * M2 + Unsafe.getUnsafe().getByte(p + i); } h *= M2; return (int) (h ^ h >>> 32); } /** * Same as {@link #hashMem32(long, long)}, but with direct UTF8 string * instead of direct unsafe access. */ public static int hashMem32(DirectUtf8Sequence seq) { return hashMem32(seq.lo(), seq.size()); } /** * (copied from ConcurrentHashMap) * Spreads (XORs) higher bits of hash to lower and also forces top * bit to 0. Because the table uses power-of-two masking, sets of * hashes that vary only in bits above the current mask will * always collide. (Among known examples are sets of Float keys * holding consecutive whole numbers in small tables.) So we * apply a transform that spreads the impact of higher bits * downward. There is a trade-off between speed, utility, and * quality of bit-spreading. Because many common sets of hashes * are already reasonably distributed (so don't benefit from * spreading), and because we use trees to handle large sets of * collisions in bins, we just XOR some shifted bits in the * cheapest possible way to reduce systematic lossage, as well as * to incorporate impact of the highest bits that would otherwise * never be used in index calculations because of table bounds. * * @param h hash code * @return adjusted hash code */ public static int spread(int h) { return (h ^ (h >>> 16)) & SPREAD_HASH_BITS; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy