org.neo4j.hashing.HashFunction Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2018-2020 "Graph Foundation,"
 * Graph Foundation, Inc. [https://graphfoundation.org]
 *
 * This file is part of ONgDB.
 *
 * ONgDB is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
/*
 * Copyright (c) 2002-2020 "Neo4j,"
 * Neo4j Sweden AB [http://neo4j.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.hashing;

/**
 * A hash function, as per this interface, will produce a deterministic value based on its input.
 * 
 * Hash functions are first initialised with a seed, which may be zero, and then updated with a succession of values
 * that are mixed into the hash state in sequence.
 * 

 * Hash functions may have internal state, but can also be stateless, if their complete state can be represented by the
 * 64-bit intermediate hash state.
 *
 * @see #incrementalXXH64()
 * @see #javaUtilHashing()
 * @see #xorShift32()
 */
public interface HashFunction
{
    /**
     * Initialise the hash function with the given seed.
     * 

     * Different seeds should produce different final hash values.
     *
     * @param seed The initialisation seed for the hash function.
     * @return An initialised intermediate hash state.
     */
    long initialise( long seed );

    /**
     * Update the hash state by mixing the given value into the given intermediate hash state.
     *
     * @param intermediateHash The intermediate hash state given either by {@link #initialise(long)}, or by a previous
     * call to this function.
     * @param value The value to add to the hash state.
     * @return a new intermediate hash state with the value mixed in.
     */
    long update( long intermediateHash, long value );

    /**
     * Produce a final hash value from the given intermediate hash state.
     *
     * @param intermediateHash the intermediate hash state from which to produce a final hash value.
     * @return the final hash value.
     */
    long finalise( long intermediateHash );

    /**
     * Reduce the given 64-bit hash value to a 32-bit value.
     *
     * @param hash The hash value to reduce.
     * @return The 32-bit representation of the given hash value.
     */
    default int toInt( long hash )
    {
        return (int) ((hash >> 32) ^ hash);
    }

    /**
     * Produce a 64-bit hash value from a single long value.
     *
     * @param value The single value to hash.
     * @return The hash of the given value.
     */
    default long hashSingleValue( long value )
    {
        return finalise( update( initialise( 0 ), value ) );
    }

    /**
     * Produce a 32-bit hash value from a single long value.
     *
     * @param value The single value to hash.
     * @return The hash of the given value.
     */
    default int hashSingleValueToInt( long value )
    {
        return toInt( hashSingleValue( value ) );
    }

    /**
     * Our incremental XXH64 based hash function.
     * 

     * This hash function is based on xxHash (XXH64 variant), but modified to work incrementally on 8-byte blocks
     * instead of on 32-byte blocks. Basically, the 32-byte block hash loop has been removed, so we use the 8-byte
     * block tail-loop for the entire input.
     * 

     * This hash function is roughly twice as fast as the hash function used for index entries since 2.2.0, about 30%
     * faster than optimised murmurhash3 implementations though not as fast as optimised xxHash implementations due to
     * the smaller block size. It is allocation free, unlike its predecessor. And it retains most of the excellent
     * statistical properties of xxHash, failing only the "TwoBytes" and "Zeroes" keyset tests in SMHasher, passing 12
     * out of 14 tests. According to Yann Collet on
     * twitter, this modification is expected to mostly cause degraded performance, and worsens some of the
     * avalanche statistics.
     * 

     * This hash function is stateless, so the returned instance can be freely cached and accessed concurrently by
     * multiple threads.
     * 

     * The xxHash algorithm is originally by Yann Collet, and this
     * implementation is with inspiration from Vsevolod Tolstopyatovs implementation in the
     * Zero Allocation Hashing library.
     * Credit for SMHasher goes to Austin Appleby.
     */
    static HashFunction incrementalXXH64()
    {
        return IncrementalXXH64.INSTANCE;
    }

    /**
     * Same hash function as that used by the standard library hash collections. It generates a hash by splitting the
     * input value into segments, and then re-distributing those segments, so the end result is effectively a striped
     * and then jumbled version of the input data. For randomly distributed keys, this has a good chance at generating
     * an even hash distribution over the full hash space.
     * 

     * It performs exceptionally poorly for sequences of numbers, as the sequence increments all end up in the same
     * stripe, generating hash values that will end up in the same buckets in collections.
     * 

     * This hash function is stateless, so the returned instance can be freely cached and accessed concurrently by
     * multiple threads.
     */
    static HashFunction javaUtilHashing()
    {
        return JavaUtilHashFunction.INSTANCE;
    }

    /**
     * The default hash function is based on a pseudo-random number generator, which uses the input value as a seed
     * to the generator. This is very fast, and performs well for most input data. However, it is not guaranteed to
     * generate a superb distribution, only a "decent" one.
     * 
     * This hash function is stateless, so the returned instance can be freely cached and accessed concurrently by
     * multiple threads.
     */
    static HashFunction xorShift32()
    {
        return XorShift32HashFunction.INSTANCE;
    }
}