All Downloads are FREE. Search and download functionalities are using the official Maven repository.

craterdog.utils.UniversalHashFunction Maven / Gradle / Ivy

/************************************************************************
 * Copyright (c) Crater Dog Technologies(TM).  All Rights Reserved.     *
 ************************************************************************
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.        *
 *                                                                      *
 * This code is free software; you can redistribute it and/or modify it *
 * under the terms of The MIT License (MIT), as published by the Open   *
 * Source Initiative. (See http://opensource.org/licenses/MIT)          *
 ************************************************************************/
package craterdog.utils;

import java.util.Arrays;
import java.util.Date;


/**
 * This class implements the universal hash function algorithm described by
 * Dietzfelbinger, et al, (1997). "A Reliable Randomized Algorithm for the Closest-Pair Problem".
 * The algorithm provides a very efficient (mod free) way to create and use a universal hash
 * function that creates an evenly distributed hash value for hash tables that have a number of
 * buckets that are a power of two.  This function works even for data types that have poor builtin
 * hash functions like are often seen for strings.
 *
 * @author Derk Norton
 */
public final class UniversalHashFunction {

    static private final int WORD_WIDTH = Integer.SIZE;

    private final int d;  // difference between word width and hash width
    private final int a;  // random positive even integer < 2^w
    private final int b;  // random non-negative integer < 2^d


    /**
     * This constructor creates a new hash function with a hash width equal to the word width.
     * The universal hash function will be chosen at random so this will not generate consistent
     * hash functions across different JVM invocations.
     */
    public UniversalHashFunction() {
        this(WORD_WIDTH);
    }


    /**
     * This constructor creates a new hash function with the specified hash width.  The universal
     * hash function will be chosen at random so this will not generate consistent hash functions
     * across different JVM invocations.
     *
     * @param hashWidth The number of bits required of the hash for the index size.  In general,
     * this will be the log base 2 of the number of buckets in the hash table (must be a power of
     * 2).
     */
    public UniversalHashFunction(int hashWidth) {
        this.d = WORD_WIDTH - hashWidth;
        this.a = (RandomUtils.pickRandomInt() & 0x7FFFFFFF) | 0x00000001;  // make sure a is positive and odd
        this.b = RandomUtils.pickRandomIndex(1 << d);  // in range [0..2^d)
    }


    /**
     * This constructor creates a new hash function with the specified hash width, and random
     * numbers a and b.  It should be used when the hash values must be consistent across different
     * JVM invocations.
     *
     * @param hashWidth The number of bits required of the hash for the index size.  In general,
     * this will be the log base 2 of the number of buckets in the hash table (must be a power of
     * 2).
     * @param a A positive odd random integer.
     * @param b A random integer in the range [0..2^(wordWidth - hashWidth)).
     */
    public UniversalHashFunction(int hashWidth, int a, int b) {
        this.d = WORD_WIDTH - hashWidth;
        this.a = a;
        this.b = b;
    }


    /**
     * This method generates a hash value for the specified object using the universal hash
     * function parameters specified in the constructor.  The result will be a hash value
     * containing the hash width number of bits.
     *
     * @param object The object to be hashed.
     * @return A universal hash of the object.
     */
    public int hashValue(Object object) {
        int x;
        // make sure the hash value is consistent across JVM invocations
        if (object instanceof Boolean || object instanceof Character || object instanceof String || object instanceof Number || object instanceof Date) {
            x = object.hashCode();
        } else if (object.getClass().isArray()) {
            Object[] array = (Object[]) object;
            x = Arrays.toString(array).hashCode();  // array.hashCode() returns the memory address of the array!
        } else {
            x = object.toString().hashCode();  // make sure it isn't calling Object.hashCode()!
        }
        int hash = (a * x + b) >>> d;  // unsigned shift right
        return hash;
    }

}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy