All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.hash.Fingerprint2011 Maven / Gradle / Ivy

There is a newer version: 3.0.0-alpha-3
Show newest version
// Copyright 2011 Google Inc. All Rights Reserved.

package com.google.common.hash;

import static com.google.common.base.Preconditions.checkPositionIndexes;
import static com.google.common.hash.LittleEndianByteArray.load64;
import static com.google.common.hash.LittleEndianByteArray.load64Safely;
import static java.lang.Long.rotateRight;

import com.google.common.annotations.VisibleForTesting;

/**
 * Implementation of Geoff Pike's fingerprint2011 hash function. See {@link Hashing#fingerprint2011}
 * for information on the behaviour of the algorithm.
 *
 * 

On Intel Core2 2.66, on 1000 bytes, fingerprint2011 takes 0.9 microseconds compared to * fingerprint at 4.0 microseconds and md5 at 4.5 microseconds. * *

Note to maintainers: This implementation relies on signed arithmetic being bit-wise equivalent * to unsigned arithmetic in all cases except: * *

    *
  • comparisons (signed values can be negative) *
  • division (avoided here) *
  • shifting (right shift must be unsigned) *
* * @author [email protected] (Kyle Maddison) * @author [email protected] (Geoff Pike) */ @ElementTypesAreNonnullByDefault final class Fingerprint2011 extends AbstractNonStreamingHashFunction { static final HashFunction FINGERPRINT_2011 = new Fingerprint2011(); // Some primes between 2^63 and 2^64 for various uses. private static final long K0 = 0xa5b85c5e198ed849L; private static final long K1 = 0x8d58ac26afe12e47L; private static final long K2 = 0xc47b6e9e3a970ed3L; private static final long K3 = 0xc6a4a7935bd1e995L; @Override public HashCode hashBytes(byte[] input, int off, int len) { checkPositionIndexes(off, off + len, input.length); return HashCode.fromLong(fingerprint(input, off, len)); } @Override public int bits() { return 64; } @Override public String toString() { return "Hashing.fingerprint2011()"; } // End of public functions. @VisibleForTesting static long fingerprint(byte[] bytes, int offset, int length) { long result; if (length <= 32) { result = murmurHash64WithSeed(bytes, offset, length, K0 ^ K1 ^ K2); } else if (length <= 64) { result = hashLength33To64(bytes, offset, length); } else { result = fullFingerprint(bytes, offset, length); } long u = length >= 8 ? load64(bytes, offset) : K0; long v = length >= 9 ? load64(bytes, offset + length - 8) : K0; result = hash128to64(result + v, u); return result == 0 || result == 1 ? result + ~1 : result; } private static long shiftMix(long val) { return val ^ (val >>> 47); } /** Implementation of Hash128to64 from util/hash/hash128to64.h */ @VisibleForTesting static long hash128to64(long high, long low) { long a = (low ^ high) * K3; a ^= (a >>> 47); long b = (high ^ a) * K3; b ^= (b >>> 47); b *= K3; return b; } /** * Computes intermediate hash of 32 bytes of byte array from the given offset. Results are * returned in the output array - this is 12% faster than allocating new arrays every time. */ private static void weakHashLength32WithSeeds( byte[] bytes, int offset, long seedA, long seedB, long[] output) { long part1 = load64(bytes, offset); long part2 = load64(bytes, offset + 8); long part3 = load64(bytes, offset + 16); long part4 = load64(bytes, offset + 24); seedA += part1; seedB = rotateRight(seedB + seedA + part4, 51); long c = seedA; seedA += part2; seedA += part3; seedB += rotateRight(seedA, 23); output[0] = seedA + part4; output[1] = seedB + c; } /* * Compute an 8-byte hash of a byte array of length greater than 64 bytes. */ private static long fullFingerprint(byte[] bytes, int offset, int length) { // For lengths over 64 bytes we hash the end first, and then as we // loop we keep 56 bytes of state: v, w, x, y, and z. long x = load64(bytes, offset); long y = load64(bytes, offset + length - 16) ^ K1; long z = load64(bytes, offset + length - 56) ^ K0; long[] v = new long[2]; long[] w = new long[2]; weakHashLength32WithSeeds(bytes, offset + length - 64, length, y, v); weakHashLength32WithSeeds(bytes, offset + length - 32, length * K1, K0, w); z += shiftMix(v[1]) * K1; x = rotateRight(z + x, 39) * K1; y = rotateRight(y, 33) * K1; // Decrease length to the nearest multiple of 64, and operate on 64-byte chunks. length = (length - 1) & ~63; do { x = rotateRight(x + y + v[0] + load64(bytes, offset + 16), 37) * K1; y = rotateRight(y + v[1] + load64(bytes, offset + 48), 42) * K1; x ^= w[1]; y ^= v[0]; z = rotateRight(z ^ w[0], 33); weakHashLength32WithSeeds(bytes, offset, v[1] * K1, x + w[0], v); weakHashLength32WithSeeds(bytes, offset + 32, z + w[1], y, w); long tmp = z; z = x; x = tmp; offset += 64; length -= 64; } while (length != 0); return hash128to64(hash128to64(v[0], w[0]) + shiftMix(y) * K1 + z, hash128to64(v[1], w[1]) + x); } private static long hashLength33To64(byte[] bytes, int offset, int length) { long z = load64(bytes, offset + 24); long a = load64(bytes, offset) + (length + load64(bytes, offset + length - 16)) * K0; long b = rotateRight(a + z, 52); long c = rotateRight(a, 37); a += load64(bytes, offset + 8); c += rotateRight(a, 7); a += load64(bytes, offset + 16); long vf = a + z; long vs = b + rotateRight(a, 31) + c; a = load64(bytes, offset + 16) + load64(bytes, offset + length - 32); z = load64(bytes, offset + length - 8); b = rotateRight(a + z, 52); c = rotateRight(a, 37); a += load64(bytes, offset + length - 24); c += rotateRight(a, 7); a += load64(bytes, offset + length - 16); long wf = a + z; long ws = b + rotateRight(a, 31) + c; long r = shiftMix((vf + ws) * K2 + (wf + vs) * K0); return shiftMix(r * K0 + vs) * K2; } @VisibleForTesting static long murmurHash64WithSeed(byte[] bytes, int offset, int length, long seed) { long mul = K3; int topBit = 0x7; int lengthAligned = length & ~topBit; int lengthRemainder = length & topBit; long hash = seed ^ (length * mul); for (int i = 0; i < lengthAligned; i += 8) { long loaded = load64(bytes, offset + i); long data = shiftMix(loaded * mul) * mul; hash ^= data; hash *= mul; } if (lengthRemainder != 0) { long data = load64Safely(bytes, offset + lengthAligned, lengthRemainder); hash ^= data; hash *= mul; } hash = shiftMix(hash) * mul; hash = shiftMix(hash); return hash; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy