All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.landawn.abacus.guava.hash.Hashing Maven / Gradle / Ivy

Go to download

A general programming library in Java/Android. It's easy to learn and simple to use with concise and powerful APIs.

There is a newer version: 5.2.4
Show newest version
/*
 * Copyright (C) 2011 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.landawn.abacus.guava.hash;

import java.security.Key;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.zip.Adler32;
import java.util.zip.CRC32;
import java.util.zip.Checksum;

import com.google.common.hash.HashCode;
import com.landawn.abacus.util.N;

/**
 * Note: It's copied from Google Guava under Apache License 2.0 and modified.
 *
 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related
 * utilities.
 *
 * 

A comparison of the various hash functions can be found * here. * * @author Kevin Bourrillion * @author Dimitris Andreou * @author Kurt Alfred Kluever * @since 11.0 */ public final class Hashing { private Hashing() { // singleton for utility class. } /** * Returns a general-purpose, temporary-use, non-cryptographic hash function. The algorithm * the returned function implements is unspecified and subject to change without notice. * *

Warning: a new random seed for these functions is chosen each time the {@code * Hashing} class is loaded. Do not use this method if hash codes may escape the current * process in any way, for example being sent over RPC, or saved to disk. * *

Repeated calls to this method on the same loaded {@code Hashing} class, using the same value * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. * * @param minimumBits a positive integer (can be arbitrarily large) * @return a hash function, described above, that produces hash codes of length {@code * minimumBits} or greater */ public static HashFunction goodFastHash(int minimumBits) { return GuavaHashFunction.from(com.google.common.hash.Hashing.goodFastHash(minimumBits)); } /** * Returns a hash function implementing the 32-bit murmur3 * algorithm, x86 variant (little-endian variant), using the given seed value. * *

The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). * *

This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code * HashFunction} returned by the original {@code murmur3_32} method. * * @since 31.0 */ public static HashFunction murmur3_32(int seed) { //NOSONAR return GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_32_fixed(seed)); } /** * Returns a hash function implementing the 32-bit murmur3 * algorithm, x86 variant (little-endian variant), using a seed value of zero. * *

The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). * *

This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code * HashFunction} returned by the original {@code murmur3_32} method. * * @since 31.0 */ public static HashFunction murmur3_32() { //NOSONAR return Hash_Holder.MURMUR3_32_FIXED; } /** * Returns a hash function implementing the * 128-bit murmur3 algorithm, * x64 variant (little-endian variant), using the given seed value. * *

The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). * * @param seed * @return */ public static HashFunction murmur3_128(int seed) { //NOSONAR return GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_128(seed)); } /** * Returns a hash function implementing the * 128-bit murmur3 algorithm, * x64 variant (little-endian variant), using a seed value of zero. * *

The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). * * @return */ public static HashFunction murmur3_128() { //NOSONAR return Hash_Holder.MURMUR3_128; } /** * Returns a hash function implementing the 64-bit * SipHash-2-4 algorithm using a seed value of {@code k = 00 01 02 ...}. * * @return * @since 15.0 */ public static HashFunction sipHash24() { return Hash_Holder.SIP_HASH_24; } /** * Returns a hash function implementing the 64-bit * SipHash-2-4 algorithm using the given seed. * * @param k0 * @param k1 * @return * @since 15.0 */ public static HashFunction sipHash24(long k0, long k1) { return GuavaHashFunction.from(com.google.common.hash.Hashing.sipHash24(k0, k1)); } /** * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). * * @deprecated If you must interoperate with a system that requires MD5, then use this method, * despite its deprecation. But if you can choose your hash function, avoid MD5, which is * neither fast nor secure. As of January 2017, we suggest: *

    *
  • For security: * {@link Hashing#sha256} or a higher-level API. *
  • For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. *
*/ @Deprecated public static HashFunction md5() { return Hash_Holder.MD5; } /** * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). * * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is * neither fast nor secure. As of January 2017, we suggest: *
    *
  • For security: * {@link Hashing#sha256} or a higher-level API. *
  • For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. *
*/ @Deprecated public static HashFunction sha1() { return Hash_Holder.SHA_1; } /** * Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by delegating to the * SHA-256 {@link MessageDigest}. * * @return */ public static HashFunction sha256() { return Hash_Holder.SHA_256; } /** * Returns a hash function implementing the SHA-384 algorithm (384 hash bits) by delegating to the * SHA-384 {@link MessageDigest}. * * @return * @since 19.0 */ public static HashFunction sha384() { return Hash_Holder.SHA_384; } /** * Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by delegating to the * SHA-512 {@link MessageDigest}. * * @return */ public static HashFunction sha512() { return Hash_Holder.SHA_512; } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * MD5 (128 hash bits) hash function and the given secret key. * * @param key the secret key * @return * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC * @since 20.0 */ public static HashFunction hmacMd5(Key key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacMd5(key)); } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * MD5 (128 hash bits) hash function and a {@link SecretSpecKey} created from the given byte array * and the MD5 algorithm. * * @param key the key material of the secret key * @return * @since 20.0 */ public static HashFunction hmacMd5(byte[] key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacMd5(key)); } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * SHA-1 (160 hash bits) hash function and the given secret key. * * @param key the secret key * @return * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC * @since 20.0 */ public static HashFunction hmacSha1(Key key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha1(key)); } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * SHA-1 (160 hash bits) hash function and a {@link SecretSpecKey} created from the given byte * array and the SHA-1 algorithm. * * @param key the key material of the secret key * @return * @since 20.0 */ public static HashFunction hmacSha1(byte[] key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha1(key)); } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * SHA-256 (256 hash bits) hash function and the given secret key. * * @param key the secret key * @return * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC * @since 20.0 */ public static HashFunction hmacSha256(Key key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha256(key)); } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * SHA-256 (256 hash bits) hash function and a {@link SecretSpecKey} created from the given byte * array and the SHA-256 algorithm. * * @param key the key material of the secret key * @return * @since 20.0 */ public static HashFunction hmacSha256(byte[] key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha256(key)); } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * SHA-512 (512 hash bits) hash function and the given secret key. * * @param key the secret key * @return * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC * @since 20.0 */ public static HashFunction hmacSha512(Key key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha512(key)); } /** * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the * SHA-512 (512 hash bits) hash function and a {@link SecretSpecKey} created from the given byte * array and the SHA-512 algorithm. * * @param key the key material of the secret key * @return * @since 20.0 */ public static HashFunction hmacSha512(byte[] key) { return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha512(key)); } /** * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described * by RFC 3720, Section 12.1. * * @return * @since 18.0 */ public static HashFunction crc32c() { return Hash_Holder.CRC_32_C; } /** * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits) by delegating * to the {@link CRC32} {@link Checksum}. * *

To get the {@code long} value equivalent to {@link Checksum#getValue()} for a * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}. * * @return * @since 14.0 */ public static HashFunction crc32() { return Hash_Holder.CRC_32; } /** * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits) by * delegating to the {@link Adler32} {@link Checksum}. * *

To get the {@code long} value equivalent to {@link Checksum#getValue()} for a * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}. * * @return * @since 14.0 */ public static HashFunction adler32() { return Hash_Holder.ADLER_32; } /** * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. * *

This is designed for generating persistent fingerprints of strings. It isn't * cryptographically secure, but it produces a high-quality hash with fewer collisions than some * alternatives we've used in the past. FarmHashFingerprints generated using this are byte-wise * identical to those created using the C++ version, but note that this uses unsigned integers * (see {@link com.google.common.primitives.UnsignedInts}). Comparisons between the two should * take this into account. * * @return * @since 20.0 */ public static HashFunction farmHashFingerprint64() { return Hash_Holder.FARMHASH_FINGERPRINT_64; } /** * * @param first * @param second * @return */ public static HashFunction concatenating(final HashFunction first, final HashFunction second) { return concatenating(N.asList(first, second)); } /** * * @param first * @param second * @param third * @return */ public static HashFunction concatenating(final HashFunction first, final HashFunction second, final HashFunction third) { return concatenating(N.asList(first, second, third)); } /** * Returns a hash function which computes its hash code by concatenating the hash codes of the * underlying hash functions together. This can be useful if you need to generate hash codes of a * specific length. * *

For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. * * @param hashFunctions * @return * @since 19.0 */ public static HashFunction concatenating(final Iterable hashFunctions) { final Iterator iter = hashFunctions.iterator(); final List gHashFunctionList = new ArrayList<>(); while (iter.hasNext()) { gHashFunctionList.add(((GuavaHashFunction) iter.next()).gHashFunction); } return GuavaHashFunction.from(com.google.common.hash.Hashing.concatenating(gHashFunctionList)); } /** * * @param first * @param second * @return */ public static HashCode combineOrdered(final HashCode first, final HashCode second) { return combineOrdered(Arrays.asList(first, second)); } /** * * @param first * @param second * @param third * @return */ public static HashCode combineOrdered(final HashCode first, final HashCode second, final HashCode third) { return combineOrdered(Arrays.asList(first, second, third)); } /** * Returns a hash code, having the same bit length as each of the input hash codes, that combines * the information of these hash codes in an ordered fashion. That is, whenever two equal hash * codes are produced by two calls to this method, it is as likely as possible that each * was computed from the same input hash codes in the same order. * * @param hashCodes * @return * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all * have the same bit length */ public static HashCode combineOrdered(final Iterable hashCodes) { return com.google.common.hash.Hashing.combineOrdered(hashCodes); } /** * * @param first * @param second * @return */ public static HashCode combineUnordered(final HashCode first, final HashCode second) { return combineUnordered(Arrays.asList(first, second)); } /** * * @param first * @param second * @param third * @return */ public static HashCode combineUnordered(final HashCode first, final HashCode second, final HashCode third) { return combineUnordered(Arrays.asList(first, second, third)); } /** * Returns a hash code, having the same bit length as each of the input hash codes, that combines * the information of these hash codes in an unordered fashion. That is, whenever two equal hash * codes are produced by two calls to this method, it is as likely as possible that each * was computed from the same input hash codes in some order. * * @param hashCodes * @return * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all * have the same bit length */ public static HashCode combineUnordered(final Iterable hashCodes) { return com.google.common.hash.Hashing.combineUnordered(hashCodes); } /** * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner * that minimizes the need for remapping as {@code buckets} grows. That is, {@code * consistentHash(h, n)} equals: * *

    *
  • {@code n - 1}, with approximate probability {@code 1/n} *
  • {@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) *
* *

This method is suitable for the common use case of dividing work among buckets that meet the * following conditions: * *

    *
  • You want to assign the same fraction of inputs to each bucket. *
  • When you reduce the number of buckets, you can accept that the most recently added buckets * will be removed first. More concretely, if you are dividing traffic among tasks, you can * decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and {@code * consistentHash} will handle it. If, however, you are dividing traffic among servers {@code * alpha}, {@code bravo}, and {@code charlie} and you occasionally need to take each of the * servers offline, {@code consistentHash} will be a poor fit: It provides no way for you to * specify which of the three buckets is disappearing. Thus, if your buckets change from {@code * [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will assign all the old {@code alpha} * traffic to {@code bravo} and all the old {@code bravo} traffic to {@code charlie}, rather than * letting {@code bravo} keep its traffic. *
* * *

See the Wikipedia article on * consistent hashing for more information. * * @param hashCode * @param buckets * @return */ public static int consistentHash(HashCode hashCode, int buckets) { return com.google.common.hash.Hashing.consistentHash(hashCode, buckets); } /** * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, * n)} equals: * *

    *
  • {@code n - 1}, with approximate probability {@code 1/n} *
  • {@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) *
* *

This method is suitable for the common use case of dividing work among buckets that meet the * following conditions: * *

    *
  • You want to assign the same fraction of inputs to each bucket. *
  • When you reduce the number of buckets, you can accept that the most recently added buckets * will be removed first. More concretely, if you are dividing traffic among tasks, you can * decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and {@code * consistentHash} will handle it. If, however, you are dividing traffic among servers {@code * alpha}, {@code bravo}, and {@code charlie} and you occasionally need to take each of the * servers offline, {@code consistentHash} will be a poor fit: It provides no way for you to * specify which of the three buckets is disappearing. Thus, if your buckets change from {@code * [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will assign all the old {@code alpha} * traffic to {@code bravo} and all the old {@code bravo} traffic to {@code charlie}, rather than * letting {@code bravo} keep its traffic. *
* * *

See the Wikipedia article on * consistent hashing for more information. * * @param input * @param buckets * @return */ public static int consistentHash(long input, int buckets) { return com.google.common.hash.Hashing.consistentHash(input, buckets); } private static final class Hash_Holder { //NOSONAR static final HashFunction MURMUR3_32_FIXED = GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_32_fixed()); static final HashFunction MURMUR3_128 = GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_128()); static final HashFunction SIP_HASH_24 = GuavaHashFunction.from(com.google.common.hash.Hashing.sipHash24()); @SuppressWarnings("deprecation") static final HashFunction MD5 = GuavaHashFunction.from(com.google.common.hash.Hashing.md5()); @SuppressWarnings("deprecation") static final HashFunction SHA_1 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha1()); static final HashFunction SHA_256 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha256()); static final HashFunction SHA_384 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha384()); static final HashFunction SHA_512 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha512()); static final HashFunction CRC_32_C = GuavaHashFunction.from(com.google.common.hash.Hashing.crc32c()); static final HashFunction CRC_32 = GuavaHashFunction.from(com.google.common.hash.Hashing.crc32()); static final HashFunction ADLER_32 = GuavaHashFunction.from(com.google.common.hash.Hashing.adler32()); static final HashFunction FARMHASH_FINGERPRINT_64 = GuavaHashFunction.from(com.google.common.hash.Hashing.farmHashFingerprint64()); private Hash_Holder() { // singleton for utility class. } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy