com.landawn.abacus.guava.hash.Hashing Maven / Gradle / Ivy
Show all versions of abacus-common Show documentation
/*
* Copyright (C) 2011 The Guava Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.landawn.abacus.guava.hash;
import java.security.Key;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.zip.Adler32;
import java.util.zip.CRC32;
import java.util.zip.Checksum;
import com.google.common.hash.HashCode;
import com.landawn.abacus.util.N;
/**
* Note: It's copied from Google Guava under Apache License 2.0 and modified.
*
* Static methods to obtain {@link HashFunction} instances, and other static hashing-related
* utilities.
*
* A comparison of the various hash functions can be found
* here.
*
* @author Kevin Bourrillion
* @author Dimitris Andreou
* @author Kurt Alfred Kluever
* @since 11.0
*/
public final class Hashing {
private Hashing() {
// singleton for utility class.
}
/**
* Returns a general-purpose, temporary-use, non-cryptographic hash function. The algorithm
* the returned function implements is unspecified and subject to change without notice.
*
*
Warning: a new random seed for these functions is chosen each time the {@code
* Hashing} class is loaded. Do not use this method if hash codes may escape the current
* process in any way, for example being sent over RPC, or saved to disk.
*
*
Repeated calls to this method on the same loaded {@code Hashing} class, using the same value
* for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances.
*
* @param minimumBits a positive integer (can be arbitrarily large)
* @return a hash function, described above, that produces hash codes of length {@code
* minimumBits} or greater
*/
public static HashFunction goodFastHash(int minimumBits) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.goodFastHash(minimumBits));
}
/**
* Returns a hash function implementing the 32-bit murmur3
* algorithm, x86 variant (little-endian variant), using the given seed value.
*
*
The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
*
*
This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code
* HashFunction} returned by the original {@code murmur3_32} method.
*
* @since 31.0
*/
public static HashFunction murmur3_32(int seed) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_32_fixed(seed));
}
/**
* Returns a hash function implementing the 32-bit murmur3
* algorithm, x86 variant (little-endian variant), using a seed value of zero.
*
*
The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
*
*
This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code
* HashFunction} returned by the original {@code murmur3_32} method.
*
* @since 31.0
*/
public static HashFunction murmur3_32() {
return Hash_Holder.MURMUR3_32_FIXED;
}
/**
* Returns a hash function implementing the
* 128-bit murmur3 algorithm,
* x64 variant (little-endian variant), using the given seed value.
*
*
The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
*
* @param seed
* @return
*/
public static HashFunction murmur3_128(int seed) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_128(seed));
}
/**
* Returns a hash function implementing the
* 128-bit murmur3 algorithm,
* x64 variant (little-endian variant), using a seed value of zero.
*
*
The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
*
* @return
*/
public static HashFunction murmur3_128() {
return Hash_Holder.MURMUR3_128;
}
/**
* Returns a hash function implementing the 64-bit
* SipHash-2-4 algorithm using a seed value of {@code k = 00 01 02 ...}.
*
* @return
* @since 15.0
*/
public static HashFunction sipHash24() {
return Hash_Holder.SIP_HASH_24;
}
/**
* Returns a hash function implementing the 64-bit
* SipHash-2-4 algorithm using the given seed.
*
* @param k0
* @param k1
* @return
* @since 15.0
*/
public static HashFunction sipHash24(long k0, long k1) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.sipHash24(k0, k1));
}
/**
* Returns a hash function implementing the MD5 hash algorithm (128 hash bits).
*
* @deprecated If you must interoperate with a system that requires MD5, then use this method,
* despite its deprecation. But if you can choose your hash function, avoid MD5, which is
* neither fast nor secure. As of January 2017, we suggest:
*
* - For security:
* {@link Hashing#sha256} or a higher-level API.
*
- For speed: {@link Hashing#goodFastHash}, though see its docs for caveats.
*
*/
@Deprecated
public static HashFunction md5() {
return Hash_Holder.MD5;
}
/**
* Returns a hash function implementing the SHA-1 algorithm (160 hash bits).
*
* @deprecated If you must interoperate with a system that requires SHA-1, then use this method,
* despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is
* neither fast nor secure. As of January 2017, we suggest:
*
* - For security:
* {@link Hashing#sha256} or a higher-level API.
*
- For speed: {@link Hashing#goodFastHash}, though see its docs for caveats.
*
*/
@Deprecated
public static HashFunction sha1() {
return Hash_Holder.SHA_1;
}
/**
* Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by delegating to the
* SHA-256 {@link MessageDigest}.
*
* @return
*/
public static HashFunction sha256() {
return Hash_Holder.SHA_256;
}
/**
* Returns a hash function implementing the SHA-384 algorithm (384 hash bits) by delegating to the
* SHA-384 {@link MessageDigest}.
*
* @return
* @since 19.0
*/
public static HashFunction sha384() {
return Hash_Holder.SHA_384;
}
/**
* Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by delegating to the
* SHA-512 {@link MessageDigest}.
*
* @return
*/
public static HashFunction sha512() {
return Hash_Holder.SHA_512;
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* MD5 (128 hash bits) hash function and the given secret key.
*
* @param key the secret key
* @return
* @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
* @since 20.0
*/
public static HashFunction hmacMd5(Key key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacMd5(key));
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* MD5 (128 hash bits) hash function and a {@link SecretSpecKey} created from the given byte array
* and the MD5 algorithm.
*
* @param key the key material of the secret key
* @return
* @since 20.0
*/
public static HashFunction hmacMd5(byte[] key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacMd5(key));
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* SHA-1 (160 hash bits) hash function and the given secret key.
*
* @param key the secret key
* @return
* @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
* @since 20.0
*/
public static HashFunction hmacSha1(Key key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha1(key));
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* SHA-1 (160 hash bits) hash function and a {@link SecretSpecKey} created from the given byte
* array and the SHA-1 algorithm.
*
* @param key the key material of the secret key
* @return
* @since 20.0
*/
public static HashFunction hmacSha1(byte[] key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha1(key));
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* SHA-256 (256 hash bits) hash function and the given secret key.
*
* @param key the secret key
* @return
* @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
* @since 20.0
*/
public static HashFunction hmacSha256(Key key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha256(key));
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* SHA-256 (256 hash bits) hash function and a {@link SecretSpecKey} created from the given byte
* array and the SHA-256 algorithm.
*
* @param key the key material of the secret key
* @return
* @since 20.0
*/
public static HashFunction hmacSha256(byte[] key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha256(key));
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* SHA-512 (512 hash bits) hash function and the given secret key.
*
* @param key the secret key
* @return
* @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
* @since 20.0
*/
public static HashFunction hmacSha512(Key key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha512(key));
}
/**
* Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
* SHA-512 (512 hash bits) hash function and a {@link SecretSpecKey} created from the given byte
* array and the SHA-512 algorithm.
*
* @param key the key material of the secret key
* @return
* @since 20.0
*/
public static HashFunction hmacSha512(byte[] key) {
return GuavaHashFunction.from(com.google.common.hash.Hashing.hmacSha512(key));
}
/**
* Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described
* by RFC 3720, Section 12.1.
*
* @return
* @since 18.0
*/
public static HashFunction crc32c() {
return Hash_Holder.CRC_32_C;
}
/**
* Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits) by delegating
* to the {@link CRC32} {@link Checksum}.
*
* To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
* {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
*
* @return
* @since 14.0
*/
public static HashFunction crc32() {
return Hash_Holder.CRC_32;
}
/**
* Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits) by
* delegating to the {@link Adler32} {@link Checksum}.
*
*
To get the {@code long} value equivalent to {@link Checksum#getValue()} for a
* {@code HashCode} produced by this function, use {@link HashCode#padToLong()}.
*
* @return
* @since 14.0
*/
public static HashFunction adler32() {
return Hash_Holder.ADLER_32;
}
/**
* Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm.
*
*
This is designed for generating persistent fingerprints of strings. It isn't
* cryptographically secure, but it produces a high-quality hash with fewer collisions than some
* alternatives we've used in the past. FarmHashFingerprints generated using this are byte-wise
* identical to those created using the C++ version, but note that this uses unsigned integers
* (see {@link com.google.common.primitives.UnsignedInts}). Comparisons between the two should
* take this into account.
*
* @return
* @since 20.0
*/
public static HashFunction farmHashFingerprint64() {
return Hash_Holder.FARMHASH_FINGERPRINT_64;
}
/**
*
* @param first
* @param second
* @return
*/
public static HashFunction concatenating(final HashFunction first, final HashFunction second) {
return concatenating(N.asList(first, second));
}
/**
*
* @param first
* @param second
* @param third
* @return
*/
public static HashFunction concatenating(final HashFunction first, final HashFunction second, final HashFunction third) {
return concatenating(N.asList(first, second, third));
}
/**
* Returns a hash function which computes its hash code by concatenating the hash codes of the
* underlying hash functions together. This can be useful if you need to generate hash codes of a
* specific length.
*
*
For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash
* functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}.
*
* @param hashFunctions
* @return
* @since 19.0
*/
public static HashFunction concatenating(final Iterable hashFunctions) {
final Iterator iter = hashFunctions.iterator();
final List gHashFunctionList = new ArrayList<>();
while (iter.hasNext()) {
gHashFunctionList.add(((GuavaHashFunction) iter.next()).gHashFunction);
}
return GuavaHashFunction.from(com.google.common.hash.Hashing.concatenating(gHashFunctionList));
}
/**
*
* @param first
* @param second
* @return
*/
public static HashCode combineOrdered(final HashCode first, final HashCode second) {
return combineOrdered(Arrays.asList(first, second));
}
/**
*
* @param first
* @param second
* @param third
* @return
*/
public static HashCode combineOrdered(final HashCode first, final HashCode second, final HashCode third) {
return combineOrdered(Arrays.asList(first, second, third));
}
/**
* Returns a hash code, having the same bit length as each of the input hash codes, that combines
* the information of these hash codes in an ordered fashion. That is, whenever two equal hash
* codes are produced by two calls to this method, it is as likely as possible that each
* was computed from the same input hash codes in the same order.
*
* @param hashCodes
* @return
* @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all
* have the same bit length
*/
public static HashCode combineOrdered(final Iterable hashCodes) {
return com.google.common.hash.Hashing.combineOrdered(hashCodes);
}
/**
*
* @param first
* @param second
* @return
*/
public static HashCode combineUnordered(final HashCode first, final HashCode second) {
return combineUnordered(Arrays.asList(first, second));
}
/**
*
* @param first
* @param second
* @param third
* @return
*/
public static HashCode combineUnordered(final HashCode first, final HashCode second, final HashCode third) {
return combineUnordered(Arrays.asList(first, second, third));
}
/**
* Returns a hash code, having the same bit length as each of the input hash codes, that combines
* the information of these hash codes in an unordered fashion. That is, whenever two equal hash
* codes are produced by two calls to this method, it is as likely as possible that each
* was computed from the same input hash codes in some order.
*
* @param hashCodes
* @return
* @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all
* have the same bit length
*/
public static HashCode combineUnordered(final Iterable hashCodes) {
return com.google.common.hash.Hashing.combineUnordered(hashCodes);
}
/**
* Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner
* that minimizes the need for remapping as {@code buckets} grows. That is, {@code
* consistentHash(h, n)} equals:
*
*
* - {@code n - 1}, with approximate probability {@code 1/n}
*
- {@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
*
*
* This method is suitable for the common use case of dividing work among buckets that meet the
* following conditions:
*
*
* - You want to assign the same fraction of inputs to each bucket.
*
- When you reduce the number of buckets, you can accept that the most recently added buckets
* will be removed first. More concretely, if you are dividing traffic among tasks, you can
* decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and {@code
* consistentHash} will handle it. If, however, you are dividing traffic among servers {@code
* alpha}, {@code bravo}, and {@code charlie} and you occasionally need to take each of the
* servers offline, {@code consistentHash} will be a poor fit: It provides no way for you to
* specify which of the three buckets is disappearing. Thus, if your buckets change from {@code
* [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will assign all the old {@code alpha}
* traffic to {@code bravo} and all the old {@code bravo} traffic to {@code charlie}, rather than
* letting {@code bravo} keep its traffic.
*
*
*
* See the Wikipedia article on
* consistent hashing for more information.
*
* @param hashCode
* @param buckets
* @return
*/
public static int consistentHash(HashCode hashCode, int buckets) {
return com.google.common.hash.Hashing.consistentHash(hashCode, buckets);
}
/**
* Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that
* minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h,
* n)} equals:
*
*
* - {@code n - 1}, with approximate probability {@code 1/n}
*
- {@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
*
*
* This method is suitable for the common use case of dividing work among buckets that meet the
* following conditions:
*
*
* - You want to assign the same fraction of inputs to each bucket.
*
- When you reduce the number of buckets, you can accept that the most recently added buckets
* will be removed first. More concretely, if you are dividing traffic among tasks, you can
* decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and {@code
* consistentHash} will handle it. If, however, you are dividing traffic among servers {@code
* alpha}, {@code bravo}, and {@code charlie} and you occasionally need to take each of the
* servers offline, {@code consistentHash} will be a poor fit: It provides no way for you to
* specify which of the three buckets is disappearing. Thus, if your buckets change from {@code
* [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will assign all the old {@code alpha}
* traffic to {@code bravo} and all the old {@code bravo} traffic to {@code charlie}, rather than
* letting {@code bravo} keep its traffic.
*
*
*
* See the Wikipedia article on
* consistent hashing for more information.
*
* @param input
* @param buckets
* @return
*/
public static int consistentHash(long input, int buckets) {
return com.google.common.hash.Hashing.consistentHash(input, buckets);
}
private static final class Hash_Holder {
static final HashFunction MURMUR3_32_FIXED = GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_32_fixed());
static final HashFunction MURMUR3_128 = GuavaHashFunction.from(com.google.common.hash.Hashing.murmur3_128());
static final HashFunction SIP_HASH_24 = GuavaHashFunction.from(com.google.common.hash.Hashing.sipHash24());
@SuppressWarnings("deprecation")
static final HashFunction MD5 = GuavaHashFunction.from(com.google.common.hash.Hashing.md5());
@SuppressWarnings("deprecation")
static final HashFunction SHA_1 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha1());
static final HashFunction SHA_256 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha256());
static final HashFunction SHA_384 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha384());
static final HashFunction SHA_512 = GuavaHashFunction.from(com.google.common.hash.Hashing.sha512());
static final HashFunction CRC_32_C = GuavaHashFunction.from(com.google.common.hash.Hashing.crc32c());
static final HashFunction CRC_32 = GuavaHashFunction.from(com.google.common.hash.Hashing.crc32());
static final HashFunction ADLER_32 = GuavaHashFunction.from(com.google.common.hash.Hashing.adler32());
static final HashFunction FARMHASH_FINGERPRINT_64 = GuavaHashFunction.from(com.google.common.hash.Hashing.farmHashFingerprint64());
private Hash_Holder() {
// singleton for utility class.
}
}
}