
javadoc.src-html.com.google.common.hash.Hashing.html Maven / Gradle / Ivy
The newest version!
001 /*
002 * Copyright (C) 2011 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015 package com.google.common.hash;
016
017 import static com.google.common.base.Preconditions.checkArgument;
018
019 import com.google.common.annotations.Beta;
020 import com.google.common.primitives.UnsignedInts;
021
022 import java.nio.ByteBuffer;
023 import java.security.MessageDigest;
024 import java.util.Iterator;
025
026 /**
027 * Static methods to obtain {@link HashFunction} instances, and other static
028 * hashing-related utilities.
029 *
030 * @author Kevin Bourrillion
031 * @author Dimitris Andreou
032 * @author Kurt Alfred Kluever
033 * @since 11.0
034 */
035 @Beta
036 public final class Hashing {
037 private Hashing() {}
038
039 /**
040 * Returns a general-purpose, <b>non-cryptographic-strength</b>, streaming hash function that
041 * produces hash codes of length at least {@code minimumBits}. Users without specific
042 * compatibility requirements and who do not persist the hash codes are encouraged to
043 * choose this hash function.
044 *
045 * <p><b>Warning: the implementation is unspecified and is subject to change.</b>
046 *
047 * @throws IllegalArgumentException if {@code minimumBits} is not positive
048 */
049 public static HashFunction goodFastHash(int minimumBits) {
050 int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
051
052 if (bits == 32) {
053 return murmur3_32();
054 } else if (bits <= 128) {
055 return murmur3_128();
056 } else {
057 // Join some 128-bit murmur3s
058 int hashFunctionsNeeded = (bits + 127) / 128;
059 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
060 for (int i = 0; i < hashFunctionsNeeded; i++) {
061 hashFunctions[i] = murmur3_128(i * 1500450271 /* a prime; shouldn't matter */);
062 }
063 return new ConcatenatedHashFunction(hashFunctions);
064 }
065 }
066
067 /**
068 * Returns a hash function implementing the
069 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
070 * algorithm</a> (little-endian variant), using the given seed value.
071 */
072 public static HashFunction murmur3_32(int seed) {
073 return new Murmur3_32HashFunction(seed);
074 }
075
076 /**
077 * Returns a hash function implementing the
078 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
079 * algorithm</a> (little-endian variant), using a seed value of zero.
080 */
081 public static HashFunction murmur3_32() {
082 return MURMUR3_32;
083 }
084
085 private static final Murmur3_32HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
086
087 /**
088 * Returns a hash function implementing the
089 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
090 * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), using the given seed
091 * value.
092 */
093 public static HashFunction murmur3_128(int seed) {
094 return new Murmur3_128HashFunction(seed);
095 }
096
097 /**
098 * Returns a hash function implementing the
099 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
100 * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), using a seed value
101 * of zero.
102 */
103 public static HashFunction murmur3_128() {
104 return MURMUR3_128;
105 }
106
107 private static final Murmur3_128HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
108
109 /**
110 * Returns a hash function implementing the MD5 hash algorithm by delegating to the MD5
111 * {@link MessageDigest}.
112 */
113 public static HashFunction md5() {
114 return MD5;
115 }
116
117 private static final HashFunction MD5 = new MessageDigestHashFunction("MD5");
118
119 /**
120 * Returns a hash function implementing the SHA-1 algorithm by delegating to the SHA-1
121 * {@link MessageDigest}.
122 */
123 public static HashFunction sha1() {
124 return SHA_1;
125 }
126
127 private static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1");
128
129 /**
130 * Returns a hash function implementing the SHA-256 algorithm by delegating to the SHA-256
131 * {@link MessageDigest}.
132 */
133 public static HashFunction sha256() {
134 return SHA_256;
135 }
136
137 private static final HashFunction SHA_256 = new MessageDigestHashFunction("SHA-256");
138
139 /**
140 * Returns a hash function implementing the SHA-512 algorithm by delegating to the SHA-512
141 * {@link MessageDigest}.
142 */
143 public static HashFunction sha512() {
144 return SHA_512;
145 }
146
147 private static final HashFunction SHA_512 = new MessageDigestHashFunction("SHA-512");
148
149 /**
150 * If {@code hashCode} has enough bits, returns {@code hashCode.asLong()}, otherwise
151 * returns a {@code long} value with {@code hashCode.asInt()} as the least-significant
152 * four bytes and {@code 0x00} as each of the most-significant four bytes.
153 */
154 public static long padToLong(HashCode hashCode) {
155 return (hashCode.bits() < 64) ? UnsignedInts.toLong(hashCode.asInt()) : hashCode.asLong();
156 }
157
158 /**
159 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform
160 * manner that minimizes the need for remapping as {@code buckets} grows. That is,
161 * {@code consistentHash(h, n)} equals:
162 *
163 * <ul>
164 * <li>{@code n - 1}, with approximate probability {@code 1/n}
165 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
166 * </ul>
167 *
168 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
169 * article on consistent hashing</a> for more information.
170 */
171 public static int consistentHash(HashCode hashCode, int buckets) {
172 return consistentHash(padToLong(hashCode), buckets);
173 }
174
175 /**
176 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform
177 * manner that minimizes the need for remapping as {@code buckets} grows. That is,
178 * {@code consistentHash(h, n)} equals:
179 *
180 * <ul>
181 * <li>{@code n - 1}, with approximate probability {@code 1/n}
182 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
183 * </ul>
184 *
185 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
186 * article on consistent hashing</a> for more information.
187 */
188 public static int consistentHash(long input, int buckets) {
189 checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
190 long h = input;
191 int candidate = 0;
192 int next;
193
194 // Jump from bucket to bucket until we go out of range
195 while (true) {
196 // See http://en.wikipedia.org/wiki/Linear_congruential_generator
197 // These values for a and m come from the C++ version of this function.
198 h = 2862933555777941757L * h + 1;
199 double inv = 0x1.0p31 / ((int) (h >>> 33) + 1);
200 next = (int) ((candidate + 1) * inv);
201
202 if (next >= 0 && next < buckets) {
203 candidate = next;
204 } else {
205 return candidate;
206 }
207 }
208 }
209
210 /**
211 * Returns a hash code, having the same bit length as each of the input hash codes,
212 * that combines the information of these hash codes in an ordered fashion. That
213 * is, whenever two equal hash codes are produced by two calls to this method, it
214 * is <i>as likely as possible</i> that each was computed from the <i>same</i>
215 * input hash codes in the <i>same</i> order.
216 *
217 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
218 * do not all have the same bit length
219 */
220 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
221 Iterator<HashCode> iterator = hashCodes.iterator();
222 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
223 int bits = iterator.next().bits();
224 byte[] resultBytes = new byte[bits / 8];
225 for (HashCode hashCode : hashCodes) {
226 byte[] nextBytes = hashCode.asBytes();
227 checkArgument(nextBytes.length == resultBytes.length,
228 "All hashcodes must have the same bit length.");
229 for (int i = 0; i < nextBytes.length; i++) {
230 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
231 }
232 }
233 return HashCodes.fromBytes(resultBytes);
234 }
235
236 /**
237 * Returns a hash code, having the same bit length as each of the input hash codes,
238 * that combines the information of these hash codes in an unordered fashion. That
239 * is, whenever two equal hash codes are produced by two calls to this method, it
240 * is <i>as likely as possible</i> that each was computed from the <i>same</i>
241 * input hash codes in <i>some</i> order.
242 *
243 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
244 * do not all have the same bit length
245 */
246 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
247 Iterator<HashCode> iterator = hashCodes.iterator();
248 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
249 byte[] resultBytes = new byte[iterator.next().bits() / 8];
250 for (HashCode hashCode : hashCodes) {
251 byte[] nextBytes = hashCode.asBytes();
252 checkArgument(nextBytes.length == resultBytes.length,
253 "All hashcodes must have the same bit length.");
254 for (int i = 0; i < nextBytes.length; i++) {
255 resultBytes[i] += nextBytes[i];
256 }
257 }
258 return HashCodes.fromBytes(resultBytes);
259 }
260
261 /**
262 * Checks that the passed argument is positive, and ceils it to a multiple of 32.
263 */
264 static int checkPositiveAndMakeMultipleOf32(int bits) {
265 checkArgument(bits > 0, "Number of bits must be positive");
266 return (bits + 31) & ~31;
267 }
268
269 // TODO(kevinb): probably expose this via a Hashing method at some point?
270 private static class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
271 final int bits;
272
273 ConcatenatedHashFunction(HashFunction[] functions) {
274 super(functions);
275 int bitSum = 0;
276 for (HashFunction f : this.functions) {
277 bitSum += f.bits();
278 }
279 this.bits = bitSum;
280 }
281
282 @Override
283 HashCode makeHash(Hasher[] hashers) {
284 byte[] bytes = new byte[bits / 8];
285 ByteBuffer buffer = ByteBuffer.wrap(bytes);
286 for (Hasher hasher : hashers) {
287 buffer.put(hasher.hash().asBytes());
288 }
289 return HashCodes.fromBytes(bytes);
290 }
291
292 @Override
293 public int bits() {
294 return bits;
295 }
296 }
297 }
© 2015 - 2025 Weber Informatics LLC | Privacy Policy