All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.common.hash.BloomFilterStrategies Maven / Gradle / Ivy

Go to download

Guava is a suite of core and expanded libraries that include utility classes, google's collections, io classes, and much much more.

There is a newer version: 33.1.0-jre
Show newest version
/*
 * Copyright (C) 2011 The Guava Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.common.hash;

import static com.google.common.base.Preconditions.checkArgument;

import com.google.common.math.LongMath;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import java.math.RoundingMode;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLongArray;
import org.checkerframework.checker.nullness.qual.Nullable;

/**
 * Collections of strategies of generating the k * log(M) bits required for an element to be mapped
 * to a BloomFilter of M bits and k hash functions. These strategies are part of the serialized form
 * of the Bloom filters that use them, thus they must be preserved as is (no updates allowed, only
 * introduction of new versions).
 *
 * 

Important: the order of the constants cannot change, and they cannot be deleted - we depend on * their ordinal for BloomFilter serialization. * * @author Dimitris Andreou * @author Kurt Alfred Kluever */ enum BloomFilterStrategies implements BloomFilter.Strategy { /** * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael * Mitzenmacher. The paper argues that this trick doesn't significantly deteriorate the * performance of a Bloom filter (yet only needs two 32bit hash functions). */ MURMUR128_MITZ_32() { @Override public boolean put( T object, Funnel funnel, int numHashFunctions, LockFreeBitArray bits) { long bitSize = bits.bitSize(); long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); int hash1 = (int) hash64; int hash2 = (int) (hash64 >>> 32); boolean bitsChanged = false; for (int i = 1; i <= numHashFunctions; i++) { int combinedHash = hash1 + (i * hash2); // Flip all the bits if it's negative (guaranteed positive number) if (combinedHash < 0) { combinedHash = ~combinedHash; } bitsChanged |= bits.set(combinedHash % bitSize); } return bitsChanged; } @Override public boolean mightContain( T object, Funnel funnel, int numHashFunctions, LockFreeBitArray bits) { long bitSize = bits.bitSize(); long hash64 = Hashing.murmur3_128().hashObject(object, funnel).asLong(); int hash1 = (int) hash64; int hash2 = (int) (hash64 >>> 32); for (int i = 1; i <= numHashFunctions; i++) { int combinedHash = hash1 + (i * hash2); // Flip all the bits if it's negative (guaranteed positive number) if (combinedHash < 0) { combinedHash = ~combinedHash; } if (!bits.get(combinedHash % bitSize)) { return false; } } return true; } }, /** * This strategy uses all 128 bits of {@link Hashing#murmur3_128} when hashing. It looks different * than the implementation in MURMUR128_MITZ_32 because we're avoiding the multiplication in the * loop and doing a (much simpler) += hash2. We're also changing the index to a positive number by * AND'ing with Long.MAX_VALUE instead of flipping the bits. */ MURMUR128_MITZ_64() { @Override public boolean put( T object, Funnel funnel, int numHashFunctions, LockFreeBitArray bits) { long bitSize = bits.bitSize(); byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); long hash1 = lowerEight(bytes); long hash2 = upperEight(bytes); boolean bitsChanged = false; long combinedHash = hash1; for (int i = 0; i < numHashFunctions; i++) { // Make the combined hash positive and indexable bitsChanged |= bits.set((combinedHash & Long.MAX_VALUE) % bitSize); combinedHash += hash2; } return bitsChanged; } @Override public boolean mightContain( T object, Funnel funnel, int numHashFunctions, LockFreeBitArray bits) { long bitSize = bits.bitSize(); byte[] bytes = Hashing.murmur3_128().hashObject(object, funnel).getBytesInternal(); long hash1 = lowerEight(bytes); long hash2 = upperEight(bytes); long combinedHash = hash1; for (int i = 0; i < numHashFunctions; i++) { // Make the combined hash positive and indexable if (!bits.get((combinedHash & Long.MAX_VALUE) % bitSize)) { return false; } combinedHash += hash2; } return true; } private /* static */ long lowerEight(byte[] bytes) { return Longs.fromBytes( bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]); } private /* static */ long upperEight(byte[] bytes) { return Longs.fromBytes( bytes[15], bytes[14], bytes[13], bytes[12], bytes[11], bytes[10], bytes[9], bytes[8]); } }; /** * Models a lock-free array of bits. * *

We use this instead of java.util.BitSet because we need access to the array of longs and we * need compare-and-swap. */ static final class LockFreeBitArray { private static final int LONG_ADDRESSABLE_BITS = 6; final AtomicLongArray data; private final LongAddable bitCount; LockFreeBitArray(long bits) { checkArgument(bits > 0, "data length is zero!"); // Avoid delegating to this(long[]), since AtomicLongArray(long[]) will clone its input and // thus double memory usage. this.data = new AtomicLongArray(Ints.checkedCast(LongMath.divide(bits, 64, RoundingMode.CEILING))); this.bitCount = LongAddables.create(); } // Used by serialization LockFreeBitArray(long[] data) { checkArgument(data.length > 0, "data length is zero!"); this.data = new AtomicLongArray(data); this.bitCount = LongAddables.create(); long bitCount = 0; for (long value : data) { bitCount += Long.bitCount(value); } this.bitCount.add(bitCount); } /** Returns true if the bit changed value. */ boolean set(long bitIndex) { if (get(bitIndex)) { return false; } int longIndex = (int) (bitIndex >>> LONG_ADDRESSABLE_BITS); long mask = 1L << bitIndex; // only cares about low 6 bits of bitIndex long oldValue; long newValue; do { oldValue = data.get(longIndex); newValue = oldValue | mask; if (oldValue == newValue) { return false; } } while (!data.compareAndSet(longIndex, oldValue, newValue)); // We turned the bit on, so increment bitCount. bitCount.increment(); return true; } boolean get(long bitIndex) { return (data.get((int) (bitIndex >>> LONG_ADDRESSABLE_BITS)) & (1L << bitIndex)) != 0; } /** * Careful here: if threads are mutating the atomicLongArray while this method is executing, the * final long[] will be a "rolling snapshot" of the state of the bit array. This is usually good * enough, but should be kept in mind. */ public static long[] toPlainArray(AtomicLongArray atomicLongArray) { long[] array = new long[atomicLongArray.length()]; for (int i = 0; i < array.length; ++i) { array[i] = atomicLongArray.get(i); } return array; } /** Number of bits */ long bitSize() { return (long) data.length() * Long.SIZE; } /** * Number of set bits (1s). * *

Note that because of concurrent set calls and uses of atomics, this bitCount is a (very) * close *estimate* of the actual number of bits set. It's not possible to do better than an * estimate without locking. Note that the number, if not exactly accurate, is *always* * underestimating, never overestimating. */ long bitCount() { return bitCount.sum(); } LockFreeBitArray copy() { return new LockFreeBitArray(toPlainArray(data)); } /** * Combines the two BitArrays using bitwise OR. * *

NOTE: Because of the use of atomics, if the other LockFreeBitArray is being mutated while * this operation is executing, not all of those new 1's may be set in the final state of this * LockFreeBitArray. The ONLY guarantee provided is that all the bits that were set in the other * LockFreeBitArray at the start of this method will be set in this LockFreeBitArray at the end * of this method. */ void putAll(LockFreeBitArray other) { checkArgument( data.length() == other.data.length(), "BitArrays must be of equal length (%s != %s)", data.length(), other.data.length()); for (int i = 0; i < data.length(); i++) { long otherLong = other.data.get(i); long ourLongOld; long ourLongNew; boolean changedAnyBits = true; do { ourLongOld = data.get(i); ourLongNew = ourLongOld | otherLong; if (ourLongOld == ourLongNew) { changedAnyBits = false; break; } } while (!data.compareAndSet(i, ourLongOld, ourLongNew)); if (changedAnyBits) { int bitsAdded = Long.bitCount(ourLongNew) - Long.bitCount(ourLongOld); bitCount.add(bitsAdded); } } } @Override public boolean equals(@Nullable Object o) { if (o instanceof LockFreeBitArray) { LockFreeBitArray lockFreeBitArray = (LockFreeBitArray) o; // TODO(lowasser): avoid allocation here return Arrays.equals(toPlainArray(data), toPlainArray(lockFreeBitArray.data)); } return false; } @Override public int hashCode() { // TODO(lowasser): avoid allocation here return Arrays.hashCode(toPlainArray(data)); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy