All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.page.bloomfilter.PageBloomFilter Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.page.bloomfilter;

import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap;

import java.util.Arrays;

import static org.apache.flink.util.Preconditions.checkArgument;

/**
 * Copy From table.runtime.util.
 * BloomFilter based on a long array of Java heap, and serialization and merge based on Unsafe.
 *
 * 

Part of this class refers to the implementation from Apache Hive project * https://github.com/apache/hive/blob/master/common/src/java/org/apache/hive/common/util/BloomFilter.java. */ public class PageBloomFilter { /** * Default false positive probability for BloomFilter. */ public static final double DEFAULT_FPP = 0.03f; private final int numBits; private final int numHashFunctions; private final BitSet bitSet; public static PageBloomFilter createByPage(DataPage dataPage) { GBinaryHashMap gBinaryHashMap = dataPage.getGBinaryHashMap(); if (gBinaryHashMap == null) { return null; } int indexLen = gBinaryHashMap.indexCount(); int keyCount = gBinaryHashMap.keyCount(); if (keyCount < 16) { return null; } PageBloomFilter pageBloomFilter = new PageBloomFilter(keyCount); for (int i = 0; i < keyCount; i++) { pageBloomFilter.addHash(gBinaryHashMap.getHashCode(indexLen, i)); } return pageBloomFilter; } public PageBloomFilter(long maxNumEntries) { this(maxNumEntries, DEFAULT_FPP); } /** * Constructor. MaxNumEntries and fpp together determine the size of bloomFilter. * * @param maxNumEntries max number entries in this bloomFilter. * @param fpp false positive probability. */ public PageBloomFilter(long maxNumEntries, double fpp) { checkArgument(maxNumEntries > 0, "expectedEntries should be > 0"); int nb = optimalNumOfBits(maxNumEntries, fpp); this.numBits = nb + (Long.SIZE - (nb % Long.SIZE)); this.numHashFunctions = optimalNumOfHashFunctions(maxNumEntries, numBits); this.bitSet = new BitSet(this.numBits); } public int byteSize() { return 2 * Integer.BYTES + 16 + bitSet.getData().length * Long.BYTES; } public void addHash(int hash32) { int hash1 = hash32; int hash2 = hash32 >>> 16; for (int i = 1; i <= numHashFunctions; i++) { int combinedHash = hash1 + ((i + 1) * hash2); // hashcode should be positive, flip all the bits if it's negative if (combinedHash < 0) { combinedHash = ~combinedHash; } int pos = combinedHash % numBits; bitSet.set(pos); } } public boolean mightContain(int hash32) { int hash1 = hash32; int hash2 = hash32 >>> 16; for (int i = 1; i <= numHashFunctions; i++) { int combinedHash = hash1 + ((i + 1) * hash2); // hashcode should be positive, flip all the bits if it's negative if (combinedHash < 0) { combinedHash = ~combinedHash; } int pos = combinedHash % numBits; if (!bitSet.get(pos)) { return false; } } return true; } public long[] getBitSet() { return bitSet.getData(); } @Override public String toString() { return "numBits: " + numBits + " numHashFunctions: " + numHashFunctions; } private static int optimalNumOfHashFunctions(long n, long m) { return Math.max(1, (int) Math.round((double) m / n * Math.log(2))); } private static int optimalNumOfBits(long maxNumEntries, double fpp) { return (int) (-maxNumEntries * Math.log(fpp) / (Math.log(2) * Math.log(2))); } /** * Bare metal bit set implementation. For performance reasons, this implementation does not * check for index bounds nor expand the bit set size if the specified index is greater than * the size. */ public static class BitSet { private final long[] data; BitSet(long bits) { this(new long[(int) Math.ceil((double) bits / (double) Long.SIZE)]); } /** * Deserialize long array as bit set. * * @param data - bit array */ BitSet(long[] data) { assert data.length > 0 : "data length is zero!"; this.data = data; } /** * Sets the bit at specified index. * * @param index - position */ public void set(int index) { data[index >>> 6] |= (1L << index); } /** * Returns true if the bit is set in the specified index. * * @param index - position * @return - value at the bit position */ public boolean get(int index) { return (data[index >>> 6] & (1L << index)) != 0; } public long[] getData() { return data; } /** * Combines the two BitArrays using bitwise OR. */ public void putAll(BitSet array) { assert data.length == array.data.length : "BitArrays must be of equal length (" + data.length + "!= " + array.data.length + ")"; for (int i = 0; i < data.length; i++) { data[i] |= array.data[i]; } } /** * Clear the bit set. */ public void clear() { Arrays.fill(data, 0); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy