org.apache.hadoop.util.bloom.DynamicBloomFilter Maven / Gradle / Ivy
/**
*
* Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
* All rights reserved.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the distribution.
* - Neither the name of the University Catholique de Louvain - UCL
* nor the names of its contributors may be used to endorse or
* promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.util.bloom;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Implements a dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
*
* A dynamic Bloom filter (DBF) makes use of a s * m
bit matrix but
* each of the s
rows is a standard Bloom filter. The creation
* process of a DBF is iterative. At the start, the DBF is a 1 * m
* bit matrix, i.e., it is composed of a single standard Bloom filter.
* It assumes that nr
elements are recorded in the
* initial bit vector, where nr <= n
(n
is
* the cardinality of the set A
to record in the filter).
*
* As the size of A
grows during the execution of the application,
* several keys must be inserted in the DBF. When inserting a key into the DBF,
* one must first get an active Bloom filter in the matrix. A Bloom filter is
* active when the number of recorded keys, nr
, is
* strictly less than the current cardinality of A
, n
.
* If an active Bloom filter is found, the key is inserted and
* nr
is incremented by one. On the other hand, if there
* is no active Bloom filter, a new one is created (i.e., a new row is added to
* the matrix) according to the current size of A
and the element
* is added in this new Bloom filter and the nr
value of
* this new Bloom filter is set to one. A given key is said to belong to the
* DBF if the k
positions are set to one in one of the matrix rows.
*
* Originally created by
* European Commission One-Lab Project 034819.
*
* @see Filter The general behavior of a filter
* @see BloomFilter A Bloom filter
*
* @see Theory and Network Applications of Dynamic Bloom Filters
*/
@InterfaceAudience.Public
@InterfaceStability.Stable
public class DynamicBloomFilter extends Filter {
/**
* Threshold for the maximum number of key to record in a dynamic Bloom filter row.
*/
private int nr;
/**
* The number of keys recorded in the current standard active Bloom filter.
*/
private int currentNbRecord;
/**
* The matrix of Bloom filter.
*/
private BloomFilter[] matrix;
/**
* Zero-args constructor for the serialization.
*/
public DynamicBloomFilter() { }
/**
* Constructor.
*
* Builds an empty Dynamic Bloom filter.
* @param vectorSize The number of bits in the vector.
* @param nbHash The number of hash function to consider.
* @param hashType type of the hashing function (see
* {@link org.apache.hadoop.util.hash.Hash}).
* @param nr The threshold for the maximum number of keys to record in a
* dynamic Bloom filter row.
*/
public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
super(vectorSize, nbHash, hashType);
this.nr = nr;
this.currentNbRecord = 0;
matrix = new BloomFilter[1];
matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
}
@Override
public void add(Key key) {
if (key == null) {
throw new NullPointerException("Key can not be null");
}
BloomFilter bf = getActiveStandardBF();
if (bf == null) {
addRow();
bf = matrix[matrix.length - 1];
currentNbRecord = 0;
}
bf.add(key);
currentNbRecord++;
}
@Override
public void and(Filter filter) {
if (filter == null
|| !(filter instanceof DynamicBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be and-ed");
}
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
throw new IllegalArgumentException("filters cannot be and-ed");
}
for (int i = 0; i < matrix.length; i++) {
matrix[i].and(dbf.matrix[i]);
}
}
@Override
public boolean membershipTest(Key key) {
if (key == null) {
return true;
}
for (int i = 0; i < matrix.length; i++) {
if (matrix[i].membershipTest(key)) {
return true;
}
}
return false;
}
@Override
public void not() {
for (int i = 0; i < matrix.length; i++) {
matrix[i].not();
}
}
@Override
public void or(Filter filter) {
if (filter == null
|| !(filter instanceof DynamicBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be or-ed");
}
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
throw new IllegalArgumentException("filters cannot be or-ed");
}
for (int i = 0; i < matrix.length; i++) {
matrix[i].or(dbf.matrix[i]);
}
}
@Override
public void xor(Filter filter) {
if (filter == null
|| !(filter instanceof DynamicBloomFilter)
|| filter.vectorSize != this.vectorSize
|| filter.nbHash != this.nbHash) {
throw new IllegalArgumentException("filters cannot be xor-ed");
}
DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
throw new IllegalArgumentException("filters cannot be xor-ed");
}
for(int i = 0; ithis dynamic Bloom filter.
*/
private void addRow() {
BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
for (int i = 0; i < matrix.length; i++) {
tmp[i] = matrix[i];
}
tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
matrix = tmp;
}
/**
* Returns the active standard Bloom filter in this dynamic Bloom filter.
* @return BloomFilter The active standard Bloom filter.
* Null
otherwise.
*/
private BloomFilter getActiveStandardBF() {
if (currentNbRecord >= nr) {
return null;
}
return matrix[matrix.length - 1];
}
}