com.googlecode.javaewah.symmetric.ThresholdFuncBitmap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of JavaEWAH Show documentation
Show all versions of JavaEWAH Show documentation
The bit array data structure is implemented in Java as the BitSet class. Unfortunately, this fails to scale without compression.
JavaEWAH is a word-aligned compressed variant of the Java bitset class. It uses a 64-bit run-length encoding (RLE) compression scheme.
The goal of word-aligned compression is not to achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented is always more efficient storage-wise than an uncompressed bitmap (implemented in Java as the BitSet class). Unlike some alternatives, javaewah does not rely on a patented scheme.
package com.googlecode.javaewah.symmetric;
import com.googlecode.javaewah.BitmapStorage;
import java.util.Arrays;
/**
* A threshold Boolean function returns true if the number of true values exceed
* a threshold. It is a symmetric Boolean function.
*
* This class implements an algorithm described in the following paper:
*
* Owen Kaser and Daniel Lemire, Compressed bitmap indexes: beyond unions and intersections
* http://arxiv.org/abs/1402.4466
*
* It is not thread safe: you should use one object per thread.
*
* @author Daniel Lemire
* @see http://en.wikipedia.org/wiki/Symmetric_Boolean_function
* @since 0.8.0
*/
public final class ThresholdFuncBitmap extends UpdateableBitmapFunction {
private final int min;
private long[] buffers;
private int bufferUsed;
private final int[] bufCounters = new int[64];
private static final int[] zeroes64 = new int[64];
/**
* Construction a threshold function with a given threshold
*
* @param min threshold
*/
public ThresholdFuncBitmap(final int min) {
super();
this.min = min;
this.buffers = new long[16];
this.bufferUsed = 0;
}
@Override
public void dispatch(BitmapStorage out, int runBegin, int runEnd) {
final int runLength = runEnd - runBegin;
if (this.hammingWeight >= this.min) {
out.addStreamOfEmptyWords(true, runLength);
} else if (this.litWeight + this.hammingWeight < this.min) {
out.addStreamOfEmptyWords(false, runLength);
} else {
final int deficit = this.min - this.hammingWeight;
if (deficit == 1) {
orLiterals(out, runBegin, runLength);
return;
}
this.bufferUsed = this.getNumberOfLiterals();
if (this.bufferUsed == deficit) {
andLiterals(out, runBegin, runLength);
} else {
generalLiterals(deficit, out, runBegin, runLength);
}
}
}
private long threshold2buf(final int t, final long[] buf, final int bufUsed) {
long result = 0L;
final int[] counters = this.bufCounters;
System.arraycopy(zeroes64, 0, counters, 0, 64);
for (int k = 0; k < bufUsed; ++k) {
long bitset = buf[k];
while (bitset != 0) {
long t2 = bitset & -bitset;
counters[Long.bitCount(t2 - 1)]++;
bitset ^= t2;
}
}
for (int pos = 0; pos < 64; ++pos) {
if (counters[pos] >= t)
result |= (1L << pos);
}
return result;
}
private static long threshold3(final int t, final long[] buffers, final int bufUsed) {
if (buffers.length == 0)
return 0;
final long[] v = new long[t];
v[0] = buffers[0];
for (int k = 1; k < bufUsed; ++k) {
final long c = buffers[k];
final int m = Math.min(t - 1, k);
for (int j = m; j >= 1; --j) {
v[j] |= (c & v[j - 1]);
}
v[0] |= c;
}
return v[t - 1];
}
private long threshold4(final int T, final long[] buf, final int bufUsed) {
if (T >= 128)
return threshold2buf(T, buf, bufUsed);
int B = 0;
for (int k = 0; k < bufUsed; ++k)
B += Long.bitCount(buf[k]);
if (2 * B >= bufUsed * T)
return threshold3(T, buf, bufUsed);//looped
return threshold2buf(T, buf, bufUsed);//scancount
}
private void orLiterals(final BitmapStorage out, final int runBegin, final int runLength) {
for (int i = 0; i < runLength; ++i) {
long w = 0;
for (EWAHPointer R : this.getLiterals()) {
w |= R.iterator.getLiteralWordAt(i + runBegin - R.beginOfRun());
}
out.addWord(w);
}
}
private void andLiterals(final BitmapStorage out, final int runBegin, final int runLength) {
for (int i = 0; i < runLength; ++i) {
long w = ~0;
for (EWAHPointer R : this.getLiterals()) {
w &= R.iterator.getLiteralWordAt(i + runBegin - R.beginOfRun());
}
out.addWord(w);
}
}
private void generalLiterals(final int deficit,
final BitmapStorage out, final int runBegin, final int runLength) {
if (this.bufferUsed > this.buffers.length)
this.buffers = Arrays.copyOf(this.buffers, 2 * this.bufferUsed);
for (int i = 0; i < runLength; ++i) {
int p = 0;
for (EWAHPointer R : this.getLiterals()) {
this.buffers[p++] = R.iterator.getLiteralWordAt(i + runBegin - R.beginOfRun());
}
out.addWord(threshold4(deficit, this.buffers, this.bufferUsed));
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy