com.googlecode.javaewah32.IteratorAggregation32 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of JavaEWAH Show documentation
Show all versions of JavaEWAH Show documentation
The bit array data structure is implemented in Java as the BitSet class. Unfortunately, this fails to scale without compression.
JavaEWAH is a word-aligned compressed variant of the Java bitset class. It uses a 64-bit run-length encoding (RLE) compression scheme.
The goal of word-aligned compression is not to achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented is always more efficient storage-wise than an uncompressed bitmap (implemented in Java as the BitSet class). Unlike some alternatives, javaewah does not rely on a patented scheme.
package com.googlecode.javaewah32;
import com.googlecode.javaewah.CloneableIterator;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
/*
* Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves
* Licensed under the Apache License, Version 2.0.
*/
/**
* Set of helper functions to aggregate bitmaps.
*/
public final class IteratorAggregation32 {
/** Private constructor to prevent instantiation */
private IteratorAggregation32() {}
/**
* @param x iterator to negate
* @return negated version of the iterator
*/
public static IteratingRLW32 not(final IteratingRLW32 x) {
return new IteratingRLW32() {
@Override
public boolean next() {
return x.next();
}
@Override
public int getLiteralWordAt(int index) {
return ~x.getLiteralWordAt(index);
}
@Override
public int getNumberOfLiteralWords() {
return x.getNumberOfLiteralWords();
}
@Override
public boolean getRunningBit() {
return !x.getRunningBit();
}
@Override
public int size() {
return x.size();
}
@Override
public int getRunningLength() {
return x.getRunningLength();
}
@Override
public void discardFirstWords(int y) {
x.discardFirstWords(y);
}
@Override
public void discardRunningWords() {
x.discardRunningWords();
}
@Override
public IteratingRLW32 clone()
throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}
};
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @return and aggregate
*/
public static IteratingRLW32 bufferedand(final IteratingRLW32... al) {
return bufferedand(DEFAULT_MAX_BUF_SIZE, al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufSize size of the internal buffer used by the iterator in
* 64-bit words
* @return and aggregate
*/
public static IteratingRLW32 bufferedand(final int bufSize,
final IteratingRLW32... al) {
if (al.length == 0)
throw new IllegalArgumentException(
"Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList basell = new LinkedList();
Collections.addAll(basell, al);
return new BufferedIterator32(new AndIt(basell, bufSize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @return or aggregate
*/
public static IteratingRLW32 bufferedor(final IteratingRLW32... al) {
return bufferedor(DEFAULT_MAX_BUF_SIZE, al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufSize size of the internal buffer used by the iterator in
* 64-bit words
* @return or aggregate
*/
public static IteratingRLW32 bufferedor(final int bufSize,
final IteratingRLW32... al) {
if (al.length == 0)
throw new IllegalArgumentException(
"Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList basell = new LinkedList();
Collections.addAll(basell, al);
return new BufferedIterator32(new ORIt(basell, bufSize));
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @return xor aggregate
*/
public static IteratingRLW32 bufferedxor(final IteratingRLW32... al) {
return bufferedxor(DEFAULT_MAX_BUF_SIZE, al);
}
/**
* Aggregate the iterators using a bitmap buffer.
*
* @param al iterators to aggregate
* @param bufSize size of the internal buffer used by the iterator in
* 64-bit words
* @return xor aggregate
*/
public static IteratingRLW32 bufferedxor(final int bufSize,
final IteratingRLW32... al) {
if (al.length == 0)
throw new IllegalArgumentException(
"Need at least one iterator");
if (al.length == 1)
return al[0];
final LinkedList basell = new LinkedList();
Collections.addAll(basell, al);
return new BufferedIterator32(new XORIt(basell, bufSize));
}
/**
* Write out the content of the iterator, but as if it were all zeros.
*
* @param container where we write
* @param i the iterator
*/
protected static void dischargeAsEmpty(final BitmapStorage32 container,
final IteratingRLW32 i) {
while (i.size() > 0) {
container.addStreamOfEmptyWords(false, i.size());
i.next();
}
}
/**
* Write out up to max words, returns how many were written
*
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static int discharge(final BitmapStorage32 container,
IteratingRLW32 i, int max) {
int counter = 0;
while (i.size() > 0 && counter < max) {
int l1 = i.getRunningLength();
if (l1 > 0) {
if (l1 + counter > max)
l1 = max - counter;
container.addStreamOfEmptyWords(
i.getRunningBit(), l1);
counter += l1;
}
int l = i.getNumberOfLiteralWords();
if (l + counter > max)
l = max - counter;
for (int k = 0; k < l; ++k) {
container.addWord(i.getLiteralWordAt(k));
}
counter += l;
i.discardFirstWords(l + l1);
}
return counter;
}
/**
* Write out up to max negated words, returns how many were written
*
* @param container target for writes
* @param i source of data
* @param max maximal number of writes
* @return how many written
*/
protected static int dischargeNegated(final BitmapStorage32 container,
IteratingRLW32 i, int max) {
int counter = 0;
while (i.size() > 0 && counter < max) {
int l1 = i.getRunningLength();
if (l1 > 0) {
if (l1 + counter > max)
l1 = max - counter;
container.addStreamOfEmptyWords(i.getRunningBit(), l1);
counter += l1;
}
int l = i.getNumberOfLiteralWords();
if (l + counter > max)
l = max - counter;
for (int k = 0; k < l; ++k) {
container.addWord(i.getLiteralWordAt(k));
}
counter += l;
i.discardFirstWords(l + l1);
}
return counter;
}
static void andToContainer(final BitmapStorage32 container,
int desiredrlwcount, final IteratingRLW32 rlwi,
IteratingRLW32 rlwj) {
while ((rlwi.size() > 0) && (rlwj.size() > 0)
&& (desiredrlwcount-- > 0)) {
while ((rlwi.getRunningLength() > 0)
|| (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi
.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW32 prey = i_is_prey ? rlwi
: rlwj;
final IteratingRLW32 predator = i_is_prey ? rlwj
: rlwi;
if (!predator.getRunningBit()) {
container.addStreamOfEmptyWords(false,
predator.getRunningLength());
prey.discardFirstWords(predator
.getRunningLength());
predator.discardFirstWords(predator
.getRunningLength());
} else {
final int index = discharge(container,
prey,
predator.getRunningLength());
container.addStreamOfEmptyWords(false,
predator.getRunningLength()
- index
);
predator.discardFirstWords(predator
.getRunningLength());
}
}
final int nbre_literal = Math.min(
rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.addWord(rlwi.getLiteralWordAt(k)
& rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
static void andToContainer(final BitmapStorage32 container,
final IteratingRLW32 rlwi, IteratingRLW32 rlwj) {
while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
while ((rlwi.getRunningLength() > 0)
|| (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi
.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW32 prey = i_is_prey ? rlwi
: rlwj;
final IteratingRLW32 predator = i_is_prey ? rlwj
: rlwi;
if (!predator.getRunningBit()) {
container.addStreamOfEmptyWords(false,
predator.getRunningLength());
prey.discardFirstWords(predator
.getRunningLength());
predator.discardFirstWords(predator
.getRunningLength());
} else {
final int index = discharge(container,
prey,
predator.getRunningLength());
container.addStreamOfEmptyWords(false,
predator.getRunningLength()
- index
);
predator.discardFirstWords(predator
.getRunningLength());
}
}
final int nbre_literal = Math.min(
rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
for (int k = 0; k < nbre_literal; ++k)
container.addWord(rlwi.getLiteralWordAt(k)
& rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
/**
* Compute the first few words of the XOR aggregate between two
* iterators.
*
* @param container where to write
* @param desiredrlwcount number of words to be written (max)
* @param rlwi first iterator to aggregate
* @param rlwj second iterator to aggregate
*/
public static void xorToContainer(final BitmapStorage32 container,
int desiredrlwcount, final IteratingRLW32 rlwi,
IteratingRLW32 rlwj) {
while ((rlwi.size() > 0) && (rlwj.size() > 0)
&& (desiredrlwcount-- > 0)) {
while ((rlwi.getRunningLength() > 0)
|| (rlwj.getRunningLength() > 0)) {
final boolean i_is_prey = rlwi
.getRunningLength() < rlwj
.getRunningLength();
final IteratingRLW32 prey = i_is_prey ? rlwi
: rlwj;
final IteratingRLW32 predator = i_is_prey ? rlwj
: rlwi;
if (!predator.getRunningBit()) {
int index = discharge(container, prey,
predator.getRunningLength());
container.addStreamOfEmptyWords(false,
predator.getRunningLength()
- index
);
predator.discardFirstWords(predator
.getRunningLength());
} else {
int index = dischargeNegated(container,
prey,
predator.getRunningLength());
container.addStreamOfEmptyWords(true,
predator.getRunningLength()
- index
);
predator.discardFirstWords(predator
.getRunningLength());
}
}
final int nbre_literal = Math.min(
rlwi.getNumberOfLiteralWords(),
rlwj.getNumberOfLiteralWords());
if (nbre_literal > 0) {
desiredrlwcount -= nbre_literal;
for (int k = 0; k < nbre_literal; ++k)
container.addWord(rlwi.getLiteralWordAt(k)
^ rlwj.getLiteralWordAt(k));
rlwi.discardFirstWords(nbre_literal);
rlwj.discardFirstWords(nbre_literal);
}
}
}
protected static int inplaceor(int[] bitmap, IteratingRLW32 i) {
int pos = 0;
int s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = i.getRunningLength();
if (i.getRunningBit())
java.util.Arrays.fill(bitmap, pos, pos
+ L, ~0);
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howmany = bitmap.length - pos;
int l = i.getRunningLength();
if (pos + l > bitmap.length) {
if (i.getRunningBit()) {
java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0);
}
i.discardFirstWords(howmany);
return bitmap.length;
}
if (i.getRunningBit())
java.util.Arrays.fill(bitmap, pos, pos + l, ~0);
pos += l;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] |= i.getLiteralWordAt(k);
i.discardFirstWords(howmany);
return pos;
}
}
return pos;
}
protected static int inplacexor(int[] bitmap, IteratingRLW32 i) {
int pos = 0;
int s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = i.getRunningLength();
if (i.getRunningBit()) {
for (int k = pos; k < pos + L; ++k)
bitmap[k] = ~bitmap[k];
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howMany = bitmap.length - pos;
int l = i.getRunningLength();
if (pos + l > bitmap.length) {
if (i.getRunningBit()) {
for (int k = pos; k < bitmap.length; ++k)
bitmap[k] = ~bitmap[k];
}
i.discardFirstWords(howMany);
return bitmap.length;
}
if (i.getRunningBit())
for (int k = pos; k < pos + l; ++k)
bitmap[k] = ~bitmap[k];
pos += l;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] ^= i.getLiteralWordAt(k);
i.discardFirstWords(howMany);
return pos;
}
}
return pos;
}
protected static int inplaceand(int[] bitmap, IteratingRLW32 i) {
int pos = 0;
int s;
while ((s = i.size()) > 0) {
if (pos + s < bitmap.length) {
final int L = i.getRunningLength();
if (!i.getRunningBit()) {
for (int k = pos; k < pos + L; ++k)
bitmap[k] = 0;
}
pos += L;
final int LR = i.getNumberOfLiteralWords();
for (int k = 0; k < LR; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
if (!i.next()) {
return pos;
}
} else {
int howMany = bitmap.length - pos;
int l = i.getRunningLength();
if (pos + l > bitmap.length) {
if (!i.getRunningBit()) {
for (int k = pos; k < bitmap.length; ++k)
bitmap[k] = 0;
}
i.discardFirstWords(howMany);
return bitmap.length;
}
if (!i.getRunningBit())
for (int k = pos; k < pos + l; ++k)
bitmap[k] = 0;
pos += l;
for (int k = 0; pos < bitmap.length; ++k)
bitmap[pos++] &= i.getLiteralWordAt(k);
i.discardFirstWords(howMany);
return pos;
}
}
return pos;
}
/**
* An optimization option. Larger values may improve speed, but at the
* expense of memory.
*/
public static final int DEFAULT_MAX_BUF_SIZE = 65536;
}
class ORIt implements CloneableIterator {
final EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32();
final int[] hardBitmap;
final LinkedList ll;
ORIt(LinkedList basell, final int bufSize) {
this.ll = basell;
this.hardBitmap = new int[bufSize];
}
@Override
public XORIt clone() throws CloneNotSupportedException {
XORIt answer = (XORIt) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardBitmap.clone();
answer.ll = (LinkedList) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator32 next() {
this.buffer.clear();
int effective = 0;
Iterator i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW32 rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation32.inplaceor(
this.hardBitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k)
this.buffer.addWord(this.hardBitmap[k]);
Arrays.fill(this.hardBitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class XORIt implements CloneableIterator {
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32();
int[] hardbitmap;
LinkedList ll;
XORIt(LinkedList basell, final int bufSize) {
this.ll = basell;
this.hardbitmap = new int[bufSize];
}
@Override
public XORIt clone() throws CloneNotSupportedException {
XORIt answer = (XORIt) super.clone();
answer.buffer = this.buffer.clone();
answer.hardbitmap = this.hardbitmap.clone();
answer.ll = (LinkedList) this.ll.clone();
return answer;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public EWAHIterator32 next() {
this.buffer.clear();
int effective = 0;
Iterator i = this.ll.iterator();
while (i.hasNext()) {
IteratingRLW32 rlw = i.next();
if (rlw.size() > 0) {
int eff = IteratorAggregation32.inplacexor(
this.hardbitmap, rlw);
if (eff > effective)
effective = eff;
} else
i.remove();
}
for (int k = 0; k < effective; ++k)
this.buffer.addWord(this.hardbitmap[k]);
Arrays.fill(this.hardbitmap, 0);
return this.buffer.getEWAHIterator();
}
}
class AndIt implements CloneableIterator {
EWAHCompressedBitmap32 buffer = new EWAHCompressedBitmap32();
LinkedList ll;
final int bufferSize;
public AndIt(LinkedList basell, final int bufSize) {
this.ll = basell;
this.bufferSize = bufSize;
}
@Override
public boolean hasNext() {
return !this.ll.isEmpty();
}
@Override
public AndIt clone() throws CloneNotSupportedException {
AndIt answer = (AndIt) super.clone();
answer.buffer = this.buffer.clone();
answer.ll = (LinkedList) this.ll.clone();
return answer;
}
@Override
public EWAHIterator32 next() {
this.buffer.clear();
IteratorAggregation32.andToContainer(this.buffer,
this.bufferSize * this.ll.size(), this.ll.get(0),
this.ll.get(1));
if (this.ll.size() > 2) {
Iterator i = this.ll.iterator();
i.next();
i.next();
EWAHCompressedBitmap32 tmpbuffer = new EWAHCompressedBitmap32();
while (i.hasNext() && this.buffer.sizeInBytes() > 0) {
IteratorAggregation32
.andToContainer(tmpbuffer,
this.buffer.getIteratingRLW(),
i.next());
this.buffer.swap(tmpbuffer);
tmpbuffer.clear();
}
}
for (IteratingRLW32 aLl : this.ll) {
if (aLl.size() == 0) {
this.ll.clear();
break;
}
}
return this.buffer.getEWAHIterator();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy