All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.javaewah.IteratorAggregation Maven / Gradle / Ivy

Go to download

The bit array data structure is implemented in Java as the BitSet class. Unfortunately, this fails to scale without compression. JavaEWAH is a word-aligned compressed variant of the Java bitset class. It uses a 64-bit run-length encoding (RLE) compression scheme. The goal of word-aligned compression is not to achieve the best compression, but rather to improve query processing time. Hence, we try to save CPU cycles, maybe at the expense of storage. However, the EWAH scheme we implemented is always more efficient storage-wise than an uncompressed bitmap (implemented in Java as the BitSet class). Unlike some alternatives, javaewah does not rely on a patented scheme.

There is a newer version: 1.2.3
Show newest version
package com.googlecode.javaewah;

import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;

/*
 * Copyright 2009-2016, Daniel Lemire, Cliff Moon, David McIntosh, Robert Becho, Google Inc., Veronika Zenz, Owen Kaser, Gregory Ssi-Yan-Kai, Rory Graves
 * Licensed under the Apache License, Version 2.0.
 */

/**
 * Set of helper functions to aggregate bitmaps.
 */
public final class IteratorAggregation {

    /** Private constructor to prevent instantiation */
    private IteratorAggregation() {}

    /**
     * @param x iterator to negate
     * @return negated version of the iterator
     */
    public static IteratingRLW not(final IteratingRLW x) {
        return new IteratingRLW() {

            @Override
            public boolean next() {
                return x.next();
            }

            @Override
            public long getLiteralWordAt(int index) {
                return ~x.getLiteralWordAt(index);
            }

            @Override
            public int getNumberOfLiteralWords() {
                return x.getNumberOfLiteralWords();
            }

            @Override
            public boolean getRunningBit() {
                return !x.getRunningBit();
            }

            @Override
            public long size() {
                return x.size();
            }

            @Override
            public long getRunningLength() {
                return x.getRunningLength();
            }

            @Override
            public void discardFirstWords(long y) {
                x.discardFirstWords(y);
            }

            @Override
            public void discardRunningWords() {
                x.discardRunningWords();
            }

            @Override
            public IteratingRLW clone()
                    throws CloneNotSupportedException {
                throw new CloneNotSupportedException();
            }
        };
    }

    /**
     * Aggregate the iterators using a bitmap buffer.
     *
     * @param al set of iterators to aggregate
     * @return and aggregate
     */
    public static IteratingRLW bufferedand(final IteratingRLW... al) {
        return bufferedand(DEFAULT_MAX_BUF_SIZE, al);
    }

    /**
     * Aggregate the iterators using a bitmap buffer.
     *
     * @param al      set of iterators to aggregate
     * @param bufSize size of the internal buffer used by the iterator in
     *                64-bit words (per input iterator)
     * @return and aggregate
     */
    public static IteratingRLW bufferedand(final int bufSize, final IteratingRLW... al) {
        if (al.length == 0)
            throw new IllegalArgumentException("Need at least one iterator");
        if (al.length == 1)
            return al[0];
        final LinkedList basell = new LinkedList();
        Collections.addAll(basell, al);
        return new BufferedIterator(new BufferedAndIterator(basell,
                bufSize));
    }

    /**
     * Aggregate the iterators using a bitmap buffer.
     *
     * @param al set of iterators to aggregate
     * @return or aggregate
     */
    public static IteratingRLW bufferedor(final IteratingRLW... al) {
        return bufferedor(DEFAULT_MAX_BUF_SIZE, al);
    }

    /**
     * Aggregate the iterators using a bitmap buffer.
     *
     * @param al      iterators to aggregate
     * @param bufSize size of the internal buffer used by the iterator in
     *                64-bit words
     * @return or aggregate
     */
    public static IteratingRLW bufferedor(final int bufSize,
                                          final IteratingRLW... al) {
        if (al.length == 0)
            throw new IllegalArgumentException("Need at least one iterator");
        if (al.length == 1)
            return al[0];

        final LinkedList basell = new LinkedList();
        Collections.addAll(basell, al);
        return new BufferedIterator(new BufferedORIterator(basell, bufSize));
    }

    /**
     * Aggregate the iterators using a bitmap buffer.
     *
     * @param al set of iterators to aggregate
     * @return xor aggregate
     */
    public static IteratingRLW bufferedxor(final IteratingRLW... al) {
        return bufferedxor(DEFAULT_MAX_BUF_SIZE, al);
    }

    /**
     * Aggregate the iterators using a bitmap buffer.
     *
     * @param al      iterators to aggregate
     * @param bufSize size of the internal buffer used by the iterator in 64-bit words
     * @return xor aggregate
     */
    public static IteratingRLW bufferedxor(final int bufSize, final IteratingRLW... al) {
        if (al.length == 0)
            throw new IllegalArgumentException("Need at least one iterator");
        if (al.length == 1)
            return al[0];

        final LinkedList basell = new LinkedList();
        Collections.addAll(basell, al);

        return new BufferedIterator(new BufferedXORIterator(basell, bufSize));
    }

    /**
     * Write out the content of the iterator, but as if it were all zeros.
     *
     * @param container where we write
     * @param i         the iterator
     */
    protected static void dischargeAsEmpty(final BitmapStorage container,
                                           final IteratingRLW i) {
        while (i.size() > 0) {
            container.addStreamOfEmptyWords(false, i.size());
            i.next();
        }
    }

    /**
     * Write out up to max words, returns how many were written
     *
     * @param container target for writes
     * @param i         source of data
     * @param max       maximal number of writes
     * @return how many written
     */

    protected static long discharge(final BitmapStorage container, IteratingRLW i, long max) {
        long counter = 0;
        while (i.size() > 0 && counter < max) {
            long l1 = i.getRunningLength();
            if (l1 > 0) {
                if (l1 + counter > max)
                    l1 = max - counter;
                container.addStreamOfEmptyWords(i.getRunningBit(), l1);
                counter += l1;
            }
            long l = i.getNumberOfLiteralWords();
            if (l + counter > max)
                l = max - counter;
            for (int k = 0; k < l; ++k) {
                container.addWord(i.getLiteralWordAt(k));
            }
            counter += l;
            i.discardFirstWords(l + l1);
        }
        return counter;
    }

    /**
     * Write out up to max negated words, returns how many were written
     *
     * @param container target for writes
     * @param i         source of data
     * @param max       maximal number of writes
     * @return how many written
     */
    protected static long dischargeNegated(final BitmapStorage container, IteratingRLW i, long max) {
        long counter = 0;
        while (i.size() > 0 && counter < max) {
            long l1 = i.getRunningLength();
            if (l1 > 0) {
                if (l1 + counter > max)
                    l1 = max - counter;
                container.addStreamOfEmptyWords(!i.getRunningBit(), l1);
                counter += l1;
            }
            long l = i.getNumberOfLiteralWords();
            if (l + counter > max)
                l = max - counter;
            for (int k = 0; k < l; ++k) {
                container.addWord(~i.getLiteralWordAt(k));
            }
            counter += l;
            i.discardFirstWords(l + l1);
        }
        return counter;
    }

    static void andToContainer(final BitmapStorage container,
                               int desiredrlwcount, final IteratingRLW rlwi, IteratingRLW rlwj) {
        while ((rlwi.size() > 0) && (rlwj.size() > 0)
                && (desiredrlwcount-- > 0)) {
            while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
                final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
                final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
                final IteratingRLW predator = i_is_prey ? rlwj : rlwi;
                if (!predator.getRunningBit()) {
                    container.addStreamOfEmptyWords(false, predator.getRunningLength());
                    prey.discardFirstWords(predator.getRunningLength());
                    predator.discardFirstWords(predator.getRunningLength());
                } else {
                    final long index = discharge(container, prey, predator.getRunningLength());
                    container.addStreamOfEmptyWords(false, predator.getRunningLength() - index);
                    predator.discardFirstWords(predator.getRunningLength());
                }
            }
            final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords());
            if (nbre_literal > 0) {
                desiredrlwcount -= nbre_literal;
                for (int k = 0; k < nbre_literal; ++k)
                    container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
                rlwi.discardFirstWords(nbre_literal);
                rlwj.discardFirstWords(nbre_literal);
            }
        }
    }

    static void andToContainer(final BitmapStorage container,
                               final IteratingRLW rlwi, IteratingRLW rlwj) {
        while ((rlwi.size() > 0) && (rlwj.size() > 0)) {
            while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
                final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
                final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
                final IteratingRLW predator = i_is_prey ? rlwj : rlwi;
                if (!predator.getRunningBit()) {
                    container.addStreamOfEmptyWords(false, predator.getRunningLength());
                    prey.discardFirstWords(predator.getRunningLength());
                    predator.discardFirstWords(predator.getRunningLength());
                } else {
                    final long index = discharge(container, prey, predator.getRunningLength());
                    container.addStreamOfEmptyWords(false, predator.getRunningLength() - index);
                    predator.discardFirstWords(predator.getRunningLength());
                }
            }
            final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords());
            if (nbre_literal > 0) {
                for (int k = 0; k < nbre_literal; ++k)
                    container.addWord(rlwi.getLiteralWordAt(k) & rlwj.getLiteralWordAt(k));
                rlwi.discardFirstWords(nbre_literal);
                rlwj.discardFirstWords(nbre_literal);
            }
        }
    }

    /**
     * Compute the first few words of the XOR aggregate between two
     * iterators.
     *
     * @param container       where to write
     * @param desiredrlwcount number of words to be written (max)
     * @param rlwi            first iterator to aggregate
     * @param rlwj            second iterator to aggregate
     */
    public static void xorToContainer(final BitmapStorage container,
                                      int desiredrlwcount, final IteratingRLW rlwi,
                                      final IteratingRLW rlwj) {
        while ((rlwi.size() > 0) && (rlwj.size() > 0)
                && (desiredrlwcount-- > 0)) {
            while ((rlwi.getRunningLength() > 0) || (rlwj.getRunningLength() > 0)) {
                final boolean i_is_prey = rlwi.getRunningLength() < rlwj.getRunningLength();
                final IteratingRLW prey = i_is_prey ? rlwi : rlwj;
                final IteratingRLW predator = i_is_prey ? rlwj : rlwi;
                if (!predator.getRunningBit()) {
                    long index = discharge(container, prey, predator.getRunningLength());
                    container.addStreamOfEmptyWords(false, predator.getRunningLength() - index);
                    predator.discardFirstWords(predator.getRunningLength());
                } else {
                    long index = dischargeNegated(container, prey, predator.getRunningLength());
                    container.addStreamOfEmptyWords(true, predator.getRunningLength() - index);
                    predator.discardFirstWords(predator.getRunningLength());
                }
            }
            final int nbre_literal = Math.min(rlwi.getNumberOfLiteralWords(), rlwj.getNumberOfLiteralWords());
            if (nbre_literal > 0) {
                desiredrlwcount -= nbre_literal;
                for (int k = 0; k < nbre_literal; ++k)
                    container.addWord(rlwi.getLiteralWordAt(k) ^ rlwj.getLiteralWordAt(k));
                rlwi.discardFirstWords(nbre_literal);
                rlwj.discardFirstWords(nbre_literal);
            }
        }
    }

    protected static int inplaceor(long[] bitmap, IteratingRLW i) {

        int pos = 0;
        long s;
        while ((s = i.size()) > 0) {
            if (pos + s < bitmap.length) {
                final int L = (int) i.getRunningLength();
                if (i.getRunningBit())
                    java.util.Arrays.fill(bitmap, pos, pos + L, ~0l);
                pos += L;
                final int LR = i.getNumberOfLiteralWords();

                for (int k = 0; k < LR; ++k)
                    bitmap[pos++] |= i.getLiteralWordAt(k);
                if (!i.next()) {
                    return pos;
                }
            } else {
                int howmany = bitmap.length - pos;
                int l = (int) i.getRunningLength();

                if (pos + l > bitmap.length) {
                    if (i.getRunningBit()) {
                        java.util.Arrays.fill(bitmap, pos, bitmap.length, ~0l);
                    }
                    i.discardFirstWords(howmany);
                    return bitmap.length;
                }
                if (i.getRunningBit())
                    java.util.Arrays.fill(bitmap, pos, pos + l, ~0l);
                pos += l;
                for (int k = 0; pos < bitmap.length; ++k)
                    bitmap[pos++] |= i.getLiteralWordAt(k);
                i.discardFirstWords(howmany);
                return pos;
            }
        }
        return pos;
    }

    protected static int inplacexor(long[] bitmap, IteratingRLW i) {
        int pos = 0;
        long s;
        while ((s = i.size()) > 0) {
            if (pos + s < bitmap.length) {
                final int L = (int) i.getRunningLength();
                if (i.getRunningBit()) {
                    for (int k = pos; k < pos + L; ++k)
                        bitmap[k] = ~bitmap[k];
                }
                pos += L;
                final int LR = i.getNumberOfLiteralWords();
                for (int k = 0; k < LR; ++k)
                    bitmap[pos++] ^= i.getLiteralWordAt(k);
                if (!i.next()) {
                    return pos;
                }
            } else {
                int howMany = bitmap.length - pos;
                int l = (int) i.getRunningLength();
                if (pos + l > bitmap.length) {
                    if (i.getRunningBit()) {
                        for (int k = pos; k < bitmap.length; ++k)
                            bitmap[k] = ~bitmap[k];
                    }
                    i.discardFirstWords(howMany);
                    return bitmap.length;
                }
                if (i.getRunningBit())
                    for (int k = pos; k < pos + l; ++k)
                        bitmap[k] = ~bitmap[k];
                pos += l;
                for (int k = 0; pos < bitmap.length; ++k)
                    bitmap[pos++] ^= i.getLiteralWordAt(k);
                i.discardFirstWords(howMany);
                return pos;
            }
        }
        return pos;
    }

    protected static int inplaceand(long[] bitmap, IteratingRLW i) {
        int pos = 0;
        long s;
        while ((s = i.size()) > 0) {
            if (pos + s < bitmap.length) {
                final int L = (int) i.getRunningLength();
                if (!i.getRunningBit()) {
                    for (int k = pos; k < pos + L; ++k)
                        bitmap[k] = 0;
                }
                pos += L;
                final int LR = i.getNumberOfLiteralWords();
                for (int k = 0; k < LR; ++k)
                    bitmap[pos++] &= i.getLiteralWordAt(k);
                if (!i.next()) {
                    return pos;
                }
            } else {
                int howmany = bitmap.length - pos;
                int l = (int) i.getRunningLength();
                if (pos + l > bitmap.length) {
                    if (!i.getRunningBit()) {
                        for (int k = pos; k < bitmap.length; ++k)
                            bitmap[k] = 0;
                    }
                    i.discardFirstWords(howmany);
                    return bitmap.length;
                }
                if (!i.getRunningBit())
                    for (int k = pos; k < pos + l; ++k)
                        bitmap[k] = 0;
                pos += l;
                for (int k = 0; pos < bitmap.length; ++k)
                    bitmap[pos++] &= i.getLiteralWordAt(k);
                i.discardFirstWords(howmany);
                return pos;
            }
        }
        return pos;
    }

    /**
     * An optimization option. Larger values may improve speed, but at the
     * expense of memory.
     */
    public static final int DEFAULT_MAX_BUF_SIZE = 65536;
}

class BufferedORIterator implements CloneableIterator {
    final EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
    final long[] hardBitmap;
    final LinkedList ll;

    BufferedORIterator(LinkedList basell, int bufSize) {
        this.ll = basell;
        this.hardBitmap = new long[bufSize];
    }

    @Override
    public BufferedXORIterator clone() throws CloneNotSupportedException {
        BufferedXORIterator answer = (BufferedXORIterator) super
                .clone();
        answer.buffer = this.buffer.clone();
        answer.hardbitmap = this.hardBitmap.clone();
        answer.ll = (LinkedList) this.ll.clone();
        return answer;
    }

    @Override
    public boolean hasNext() {
        return !this.ll.isEmpty();
    }

    @Override
    public EWAHIterator next() {
        this.buffer.clear();
        long effective = 0;
        Iterator i = this.ll.iterator();
        while (i.hasNext()) {
            IteratingRLW rlw = i.next();
            if (rlw.size() > 0) {
                int eff = IteratorAggregation.inplaceor(this.hardBitmap, rlw);
                if (eff > effective)
                    effective = eff;
            } else
                i.remove();
        }
        for (int k = 0; k < effective; ++k) {
            this.buffer.addWord(this.hardBitmap[k]);
        }

        Arrays.fill(this.hardBitmap, 0);
        return this.buffer.getEWAHIterator();
    }
}

class BufferedXORIterator implements CloneableIterator {
    EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
    long[] hardbitmap;
    LinkedList ll;

    BufferedXORIterator(LinkedList basell, int bufSize) {
        this.ll = basell;
        this.hardbitmap = new long[bufSize];
    }

    @Override
    public BufferedXORIterator clone() throws CloneNotSupportedException {
        BufferedXORIterator answer = (BufferedXORIterator) super.clone();
        answer.buffer = this.buffer.clone();
        answer.hardbitmap = this.hardbitmap.clone();
        answer.ll = (LinkedList) this.ll.clone();
        return answer;
    }

    @Override
    public boolean hasNext() {
        return !this.ll.isEmpty();
    }

    @Override
    public EWAHIterator next() {
        this.buffer.clear();
        long effective = 0;
        Iterator i = this.ll.iterator();
        while (i.hasNext()) {
            IteratingRLW rlw = i.next();
            if (rlw.size() > 0) {
                int eff = IteratorAggregation.inplacexor(this.hardbitmap, rlw);
                if (eff > effective)
                    effective = eff;
            } else
                i.remove();
        }
        for (int k = 0; k < effective; ++k)
            this.buffer.addWord(this.hardbitmap[k]);
        Arrays.fill(this.hardbitmap, 0);
        return this.buffer.getEWAHIterator();
    }
}

class BufferedAndIterator implements CloneableIterator {
    EWAHCompressedBitmap buffer = new EWAHCompressedBitmap();
    LinkedList ll;
    final int bufferSize;

    public BufferedAndIterator(LinkedList basell, int bufSize) {
        this.ll = basell;
        this.bufferSize = bufSize;
    }

    @Override
    public boolean hasNext() {
        return !this.ll.isEmpty();
    }

    @Override
    public BufferedAndIterator clone() throws CloneNotSupportedException {
        BufferedAndIterator answer = (BufferedAndIterator) super.clone();
        answer.buffer = this.buffer.clone();
        answer.ll = (LinkedList) this.ll.clone();
        return answer;
    }

    @Override
    public EWAHIterator next() {
        this.buffer.clear();
        IteratorAggregation.andToContainer(this.buffer, this.bufferSize * this.ll.size(),
                this.ll.get(0), this.ll.get(1));
        if (this.ll.size() > 2) {
            Iterator i = this.ll.iterator();
            i.next();
            i.next();
            EWAHCompressedBitmap tmpbuffer = new EWAHCompressedBitmap();
            while (i.hasNext() && this.buffer.sizeInBytes() > 0) {
                IteratorAggregation.andToContainer(tmpbuffer, this.buffer.getIteratingRLW(), i.next());
                this.buffer.swap(tmpbuffer);
                tmpbuffer.clear();
            }
        }
        for (IteratingRLW aLl : this.ll) {
            if (aLl.size() == 0) {
                this.ll.clear();
                break;
            }
        }
        return this.buffer.getEWAHIterator();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy