org.apache.druid.extendedset.intset.ConciseSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* (c) 2010 Alessandro Colantonio
*
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.druid.extendedset.intset;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.NoSuchElementException;
/**
* This is CONCISE: COmpressed 'N' Composable Integer SEt.
*
* This class is an instance of {@link IntSet} internally represented by
* compressed bitmaps though a RLE (Run-Length Encoding) compression algorithm.
* See
* http://ricerca.mat.uniroma3.it/users/dipietro/publications/0020-0190.pdf
* for more details.
*
* Notice that the iterator by {@link #iterator()} is fail-fast,
* similar to most {@link Collection}-derived classes. If the set is
* structurally modified at any time after the iterator is created, the iterator
* will throw a {@link ConcurrentModificationException}. Thus, in the face of
* concurrent modification, the iterator fails quickly and cleanly, rather than
* risking arbitrary, non-deterministic behavior at an undetermined time in the
* future. The iterator throws a {@link ConcurrentModificationException} on a
* best-effort basis. Therefore, it would be wrong to write a program that
* depended on this exception for its correctness: the fail-fast behavior of
* iterators should be used only to detect bugs.
*
* @version $Id$
*/
public class ConciseSet extends AbstractIntSet implements Serializable
{
/**
* generated serial ID
*/
private static final long serialVersionUID = 560068054685367266L;
/**
* true
if the class must simulate the behavior of WAH
*/
private final boolean simulateWAH;
/**
* This is the compressed bitmap, that is a collection of words. For each
* word:
*
* - 1* (0x80000000) means that it is a 31-bit literal.
*
- 00* (0x00000000) indicates a sequence made up of at
* most one set bit in the first 31 bits, and followed by blocks of 31 0's.
* The following 5 bits (00xxxxx*) indicates which is the set bit (
* 00000 = no set bit, 00001 = LSB, 11111 = MSB),
* while the remaining 25 bits indicate the number of following 0's blocks.
*
- 01* (0x40000000) indicates a sequence made up of at
* most one unset bit in the first 31 bits, and followed by blocks of
* 31 1's. (see the 00* case above).
*
*
* Note that literal words 0xFFFFFFFF and 0x80000000 are allowed, thus
* zero-length sequences (i.e., such that getSequenceCount() == 0) cannot
* exists.
*/
private int[] words;
/**
* Most significant set bit within the uncompressed bit string.
*/
private transient int last;
/**
* Cached cardinality of the bit-set. Defined for efficient {@link #size()}
* calls. When -1, the cache is invalid.
*/
private transient int size;
/**
* Index of the last word in {@link #words}
*/
private transient int lastWordIndex;
/**
* Creates an empty integer set
*/
public ConciseSet()
{
this(false);
}
/**
* Creates an empty integer set
*
* @param simulateWAH true
if the class must simulate the behavior of
* WAH
*/
public ConciseSet(boolean simulateWAH)
{
this.simulateWAH = simulateWAH;
reset();
}
public ConciseSet(int[] words, boolean simulateWAH)
{
this.words = words;
this.lastWordIndex = isEmpty() ? -1 : words.length - 1;
this.size = -1;
updateLast();
this.simulateWAH = simulateWAH;
}
/**
* Calculates the modulus division by 31 in a faster way than using n % 31
*
* This method of finding modulus division by an integer that is one less
* than a power of 2 takes at most O(lg(32)) time. The number of operations
* is at most 12 + 9 * ceil(lg(32)).
*
* See
* http://graphics.stanford.edu/~seander/bithacks.html
*
* @param n number to divide
*
* @return n % 31
*/
private static int maxLiteralLengthModulus(int n)
{
int m = (n & 0xC1F07C1F) + ((n >>> 5) & 0xC1F07C1F);
m = (m >>> 15) + (m & 0x00007FFF);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
if (m <= 31) {
return m == 31 ? 0 : m;
}
m = (m >>> 5) + (m & 0x0000001F);
return m == 31 ? 0 : m;
}
/**
* Calculates the multiplication by 31 in a faster way than using n * 31
*
* @param n number to multiply
*
* @return n * 31
*/
private static int maxLiteralLengthMultiplication(int n)
{
return (n << 5) - n;
}
/**
* Calculates the division by 31
*
* @param n number to divide
*
* @return n / 31
*/
private static int maxLiteralLengthDivision(int n)
{
return n / 31;
}
/**
* Checks whether a word is a literal one
*
* @param word word to check
*
* @return true
if the given word is a literal word
*/
private static boolean isLiteral(int word)
{
// "word" must be 1*
// NOTE: this is faster than "return (word & 0x80000000) == 0x80000000"
return (word & 0x80000000) != 0;
}
/**
* Checks whether a word contains a sequence of 1's
*
* @param word word to check
*
* @return true
if the given word is a sequence of 1's
*/
private static boolean isOneSequence(int word)
{
// "word" must be 01*
return (word & 0xC0000000) == ConciseSetUtils.SEQUENCE_BIT;
}
/**
* Checks whether a word contains a sequence of 0's
*
* @param word word to check
*
* @return true
if the given word is a sequence of 0's
*/
private static boolean isZeroSequence(int word)
{
// "word" must be 00*
return (word & 0xC0000000) == 0;
}
/**
* Checks whether a word contains a sequence of 0's with no set bit, or 1's
* with no unset bit.
*
* NOTE: when {@link #simulateWAH} is true
, it is
* equivalent to (and as fast as) !
{@link #isLiteral(int)}
*
* @param word word to check
*
* @return true
if the given word is a sequence of 0's or 1's
* but with no (un)set bit
*/
private static boolean isSequenceWithNoBits(int word)
{
// "word" must be 0?00000*
return (word & 0xBE000000) == 0x00000000;
}
/**
* Gets the number of blocks of 1's or 0's stored in a sequence word
*
* @param word word to check
*
* @return the number of blocks that follow the first block of 31 bits
*/
private static int getSequenceCount(int word)
{
// get the 25 LSB bits
return word & 0x01FFFFFF;
}
/**
* Clears the (un)set bit in a sequence
*
* @param word word to check
*
* @return the sequence corresponding to the given sequence and with no
* (un)set bits
*/
private static int getSequenceWithNoBits(int word)
{
// clear 29 to 25 LSB bits
return (word & 0xC1FFFFFF);
}
/**
* Gets the number of set bits within the literal word
*
* @param word literal word
*
* @return the number of set bits within the literal word
*/
private static int getLiteralBitCount(int word)
{
return Integer.bitCount(getLiteralBits(word));
}
/**
* Gets the bits contained within the literal word
*
* @param word literal word
*
* @return the literal word with the most significant bit cleared
*/
private static int getLiteralBits(int word)
{
return ConciseSetUtils.ALL_ONES_WITHOUT_MSB & word;
}
/**
* Returns true
when the given 31-bit literal string (namely,
* with MSB set) contains only one set bit
*
* @param literal literal word (namely, with MSB unset)
*
* @return true
when the given literal contains only one set
* bit
*/
private static boolean containsOnlyOneBit(int literal)
{
return (literal & (literal - 1)) == 0;
}
/**
* Resets to an empty set
*
* @see #ConciseSet()
* {@link #clear()}
*/
private void reset()
{
words = null;
last = -1;
size = 0;
lastWordIndex = -1;
}
/**
* {@inheritDoc}
*/
@Override
public ConciseSet clone()
{
if (isEmpty()) {
return empty();
}
// NOTE: do not use super.clone() since it is 10 times slower!
ConciseSet res = empty();
res.last = last;
res.lastWordIndex = lastWordIndex;
res.size = size;
res.words = Arrays.copyOf(words, lastWordIndex + 1);
return res;
}
/**
* Gets the literal word that represents the first 31 bits of the given the
* word (i.e. the first block of a sequence word, or the bits of a literal word).
*
* If the word is a literal, it returns the unmodified word. In case of a
* sequence, it returns a literal that represents the first 31 bits of the
* given sequence word.
*
* @param word word to check
*
* @return the literal contained within the given word, with the most
* significant bit set to 1.
*/
private /*static*/ int getLiteral(int word)
{
if (isLiteral(word)) {
return word;
}
if (simulateWAH) {
return isZeroSequence(word) ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL;
}
// get bits from 30 to 26 and use them to set the corresponding bit
// NOTE: "1 << (word >>> 25)" and "1 << ((word >>> 25) & 0x0000001F)" are equivalent
// NOTE: ">>> 1" is required since 00000 represents no bits and 00001 the LSB bit set
int literal = (1 << (word >>> 25)) >>> 1;
return isZeroSequence(word)
? (ConciseSetUtils.ALL_ZEROS_LITERAL | literal)
: (ConciseSetUtils.ALL_ONES_LITERAL & ~literal);
}
/**
* Assures that the length of {@link #words} is sufficient to contain
* the given index.
*/
private void ensureCapacity(int index)
{
int capacity = words == null ? 0 : words.length;
if (capacity > index) {
return;
}
capacity = Math.max(capacity << 1, index + 1);
if (words == null) {
// nothing to copy
words = new int[capacity];
return;
}
words = Arrays.copyOf(words, capacity);
}
/**
* Removes unused allocated words at the end of {@link #words} only when they
* are more than twice of the needed space
*/
private void compact()
{
if (words != null && ((lastWordIndex + 1) << 1) < words.length) {
words = Arrays.copyOf(words, lastWordIndex + 1);
}
}
/**
* Sets the bit at the given absolute position within the uncompressed bit
* string. The bit must be appendable, that is it must represent an
* integer that is strictly greater than the maximum integer in the set.
* Note that the parameter range check is performed by the public method
* {@link #add)} and not in this method.
*
* NOTE: This method assumes that the last element of {@link #words}
* (i.e. getLastWord()
) must be one of the
* following:
*
* - a literal word with at least one set bit;
*
- a sequence of ones.
*
* Hence, the last word in {@link #words} cannot be:
*
* - a literal word containing only zeros;
*
- a sequence of zeros.
*
*
* @param i the absolute position of the bit to set (i.e., the integer to add)
*/
private void append(int i)
{
// special case of empty set
if (isEmpty()) {
int zeroBlocks = maxLiteralLengthDivision(i);
if (zeroBlocks == 0) {
words = new int[1];
lastWordIndex = 0;
} else if (zeroBlocks == 1) {
words = new int[2];
lastWordIndex = 1;
words[0] = ConciseSetUtils.ALL_ZEROS_LITERAL;
} else {
words = new int[2];
lastWordIndex = 1;
words[0] = zeroBlocks - 1;
}
last = i;
size = 1;
words[lastWordIndex] = ConciseSetUtils.ALL_ZEROS_LITERAL | (1 << maxLiteralLengthModulus(i));
return;
}
// position of the next bit to set within the current literal
int bit = maxLiteralLengthModulus(last) + i - last;
// if we are outside the current literal, add zeros in
// between the current word and the new 1-bit literal word
if (bit >= ConciseSetUtils.MAX_LITERAL_LENGTH) {
int zeroBlocks = maxLiteralLengthDivision(bit) - 1;
bit = maxLiteralLengthModulus(bit);
if (zeroBlocks == 0) {
ensureCapacity(lastWordIndex + 1);
} else {
ensureCapacity(lastWordIndex + 2);
appendFill(zeroBlocks, 0);
}
appendLiteral(ConciseSetUtils.ALL_ZEROS_LITERAL | 1 << bit);
} else {
words[lastWordIndex] |= 1 << bit;
if (words[lastWordIndex] == ConciseSetUtils.ALL_ONES_LITERAL) {
lastWordIndex--;
appendLiteral(ConciseSetUtils.ALL_ONES_LITERAL);
}
}
// update other info
last = i;
if (size >= 0) {
size++;
}
}
/**
* Append a literal word after the last word
*
* @param word the new literal word. Note that the leftmost bit must
* be set to 1.
*/
private void appendLiteral(int word)
{
// when we have a zero sequence of the maximum length (that is,
// 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen
// that we try to append a zero literal because the result of the given operation must be an
// empty set. Without the following test, we would have increased the
// counter of the zero sequence, thus obtaining 0x02000000 that
// represents a sequence with the first bit set!
if (lastWordIndex == 0 && word == ConciseSetUtils.ALL_ZEROS_LITERAL && words[0] == 0x01FFFFFF) {
return;
}
// first addition
if (lastWordIndex < 0) {
words[lastWordIndex = 0] = word;
return;
}
final int lastWord = words[lastWordIndex];
if (word == ConciseSetUtils.ALL_ZEROS_LITERAL) {
if (lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) {
words[lastWordIndex] = 1;
} else if (isZeroSequence(lastWord)) {
words[lastWordIndex]++;
} else if (!simulateWAH && containsOnlyOneBit(getLiteralBits(lastWord))) {
words[lastWordIndex] = 1 | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25);
} else {
words[++lastWordIndex] = word;
}
} else if (word == ConciseSetUtils.ALL_ONES_LITERAL) {
if (lastWord == ConciseSetUtils.ALL_ONES_LITERAL) {
words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1;
} else if (isOneSequence(lastWord)) {
words[lastWordIndex]++;
} else if (!simulateWAH && containsOnlyOneBit(~lastWord)) {
words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | 1 | ((1 + Integer.numberOfTrailingZeros(~lastWord))
<< 25);
} else {
words[++lastWordIndex] = word;
}
} else {
words[++lastWordIndex] = word;
}
}
/**
* Append a sequence word after the last word
*
* @param length sequence length
* @param fillType sequence word with a count that equals 0
*/
private void appendFill(int length, int fillType)
{
assert length > 0;
assert lastWordIndex >= -1;
fillType &= ConciseSetUtils.SEQUENCE_BIT;
// it is actually a literal...
if (length == 1) {
appendLiteral(fillType == 0 ? ConciseSetUtils.ALL_ZEROS_LITERAL : ConciseSetUtils.ALL_ONES_LITERAL);
return;
}
// empty set
if (lastWordIndex < 0) {
words[lastWordIndex = 0] = fillType | (length - 1);
return;
}
final int lastWord = words[lastWordIndex];
if (isLiteral(lastWord)) {
if (fillType == 0 && lastWord == ConciseSetUtils.ALL_ZEROS_LITERAL) {
words[lastWordIndex] = length;
} else if (fillType == ConciseSetUtils.SEQUENCE_BIT && lastWord == ConciseSetUtils.ALL_ONES_LITERAL) {
words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length;
} else if (!simulateWAH) {
if (fillType == 0 && containsOnlyOneBit(getLiteralBits(lastWord))) {
words[lastWordIndex] = length | ((1 + Integer.numberOfTrailingZeros(lastWord)) << 25);
} else if (fillType == ConciseSetUtils.SEQUENCE_BIT && containsOnlyOneBit(~lastWord)) {
words[lastWordIndex] = ConciseSetUtils.SEQUENCE_BIT | length | ((1 + Integer.numberOfTrailingZeros(~lastWord))
<< 25);
} else {
words[++lastWordIndex] = fillType | (length - 1);
}
} else {
words[++lastWordIndex] = fillType | (length - 1);
}
} else {
if ((lastWord & 0xC0000000) == fillType) {
words[lastWordIndex] += length;
} else {
words[++lastWordIndex] = fillType | (length - 1);
}
}
}
/**
* Recalculate a fresh value for {@link ConciseSet#last}
*/
private void updateLast()
{
if (isEmpty()) {
last = -1;
return;
}
last = 0;
for (int i = 0; i <= lastWordIndex; i++) {
int w = words[i];
if (isLiteral(w)) {
last += ConciseSetUtils.MAX_LITERAL_LENGTH;
} else {
last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1);
}
}
int w = words[lastWordIndex];
if (isLiteral(w)) {
last -= Integer.numberOfLeadingZeros(getLiteralBits(w));
} else {
last--;
}
}
/**
* Performs the given operation over the bit-sets
*
* @param other {@link ConciseSet} instance that represents the right
* operand
* @param operator operator
*
* @return the result of the operation
*/
private ConciseSet performOperation(ConciseSet other, Operator operator)
{
// non-empty arguments
if (this.isEmpty() || other.isEmpty()) {
return operator.combineEmptySets(this, other);
}
// if the two operands are disjoint, the operation is faster
ConciseSet res = operator.combineDisjointSets(this, other);
if (res != null) {
return res;
}
// Allocate a sufficient number of words to contain all possible results.
// NOTE: since lastWordIndex is the index of the last used word in "words",
// we require "+2" to have the actual maximum required space.
// In any case, we do not allocate more than the maximum space required
// for the uncompressed representation.
// Another "+1" is required to allows for the addition of the last word
// before compacting.
res = empty();
res.words = new int[1 + Math.min(
this.lastWordIndex + other.lastWordIndex + 2,
maxLiteralLengthDivision(Math.max(this.last, other.last)) << (simulateWAH ? 1 : 0)
)];
// scan "this" and "other"
WordIterator thisItr = new WordIterator();
WordIterator otherItr = other.new WordIterator();
while (true) {
if (!thisItr.isLiteral) {
if (!otherItr.isLiteral) {
int minCount = Math.min(thisItr.count, otherItr.count);
res.appendFill(minCount, operator.combineLiterals(thisItr.word, otherItr.word));
//noinspection NonShortCircuitBooleanExpression
if (!thisItr.prepareNext(minCount) | /* NOT || */ !otherItr.prepareNext(minCount)) {
break;
}
} else {
res.appendLiteral(operator.combineLiterals(thisItr.toLiteral(), otherItr.word));
thisItr.word--;
//noinspection NonShortCircuitBooleanExpression
if (!thisItr.prepareNext(1) | /* do NOT use "||" */ !otherItr.prepareNext()) {
break;
}
}
} else if (!otherItr.isLiteral) {
res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.toLiteral()));
otherItr.word--;
//noinspection NonShortCircuitBooleanExpression
if (!thisItr.prepareNext() | /* do NOT use "||" */ !otherItr.prepareNext(1)) {
break;
}
} else {
res.appendLiteral(operator.combineLiterals(thisItr.word, otherItr.word));
//noinspection NonShortCircuitBooleanExpression
if (!thisItr.prepareNext() | /* do NOT use "||" */ !otherItr.prepareNext()) {
break;
}
}
}
// invalidate the size
res.size = -1;
boolean invalidLast = true;
// if one bit string is greater than the other one, we add the remaining
// bits depending on the given operation.
switch (operator) {
case AND:
break;
case OR:
res.last = Math.max(this.last, other.last);
invalidLast = thisItr.flush(res);
invalidLast |= otherItr.flush(res);
break;
case XOR:
if (this.last != other.last) {
res.last = Math.max(this.last, other.last);
invalidLast = false;
}
invalidLast |= thisItr.flush(res);
invalidLast |= otherItr.flush(res);
break;
case ANDNOT:
if (this.last > other.last) {
res.last = this.last;
invalidLast = false;
}
invalidLast |= thisItr.flush(res);
break;
}
// remove trailing zeros
res.trimZeros();
if (res.isEmpty()) {
return res;
}
// compute the greatest element
if (invalidLast) {
res.updateLast();
}
// compact the memory
res.compact();
return res;
}
/**
* {@inheritDoc}
*/
public int[] getWords()
{
if (words == null) {
return new int[]{};
}
return Arrays.copyOf(words, lastWordIndex + 1);
}
/**
* {@inheritDoc}
*/
public ConciseSet intersection(IntSet other)
{
if (isEmpty() || other == null || other.isEmpty()) {
return empty();
}
if (other == this) {
return clone();
}
return performOperation(convert(other), Operator.AND);
}
/**
* Removes trailing zeros
*/
private void trimZeros()
{
// loop over ALL_ZEROS_LITERAL words
int w;
do {
w = words[lastWordIndex];
if (w == ConciseSetUtils.ALL_ZEROS_LITERAL) {
lastWordIndex--;
} else if (isZeroSequence(w)) {
if (simulateWAH || isSequenceWithNoBits(w)) {
lastWordIndex--;
} else {
// convert the sequence in a 1-bit literal word
words[lastWordIndex] = getLiteral(w);
return;
}
} else {
// one sequence or literal
return;
}
if (lastWordIndex < 0) {
reset();
return;
}
} while (true);
}
/**
* {@inheritDoc}
*/
@Override
public IntIterator iterator()
{
if (isEmpty()) {
return EmptyIntIterator.instance();
}
return new BitIterator();
}
/**
* {@inheritDoc}
*/
@Override
public IntIterator descendingIterator()
{
if (isEmpty()) {
return EmptyIntIterator.instance();
}
return new ReverseBitIterator();
}
/**
* {@inheritDoc}
*/
public void clear()
{
reset();
}
/**
* Convert a given collection to a {@link ConciseSet} instance
*/
private ConciseSet convert(IntSet c)
{
if (c instanceof ConciseSet && simulateWAH == ((ConciseSet) c).simulateWAH) {
return (ConciseSet) c;
}
if (c == null) {
return empty();
}
ConciseSet res = empty();
IntIterator itr = c.iterator();
while (itr.hasNext()) {
res.add(itr.next());
}
return res;
}
/**
* {@inheritDoc}
*/
public ConciseSet convert(int... a)
{
ConciseSet res = empty();
if (a != null) {
a = Arrays.copyOf(a, a.length);
Arrays.sort(a);
for (int i : a) {
if (res.last != i) {
res.add(i);
}
}
}
return res;
}
/**
* Replace the current instance with another {@link ConciseSet} instance. It
* also returns true
if the given set is actually different
* from the current one
*
* @param other {@link ConciseSet} instance to use to replace the current one
*
* @return true
if the given set is different from the current
* set
*/
private boolean replaceWith(ConciseSet other)
{
if (this == other) {
return false;
}
boolean isSimilar = (this.lastWordIndex == other.lastWordIndex)
&& (this.last == other.last);
for (int i = 0; isSimilar && (i <= lastWordIndex); i++) {
isSimilar = this.words[i] == other.words[i];
}
if (isSimilar) {
if (other.size >= 0) {
this.size = other.size;
}
return false;
}
this.words = other.words;
this.size = other.size;
this.last = other.last;
this.lastWordIndex = other.lastWordIndex;
return true;
}
/**
* {@inheritDoc}
*/
public boolean add(int e)
{
// range check
if (e < ConciseSetUtils.MIN_ALLOWED_SET_BIT || e > ConciseSetUtils.MAX_ALLOWED_INTEGER) {
throw new IndexOutOfBoundsException(String.valueOf(e));
}
// the element can be simply appended
if (e > last) {
append(e);
return true;
}
if (e == last) {
return false;
}
// check if the element can be put in a literal word
int blockIndex = maxLiteralLengthDivision(e);
int bitPosition = maxLiteralLengthModulus(e);
for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) {
int w = words[i];
if (isLiteral(w)) {
// check if the current literal word is the "right" one
if (blockIndex == 0) {
// bit already set
if ((w & (1 << bitPosition)) != 0) {
return false;
}
// By adding the bit we potentially create a sequence:
// -- If the literal is made up of all zeros, it definitely
// cannot be part of a sequence (otherwise it would not have
// been created). Thus, we can create a 1-bit literal word
// -- If there are MAX_LITERAL_LENGTH - 2 set bits, by adding
// the new one we potentially allow for a 1's sequence
// together with the successive word
// -- If there are MAX_LITERAL_LENGTH - 1 set bits, by adding
// the new one we potentially allow for a 1's sequence
// together with the successive and/or the preceding words
if (!simulateWAH) {
int bitCount = getLiteralBitCount(w);
if (bitCount >= ConciseSetUtils.MAX_LITERAL_LENGTH - 2) {
break;
}
} else {
if (containsOnlyOneBit(~w) || w == ConciseSetUtils.ALL_ONES_LITERAL) {
break;
}
}
// set the bit
words[i] |= 1 << bitPosition;
if (size >= 0) {
size++;
}
return true;
}
blockIndex--;
} else {
if (simulateWAH) {
if (isOneSequence(w) && blockIndex <= getSequenceCount(w)) {
return false;
}
} else {
// if we are at the beginning of a sequence, and it is
// a set bit, the bit already exists
if (blockIndex == 0
&& (getLiteral(w) & (1 << bitPosition)) != 0) {
return false;
}
// if we are in the middle of a sequence of 1's, the bit already exist
if (blockIndex > 0
&& blockIndex <= getSequenceCount(w)
&& isOneSequence(w)) {
return false;
}
}
// next word
blockIndex -= getSequenceCount(w) + 1;
}
}
// the bit is in the middle of a sequence or it may cause a literal to
// become a sequence, thus the "easiest" way to add it is by ORing
return replaceWith(performOperation(convert(e), Operator.OR));
}
/**
* {@inheritDoc}
*/
public boolean remove(int o)
{
if (isEmpty()) {
return false;
}
// the element cannot exist
if (o > last) {
return false;
}
// check if the element can be removed from a literal word
int blockIndex = maxLiteralLengthDivision(o);
int bitPosition = maxLiteralLengthModulus(o);
for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) {
final int w = words[i];
if (isLiteral(w)) {
// check if the current literal word is the "right" one
if (blockIndex == 0) {
// the bit is already unset
if ((w & (1 << bitPosition)) == 0) {
return false;
}
// By removing the bit we potentially create a sequence:
// -- If the literal is made up of all ones, it definitely
// cannot be part of a sequence (otherwise it would not have
// been created). Thus, we can create a 30-bit literal word
// -- If there are 2 set bits, by removing the specified
// one we potentially allow for a 1's sequence together with
// the successive word
// -- If there is 1 set bit, by removing the new one we
// potentially allow for a 0's sequence
// together with the successive and/or the preceding words
if (!simulateWAH) {
int bitCount = getLiteralBitCount(w);
if (bitCount <= 2) {
break;
}
} else {
final int l = getLiteralBits(w);
if (l == 0 || containsOnlyOneBit(l)) {
break;
}
}
// unset the bit
words[i] &= ~(1 << bitPosition);
if (size >= 0) {
size--;
}
// if the bit is the maximal element, update it
if (o == last) {
last -= maxLiteralLengthModulus(last) - (ConciseSetUtils.MAX_LITERAL_LENGTH
- Integer.numberOfLeadingZeros(getLiteralBits(words[i])));
}
return true;
}
blockIndex--;
} else {
if (simulateWAH) {
if (isZeroSequence(w) && blockIndex <= getSequenceCount(w)) {
return false;
}
} else {
// if we are at the beginning of a sequence, and it is
// an unset bit, the bit does not exist
if (blockIndex == 0
&& (getLiteral(w) & (1 << bitPosition)) == 0) {
return false;
}
// if we are in the middle of a sequence of 0's, the bit does not exist
if (blockIndex > 0
&& blockIndex <= getSequenceCount(w)
&& isZeroSequence(w)) {
return false;
}
}
// next word
blockIndex -= getSequenceCount(w) + 1;
}
}
// the bit is in the middle of a sequence or it may cause a literal to
// become a sequence, thus the "easiest" way to remove it by ANDNOTing
return replaceWith(performOperation(convert(o), Operator.ANDNOT));
}
/**
* {@inheritDoc}
*/
public boolean contains(int o)
{
if (isEmpty() || o > last || o < 0) {
return false;
}
// check if the element is within a literal word
int block = maxLiteralLengthDivision(o);
int bit = maxLiteralLengthModulus(o);
for (int i = 0; i <= lastWordIndex; i++) {
final int w = words[i];
final int t = w & 0xC0000000; // the first two bits...
switch (t) {
case 0x80000000: // LITERAL
case 0xC0000000: // LITERAL
// check if the current literal word is the "right" one
if (block == 0) {
return (w & (1 << bit)) != 0;
}
block--;
break;
case 0x00000000: // ZERO SEQUENCE
if (!simulateWAH) {
if (block == 0 && ((w >> 25) - 1) == bit) {
return true;
}
}
block -= getSequenceCount(w) + 1;
if (block < 0) {
return false;
}
break;
case 0x40000000: // ONE SEQUENCE
if (!simulateWAH) {
if (block == 0 && (0x0000001F & (w >> 25) - 1) == bit) {
return false;
}
}
block -= getSequenceCount(w) + 1;
if (block < 0) {
return true;
}
break;
}
}
// no more words
return false;
}
/**
* {@inheritDoc}
*/
@Override
public boolean isEmpty()
{
return words == null;
}
/**
* {@inheritDoc}
*/
public boolean addAll(IntSet c)
{
if (c == null || c.isEmpty() || this == c) {
return false;
}
ConciseSet other = convert(c);
if (other.size == 1) {
return add(other.last);
}
return replaceWith(performOperation(convert(c), Operator.OR));
}
/**
* {@inheritDoc}
*/
@Override
public int size()
{
if (size < 0) {
size = 0;
for (int i = 0; i <= lastWordIndex; i++) {
int w = words[i];
if (isLiteral(w)) {
size += getLiteralBitCount(w);
} else {
if (isZeroSequence(w)) {
if (!isSequenceWithNoBits(w)) {
size++;
}
} else {
size += maxLiteralLengthMultiplication(getSequenceCount(w) + 1);
if (!isSequenceWithNoBits(w)) {
size--;
}
}
}
}
}
return size;
}
/**
* {@inheritDoc}
*/
public ConciseSet empty()
{
return new ConciseSet(simulateWAH);
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode()
{
int h = 1;
for (int i = 0; i <= lastWordIndex; i++) {
h = (h << 5) - h + words[i];
}
return h;
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj)
{
if (this == obj) {
return true;
}
if (!(obj instanceof ConciseSet)) {
return super.equals(obj);
}
final ConciseSet other = (ConciseSet) obj;
if (simulateWAH != other.simulateWAH) {
return super.equals(obj);
}
if (size() != other.size()) {
return false;
}
if (isEmpty()) {
return true;
}
if (last != other.last) {
return false;
}
for (int i = 0; i <= lastWordIndex; i++) {
if (words[i] != other.words[i]) {
return false;
}
}
return true;
}
/**
* {@inheritDoc}
*/
@SuppressWarnings("CompareToUsesNonFinalVariable")
@Override
public int compareTo(IntSet o)
{
// empty set cases
if (this.isEmpty() && o.isEmpty()) {
return 0;
}
if (this.isEmpty()) {
return -1;
}
if (o.isEmpty()) {
return 1;
}
final ConciseSet other = convert(o);
// the word at the end must be the same
int res = Integer.compare(this.last, other.last);
if (res != 0) {
return res;
}
// scan words from MSB to LSB
int thisIndex = this.lastWordIndex;
int otherIndex = other.lastWordIndex;
int thisWord = this.words[thisIndex];
int otherWord = other.words[otherIndex];
while (thisIndex >= 0 && otherIndex >= 0) {
if (!isLiteral(thisWord)) {
if (!isLiteral(otherWord)) {
// compare two sequences
// note that they are made up of at least two blocks, and we
// start comparing from the end, that is at blocks with no
// (un)set bits
if (isZeroSequence(thisWord)) {
if (isOneSequence(otherWord)) {
// zeros < ones
return -1;
}
// compare two sequences of zeros
res = Integer.compare(getSequenceCount(otherWord), getSequenceCount(thisWord));
if (res != 0) {
return res;
}
} else {
if (isZeroSequence(otherWord)) {
// ones > zeros
return 1;
}
// compare two sequences of ones
res = Integer.compare(getSequenceCount(thisWord), getSequenceCount(otherWord));
if (res != 0) {
return res;
}
}
// if the sequences are the same (both zeros or both ones)
// and have the same length, compare the first blocks in the
// next loop since such blocks might contain (un)set bits
thisWord = getLiteral(thisWord);
otherWord = getLiteral(otherWord);
} else {
// zeros < literal --> -1
// ones > literal --> +1
// note that the sequence is made up of at least two blocks,
// and we start comparing from the end, that is at a block
// with no (un)set bits
if (isZeroSequence(thisWord)) {
if (otherWord != ConciseSetUtils.ALL_ZEROS_LITERAL) {
return -1;
}
} else {
if (otherWord != ConciseSetUtils.ALL_ONES_LITERAL) {
return 1;
}
}
if (getSequenceCount(thisWord) == 1) {
thisWord = getLiteral(thisWord);
} else {
thisWord--;
}
if (--otherIndex >= 0) {
otherWord = other.words[otherIndex];
}
}
} else if (!isLiteral(otherWord)) {
// literal > zeros --> +1
// literal < ones --> -1
// note that the sequence is made up of at least two blocks,
// and we start comparing from the end, that is at a block
// with no (un)set bits
if (isZeroSequence(otherWord)) {
if (thisWord != ConciseSetUtils.ALL_ZEROS_LITERAL) {
return 1;
}
} else {
if (thisWord != ConciseSetUtils.ALL_ONES_LITERAL) {
return -1;
}
}
if (--thisIndex >= 0) {
thisWord = this.words[thisIndex];
}
if (getSequenceCount(otherWord) == 1) {
otherWord = getLiteral(otherWord);
} else {
otherWord--;
}
} else {
// equals compare(getLiteralBits(thisWord), getLiteralBits(otherWord))
res = Integer.compare(thisWord, otherWord);
if (res != 0) {
return res;
}
if (--thisIndex >= 0) {
thisWord = this.words[thisIndex];
}
if (--otherIndex >= 0) {
otherWord = other.words[otherIndex];
}
}
}
return thisIndex >= 0 ? 1 : (otherIndex >= 0 ? -1 : 0);
}
/**
* Save the state of the instance to a stream
*/
private void writeObject(ObjectOutputStream s) throws IOException
{
if (words != null && lastWordIndex < words.length - 1) {
// compact before serializing
words = Arrays.copyOf(words, lastWordIndex + 1);
}
s.defaultWriteObject();
}
/**
* Reconstruct the instance from a stream
*/
private void readObject(ObjectInputStream s) throws IOException, ClassNotFoundException
{
s.defaultReadObject();
if (words == null) {
reset();
return;
}
lastWordIndex = words.length - 1;
updateLast();
size = -1;
}
/**
* Possible operations
*/
private enum Operator
{
/**
* @uml.property name="aND"
* @uml.associationEnd
*/
AND {
@Override
public int combineLiterals(int literal1, int literal2)
{
return literal1 & literal2;
}
@Override
public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2)
{
return op1.empty();
}
/** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */
private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2)
{
// check whether the first operator starts with a sequence that
// completely "covers" the second operator
if (isSequenceWithNoBits(op1.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) {
// op2 is completely hidden by op1
if (isZeroSequence(op1.words[0])) {
return op1.empty();
}
// op2 is left unchanged, but the rest of op1 is hidden
return op2.clone();
}
return null;
}
@Override
public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2)
{
ConciseSet res = oneWayCombineDisjointSets(op1, op2);
if (res == null) {
res = oneWayCombineDisjointSets(op2, op1);
}
return res;
}
},
/**
* @uml.property name="oR"
* @uml.associationEnd
*/
OR {
@Override
public int combineLiterals(int literal1, int literal2)
{
return literal1 | literal2;
}
@Override
public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2)
{
if (!op1.isEmpty()) {
return op1.clone();
}
if (!op2.isEmpty()) {
return op2.clone();
}
return op1.empty();
}
/** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */
private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2)
{
// check whether the first operator starts with a sequence that
// completely "covers" the second operator
if (isSequenceWithNoBits(op1.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) {
// op2 is completely hidden by op1
if (isOneSequence(op1.words[0])) {
return op1.clone();
}
// op2 is left unchanged, but the rest of op1 must be appended...
// ... first, allocate sufficient space for the result
ConciseSet res = op1.empty();
res.words = new int[op1.lastWordIndex + op2.lastWordIndex + 3];
res.lastWordIndex = op2.lastWordIndex;
// ... then, copy op2
System.arraycopy(op2.words, 0, res.words, 0, op2.lastWordIndex + 1);
// ... finally, append op1
WordIterator wordIterator = op1.new WordIterator();
wordIterator.prepareNext(maxLiteralLengthDivision(op2.last) + 1);
wordIterator.flush(res);
if (op1.size < 0 || op2.size < 0) {
res.size = -1;
} else {
res.size = op1.size + op2.size;
}
res.last = op1.last;
res.compact();
return res;
}
return null;
}
@Override
public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2)
{
ConciseSet res = oneWayCombineDisjointSets(op1, op2);
if (res == null) {
res = oneWayCombineDisjointSets(op2, op1);
}
return res;
}
},
/**
* @uml.property name="xOR"
* @uml.associationEnd
*/
XOR {
@Override
public int combineLiterals(int literal1, int literal2)
{
return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 ^ literal2);
}
@Override
public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2)
{
if (!op1.isEmpty()) {
return op1.clone();
}
if (!op2.isEmpty()) {
return op2.clone();
}
return op1.empty();
}
/** Used to implement {@link #combineDisjointSets(ConciseSet, ConciseSet)} */
private ConciseSet oneWayCombineDisjointSets(ConciseSet op1, ConciseSet op2)
{
// check whether the first operator starts with a sequence that
// completely "covers" the second operator
if (isSequenceWithNoBits(op1.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) {
// op2 is left unchanged by op1
if (isZeroSequence(op1.words[0])) {
return OR.combineDisjointSets(op1, op2);
}
// op2 must be complemented, then op1 must be appended
// it is better to perform it normally...
return null;
}
return null;
}
@Override
public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2)
{
ConciseSet res = oneWayCombineDisjointSets(op1, op2);
if (res == null) {
res = oneWayCombineDisjointSets(op2, op1);
}
return res;
}
},
/**
* @uml.property name="aNDNOT"
* @uml.associationEnd
*/
ANDNOT {
@Override
public int combineLiterals(int literal1, int literal2)
{
return ConciseSetUtils.ALL_ZEROS_LITERAL | (literal1 & (~literal2));
}
@Override
public ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2)
{
if (!op1.isEmpty()) {
return op1.clone();
}
return op1.empty();
}
@Override
public ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2)
{
// check whether the first operator starts with a sequence that
// completely "covers" the second operator
if (isSequenceWithNoBits(op1.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(op1.words[0]) + 1) > op2.last) {
// op1 is left unchanged by op2
if (isZeroSequence(op1.words[0])) {
return op1.clone();
}
// op2 must be complemented, then op1 must be appended
// it is better to perform it normally...
return null;
}
// check whether the second operator starts with a sequence that
// completely "covers" the first operator
if (isSequenceWithNoBits(op2.words[0])
&& maxLiteralLengthMultiplication(getSequenceCount(op2.words[0]) + 1) > op1.last) {
// op1 is left unchanged by op2
if (isZeroSequence(op2.words[0])) {
return op1.clone();
}
// op1 is cleared by op2
return op1.empty();
}
return null;
}
};
/**
* Performs the operation on the given literals
*
* @param literal1 left operand
* @param literal2 right operand
*
* @return literal representing the result of the specified operation
*/
public abstract int combineLiterals(int literal1, int literal2);
/**
* Performs the operation when one or both operands are empty set
*
* NOTE: the caller MUST assure that one or both the operands
* are empty!!!
*
* @param op1 left operand
* @param op2 right operand
*
* @return null
if both operands are non-empty
*/
public abstract ConciseSet combineEmptySets(ConciseSet op1, ConciseSet op2);
/**
* Performs the operation in the special case of "disjoint" sets, namely
* when the first (or the second) operand starts with a sequence (it
* does not matter if 0's or 1's) that completely covers all the bits of
* the second (or the first) operand.
*
* @param op1 left operand
* @param op2 right operand
*
* @return null
if operands are non-disjoint
*/
public abstract ConciseSet combineDisjointSets(ConciseSet op1, ConciseSet op2);
}
/**
* Iterator over the bits of a single literal/fill word
*/
private interface WordExpander
{
boolean hasNext();
boolean hasPrevious();
int next();
int previous();
void skipAllAfter(int i);
void skipAllBefore(int i);
void reset(int offset, int word, boolean fromBeginning);
}
/**
* Iterates over words, from the rightmost (LSB) to the leftmost (MSB).
*
* When {@link ConciseSet#simulateWAH} is false
, mixed
* sequences are "broken" into a literal (i.e., the first block is coded
* with a literal in {@link #word}) and a "pure" sequence (i.e., the
* remaining blocks are coded with a sequence with no bits in {@link #word})
*/
private class WordIterator
{
/**
* copy of the current word
*/
int word;
/**
* current word index
*/
int index;
/**
* true
if {@link #word} is a literal
*/
boolean isLiteral;
/**
* number of blocks in the current word (1 for literals, > 1 for sequences)
*/
int count;
/**
* Initialize data
*/
WordIterator()
{
isLiteral = false;
index = -1;
prepareNext();
}
/**
* @return true
if there is no current word
*/
boolean exhausted()
{
return index > lastWordIndex;
}
/**
* Prepare the next value for {@link #word} after skipping a given
* number of 31-bit blocks in the current sequence.
*
* NOTE: it works only when the current word is within a
* sequence, namely a literal cannot be skipped. Moreover, the number of
* blocks to skip must be less than the remaining blocks in the current
* sequence.
*
* @param c number of 31-bit "blocks" to skip
*
* @return false
if the next word does not exists
*/
boolean prepareNext(int c)
{
assert c <= count;
count -= c;
if (count == 0) {
return prepareNext();
}
return true;
}
/**
* Prepare the next value for {@link #word}
*
* @return false
if the next word does not exists
*/
boolean prepareNext()
{
if (!simulateWAH && isLiteral && count > 1) {
count--;
isLiteral = false;
word = getSequenceWithNoBits(words[index]) - 1;
return true;
}
index++;
if (index > lastWordIndex) {
return false;
}
word = words[index];
isLiteral = isLiteral(word);
if (!isLiteral) {
count = getSequenceCount(word) + 1;
if (!simulateWAH && !isSequenceWithNoBits(word)) {
isLiteral = true;
int bit = (1 << (word >>> 25)) >>> 1;
word = isZeroSequence(word)
? (ConciseSetUtils.ALL_ZEROS_LITERAL | bit)
: (ConciseSetUtils.ALL_ONES_LITERAL & ~bit);
}
} else {
count = 1;
}
return true;
}
/**
* @return the literal word corresponding to each block contained in the
* current sequence word. Not to be used with literal words!
*/
int toLiteral()
{
assert !isLiteral;
return ConciseSetUtils.ALL_ZEROS_LITERAL | ((word << 1) >> ConciseSetUtils.MAX_LITERAL_LENGTH);
}
/**
* Copies all the remaining words in the given set
*
* @param s set where the words must be copied
*
* @return false
if there are no words to copy
*/
private boolean flush(ConciseSet s)
{
// nothing to flush
if (exhausted()) {
return false;
}
// try to "compress" the first few words
do {
if (isLiteral) {
s.appendLiteral(word);
} else {
s.appendFill(count, word);
}
} while (prepareNext() && s.words[s.lastWordIndex] != word);
// copy remaining words "as-is"
int delta = lastWordIndex - index + 1;
System.arraycopy(words, index, s.words, s.lastWordIndex + 1, delta);
s.lastWordIndex += delta;
s.last = last;
return true;
}
}
/*
* DEBUGGING METHODS
*/
/**
* Iterator over the bits of literal and zero-fill words
*/
private class LiteralAndZeroFillExpander implements WordExpander
{
final int[] buffer = new int[ConciseSetUtils.MAX_LITERAL_LENGTH];
int len = 0;
int current = 0;
@Override
public boolean hasNext()
{
return current < len;
}
@Override
public boolean hasPrevious()
{
return current > 0;
}
@Override
public int next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
return buffer[current++];
}
@Override
public int previous()
{
if (!hasPrevious()) {
throw new NoSuchElementException();
}
return buffer[--current];
}
@Override
public void skipAllAfter(int i)
{
while (hasPrevious() && buffer[current - 1] > i) {
current--;
}
}
@Override
public void skipAllBefore(int i)
{
while (hasNext() && buffer[current] < i) {
current++;
}
}
@Override
public void reset(int offset, int word, boolean fromBeginning)
{
if (isLiteral(word)) {
len = 0;
for (int i = 0; i < ConciseSetUtils.MAX_LITERAL_LENGTH; i++) {
if ((word & (1 << i)) != 0) {
buffer[len++] = offset + i;
}
}
current = fromBeginning ? 0 : len;
} else {
if (isZeroSequence(word)) {
if (simulateWAH || isSequenceWithNoBits(word)) {
len = 0;
current = 0;
} else {
len = 1;
buffer[0] = offset + ((0x3FFFFFFF & word) >>> 25) - 1;
current = fromBeginning ? 0 : 1;
}
} else {
throw new RuntimeException("sequence of ones!");
}
}
}
}
/**
* Iterator over the bits of one-fill words
*/
private class OneFillExpander implements WordExpander
{
int firstInt = 1;
int lastInt = -1;
int current = 0;
int exception = -1;
@Override
public boolean hasNext()
{
return current < lastInt;
}
@Override
public boolean hasPrevious()
{
return current > firstInt;
}
@Override
public int next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
current++;
if (!simulateWAH && current == exception) {
current++;
}
return current;
}
@Override
public int previous()
{
if (!hasPrevious()) {
throw new NoSuchElementException();
}
current--;
if (!simulateWAH && current == exception) {
current--;
}
return current;
}
@Override
public void skipAllAfter(int i)
{
if (i >= current) {
return;
}
current = i + 1;
}
@Override
public void skipAllBefore(int i)
{
if (i <= current) {
return;
}
current = i - 1;
}
@Override
public void reset(int offset, int word, boolean fromBeginning)
{
if (!isOneSequence(word)) {
throw new RuntimeException("NOT a sequence of ones!");
}
firstInt = offset;
lastInt = offset + maxLiteralLengthMultiplication(getSequenceCount(word) + 1) - 1;
if (!simulateWAH) {
exception = offset + ((0x3FFFFFFF & word) >>> 25) - 1;
if (exception == firstInt) {
firstInt++;
}
if (exception == lastInt) {
lastInt--;
}
}
current = fromBeginning ? (firstInt - 1) : (lastInt + 1);
}
}
/**
* Iterator for all the integers of a {@link ConciseSet} instance
*/
private class BitIterator implements IntIterator
{
/**
* @uml.property name="litExp"
* @uml.associationEnd
*/
final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander();
/**
* @uml.property name="oneExp"
* @uml.associationEnd
*/
final OneFillExpander oneExp = new OneFillExpander();
/**
* @uml.property name="exp"
* @uml.associationEnd
*/
WordExpander exp;
int nextIndex = 0;
int nextOffset = 0;
private BitIterator()
{
nextWord();
}
private void nextWord()
{
final int word = words[nextIndex++];
exp = isOneSequence(word) ? oneExp : litExp;
exp.reset(nextOffset, word, true);
// prepare next offset
if (isLiteral(word)) {
nextOffset += ConciseSetUtils.MAX_LITERAL_LENGTH;
} else {
nextOffset += maxLiteralLengthMultiplication(getSequenceCount(word) + 1);
}
}
@Override
public boolean hasNext()
{
return nextIndex <= lastWordIndex || exp.hasNext();
}
@Override
public int next()
{
while (!exp.hasNext()) {
if (nextIndex > lastWordIndex) {
throw new NoSuchElementException();
}
nextWord();
}
return exp.next();
}
@Override
public void skipAllBefore(int element)
{
while (true) {
exp.skipAllBefore(element);
if (exp.hasNext() || nextIndex > lastWordIndex) {
return;
}
nextWord();
}
}
@Override
public IntIterator clone()
{
BitIterator retVal = new BitIterator();
retVal.exp = exp;
retVal.nextIndex = nextIndex;
retVal.nextOffset = nextOffset;
return retVal;
}
}
private class ReverseBitIterator implements IntIterator
{
/**
* @uml.property name="litExp"
* @uml.associationEnd
*/
final LiteralAndZeroFillExpander litExp = new LiteralAndZeroFillExpander();
/**
* @uml.property name="oneExp"
* @uml.associationEnd
*/
final OneFillExpander oneExp = new OneFillExpander();
/**
* @uml.property name="exp"
* @uml.associationEnd
*/
WordExpander exp;
int nextIndex = lastWordIndex;
int nextOffset = maxLiteralLengthMultiplication(maxLiteralLengthDivision(last) + 1);
int firstIndex; // first non-zero block
ReverseBitIterator()
{
// identify the first non-zero block
if ((isSequenceWithNoBits(words[0]) && isZeroSequence(words[0])) || (isLiteral(words[0])
&& words[0]
== ConciseSetUtils.ALL_ZEROS_LITERAL)) {
firstIndex = 1;
} else {
firstIndex = 0;
}
previousWord();
}
void previousWord()
{
final int word = words[nextIndex--];
exp = isOneSequence(word) ? oneExp : litExp;
if (isLiteral(word)) {
nextOffset -= ConciseSetUtils.MAX_LITERAL_LENGTH;
} else {
nextOffset -= maxLiteralLengthMultiplication(getSequenceCount(word) + 1);
}
exp.reset(nextOffset, word, false);
}
@Override
public boolean hasNext()
{
return nextIndex >= firstIndex || exp.hasPrevious();
}
@Override
public int next()
{
while (!exp.hasPrevious()) {
if (nextIndex < firstIndex) {
throw new NoSuchElementException();
}
previousWord();
}
return exp.previous();
}
@Override
public void skipAllBefore(int element)
{
while (true) {
exp.skipAllAfter(element);
if (exp.hasPrevious() || nextIndex < firstIndex) {
return;
}
previousWord();
}
}
@Override
public IntIterator clone()
{
ReverseBitIterator retVal = new ReverseBitIterator();
retVal.exp = exp;
retVal.nextIndex = nextIndex;
retVal.nextOffset = nextOffset;
retVal.firstIndex = firstIndex;
return retVal;
}
}
}