src.it.unimi.dsi.bits.LongBigArrayBitVector Maven / Gradle / Ivy
Show all versions of dsiutils Show documentation
/*
* DSI utilities
*
* Copyright (C) 2007-2020 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 3 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, see .
*
*/
package it.unimi.dsi.bits;
import static it.unimi.dsi.bits.LongArrayBitVector.bit;
import static it.unimi.dsi.bits.LongArrayBitVector.mask;
import static it.unimi.dsi.fastutil.BigArrays.SEGMENT_SIZE;
import static it.unimi.dsi.fastutil.BigArrays.displacement;
import static it.unimi.dsi.fastutil.BigArrays.grow;
import static it.unimi.dsi.fastutil.BigArrays.segment;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import it.unimi.dsi.fastutil.BigArrays;
import it.unimi.dsi.fastutil.booleans.BooleanBigList;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigList;
/**
* A bit vector implementation based on a {@linkplain LongBigArrays big arrays of longs}.
*
*
* The main goal of this class is to be able to accommodate very large bit vectors. With respect to
* {@link LongArrayBitVector}, many optimized methods are missing and rely on the generic
* implementations in {@link AbstractBitVector}. Instances of this class represent a bit vector
* using a {@linkplain LongBigArrays big array of longs} that is enlarged as needed when new entries
* are created (using {@link LongBigArrays#grow(long[][], long, long)}), but is never made
* smaller (even on a {@link #clear()}). Use {@link #trim()} for that purpose.
*
*
* Besides usual methods for setting and getting bits, this class provides views that make
* it possible to access comfortably the bit vector in different ways: for instance,
* {@link #asLongBigList(int)} provide access as a list of longs, whereas {@link #asLongSet()}
* provides access in setwise form.
*
*
* When enlarging the underlying array (e.g., for {@link #append(long, int)} operations or add
* operations on the {@linkplain #asLongBigList(int) big list view}), or when invoking
* {@link #ensureCapacity(long)}, this class calls {@link LongBigArrays#grow(long[][], long, long)},
* which could enlarge the array more than expected. On the contrary, {@link #length(long)} (and the
* corresponding method in the {@linkplain #asLongBigList(int) big list view}) sizes the underlying
* array in an exact manner.
*
*
* Bit numbering follows the right-to-left convention: bit k (counted from the right) of
* word w is bit 64w + k of the overall bit vector.
*
*
* If {@link #CHECKS} is true at compile time, boundary checks for all bit operations will be
* compiled in. For maximum speed, you may want to recompile this class with {@link #CHECKS} set to
* false. {@link #CHECKS} is public, so you can check from your code whether you're being provided a
* version with checks or not.
*
*
* Warning: Several optional methods have still to be implemented (e.g., adding an
* element at an arbitrary position using the {@link BooleanBigList} methods).
*
*
* Warning: The {@link #bits()} method uses the {@link AbstractBitVector}
* implementation, which will fail for bit vectors that cannot be stored in a single long array
* (i.e., more than 237 bits).
*/
public class LongBigArrayBitVector extends AbstractBitVector implements Cloneable, Serializable {
private static final long serialVersionUID = 1L;
public final static int LOG2_BITS_PER_WORD = 6;
public final static int BITS_PER_WORD = 1 << LOG2_BITS_PER_WORD;
public final static int WORD_MASK = BITS_PER_WORD - 1;
public final static int LAST_BIT = BITS_PER_WORD - 1;
public final static long ALL_ONES = 0xFFFFFFFFFFFFFFFFL;
public final static long LAST_BIT_MASK = 1L << LAST_BIT;
/** Whether this class has been compiled with index checks or not. */
public final static boolean CHECKS = false;
private static final boolean ASSERTS = false;
/** The number of bits in this vector. */
protected long length;
/**
* The backing big array of this vector. Bit 0 of the first element of the first array contains bit
* 0 of the bit vector, bit 1 of the second element contains bit {@link #BITS_PER_WORD} of the bit
* vector and so on.
*/
protected transient long[][] bits;
/**
* Returns the number of words that are necessary to hold the given number of bits.
*
* @param size a number of bits.
* @return the number of words that are necessary to hold the given number of bits.
*/
protected final static long numWords(final long size) {
return (size + WORD_MASK) >>> LOG2_BITS_PER_WORD;
}
/**
* Return the index of the word that holds a bit of specified index.
*
* @param index the index of a bit, or -1.
* @return the index of the word that holds the bit of given index, or -1 if {@code index} is -1.
*/
protected final static long word(final long index) {
return index >>> LOG2_BITS_PER_WORD;
}
protected LongBigArrayBitVector(final long capacity) {
this.bits = capacity > 0 ? LongBigArrays.newBigArray(numWords(capacity)) : LongBigArrays.EMPTY_BIG_ARRAY;
}
/**
* Creates a new empty bit vector of given capacity. The resulting vector will be able to contain
* {@code capacity} bits without reallocations of the backing array.
*
*
* Note that this constructor creates an empty bit vector. If you want a cleared bit vector
* of a specified size, please use the {@link #ofLength(long)} factory method.
*
* @param capacity the capacity (in bits) of the new bit vector.
* @return a new bit vector of given capacity.
*/
public static LongBigArrayBitVector getInstance(final long capacity) {
return new LongBigArrayBitVector(capacity);
}
/**
* Creates a new empty bit vector. No allocation is actually performed.
*
* @return a new bit vector with no capacity.
*/
public static LongBigArrayBitVector getInstance() {
return new LongBigArrayBitVector(0);
}
/**
* Creates a new empty bit vector of given length.
*
* @param length the size (in bits) of the new bit vector.
*/
public static LongBigArrayBitVector ofLength(final long length) {
final LongBigArrayBitVector bv = new LongBigArrayBitVector(length);
bv.length = length;
return bv;
}
/**
* Creates a new bit vector with given bits.
*
* @param bit a list of bits that will be set in the newly created bit vector.
*/
public static LongBigArrayBitVector of(final int... bit) {
final LongBigArrayBitVector bitVector = new LongBigArrayBitVector(bit.length);
for (final int b : bit) {
if (b != 0 && b != 1) throw new IllegalArgumentException("Illegal bit value: " + b);
bitVector.add(b);
}
return bitVector;
}
@Override
public long length() {
return length;
}
/**
* Ensures that this bit vector can hold the specified number of bits.
*
*
* This method uses {@link LongBigArrays#grow(long[][], long, long)} to ensure that there is enough
* space for the given number of bits. As a consequence, the actual length of the long array
* allocated might be larger than expected.
*
* @param numBits the number of bits that this vector must be able to contain.
* @return this bit vector.
*/
public LongBigArrayBitVector ensureCapacity(final long numBits) {
bits = grow(bits, numWords(numBits), numWords(length));
return this;
}
@Override
public LongBigArrayBitVector length(final long newLength) {
bits = BigArrays.ensureCapacity(bits, numWords(newLength), numWords(length));
final long oldLength = length;
if (newLength < oldLength) fill(newLength, oldLength, false);
length = newLength;
return this;
}
@Override
public void fill(final boolean value) {
final long fullWords = length / Long.SIZE;
BigArrays.fill(bits, 0, fullWords, value ? 0xFFFFFFFFFFFFFFFFL : 0L);
if (length % Long.SIZE != 0) if (value) BigArrays.set(bits, fullWords, (1L << length % Long.SIZE) - 1);
else BigArrays.set(bits, fullWords, 0);
}
/**
* Reduces as must as possible the size of the backing array.
*
* @return true if some trimming was actually necessary.
*/
public boolean trim() {
if (BigArrays.length(bits) == numWords(length)) return false;
bits = BigArrays.setLength(bits, numWords(length));
return true;
}
/**
* Sets the size of this bit vector to 0.
*
* Note that this method does not try to reallocate that backing array. If you want to force that
* behaviour, call {@link #trim()} afterwards.
*/
@Override
public void clear() {
if (length != 0) BigArrays.fill(bits, 0, word(length - 1) + 1, 0);
length = 0;
}
@Override
public LongBigArrayBitVector copy() {
final LongBigArrayBitVector copy = new LongBigArrayBitVector(length);
copy.length = length;
BigArrays.copy(bits, 0, copy.bits, 0, numWords(length));
return copy;
}
/**
* Returns this bit vector.
*
* @return this bit vector.
*/
@Override
public LongBigArrayBitVector fast() {
return this;
}
/**
* Returns a copy of the given bit vector.
*
*
* This method uses {@link BitVector#getLong(long, long)} on {@link Long#SIZE} boundaries to copy at
* high speed.
*
* @param bv a bit vector.
* @return an instance of this class containing a copy of the given vector.
*/
public static LongBigArrayBitVector copy(final BitVector bv) {
final long length = bv.length();
final LongBigArrayBitVector copy = new LongBigArrayBitVector(length);
final long fullBits = length - length % Long.SIZE;
for (long i = 0; i < fullBits; i += Long.SIZE) BigArrays.set(copy.bits, i / Long.SIZE, bv.getLong(i, i + Long.SIZE));
if (length % Long.SIZE != 0) BigArrays.set(copy.bits, fullBits / Long.SIZE, bv.getLong(fullBits, length));
copy.length = length;
return copy;
}
@Override
public boolean getBoolean(final long index) {
if (CHECKS) ensureRestrictedIndex(index);
return (BigArrays.get(bits, word(index)) & mask(index)) != 0;
}
@Override
public boolean set(final long index, final boolean value) {
if (CHECKS) ensureRestrictedIndex(index);
final long word = word(index);
final long mask = mask(index);
final int segment = BigArrays.segment(word);
final int displacement = BigArrays.displacement(word);
final boolean oldValue = (bits[segment][displacement] & mask) != 0;
if (value != oldValue) bits[segment][displacement] ^= mask;
return oldValue;
}
@Override
public void set(final long index) {
if (CHECKS) ensureRestrictedIndex(index);
final long word = word(index);
bits[BigArrays.segment(word)][BigArrays.displacement(word)] |= mask(index);
}
@Override
public void clear(final long index) {
if (CHECKS) ensureRestrictedIndex(index);
final long word = word(index);
bits[BigArrays.segment(word)][BigArrays.displacement(word)] &= ~mask(index);
}
@Override
public LongBigArrayBitVector append(final long value, final int width) {
if (width == 0) return this;
if (CHECKS) if (width < Long.SIZE && (value & -1L << width) != 0) throw new IllegalArgumentException("The specified value (" + value + ") is larger than the maximum value for the given width (" + width + ")");
final long length = this.length;
final long startWord = word(length);
int segment = segment(startWord);
int displacement = displacement(startWord);
final int startBit = bit(length);
ensureCapacity(length + width);
if (startBit + width <= Long.SIZE) bits[segment][displacement] |= value << startBit;
else {
bits[segment][displacement] |= value << startBit;
if (++displacement == SEGMENT_SIZE) {
displacement = 0;
segment++;
}
bits[segment][displacement] = value >>> BITS_PER_WORD - startBit;
}
this.length += width;
return this;
}
@Override
public boolean add(final boolean value) {
final long length = this.length;
final long startWord = word(length);
final int segment = segment(startWord);
final int displacement = displacement(startWord);
final int startBit = bit(length);
ensureCapacity(length + 1);
if (value) bits[segment][displacement] |= mask(startBit);
this.length++;
return true;
}
@Override
public long getLong(final long from, final long to) {
if (CHECKS) BitVectors.ensureFromTo(length, from, to);
final long l = Long.SIZE - (to - from);
final long startWord = word(from);
int segment = segment(startWord);
int displacement = displacement(startWord);
final int startBit = bit(from);
if (l == Long.SIZE) return 0;
if (startBit <= l) return bits[segment][displacement] << l - startBit >>> l;
final long result = bits[segment][displacement] >>> startBit;
if (++displacement == SEGMENT_SIZE) {
displacement = 0;
segment++;
}
return result | bits[segment][displacement] << Long.SIZE + l - startBit >>> l;
}
/**
* Wraps the given big array of longs in a bit vector for the given number of bits.
*
*
* Note that all bits in {@code array} beyond that of index {@code size} must be unset, or an
* exception will be thrown.
*
* @param array a big array of longs.
* @param size the number of bits of the newly created bit vector.
* @return a bit vector of size {@code size} using {@code array} as backing big array.
*/
public static LongBigArrayBitVector wrap(final long[][] array, final long size) {
if (size > BigArrays.length(array) << LOG2_BITS_PER_WORD) throw new IllegalArgumentException("The provided array is too short (" + BigArrays.length(array) + " elements) for the given size (" + size + ")");
final LongBigArrayBitVector result = new LongBigArrayBitVector(0);
result.length = size;
result.bits = array;
final long arrayLength = BigArrays.length(array);
final long lastWord = size / Long.SIZE;
if (lastWord < arrayLength && (BigArrays.get(array, lastWord) & ~((1L << size % Long.SIZE) - 1)) != 0) throw new IllegalArgumentException("Garbage beyond size in bit array");
for (long i = lastWord + 1; i < arrayLength; i++) if (BigArrays.get(array, i) != 0) throw new IllegalArgumentException("Garbage beyond size in bit array");
return result;
}
/**
* Wraps the given array of longs in a bit vector.
*
* @param array an array of longs.
* @return a bit vector of size {@code Long.SIZE} times the length of {@code array} using
* {@code array} as backing array.
*/
public static LongBigArrayBitVector wrap(final long[][] array) {
return wrap(array, BigArrays.length(array) * Long.SIZE);
}
/**
* Returns a cloned copy of this bit vector.
*
*
* This method is functionally equivalent to {@link #copy()}, except that {@link #copy()} trims the
* backing array.
*
* @return a copy of this bit vector.
*/
@Override
public LongBigArrayBitVector clone() throws CloneNotSupportedException {
final LongBigArrayBitVector copy = (LongBigArrayBitVector)super.clone();
copy.bits = bits.clone();
return copy;
}
@Override
public int hashCode() {
long h = 0x9e3779b97f4a7c13L ^ length;
final long numWords = numWords(length);
for (long i = 0; i < numWords; i++) h ^= (h << 5) + BigArrays.get(bits, i) + (h >>> 2);
if (ASSERTS) assert (int)((h >>> 32) ^ h) == super.hashCode();
return (int)((h >>> 32) ^ h);
}
@Override
public boolean equals(final Object o) {
if (o instanceof LongBigArrayBitVector) return equals((LongBigArrayBitVector)o);
return super.equals(o);
}
public boolean equals(final LongBigArrayBitVector v) {
if (length != v.length()) return false;
long i = numWords(length);
while (i-- != 0) if (BigArrays.get(bits, i) != BigArrays.get(v.bits, i)) return false;
return true;
}
/**
* A list-of-integers view of a bit vector.
*
*
* This class implements in the obvious way a view of a bit vector as a list of integers of given
* width. The vector is enlarged as needed (i.e., when adding new elements), but it is never shrunk.
*/
protected static class LongBigListView extends AbstractBitVector.LongBigListView {
private static final long serialVersionUID = 1L;
@SuppressWarnings("hiding")
private final LongBigArrayBitVector bitVector;
public LongBigListView(final LongBigArrayBitVector bitVector, final int width) {
super(bitVector, width);
this.bitVector = bitVector;
}
@Override
public boolean add(final long value) {
bitVector.append(value, width);
return true;
}
@Override
public long getLong(final long index) {
final long start = index * width;
return bitVector.getLong(start, start + width);
}
@Override
public void clear() {
bitVector.clear();
}
@Override
public long set(final long index, final long value) {
if (width == 0) return 0;
if (width != Long.SIZE && value > fullMask) throw new IllegalArgumentException("Value too large: " + value);
ensureRestrictedIndex(index);
final long[][] bits = bitVector.bits;
final long start = index * width;
final long startWord = word(start);
final long endWord = word(start + width - 1);
final int startBit = bit(start);
final long oldValue;
if (startWord == endWord) {
final int segment = segment(startWord);
final int displacement = displacement(startWord);
oldValue = bits[segment][displacement] >>> startBit & fullMask;
bits[segment][displacement] &= ~(fullMask << startBit);
bits[segment][displacement] |= value << startBit;
if (ASSERTS) assert value == (bits[segment][displacement] >>> startBit & fullMask);
} else {
// Here startBit > 0.
final int startSegment = segment(startWord);
final int startDisplacement = displacement(startWord);
final int endSegment = segment(endWord);
final int endDisplacement = displacement(endWord);
oldValue = bits[startSegment][startDisplacement] >>> startBit | bits[endSegment][endDisplacement] << (BITS_PER_WORD - startBit) & fullMask;
bits[startSegment][startDisplacement] &= (1L << startBit) - 1;
bits[startSegment][startDisplacement] |= value << startBit;
bits[endSegment][endDisplacement] &= -(1L << width - BITS_PER_WORD + startBit);
bits[endSegment][endDisplacement] |= value >>> BITS_PER_WORD - startBit;
if (ASSERTS) assert value == (bits[startSegment][startDisplacement] >>> startBit | bits[endSegment][endDisplacement] << (BITS_PER_WORD - startBit) & fullMask);
}
return oldValue;
}
}
@Override
public LongBigList asLongBigList(final int width) {
return new LongBigListView(this, width);
}
private void writeObject(final ObjectOutputStream s) throws IOException {
s.defaultWriteObject();
final long numWords = numWords(length);
for (long i = 0; i < numWords; i++) s.writeLong(BigArrays.get(bits, i));
}
private void readObject(final ObjectInputStream s) throws IOException, ClassNotFoundException {
s.defaultReadObject();
final long numWords = numWords(length);
bits = LongBigArrays.newBigArray(numWords);
for (long i = 0; i < numWords; i++) BigArrays.set(bits, i, s.readLong());
}
}