org.apache.hudi.org.roaringbitmap.RoaringBitmapWriter Maven / Gradle / Ivy
The newest version!
package org.roaringbitmap;
import org.roaringbitmap.buffer.*;
import java.util.function.Supplier;
public interface RoaringBitmapWriter extends Supplier {
static Wizard writer() {
return new RoaringBitmapWizard();
}
static Wizard bufferWriter() {
return new BufferWizard();
}
abstract class Wizard,
T extends BitmapDataProvider & AppendableStorage>
implements Supplier> {
protected int initialCapacity = RoaringArray.INITIAL_CAPACITY;
protected boolean constantMemory;
protected boolean partiallySortValues = false;
protected boolean runCompress = true;
protected Supplier containerSupplier;
protected int expectedContainerSize = 16;
Wizard() {
containerSupplier = arraySupplier();
}
/**
* Choose this option if it is known that most containers will be sparse.
* @return this
*/
public Wizard optimiseForArrays() {
containerSupplier = arraySupplier();
return this;
}
/**
* Choose this option if the bitmap is expected to be RLE compressible.
* Will buffer additions into a RunContainer.
* @return this
*/
public Wizard optimiseForRuns() {
containerSupplier = runSupplier();
return this;
}
/**
* By default the bitmap will be run-compressed on the fly,
* but it can be disabled (and run compressed at the end).
* @param runCompress whether to apply run compression on the fly.
* @return this
*/
public Wizard runCompress(boolean runCompress) {
this.runCompress = runCompress;
return this;
}
/**
*
* @param count how many values are expected to fall within any 65536 bit range.
* @return this
*/
public Wizard expectedValuesPerContainer(int count) {
sanityCheck(count);
this.expectedContainerSize = count;
if (count < ArrayContainer.DEFAULT_MAX_SIZE) {
return optimiseForArrays();
} else if (count < 1 << 14) {
return constantMemory();
} else {
return optimiseForRuns();
}
}
public Wizard fastRank() {
throw new IllegalStateException("Fast rank not yet implemented for byte buffers");
}
/**
* All writes are buffered into the same buffer of 8kB, before converting to
* the best container representation and appending to the bitmap.
* This option overrides any optimiseForArrays, optimiseForRuns and optimiseForBitmaps settings.
* @return this
*/
public Wizard constantMemory() {
constantMemory = true;
return this;
}
/**
* Influence default container choice by how dense the bitmap is expected to be.
* @param density value in [0.0, 1.0], density of the bitmap
* @return this
*/
public Wizard expectedDensity(double density) {
return expectedValuesPerContainer((int) (0xFFFF * density));
}
/**
* Guesses the number of prefices required based on an estimate of the range the bitmap
* will contain, assumes that all prefices in the range will be required. This is a good
* heuristic for a contiguous bitmap, and, for instance, a very bad heuristic for a bitmap
* with just two values far apart.
* @param min the inclusive min value
* @param max the exclusive max value
* @return this
*/
public Wizard expectedRange(long min, long max) {
return initialCapacity((int) ((max - min) >>> 16) + 1);
}
/**
* Takes control of the size of the prefix array, in case it can be precalculated
* or estimated. This can potentially save many array allocations during building
* the bitmap.
* @param count an estimate of the number of prefix keys required.
* @return this
*/
public Wizard initialCapacity(int count) {
sanityCheck(count);
initialCapacity = count;
return this;
}
/**
* Will partially sort values, which can allocate O(n) temporary
* memory but can significantly speed up adding unsorted values
* to a bitmap.
* @return this
*/
public Wizard doPartialRadixSort() {
partiallySortValues = true;
return this;
}
protected abstract Supplier arraySupplier();
protected abstract Supplier runSupplier();
protected abstract T createUnderlying(int initialCapacity);
/**
* Builds a bitmap writer based on the supplied options.
* A call to this method is repeatable, and will not fail because the wizard
* should already be in a valid state.
* @return a new RoaringBitmapWriter
*/
@Override
public RoaringBitmapWriter get() {
int capacity = initialCapacity;
return new ContainerAppender<>(partiallySortValues, runCompress,
() -> createUnderlying(capacity), containerSupplier);
}
private static void sanityCheck(int count) {
if (count >= 0xFFFF) {
throw new IllegalArgumentException(count + " > 65536");
}
if (count < 0) {
throw new IllegalArgumentException(count + " < 0");
}
}
}
class BufferWizard extends Wizard {
@Override
protected Supplier arraySupplier() {
int size = expectedContainerSize;
return () -> new MappeableArrayContainer(size);
}
@Override
protected Supplier runSupplier() {
return MappeableRunContainer::new;
}
@Override
protected MutableRoaringBitmap createUnderlying(int initialCapacity) {
return new MutableRoaringBitmap(new MutableRoaringArray(initialCapacity));
}
}
abstract class RoaringWizard extends Wizard {
@Override
protected Supplier arraySupplier() {
int size = expectedContainerSize;
return () -> new ArrayContainer(size);
}
@Override
protected Supplier runSupplier() {
return RunContainer::new;
}
@Override
public Wizard fastRank() {
return new FastRankRoaringBitmapWizard(this);
}
@Override
public RoaringBitmapWriter get() {
if (constantMemory) {
int capacity = initialCapacity;
return new ConstantMemoryContainerAppender<>(
partiallySortValues, runCompress, () -> createUnderlying(capacity));
}
return super.get();
}
}
class FastRankRoaringBitmapWizard extends RoaringWizard {
FastRankRoaringBitmapWizard(Wizard wizard) {
this.constantMemory = wizard.constantMemory;
this.initialCapacity = wizard.initialCapacity;
this.containerSupplier = wizard.containerSupplier;
this.partiallySortValues = wizard.partiallySortValues;
}
@Override
protected FastRankRoaringBitmap createUnderlying(int initialCapacity) {
return new FastRankRoaringBitmap(new RoaringArray(initialCapacity));
}
}
class RoaringBitmapWizard extends RoaringWizard {
@Override
protected RoaringBitmap createUnderlying(int initialCapacity) {
return new RoaringBitmap(new RoaringArray(initialCapacity));
}
}
/**
* Gets the bitmap being written to.
* @return the bitmap
*/
T getUnderlying();
/**
* buffers a value to be added to the bitmap.
* @param value the value
*/
void add(int value);
/**
* Add a range to the bitmap
* @param min the inclusive min value
* @param max the exclusive max value
*/
void add(long min, long max);
/**
* Adds many values to the bitmap.
* @param values the values to add
*
*/
void addMany(int... values);
/**
* Flushes all pending changes to the bitmap.
*/
void flush();
/**
* flushes any pending changes to the bitmap and returns the bitmap
* @return the underlying bitmap
*/
default T get() {
flush();
return getUnderlying();
}
/**
* Resets the writer so it can be reused, must release the reference to the underlying bitmap
*/
void reset();
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy