org.roaringbitmap.buffer.BufferParallelAggregation Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-core Show documentation
Show all versions of spark-core Show documentation
Shaded version of Apache Spark 2.x.x for Presto
The newest version!
package org.roaringbitmap.buffer;
import java.nio.LongBuffer;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.IntStream;
import static org.roaringbitmap.Util.compareUnsigned;
/**
*
* These utility methods provide parallel implementations of
* logical aggregation operators. AND is not implemented
* because it is unlikely to be profitable.
*
* There is a temporary memory overhead in using these methods,
* since a materialisation of the rotated containers grouped by key
* is created in each case.
*
* Each method executes on the default fork join pool by default.
* If this is undesirable (it usually is) wrap the call inside
* a submission of a runnable to your own thread pool.
*
*
* {@code
*
* //...
*
* ExecutorService executor = ...
* ImmutableRoaringBitmap[] bitmaps = ...
* // executes on executors threads
* MutableRoaringBitmap result = executor.submit(
* () -> BufferParallelAggregation.or(bitmaps)).get();
* }
*
*/
public class BufferParallelAggregation {
private static final Collector<
Map.Entry>, MutableRoaringArray, MutableRoaringBitmap> XOR
= new ContainerCollector(BufferParallelAggregation::xor);
private static final OrCollector OR = new OrCollector();
/**
* Collects containers grouped by their key into a RoaringBitmap, applying the
* supplied aggregation function to each group.
*/
public static class ContainerCollector implements Collector<
Map.Entry>, MutableRoaringArray, MutableRoaringBitmap> {
private final Function, MappeableContainer> reducer;
/**
* Creates a collector with the reducer function.
* @param reducer a function to apply to containers with the same key.
*/
public ContainerCollector(Function, MappeableContainer> reducer) {
this.reducer = reducer;
}
@Override
public Supplier supplier() {
return MutableRoaringArray::new;
}
@Override
public BiConsumer<
MutableRoaringArray, Map.Entry>> accumulator() {
return (l, r) -> {
assert l.size == 0 || compareUnsigned(l.keys[l.size - 1], r.getKey()) < 0;
MappeableContainer container = reducer.apply(r.getValue());
if (!container.isEmpty()) {
l.append(r.getKey(), container);
}
};
}
@Override
public BinaryOperator combiner() {
return (l, r) -> {
assert l.size == 0 || r.size == 0 || compareUnsigned(l.keys[l.size - 1], r.keys[0]) < 0;
l.append(r);
return l;
};
}
@Override
public Function finisher() {
return MutableRoaringBitmap::new;
}
@Override
public Set characteristics() {
return EnumSet.noneOf(Characteristics.class);
}
}
/**
* Collects a list of containers into a single container.
*/
public static class OrCollector
implements Collector, MappeableContainer, MappeableContainer> {
@Override
public Supplier supplier() {
return () -> new MappeableBitmapContainer(LongBuffer.allocate(1 << 10), -1);
}
@Override
public BiConsumer> accumulator() {
return (l, r) -> l.lazyIOR(or(r));
}
@Override
public BinaryOperator combiner() {
return MappeableContainer::lazyIOR;
}
@Override
public Function finisher() {
return MappeableContainer::repairAfterLazy;
}
@Override
public Set characteristics() {
return EnumSet.of(Characteristics.UNORDERED);
}
}
/**
* Groups the containers by their keys
* @param bitmaps input bitmaps
* @return The containers from the bitmaps grouped by key
*/
public static SortedMap> groupByKey(
ImmutableRoaringBitmap... bitmaps) {
Map> grouped = new HashMap<>();
for (ImmutableRoaringBitmap bitmap : bitmaps) {
MappeableContainerPointer it = bitmap.highLowContainer.getContainerPointer();
while (null != it.getContainer()) {
MappeableContainer container = it.getContainer();
Short key = it.key();
List slice = grouped.get(key);
if (null == slice) {
slice = new ArrayList<>();
grouped.put(key, slice);
}
slice.add(container);
it.advance();
}
}
SortedMap> sorted = new TreeMap<>(BufferUtil::compareUnsigned);
sorted.putAll(grouped);
return sorted;
}
/**
* Computes the bitwise union of the input bitmaps
* @param bitmaps the input bitmaps
* @return the union of the bitmaps
*/
public static MutableRoaringBitmap or(ImmutableRoaringBitmap... bitmaps) {
SortedMap> grouped = groupByKey(bitmaps);
short[] keys = new short[grouped.size()];
MappeableContainer[] values = new MappeableContainer[grouped.size()];
List> slices = new ArrayList<>(grouped.size());
int i = 0;
for (Map.Entry> slice : grouped.entrySet()) {
keys[i++] = slice.getKey();
slices.add(slice.getValue());
}
IntStream.range(0, i)
.parallel()
.forEach(position -> values[position] = or(slices.get(position)));
return new MutableRoaringBitmap(new MutableRoaringArray(keys, values, i));
}
/**
* Computes the bitwise symmetric difference of the input bitmaps
* @param bitmaps the input bitmaps
* @return the symmetric difference of the bitmaps
*/
public static MutableRoaringBitmap xor(ImmutableRoaringBitmap... bitmaps) {
return groupByKey(bitmaps)
.entrySet()
.parallelStream()
.collect(XOR);
}
private static MappeableContainer xor(List containers) {
MappeableContainer result = containers.get(0).clone();
for (int i = 1; i < containers.size(); ++i) {
result = result.ixor(containers.get(i));
}
return result;
}
private static MappeableContainer or(List containers) {
int parallelism;
// if there are few enough containers it's possible no bitmaps will be materialised
if (containers.size() < 16) {
MappeableContainer result = containers.get(0).clone();
for (int i = 1; i < containers.size(); ++i) {
result = result.lazyIOR(containers.get(i));
}
return result.repairAfterLazy();
}
// heuristic to save memory if the union is large and likely to end up as a bitmap
if (containers.size() < 512 || (parallelism = availableParallelism()) == 1) {
MappeableContainer result = new MappeableBitmapContainer(LongBuffer.allocate(1 << 10), -1);
for (MappeableContainer container : containers) {
result = result.lazyIOR(container);
}
return result.repairAfterLazy();
}
// we have an enormous slice (probably skewed), parallelise it
int step = Math.floorDiv(containers.size(), parallelism);
int mod = Math.floorMod(containers.size(), parallelism);
return IntStream.range(0, parallelism)
.parallel()
.mapToObj(i -> containers.subList(i * step + Math.min(i, mod),
(i + 1) * step + Math.min(i + 1, mod)))
.collect(OR);
}
private static int availableParallelism() {
return ForkJoinTask.inForkJoinPool()
? ForkJoinTask.getPool().getParallelism()
: ForkJoinPool.getCommonPoolParallelism();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy