org.roaringbitmap.ParallelAggregation Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of RoaringBitmap Show documentation
Show all versions of RoaringBitmap Show documentation
Roaring bitmaps are compressed bitmaps (also called bitsets) which tend to outperform
conventional compressed bitmaps such as WAH or Concise.
package org.roaringbitmap;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.IntStream;
/**
*
* These utility methods provide parallel implementations of
* logical aggregation operators. AND is not implemented
* because it is unlikely to be profitable.
*
* There is a temporary memory overhead in using these methods,
* since a materialisation of the rotated containers grouped by key
* is created in each case.
*
* Each method executes on the default fork join pool by default.
* If this is undesirable (it usually is) wrap the call inside
* a submission of a runnable to your own thread pool.
*
*
* {@code
*
* //...
*
* ExecutorService executor = ...
* RoaringBitmap[] bitmaps = ...
* // executes on executors threads
* RoaringBitmap result = executor.submit(() -> ParallelAggregation.or(bitmaps)).get();
* }
*
*/
public class ParallelAggregation {
private static final Collector>,
RoaringArray, RoaringBitmap>
XOR = new ContainerCollector(ParallelAggregation::xor);
private static final OrCollector OR = new OrCollector();
/**
* Collects containers grouped by their key into a RoaringBitmap, applying the
* supplied aggregation function to each group.
*/
public static class ContainerCollector implements
Collector>, RoaringArray, RoaringBitmap> {
private final Function, Container> reducer;
/**
* Creates a collector with the reducer function.
* @param reducer a function to apply to containers with the same key.
*/
ContainerCollector(Function, Container> reducer) {
this.reducer = reducer;
}
@Override
public Supplier supplier() {
return RoaringArray::new;
}
@Override
public BiConsumer>> accumulator() {
return (l, r) -> {
assert l.size == 0 || l.keys[l.size - 1] < r.getKey();
Container container = reducer.apply(r.getValue());
if (!container.isEmpty()) {
l.append(r.getKey(), container);
}
};
}
@Override
public BinaryOperator combiner() {
return (l, r) -> {
assert l.size == 0 || r.size == 0 || l.keys[l.size - 1] - r.keys[0] < 0;
l.append(r);
return l;
};
}
@Override
public Function finisher() {
return RoaringBitmap::new;
}
@Override
public Set characteristics() {
return EnumSet.noneOf(Characteristics.class);
}
}
/**
* Collects a list of containers into a single container.
*/
public static class OrCollector
implements Collector, Container, Container> {
@Override
public Supplier supplier() {
return () -> new BitmapContainer(new long[1 << 10], -1);
}
@Override
public BiConsumer> accumulator() {
return (l, r) -> l.lazyIOR(or(r));
}
@Override
public BinaryOperator combiner() {
return Container::lazyIOR;
}
@Override
public Function finisher() {
return Container::repairAfterLazy;
}
@Override
public Set characteristics() {
return EnumSet.of(Characteristics.UNORDERED);
}
}
/**
* Groups the containers by their keys
* @param bitmaps input bitmaps
* @return The containers from the bitmaps grouped by key
*/
public static SortedMap> groupByKey(RoaringBitmap... bitmaps) {
Map> grouped = new HashMap<>();
for (RoaringBitmap bitmap : bitmaps) {
RoaringArray ra = bitmap.highLowContainer;
for (int i = 0; i < ra.size; ++i) {
Container container = ra.values[i];
Character key = ra.keys[i];
List slice = grouped.get(key);
if (null == slice) {
slice = new ArrayList<>();
grouped.put(key, slice);
}
slice.add(container);
}
}
return new TreeMap<>(grouped);
}
/**
* Computes the bitwise union of the input bitmaps
* @param bitmaps the input bitmaps
* @return the union of the bitmaps
*/
public static RoaringBitmap or(RoaringBitmap... bitmaps) {
SortedMap> grouped = groupByKey(bitmaps);
char[] keys = new char[grouped.size()];
Container[] values = new Container[grouped.size()];
List> slices = new ArrayList<>(grouped.size());
int i = 0;
for (Map.Entry> slice : grouped.entrySet()) {
keys[i++] = slice.getKey();
slices.add(slice.getValue());
}
IntStream.range(0, i)
.parallel()
.forEach(position -> values[position] = or(slices.get(position)));
return new RoaringBitmap(new RoaringArray(keys, values, i));
}
/**
* Computes the bitwise symmetric difference of the input bitmaps
* @param bitmaps the input bitmaps
* @return the symmetric difference of the bitmaps
*/
public static RoaringBitmap xor(RoaringBitmap... bitmaps) {
return groupByKey(bitmaps)
.entrySet()
.parallelStream()
.collect(XOR);
}
private static Container xor(List containers) {
Container result = containers.get(0).clone();
for (int i = 1; i < containers.size(); ++i) {
result = result.ixor(containers.get(i));
}
return result;
}
private static Container or(List containers) {
int parallelism;
// if there are few enough containers it's possible no bitmaps will be materialised
if (containers.size() < 16) {
Container result = containers.get(0).clone();
for (int i = 1; i < containers.size(); ++i) {
result = result.lazyIOR(containers.get(i));
}
return result.repairAfterLazy();
}
// heuristic to save memory if the union is large and likely to end up as a bitmap
if (containers.size() < 512 || (parallelism = availableParallelism()) == 1) {
Container result = new BitmapContainer(new long[1 << 10], -1);
for (Container container : containers) {
result = result.lazyIOR(container);
}
return result.repairAfterLazy();
}
int step = Math.floorDiv(containers.size(), parallelism);
int mod = Math.floorMod(containers.size(), parallelism);
// we have an enormous slice (probably skewed), parallelise it
return IntStream.range(0, parallelism)
.parallel()
.mapToObj(i -> containers.subList(i * step + Math.min(i, mod),
(i + 1) * step + Math.min(i + 1, mod)))
.collect(OR);
}
private static int availableParallelism() {
return ForkJoinTask.inForkJoinPool()
? ForkJoinTask.getPool().getParallelism()
: ForkJoinPool.getCommonPoolParallelism();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy