org.roaringbitmap.buffer.BufferParallelAggregation Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of RoaringBitmap Show documentation
Roaring bitmaps are compressed bitmaps (also called bitsets) which tend to outperform conventional compressed bitmaps such as WAH or Concise.
There is a newer version: 1.3.0
Show newest version
package org.roaringbitmap.buffer;

import java.nio.LongBuffer;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.IntStream;

/**
 *
 * These utility methods provide parallel implementations of
 * logical aggregation operators. AND is not implemented
 * because it is unlikely to be profitable.
 *
 * There is a temporary memory overhead in using these methods,
 * since a materialisation of the rotated containers grouped by key
 * is created in each case.
 *
 * Each method executes on the default fork join pool by default.
 * If this is undesirable (it usually is) wrap the call inside
 * a submission of a runnable to your own thread pool.
 *
 *  * {@code
 *
 *       //...
 *
 *       ExecutorService executor = ...
 *       ImmutableRoaringBitmap[] bitmaps = ...
 *       // executes on executors threads
 *       MutableRoaringBitmap result = executor.submit(
 *            () -> BufferParallelAggregation.or(bitmaps)).get();
 * }
 * 
 */
public class BufferParallelAggregation {

  private static final Collector>,
          MutableRoaringArray, MutableRoaringBitmap>
          XOR = new ContainerCollector(BufferParallelAggregation::xor);

  private static final OrCollector OR = new OrCollector();

  /**
   * Collects containers grouped by their key into a RoaringBitmap, applying the
   * supplied aggregation function to each group.
   */
  public static class ContainerCollector implements
          Collector>,
                  MutableRoaringArray, MutableRoaringBitmap> {

    private final Function, MappeableContainer> reducer;

    /**
     * Creates a collector with the reducer function.
     * @param reducer a function to apply to containers with the same key.
     */
    ContainerCollector(Function, MappeableContainer> reducer) {
      this.reducer = reducer;
    }

    @Override
    public Supplier supplier() {
      return MutableRoaringArray::new;
    }

    @Override
    public BiConsumer<
            MutableRoaringArray, Map.Entry>> accumulator() {
      return (l, r) -> {
        assert l.size == 0 || l.keys[l.size - 1] < r.getKey();
        MappeableContainer container = reducer.apply(r.getValue());
        if (!container.isEmpty()) {
          l.append(r.getKey(), container);
        }
      };
    }

    @Override
    public BinaryOperator combiner() {
      return (l, r) -> {
        assert l.size == 0 || r.size == 0 || l.keys[l.size - 1] - r.keys[0] < 0;
        l.append(r);
        return l;
      };
    }

    @Override
    public Function finisher() {
      return MutableRoaringBitmap::new;
    }

    @Override
    public Set characteristics() {
      return EnumSet.noneOf(Characteristics.class);
    }
  }

  /**
   * Collects a list of containers into a single container.
   */
  public static class OrCollector
          implements Collector, MappeableContainer, MappeableContainer> {

    @Override
    public Supplier supplier() {
      return () -> new MappeableBitmapContainer(LongBuffer.allocate(1 << 10), -1);
    }

    @Override
    public BiConsumer> accumulator() {
      return (l, r) -> l.lazyIOR(or(r));
    }

    @Override
    public BinaryOperator combiner() {
      return MappeableContainer::lazyIOR;
    }

    @Override
    public Function finisher() {
      return MappeableContainer::repairAfterLazy;
    }

    @Override
    public Set characteristics() {
      return EnumSet.of(Characteristics.UNORDERED);
    }
  }

  /**
   * Groups the containers by their keys
   * @param bitmaps input bitmaps
   * @return The containers from the bitmaps grouped by key
   */
  public static SortedMap> groupByKey(
          ImmutableRoaringBitmap... bitmaps) {
    Map> grouped = new HashMap<>();
    for (ImmutableRoaringBitmap bitmap : bitmaps) {
      MappeableContainerPointer it = bitmap.highLowContainer.getContainerPointer();
      while (null != it.getContainer()) {
        MappeableContainer container = it.getContainer();
        Character key = it.key();
        List slice = grouped.get(key);
        if (null == slice) {
          slice = new ArrayList<>();
          grouped.put(key, slice);
        }
        slice.add(container);
        it.advance();
      }
    }
    return new TreeMap<>(grouped);
  }

  /**
   * Computes the bitwise union of the input bitmaps
   * @param bitmaps the input bitmaps
   * @return the union of the bitmaps
   */
  public static MutableRoaringBitmap or(ImmutableRoaringBitmap... bitmaps) {
    SortedMap> grouped = groupByKey(bitmaps);
    char[] keys = new char[grouped.size()];
    MappeableContainer[] values = new MappeableContainer[grouped.size()];
    List> slices = new ArrayList<>(grouped.size());
    int i = 0;
    for (Map.Entry> slice : grouped.entrySet()) {
      keys[i++] = slice.getKey();
      slices.add(slice.getValue());
    }
    IntStream.range(0, i)
            .parallel()
            .forEach(position -> values[position] = or(slices.get(position)));
    return new MutableRoaringBitmap(new MutableRoaringArray(keys, values, i));
  }

  /**
   * Computes the bitwise symmetric difference of the input bitmaps
   * @param bitmaps the input bitmaps
   * @return the symmetric difference of the bitmaps
   */
  public static MutableRoaringBitmap xor(ImmutableRoaringBitmap... bitmaps) {
    return groupByKey(bitmaps)
            .entrySet()
            .parallelStream()
            .collect(XOR);
  }



  private static MappeableContainer xor(List containers) {
    MappeableContainer result = containers.get(0).clone();
    for (int i = 1; i < containers.size(); ++i) {
      result = result.ixor(containers.get(i));
    }
    return result;
  }

  private static MappeableContainer or(List containers) {
    int parallelism;
    // if there are few enough containers it's possible no bitmaps will be materialised
    if (containers.size() < 16) {
      MappeableContainer result = containers.get(0).clone();
      for (int i = 1; i < containers.size(); ++i) {
        result = result.lazyIOR(containers.get(i));
      }
      return result.repairAfterLazy();
    }
    // heuristic to save memory if the union is large and likely to end up as a bitmap
    if (containers.size() < 512 || (parallelism = availableParallelism()) == 1) {
      MappeableContainer result = new MappeableBitmapContainer(LongBuffer.allocate(1 << 10), -1);
      for (MappeableContainer container : containers) {
        result = result.lazyIOR(container);
      }
      return result.repairAfterLazy();
    }
    // we have an enormous slice (probably skewed), parallelise it
    int step = Math.floorDiv(containers.size(), parallelism);
    int mod = Math.floorMod(containers.size(), parallelism);
    return IntStream.range(0, parallelism)
            .parallel()
            .mapToObj(i -> containers.subList(i * step + Math.min(i, mod),
                    (i + 1) * step + Math.min(i + 1, mod)))
            .collect(OR);
  }

  private static int availableParallelism() {
    return ForkJoinTask.inForkJoinPool()
            ? ForkJoinTask.getPool().getParallelism()
            : ForkJoinPool.getCommonPoolParallelism();
  }

}