All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.pivovarit.collectors.ParallelStreamCollector Maven / Gradle / Ivy

The newest version!
package com.pivovarit.collectors;

import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Stream;

import static com.pivovarit.collectors.AsyncParallelCollector.requireValidParallelism;
import static com.pivovarit.collectors.BatchingSpliterator.batching;
import static com.pivovarit.collectors.BatchingSpliterator.partitioned;
import static com.pivovarit.collectors.CompletionStrategy.ordered;
import static com.pivovarit.collectors.CompletionStrategy.unordered;
import static java.util.Collections.emptySet;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.toList;

/**
 * @author Grzegorz Piwowarek
 */
class ParallelStreamCollector implements Collector>, Stream> {

    private static final EnumSet UNORDERED = EnumSet.of(Characteristics.UNORDERED);

    private final Function function;

    private final CompletionStrategy completionStrategy;

    private final Set characteristics;

    private final Dispatcher dispatcher;

    private ParallelStreamCollector(
      Function function,
      CompletionStrategy completionStrategy,
      Set characteristics,
      Dispatcher dispatcher) {
        this.completionStrategy = completionStrategy;
        this.characteristics = characteristics;
        this.dispatcher = dispatcher;
        this.function = function;
    }

    @Override
    public Supplier>> supplier() {
        return ArrayList::new;
    }

    @Override
    public BiConsumer>, T> accumulator() {
        return (acc, e) -> {
            dispatcher.start();
            acc.add(dispatcher.enqueue(() -> function.apply(e)));
        };
    }

    @Override
    public BinaryOperator>> combiner() {
        return (left, right) -> {
            throw new UnsupportedOperationException(
              "Using parallel stream with parallel collectors is a bad idea");
        };
    }

    @Override
    public Function>, Stream> finisher() {
        return acc -> {
            dispatcher.stop();
            return completionStrategy.apply(acc);
        };
    }

    @Override
    public Set characteristics() {
        return characteristics;
    }

    static  Collector> streaming(Function mapper) {
        requireNonNull(mapper, "mapper can't be null");

        return new ParallelStreamCollector<>(mapper, unordered(), UNORDERED, Dispatcher.virtual());
    }

    static  Collector> streaming(Function mapper, int parallelism) {
        requireNonNull(mapper, "mapper can't be null");
        requireValidParallelism(parallelism);

        return new ParallelStreamCollector<>(mapper, unordered(), UNORDERED, Dispatcher.virtual(parallelism));
    }

    static  Collector> streaming(Function mapper, Executor executor, int parallelism) {
        requireNonNull(executor, "executor can't be null");
        requireNonNull(mapper, "mapper can't be null");
        requireValidParallelism(parallelism);

        return new ParallelStreamCollector<>(mapper, unordered(), UNORDERED, Dispatcher.from(executor, parallelism));
    }

    static  Collector> streamingOrdered(Function mapper) {
        requireNonNull(mapper, "mapper can't be null");

        return new ParallelStreamCollector<>(mapper, ordered(), emptySet(), Dispatcher.virtual());
    }

    static  Collector> streamingOrdered(Function mapper, int parallelism) {
        requireNonNull(mapper, "mapper can't be null");
        requireValidParallelism(parallelism);

        return new ParallelStreamCollector<>(mapper, ordered(), emptySet(), Dispatcher.virtual(parallelism));
    }

    static  Collector> streamingOrdered(Function mapper, Executor executor,
                                                              int parallelism) {
        requireNonNull(executor, "executor can't be null");
        requireNonNull(mapper, "mapper can't be null");
        requireValidParallelism(parallelism);

        return new ParallelStreamCollector<>(mapper, ordered(), emptySet(), Dispatcher.from(executor, parallelism));
    }

    static final class BatchingCollectors {

        private BatchingCollectors() {
        }

        static  Collector> streaming(Function mapper, Executor executor,
                                                           int parallelism) {
            requireNonNull(executor, "executor can't be null");
            requireNonNull(mapper, "mapper can't be null");
            requireValidParallelism(parallelism);

            return parallelism == 1
              ? syncCollector(mapper)
              : batchingCollector(mapper, executor, parallelism);
        }

        static  Collector> streamingOrdered(Function mapper, Executor executor,
                                                                  int parallelism) {
            requireNonNull(executor, "executor can't be null");
            requireNonNull(mapper, "mapper can't be null");
            requireValidParallelism(parallelism);

            return parallelism == 1
              ? syncCollector(mapper)
              : batchingCollector(mapper, executor, parallelism);
        }

        private static  Collector> batchingCollector(Function mapper,
                                                                           Executor executor, int parallelism) {
            return collectingAndThen(
              toList(),
              list -> {
                  // no sense to repack into batches of size 1
                  if (list.size() == parallelism) {
                      return list.stream()
                        .collect(new ParallelStreamCollector<>(
                          mapper,
                          ordered(),
                          emptySet(),
                          Dispatcher.from(executor, parallelism)));
                  } else {
                      return partitioned(list, parallelism)
                        .collect(collectingAndThen(new ParallelStreamCollector<>(
                            batching(mapper),
                            ordered(),
                            emptySet(),
                            Dispatcher.from(executor, parallelism)),
                          s -> s.flatMap(Collection::stream)));
                  }
              });
        }

        private static  Collector, Stream> syncCollector(Function mapper) {
            return Collector.of(Stream::builder, (rs, t) -> rs.add(mapper.apply(t)), (rs, rs2) -> {
                throw new UnsupportedOperationException(
                  "Using parallel stream with parallel collectors is a bad idea");
            }, Stream.Builder::build);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy