com.pivovarit.collectors.ParallelStreamCollector Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of parallel-collectors Show documentation
Show all versions of parallel-collectors Show documentation
Parallel collection processing with customizable thread pools
The newest version!
package com.pivovarit.collectors;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collector;
import java.util.stream.Stream;
import static com.pivovarit.collectors.AsyncParallelCollector.requireValidParallelism;
import static com.pivovarit.collectors.BatchingSpliterator.batching;
import static com.pivovarit.collectors.BatchingSpliterator.partitioned;
import static com.pivovarit.collectors.CompletionStrategy.ordered;
import static com.pivovarit.collectors.CompletionStrategy.unordered;
import static java.util.Collections.emptySet;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.toList;
/**
* @author Grzegorz Piwowarek
*/
class ParallelStreamCollector implements Collector>, Stream> {
private static final EnumSet UNORDERED = EnumSet.of(Characteristics.UNORDERED);
private final Function function;
private final CompletionStrategy completionStrategy;
private final Set characteristics;
private final Dispatcher dispatcher;
private ParallelStreamCollector(
Function function,
CompletionStrategy completionStrategy,
Set characteristics,
Dispatcher dispatcher) {
this.completionStrategy = completionStrategy;
this.characteristics = characteristics;
this.dispatcher = dispatcher;
this.function = function;
}
@Override
public Supplier>> supplier() {
return ArrayList::new;
}
@Override
public BiConsumer>, T> accumulator() {
return (acc, e) -> {
dispatcher.start();
acc.add(dispatcher.enqueue(() -> function.apply(e)));
};
}
@Override
public BinaryOperator>> combiner() {
return (left, right) -> {
throw new UnsupportedOperationException(
"Using parallel stream with parallel collectors is a bad idea");
};
}
@Override
public Function>, Stream> finisher() {
return acc -> {
dispatcher.stop();
return completionStrategy.apply(acc);
};
}
@Override
public Set characteristics() {
return characteristics;
}
static Collector> streaming(Function mapper) {
requireNonNull(mapper, "mapper can't be null");
return new ParallelStreamCollector<>(mapper, unordered(), UNORDERED, Dispatcher.virtual());
}
static Collector> streaming(Function mapper, int parallelism) {
requireNonNull(mapper, "mapper can't be null");
requireValidParallelism(parallelism);
return new ParallelStreamCollector<>(mapper, unordered(), UNORDERED, Dispatcher.virtual(parallelism));
}
static Collector> streaming(Function mapper, Executor executor, int parallelism) {
requireNonNull(executor, "executor can't be null");
requireNonNull(mapper, "mapper can't be null");
requireValidParallelism(parallelism);
return new ParallelStreamCollector<>(mapper, unordered(), UNORDERED, Dispatcher.from(executor, parallelism));
}
static Collector> streamingOrdered(Function mapper) {
requireNonNull(mapper, "mapper can't be null");
return new ParallelStreamCollector<>(mapper, ordered(), emptySet(), Dispatcher.virtual());
}
static Collector> streamingOrdered(Function mapper, int parallelism) {
requireNonNull(mapper, "mapper can't be null");
requireValidParallelism(parallelism);
return new ParallelStreamCollector<>(mapper, ordered(), emptySet(), Dispatcher.virtual(parallelism));
}
static Collector> streamingOrdered(Function mapper, Executor executor,
int parallelism) {
requireNonNull(executor, "executor can't be null");
requireNonNull(mapper, "mapper can't be null");
requireValidParallelism(parallelism);
return new ParallelStreamCollector<>(mapper, ordered(), emptySet(), Dispatcher.from(executor, parallelism));
}
static final class BatchingCollectors {
private BatchingCollectors() {
}
static Collector> streaming(Function mapper, Executor executor,
int parallelism) {
requireNonNull(executor, "executor can't be null");
requireNonNull(mapper, "mapper can't be null");
requireValidParallelism(parallelism);
return parallelism == 1
? syncCollector(mapper)
: batchingCollector(mapper, executor, parallelism);
}
static Collector> streamingOrdered(Function mapper, Executor executor,
int parallelism) {
requireNonNull(executor, "executor can't be null");
requireNonNull(mapper, "mapper can't be null");
requireValidParallelism(parallelism);
return parallelism == 1
? syncCollector(mapper)
: batchingCollector(mapper, executor, parallelism);
}
private static Collector> batchingCollector(Function mapper,
Executor executor, int parallelism) {
return collectingAndThen(
toList(),
list -> {
// no sense to repack into batches of size 1
if (list.size() == parallelism) {
return list.stream()
.collect(new ParallelStreamCollector<>(
mapper,
ordered(),
emptySet(),
Dispatcher.from(executor, parallelism)));
} else {
return partitioned(list, parallelism)
.collect(collectingAndThen(new ParallelStreamCollector<>(
batching(mapper),
ordered(),
emptySet(),
Dispatcher.from(executor, parallelism)),
s -> s.flatMap(Collection::stream)));
}
});
}
private static Collector, Stream> syncCollector(Function mapper) {
return Collector.of(Stream::builder, (rs, t) -> rs.add(mapper.apply(t)), (rs, rs2) -> {
throw new UnsupportedOperationException(
"Using parallel stream with parallel collectors is a bad idea");
}, Stream.Builder::build);
}
}
}