org.eclipse.collections.impl.parallel.ParallelIterate Maven / Gradle / Ivy
Show all versions of eclipse-collections Show documentation
/*
* Copyright (c) 2022 Goldman Sachs and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* and Eclipse Distribution License v. 1.0 which accompany this distribution.
* The Eclipse Public License is available at http://www.eclipse.org/legal/epl-v10.html
* and the Eclipse Distribution License is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*/
package org.eclipse.collections.impl.parallel;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Collection;
import java.util.List;
import java.util.RandomAccess;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.eclipse.collections.api.block.function.Function;
import org.eclipse.collections.api.block.function.Function0;
import org.eclipse.collections.api.block.function.Function2;
import org.eclipse.collections.api.block.function.primitive.DoubleFunction;
import org.eclipse.collections.api.block.function.primitive.FloatFunction;
import org.eclipse.collections.api.block.function.primitive.IntFunction;
import org.eclipse.collections.api.block.function.primitive.LongFunction;
import org.eclipse.collections.api.block.predicate.Predicate;
import org.eclipse.collections.api.block.procedure.Procedure;
import org.eclipse.collections.api.block.procedure.Procedure2;
import org.eclipse.collections.api.block.procedure.primitive.ObjectIntProcedure;
import org.eclipse.collections.api.factory.Maps;
import org.eclipse.collections.api.factory.primitive.ObjectDoubleMaps;
import org.eclipse.collections.api.factory.primitive.ObjectLongMaps;
import org.eclipse.collections.api.list.ListIterable;
import org.eclipse.collections.api.map.MutableMap;
import org.eclipse.collections.api.map.MutableMapIterable;
import org.eclipse.collections.api.map.primitive.MutableObjectDoubleMap;
import org.eclipse.collections.api.map.primitive.MutableObjectLongMap;
import org.eclipse.collections.api.map.primitive.ObjectDoubleMap;
import org.eclipse.collections.api.map.primitive.ObjectLongMap;
import org.eclipse.collections.api.multimap.MutableMultimap;
import org.eclipse.collections.api.tuple.primitive.DoubleDoublePair;
import org.eclipse.collections.impl.block.factory.Functions0;
import org.eclipse.collections.impl.block.procedure.MultimapPutProcedure;
import org.eclipse.collections.impl.block.procedure.MutatingAggregationProcedure;
import org.eclipse.collections.impl.block.procedure.NonMutatingAggregationProcedure;
import org.eclipse.collections.impl.list.fixed.ArrayAdapter;
import org.eclipse.collections.impl.map.mutable.ConcurrentHashMap;
import org.eclipse.collections.impl.multimap.list.SynchronizedPutFastListMultimap;
import org.eclipse.collections.impl.tuple.primitive.PrimitiveTuples;
import org.eclipse.collections.impl.utility.Iterate;
import static org.eclipse.collections.impl.factory.Iterables.iList;
/**
* The ParallelIterate class contains several parallel algorithms that work with Collections. All the higher
* level parallel algorithms depend on the basic parallel algorithm named {@code forEach}. The forEach algorithm employs
* a batching fork and join approach.
*
* All Collections that are not either a {@link RandomAccess} or {@link List} are first converted to a Java array
* using {@link Iterate#toArray(Iterable)}, and then run with one of the {@code ParallelArrayIterate.forEach} methods.
*
* @see ParallelArrayIterate
*/
public final class ParallelIterate
{
static final int DEFAULT_MIN_FORK_SIZE = 10000;
static final int AVAILABLE_PROCESSORS = Runtime.getRuntime().availableProcessors();
static final int TASK_RATIO = 2;
static final int DEFAULT_PARALLEL_TASK_COUNT = ParallelIterate.getDefaultTaskCount();
static final ExecutorService EXECUTOR_SERVICE = ParallelIterate.newPooledExecutor(ParallelIterate.class.getSimpleName(), true);
private ParallelIterate()
{
throw new AssertionError("Suppress default constructor for noninstantiability");
}
static boolean isExecutorShutdown()
{
return ParallelIterate.EXECUTOR_SERVICE.isShutdown();
}
static void shutdownExecutor()
{
ParallelIterate.EXECUTOR_SERVICE.shutdown();
}
/**
* Iterate over the collection specified, in parallel batches using default runtime parameter values. The
* {@code ObjectIntProcedure} used must be stateless, or use concurrent aware objects if they are to be shared.
*
* e.g.
*
* Map<Integer, Object> chm = new ConcurrentHashMap<Integer, Object>();
* ParallelIterate.forEachWithIndex(collection, (each, index) -> chm.put(index, each));
*
*/
public static void forEachWithIndex(Iterable iterable, ObjectIntProcedure super T> objectIntProcedure)
{
ParallelIterate.forEachWithIndex(iterable, objectIntProcedure, ParallelIterate.EXECUTOR_SERVICE);
}
/**
* Iterate over the collection specified in parallel batches using the default runtime parameters. The
* ObjectIntProcedure used must be stateless, or use concurrent aware objects if they are to be shared. The code
* is executed against the specified executor.
*
* e.g.
* Map<Integer, Object> chm = new ConcurrentHashMap<Integer, Object>();
* ParallelIterate.forEachWithIndex(collection, (each, index) -> chm.put(index, each), executor);
*
*
* @param executor Use this executor for all execution.
*/
public static > void forEachWithIndex(
Iterable iterable,
BT procedure,
Executor executor)
{
ParallelIterate.forEachWithIndex(
iterable,
new PassThruObjectIntProcedureFactory<>(procedure),
new PassThruCombiner<>(), executor);
}
/**
* Iterate over the collection specified in parallel batches. The
* ObjectIntProcedure used must be stateless, or use concurrent aware objects if they are to be shared. The
* specified minimum fork size and task count are used instead of the default values.
*
* @param minForkSize Only run in parallel if input collection is longer than this.
* @param taskCount How many parallel tasks to submit to the executor.
* @see #forEachWithIndex(Iterable, ObjectIntProcedure)
*/
public static > void forEachWithIndex(
Iterable iterable,
BT procedure,
int minForkSize,
int taskCount)
{
ParallelIterate.forEachWithIndex(
iterable,
new PassThruObjectIntProcedureFactory<>(procedure),
new PassThruCombiner<>(),
minForkSize,
taskCount);
}
public static > void forEachWithIndex(
Iterable iterable,
ObjectIntProcedureFactory procedureFactory,
Combiner combiner,
Executor executor)
{
int taskCount = Math.max(
ParallelIterate.DEFAULT_PARALLEL_TASK_COUNT,
Iterate.sizeOf(iterable) / ParallelIterate.DEFAULT_MIN_FORK_SIZE);
ParallelIterate.forEachWithIndex(
iterable,
procedureFactory,
combiner,
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
taskCount,
executor);
}
public static > void forEachWithIndex(
Iterable iterable,
ObjectIntProcedureFactory procedureFactory,
Combiner combiner,
int minForkSize,
int taskCount)
{
ParallelIterate.forEachWithIndex(iterable, procedureFactory, combiner, minForkSize, taskCount, ParallelIterate.EXECUTOR_SERVICE);
}
public static > void forEachWithIndex(
Iterable iterable,
ObjectIntProcedureFactory procedureFactory,
Combiner combiner,
int minForkSize,
int taskCount,
Executor executor)
{
if (Iterate.notEmpty(iterable))
{
if (iterable instanceof RandomAccess || iterable instanceof ListIterable
&& iterable instanceof List)
{
ParallelIterate.forEachWithIndexInListOnExecutor(
(List) iterable,
procedureFactory,
combiner,
minForkSize,
taskCount,
executor);
}
else
{
ParallelIterate.forEachWithIndexInListOnExecutor(
ArrayAdapter.adapt((T[]) Iterate.toArray(iterable)),
procedureFactory,
combiner,
minForkSize,
taskCount,
executor);
}
}
}
public static > void forEachWithIndexInListOnExecutor(
List list,
ObjectIntProcedureFactory procedureFactory,
Combiner combiner,
int minForkSize,
int taskCount,
Executor executor)
{
int size = list.size();
if (size < minForkSize)
{
BT procedure = procedureFactory.create();
Iterate.forEachWithIndex(list, procedure);
if (combiner.useCombineOne())
{
combiner.combineOne(procedure);
}
else
{
combiner.combineAll(iList(procedure));
}
}
else
{
int threadCount = Math.min(size, taskCount);
ObjectIntProcedureFJTaskRunner runner =
new ObjectIntProcedureFJTaskRunner<>(combiner, threadCount);
runner.executeAndCombine(executor, procedureFactory, list);
}
}
/**
* Iterate over the collection specified in parallel batches using default runtime parameter values. The
* {@code Procedure} used must be stateless, or use concurrent aware objects if they are to be shared.
*
* e.g.
*
* Map<Object, Boolean> chm = new ConcurrentHashMap<Object, Boolean>();
* ParallelIterate.forEach(collection, each -> chm.put(each, Boolean.TRUE));
*
*/
public static void forEach(Iterable iterable, Procedure super T> procedure)
{
ParallelIterate.forEach(iterable, procedure, ParallelIterate.EXECUTOR_SERVICE);
}
/**
* Iterate over the collection specified in parallel batches using default runtime parameter values. The
* {@code Procedure} used must be stateless, or use concurrent aware objects if they are to be shared.
*
* e.g.
*
* Map<Object, Boolean> chm = new ConcurrentHashMap<Object, Boolean>();
* ParallelIterate.forEachBatchSize(collection, each -> chm.put(each, Boolean.TRUE), 100);
*
*/
public static void forEach(Iterable iterable, Procedure super T> procedure, int batchSize)
{
ParallelIterate.forEach(iterable, procedure, batchSize, ParallelIterate.EXECUTOR_SERVICE);
}
public static void forEach(Iterable iterable, Procedure super T> procedure, int batchSize, Executor executor)
{
ParallelIterate.forEach(iterable, procedure, batchSize, ParallelIterate.calculateTaskCount(iterable, batchSize), executor);
}
/**
* Iterate over the collection specified in parallel batches using default runtime parameter values
* and the specified executor.
* The {@code Procedure} used must be stateless, or use concurrent aware objects if they are to be shared.
*
* @param executor Use this executor for all execution.
* @see #forEach(Iterable, Procedure)
*/
public static > void forEach(
Iterable iterable,
BT procedure,
Executor executor)
{
ParallelIterate.forEach(
iterable,
new PassThruProcedureFactory<>(procedure),
new PassThruCombiner<>(),
executor);
}
/**
* Iterate over the collection specified in parallel batches using the specified minimum fork and task count sizes.
* The {@code Procedure} used must be stateless, or use concurrent aware objects if they are to be shared.
*
* @param minForkSize Only run in parallel if input collection is longer than this.
* @param taskCount How many parallel tasks to submit to the executor.
* TODO: How does the taskCount relate to the number of threads in the executor?
* @see #forEach(Iterable, Procedure)
*/
public static > void forEach(
Iterable iterable,
BT procedure,
int minForkSize,
int taskCount)
{
ParallelIterate.forEach(iterable, procedure, minForkSize, taskCount, ParallelIterate.EXECUTOR_SERVICE);
}
public static > void forEach(
Iterable iterable,
BT procedure,
int minForkSize,
int taskCount,
Executor executor)
{
ParallelIterate.forEach(
iterable,
new PassThruProcedureFactory<>(procedure),
new PassThruCombiner<>(),
minForkSize,
taskCount,
executor);
}
public static > void forEach(
Iterable iterable,
ProcedureFactory procedureFactory,
Combiner combiner,
Executor executor)
{
ParallelIterate.forEach(iterable, procedureFactory, combiner, ParallelIterate.DEFAULT_MIN_FORK_SIZE, executor);
}
public static > void forEach(
Iterable iterable,
ProcedureFactory procedureFactory,
Combiner combiner)
{
ParallelIterate.forEach(iterable, procedureFactory, combiner, ParallelIterate.EXECUTOR_SERVICE);
}
/**
* Iterate over the collection specified in parallel batches using the default values for the task size. The
* ProcedureFactory can create stateful closures that will be collected and combined using the specified Combiner.
*
* e.g. The ParallelIterate.select() implementation
*
* CollectionCombiner<T, SelectProcedure<T>> combiner = CollectionCombiner.forSelect(collection);
* ParallelIterate.forEach(collection, new SelectProcedureFactory<T>(predicate, taskSize), combiner, 1000);
*
*/
@SuppressWarnings("JavaDoc")
public static > void forEach(
Iterable iterable,
ProcedureFactory procedureFactory,
Combiner combiner,
int batchSize)
{
ParallelIterate.forEach(iterable, procedureFactory, combiner, batchSize, ParallelIterate.EXECUTOR_SERVICE);
}
public static > void forEach(
Iterable iterable,
ProcedureFactory procedureFactory,
Combiner combiner,
int batchSize,
Executor executor)
{
ParallelIterate.forEach(iterable, procedureFactory, combiner, batchSize, ParallelIterate.calculateTaskCount(iterable, batchSize), executor);
}
/**
* Iterate over the collection specified in parallel batches using the default values for the task size. The
* ProcedureFactory can create stateful closures that will be collected and combined using the specified Combiner.
*
* e.g. The ParallelIterate.select() implementation
*
* int taskCount = Math.max(DEFAULT_PARALLEL_TASK_COUNT, collection.size() / DEFAULT_MIN_FORK_SIZE);
* final int taskSize = collection.size() / taskCount / 2;
* CollectionCombiner<T, SelectProcedure<T>> combiner = CollectionCombiner.forSelect(collection);
* ParallelIterate.forEach(collection, new SelectProcedureFactory<T>(predicate, taskSize), combiner, DEFAULT_MIN_FORK_SIZE, taskCount);
*
*/
@SuppressWarnings("JavaDoc")
public static > void forEach(
Iterable iterable,
ProcedureFactory procedureFactory,
Combiner combiner,
int minForkSize,
int taskCount)
{
ParallelIterate.forEach(iterable, procedureFactory, combiner, minForkSize, taskCount, ParallelIterate.EXECUTOR_SERVICE);
}
public static > void forEach(
Iterable iterable,
ProcedureFactory procedureFactory,
Combiner combiner,
int minForkSize,
int taskCount,
Executor executor)
{
if (Iterate.notEmpty(iterable))
{
if (iterable instanceof BatchIterable)
{
ParallelIterate.forEachInBatchWithExecutor(
(BatchIterable) iterable,
procedureFactory,
combiner,
minForkSize,
taskCount,
executor);
}
else if ((iterable instanceof RandomAccess || iterable instanceof ListIterable)
&& iterable instanceof List)
{
ParallelIterate.forEachInListOnExecutor(
(List) iterable,
procedureFactory,
combiner,
minForkSize,
taskCount,
executor);
}
else
{
ParallelArrayIterate.forEachOn(
(T[]) Iterate.toArray(iterable),
procedureFactory,
combiner,
minForkSize,
taskCount,
executor);
}
}
}
public static > void forEachInListOnExecutor(
List list,
ProcedureFactory procedureFactory,
Combiner combiner,
int minForkSize,
int taskCount,
Executor executor)
{
int size = list.size();
if (size < minForkSize)
{
BT procedure = procedureFactory.create();
Iterate.forEach(list, procedure);
if (combiner.useCombineOne())
{
combiner.combineOne(procedure);
}
else
{
combiner.combineAll(iList(procedure));
}
}
else
{
int threadCount = Math.min(size, taskCount);
ProcedureFJTaskRunner runner =
new ProcedureFJTaskRunner<>(combiner, threadCount);
runner.executeAndCombine(executor, procedureFactory, list);
}
}
public static > void forEachInBatchWithExecutor(
BatchIterable set,
ProcedureFactory procedureFactory,
Combiner combiner,
int minForkSize,
int taskCount,
Executor executor)
{
int size = set.size();
if (size < minForkSize)
{
BT procedure = procedureFactory.create();
set.forEach(procedure);
if (combiner.useCombineOne())
{
combiner.combineOne(procedure);
}
else
{
combiner.combineAll(iList(procedure));
}
}
else
{
int threadCount = Math.min(size, Math.min(taskCount, set.getBatchCount((int) Math.ceil((double) size / (double) taskCount))));
BatchIterableProcedureFJTaskRunner runner =
new BatchIterableProcedureFJTaskRunner<>(combiner, threadCount);
runner.executeAndCombine(executor, procedureFactory, set);
}
}
/**
* Same effect as {@link Iterate#select(Iterable, Predicate)}, but executed in parallel batches.
*
* @return The selected elements. The Collection will be of the same type as the input (List or Set)
* and will be in the same order as the input (if it is an ordered collection).
* @see ParallelIterate#select(Iterable, Predicate, boolean)
*/
public static Collection select(
Iterable iterable,
Predicate super T> predicate)
{
return ParallelIterate.select(iterable, predicate, false);
}
/**
* Same effect as {@link Iterate#select(Iterable, Predicate)}, but executed in parallel batches,
* and with a potentially reordered result.
*
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The selected elements. The Collection will be of the same type (List or Set) as the input.
*/
public static Collection select(
Iterable iterable,
Predicate super T> predicate,
boolean allowReorderedResult)
{
return ParallelIterate.select(iterable, predicate, null, allowReorderedResult);
}
/**
* Same effect as {@link Iterate#select(Iterable, Predicate)}, but executed in parallel batches,
* and writing output into the specified collection.
*
* @param target Where to write the output.
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The 'target' collection, with the selected elements added.
*/
public static > R select(
Iterable iterable,
Predicate super T> predicate,
R target,
boolean allowReorderedResult)
{
return ParallelIterate.select(
iterable,
predicate,
target,
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE,
allowReorderedResult);
}
/**
* Same effect as {@link Iterate#select(Iterable, Predicate)}, but executed in parallel batches,
* and writing output into the specified collection.
*
* @param target Where to write the output.
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The 'target' collection, with the selected elements added.
*/
public static > R select(
Iterable iterable,
Predicate super T> predicate,
R target,
int batchSize,
Executor executor,
boolean allowReorderedResult)
{
FastListSelectProcedureCombiner combiner = new FastListSelectProcedureCombiner<>(iterable, target, 10, allowReorderedResult);
FastListSelectProcedureFactory procedureFactory = new FastListSelectProcedureFactory<>(predicate, batchSize);
ParallelIterate.forEach(
iterable,
procedureFactory,
combiner,
batchSize,
ParallelIterate.calculateTaskCount(iterable, batchSize),
executor);
return (R) combiner.getResult();
}
private static int calculateTaskCount(Iterable iterable, int batchSize)
{
if (iterable instanceof BatchIterable>)
{
return ParallelIterate.calculateTaskCount((BatchIterable>) iterable, batchSize);
}
return ParallelIterate.calculateTaskCount(Iterate.sizeOf(iterable), batchSize);
}
private static int calculateTaskCount(BatchIterable batchIterable, int batchSize)
{
return Math.max(2, batchIterable.getBatchCount(batchSize));
}
private static int calculateTaskCount(int size, int batchSize)
{
return Math.max(2, size / batchSize);
}
/**
* Same effect as {@link Iterate#reject(Iterable, Predicate)}, but executed in parallel batches.
*
* @return The rejected elements. The Collection will be of the same type as the input (List or Set)
* and will be in the same order as the input (if it is an ordered collection).
* @see ParallelIterate#reject(Iterable, Predicate, boolean)
*/
public static Collection reject(
Iterable iterable,
Predicate super T> predicate)
{
return ParallelIterate.reject(iterable, predicate, false);
}
/**
* Same effect as {@link Iterate#reject(Iterable, Predicate)}, but executed in parallel batches,
* and with a potentially reordered result.
*
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The rejected elements. The Collection will be of the same type (List or Set) as the input.
*/
public static Collection reject(
Iterable iterable,
Predicate super T> predicate,
boolean allowReorderedResult)
{
return ParallelIterate.reject(iterable, predicate, null, allowReorderedResult);
}
/**
* Same effect as {@link Iterate#reject(Iterable, Predicate)}, but executed in parallel batches,
* and writing output into the specified collection.
*
* @param target Where to write the output.
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The 'target' collection, with the rejected elements added.
*/
public static > R reject(
Iterable iterable,
Predicate super T> predicate,
R target,
boolean allowReorderedResult)
{
return ParallelIterate.reject(
iterable,
predicate,
target,
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE,
allowReorderedResult);
}
public static > R reject(
Iterable iterable,
Predicate super T> predicate,
R target,
int batchSize,
Executor executor,
boolean allowReorderedResult)
{
FastListRejectProcedureCombiner combiner = new FastListRejectProcedureCombiner<>(iterable, target, 10, allowReorderedResult);
FastListRejectProcedureFactory procedureFactory = new FastListRejectProcedureFactory<>(predicate, batchSize);
ParallelIterate.forEach(
iterable,
procedureFactory,
combiner,
batchSize,
ParallelIterate.calculateTaskCount(iterable, batchSize),
executor);
return (R) combiner.getResult();
}
/**
* Same effect as {@link Iterate#count(Iterable, Predicate)}, but executed in parallel batches.
*
* @return The number of elements which satisfy the predicate.
*/
public static int count(Iterable iterable, Predicate super T> predicate)
{
return ParallelIterate.count(iterable, predicate, ParallelIterate.DEFAULT_MIN_FORK_SIZE, ParallelIterate.EXECUTOR_SERVICE);
}
/**
* Same effect as {@link Iterate#count(Iterable, Predicate)}, but executed in parallel batches.
*
* @return The number of elements which satisfy the predicate.
*/
public static int count(Iterable iterable, Predicate super T> predicate, int batchSize, Executor executor)
{
CountCombiner combiner = new CountCombiner<>();
CountProcedureFactory procedureFactory = new CountProcedureFactory<>(predicate);
ParallelIterate.forEach(
iterable,
procedureFactory,
combiner,
batchSize,
executor);
return combiner.getCount();
}
/**
* Same effect as {@link Iterate#collect(Iterable, Function)},
* but executed in parallel batches.
*
* @return The collected elements. The Collection will be of the same type as the input (List or Set)
* and will be in the same order as the input (if it is an ordered collection).
* @see ParallelIterate#collect(Iterable, Function, boolean)
*/
public static Collection collect(
Iterable iterable,
Function super T, V> function)
{
return ParallelIterate.collect(iterable, function, false);
}
/**
* Same effect as {@link Iterate#collect(Iterable, Function)}, but executed in parallel batches,
* and with potentially reordered result.
*
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The collected elements. The Collection will be of the same type
* (List or Set) as the input.
*/
public static Collection collect(
Iterable iterable,
Function super T, V> function,
boolean allowReorderedResult)
{
return ParallelIterate.collect(iterable, function, null, allowReorderedResult);
}
/**
* Same effect as {@link Iterate#collect(Iterable, Function)}, but executed in parallel batches,
* and writing output into the specified collection.
*
* @param target Where to write the output.
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The 'target' collection, with the collected elements added.
*/
public static > R collect(
Iterable iterable,
Function super T, V> function,
R target,
boolean allowReorderedResult)
{
return ParallelIterate.collect(
iterable,
function,
target,
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE,
allowReorderedResult);
}
public static > R collect(
Iterable iterable,
Function super T, V> function,
R target,
int batchSize,
Executor executor,
boolean allowReorderedResult)
{
int size = Iterate.sizeOf(iterable);
FastListCollectProcedureCombiner combiner = new FastListCollectProcedureCombiner<>(iterable, target, size, allowReorderedResult);
int taskCount = ParallelIterate.calculateTaskCount(iterable, batchSize);
FastListCollectProcedureFactory procedureFactory = new FastListCollectProcedureFactory<>(function, size / taskCount);
ParallelIterate.forEach(
iterable,
procedureFactory,
combiner,
batchSize,
taskCount,
executor);
return (R) combiner.getResult();
}
public static Collection flatCollect(
Iterable iterable,
Function super T, Collection> function)
{
return ParallelIterate.flatCollect(iterable, function, false);
}
public static Collection flatCollect(
Iterable iterable,
Function super T, Collection> function,
boolean allowReorderedResult)
{
return ParallelIterate.flatCollect(iterable, function, null, allowReorderedResult);
}
public static > R flatCollect(
Iterable iterable,
Function super T, Collection> function,
R target,
boolean allowReorderedResult)
{
return ParallelIterate.flatCollect(
iterable,
function,
target,
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE,
allowReorderedResult);
}
public static > R flatCollect(
Iterable iterable,
Function super T, Collection> function,
R target,
int batchSize,
Executor executor,
boolean allowReorderedResult)
{
int size = Iterate.sizeOf(iterable);
int taskCount = ParallelIterate.calculateTaskCount(iterable, batchSize);
int taskSize = size / taskCount;
FlatCollectProcedureCombiner combiner =
new FlatCollectProcedureCombiner<>(iterable, target, size, allowReorderedResult);
FlatCollectProcedureFactory procedureFactory = new FlatCollectProcedureFactory<>(function, taskSize);
ParallelIterate.forEach(
iterable,
procedureFactory,
combiner,
batchSize,
taskCount,
executor);
return (R) combiner.getResult();
}
/**
* Same effect as {@link Iterate#collectIf(Iterable, Predicate, Function)},
* but executed in parallel batches.
*
* @return The collected elements. The Collection will be of the same type as the input (List or Set)
* and will be in the same order as the input (if it is an ordered collection).
* @see ParallelIterate#collectIf(Iterable, Predicate, Function, boolean)
*/
public static Collection collectIf(
Iterable iterable,
Predicate super T> predicate,
Function super T, V> function)
{
return ParallelIterate.collectIf(iterable, predicate, function, false);
}
/**
* Same effect as {@link Iterate#collectIf(Iterable, Predicate, Function)},
* but executed in parallel batches, and with potentially reordered results.
*
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The collected elements. The Collection will be of the same type
* as the input (List or Set)
*/
public static Collection collectIf(
Iterable iterable,
Predicate super T> predicate,
Function super T, V> function,
boolean allowReorderedResult)
{
return ParallelIterate.collectIf(iterable, predicate, function, null, allowReorderedResult);
}
/**
* Same effect as {@link Iterate#collectIf(Iterable, Predicate, Function)},
* but executed in parallel batches, and writing output into the specified collection.
*
* @param target Where to write the output.
* @param allowReorderedResult If the result can be in a different order.
* Allowing reordering may yield faster execution.
* @return The 'target' collection, with the collected elements added.
*/
public static > R collectIf(
Iterable iterable,
Predicate super T> predicate,
Function super T, V> function,
R target,
boolean allowReorderedResult)
{
return ParallelIterate.collectIf(
iterable,
predicate,
function,
target,
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE,
allowReorderedResult);
}
public static > R collectIf(
Iterable iterable,
Predicate super T> predicate,
Function super T, V> function,
R target,
int batchSize,
Executor executor,
boolean allowReorderedResult)
{
FastListCollectIfProcedureCombiner combiner = new FastListCollectIfProcedureCombiner<>(iterable, target, 10, allowReorderedResult);
FastListCollectIfProcedureFactory procedureFactory = new FastListCollectIfProcedureFactory<>(function, predicate, batchSize);
ParallelIterate.forEach(
iterable,
procedureFactory,
combiner,
batchSize,
ParallelIterate.calculateTaskCount(iterable, batchSize),
executor);
return (R) combiner.getResult();
}
/**
* Same effect as {@link Iterate#groupBy(Iterable, Function)},
* but executed in parallel batches, and writing output into a SynchronizedPutFastListMultimap.
*/
public static MutableMultimap groupBy(
Iterable iterable,
Function super V, ? extends K> function)
{
return ParallelIterate.groupBy(iterable, function, ParallelIterate.DEFAULT_MIN_FORK_SIZE, ParallelIterate.EXECUTOR_SERVICE);
}
public static MutableMap aggregateBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Function2 super V, ? super T, ? extends V> nonMutatingAggregator)
{
return ParallelIterate.aggregateBy(
iterable,
groupBy,
zeroValueFactory,
nonMutatingAggregator,
ParallelIterate.DEFAULT_MIN_FORK_SIZE);
}
public static > R aggregateBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Function2 super V, ? super T, ? extends V> nonMutatingAggregator,
R mutableMap)
{
return ParallelIterate.aggregateBy(
iterable,
groupBy,
zeroValueFactory,
nonMutatingAggregator,
mutableMap,
ParallelIterate.DEFAULT_MIN_FORK_SIZE);
}
public static MutableMap aggregateBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Function2 super V, ? super T, ? extends V> nonMutatingAggregator,
int batchSize)
{
return ParallelIterate.aggregateBy(
iterable,
groupBy,
zeroValueFactory,
nonMutatingAggregator,
batchSize,
ParallelIterate.EXECUTOR_SERVICE);
}
public static > R aggregateBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Function2 super V, ? super T, ? extends V> nonMutatingAggregator,
R mutableMap,
int batchSize)
{
return ParallelIterate.aggregateBy(
iterable,
groupBy,
zeroValueFactory,
nonMutatingAggregator,
mutableMap,
batchSize,
ParallelIterate.EXECUTOR_SERVICE);
}
public static MutableMap aggregateBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Function2 super V, ? super T, ? extends V> nonMutatingAggregator,
int batchSize,
Executor executor)
{
return ParallelIterate.aggregateBy(
iterable,
groupBy,
zeroValueFactory,
nonMutatingAggregator,
ConcurrentHashMap.newMap(),
batchSize,
executor);
}
public static > R aggregateBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Function2 super V, ? super T, ? extends V> nonMutatingAggregator,
R mutableMap,
int batchSize,
Executor executor)
{
NonMutatingAggregationProcedure nonMutatingAggregationProcedure =
new NonMutatingAggregationProcedure<>(mutableMap, groupBy, zeroValueFactory, nonMutatingAggregator);
ParallelIterate.forEach(
iterable,
new PassThruProcedureFactory<>(nonMutatingAggregationProcedure),
Combiners.>passThru(),
batchSize,
executor);
return mutableMap;
}
public static MutableMap aggregateInPlaceBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Procedure2 super V, ? super T> mutatingAggregator)
{
return ParallelIterate.aggregateInPlaceBy(
iterable,
groupBy,
zeroValueFactory,
mutatingAggregator,
ParallelIterate.DEFAULT_MIN_FORK_SIZE);
}
public static > R aggregateInPlaceBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Procedure2 super V, ? super T> mutatingAggregator,
R mutableMap)
{
return ParallelIterate.aggregateInPlaceBy(
iterable,
groupBy,
zeroValueFactory,
mutatingAggregator,
mutableMap,
ParallelIterate.DEFAULT_MIN_FORK_SIZE);
}
public static MutableMap aggregateInPlaceBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Procedure2 super V, ? super T> mutatingAggregator,
int batchSize)
{
return ParallelIterate.aggregateInPlaceBy(
iterable,
groupBy,
zeroValueFactory,
mutatingAggregator,
batchSize,
ParallelIterate.EXECUTOR_SERVICE);
}
public static > R aggregateInPlaceBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Procedure2 super V, ? super T> mutatingAggregator,
R mutableMap,
int batchSize)
{
return ParallelIterate.aggregateInPlaceBy(
iterable,
groupBy,
zeroValueFactory,
mutatingAggregator,
mutableMap,
batchSize,
ParallelIterate.EXECUTOR_SERVICE);
}
public static MutableMap aggregateInPlaceBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Procedure2 super V, ? super T> mutatingAggregator,
int batchSize,
Executor executor)
{
MutableMap map = ConcurrentHashMap.newMap();
MutatingAggregationProcedure mutatingAggregationProcedure =
new MutatingAggregationProcedure<>(map, groupBy, zeroValueFactory, mutatingAggregator);
ParallelIterate.forEach(
iterable,
new PassThruProcedureFactory<>(mutatingAggregationProcedure),
Combiners.>passThru(),
batchSize,
executor);
return map;
}
public static > R aggregateInPlaceBy(
Iterable iterable,
Function super T, ? extends K> groupBy,
Function0 extends V> zeroValueFactory,
Procedure2 super V, ? super T> mutatingAggregator,
R mutableMap,
int batchSize,
Executor executor)
{
MutatingAggregationProcedure mutatingAggregationProcedure =
new MutatingAggregationProcedure<>(mutableMap, groupBy, zeroValueFactory, mutatingAggregator);
ParallelIterate.forEach(
iterable,
new PassThruProcedureFactory<>(mutatingAggregationProcedure),
Combiners.>passThru(),
batchSize,
executor);
return mutableMap;
}
/**
* Same effect as {@link Iterate#groupBy(Iterable, Function)},
* but executed in parallel batches, and writing output into a SynchronizedPutFastListMultimap.
*/
public static > MutableMultimap groupBy(
Iterable iterable,
Function super V, ? extends K> function,
R concurrentMultimap)
{
return ParallelIterate.groupBy(iterable, function, concurrentMultimap, ParallelIterate.DEFAULT_MIN_FORK_SIZE);
}
/**
* Same effect as {@link Iterate#groupBy(Iterable, Function)},
* but executed in parallel batches, and writing output into a SynchronizedPutFastListMultimap.
*/
public static > MutableMultimap groupBy(
Iterable iterable,
Function super V, ? extends K> function,
R concurrentMultimap,
int batchSize)
{
return ParallelIterate.groupBy(iterable, function, concurrentMultimap, batchSize, ParallelIterate.EXECUTOR_SERVICE);
}
/**
* Same effect as {@link Iterate#groupBy(Iterable, Function)},
* but executed in parallel batches, and writing output into a SynchronizedPutFastListMultimap.
*/
public static MutableMultimap groupBy(
Iterable iterable,
Function super V, ? extends K> function,
int batchSize)
{
return ParallelIterate.groupBy(iterable, function, batchSize, ParallelIterate.EXECUTOR_SERVICE);
}
/**
* Same effect as {@link Iterate#groupBy(Iterable, Function)},
* but executed in parallel batches, and writing output into a SynchronizedPutFastListMultimap.
*/
public static MutableMultimap groupBy(
Iterable iterable,
Function super V, ? extends K> function,
int batchSize,
Executor executor)
{
return ParallelIterate.groupBy(iterable, function, SynchronizedPutFastListMultimap.newMultimap(), batchSize, executor);
}
/**
* Same effect as {@link Iterate#groupBy(Iterable, Function)},
* but executed in parallel batches, and writing output into a SynchronizedPutFastListMultimap.
*/
public static > MutableMultimap groupBy(
Iterable iterable,
Function super V, ? extends K> function,
R concurrentMultimap,
int batchSize,
Executor executor)
{
ParallelIterate.forEach(
iterable,
new PassThruProcedureFactory<>(new MultimapPutProcedure<>(concurrentMultimap, function)),
Combiners.>passThru(),
batchSize,
executor);
return concurrentMultimap;
}
public static ObjectDoubleMap sumByDouble(
Iterable iterable,
Function super T, ? extends V> groupBy,
DoubleFunction super T> function)
{
MutableObjectDoubleMap result = ObjectDoubleMaps.mutable.empty();
ParallelIterate.forEach(
iterable,
new SumByDoubleProcedure<>(groupBy, function),
new SumByDoubleCombiner<>(result),
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE);
return result;
}
public static ObjectDoubleMap sumByFloat(
Iterable iterable,
Function super T, ? extends V> groupBy,
FloatFunction super T> function)
{
MutableObjectDoubleMap result = ObjectDoubleMaps.mutable.empty();
ParallelIterate.forEach(
iterable,
new SumByFloatProcedure<>(groupBy, function),
new SumByFloatCombiner<>(result),
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE);
return result;
}
public static ObjectLongMap sumByLong(
Iterable iterable,
Function super T, ? extends V> groupBy,
LongFunction super T> function)
{
MutableObjectLongMap result = ObjectLongMaps.mutable.empty();
ParallelIterate.forEach(
iterable,
new SumByLongProcedure<>(groupBy, function),
new SumByLongCombiner<>(result),
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE);
return result;
}
public static ObjectLongMap sumByInt(
Iterable iterable,
Function super T, ? extends V> groupBy,
IntFunction super T> function)
{
MutableObjectLongMap result = ObjectLongMaps.mutable.empty();
ParallelIterate.forEach(
iterable,
new SumByIntProcedure<>(groupBy, function),
new SumByIntCombiner<>(result),
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE);
return result;
}
/**
* @since 6.0
*/
public static MutableMap sumByBigDecimal(
Iterable iterable,
Function super T, ? extends V> groupBy,
Function super T, BigDecimal> function)
{
MutableMap result = Maps.mutable.empty();
ParallelIterate.forEach(
iterable,
new SumByBigDecimalProcedure<>(groupBy, function),
new SumByBigDecimalCombiner<>(result),
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE);
return result;
}
/**
* @since 6.0
*/
public static MutableMap sumByBigInteger(
Iterable iterable,
Function super T, ? extends V> groupBy,
Function super T, BigInteger> function)
{
MutableMap result = Maps.mutable.empty();
ParallelIterate.forEach(
iterable,
new SumByBigIntegerProcedure<>(groupBy, function),
new SumByBigIntegerCombiner<>(result),
ParallelIterate.DEFAULT_MIN_FORK_SIZE,
ParallelIterate.EXECUTOR_SERVICE);
return result;
}
/**
* Returns a brand new ExecutorService using the specified poolName with the specified maximum thread pool size. The
* same poolName may be used more than once resulting in multiple pools with the same name.
*
* The pool will be initialised with newPoolSize threads. If that number of threads are in use and another thread
* is requested, the pool will reject execution and the submitting thread will execute the task.
*/
public static ExecutorService newPooledExecutor(int newPoolSize, String poolName, boolean useDaemonThreads)
{
return new ThreadPoolExecutor(
newPoolSize,
newPoolSize,
0L,
TimeUnit.MILLISECONDS,
new SynchronousQueue<>(),
new CollectionsThreadFactory(poolName, useDaemonThreads),
new ThreadPoolExecutor.CallerRunsPolicy());
}
/**
* Returns a brand new ExecutorService using the specified poolName and uses the optional property named
* to set the maximum thread pool size. The same poolName may be used more than
* once resulting in multiple pools with the same name.
*/
public static ExecutorService newPooledExecutor(String poolName, boolean useDaemonThreads)
{
return ParallelIterate.newPooledExecutor(ParallelIterate.getDefaultMaxThreadPoolSize(), poolName, useDaemonThreads);
}
public static int getDefaultTaskCount()
{
return ParallelIterate.getDefaultMaxThreadPoolSize() * ParallelIterate.getTaskRatio();
}
public static int getDefaultMaxThreadPoolSize()
{
return Math.min(AVAILABLE_PROCESSORS + 1, 100);
}
public static int getTaskRatio()
{
return TASK_RATIO;
}
private static final class SumByDoubleProcedure implements Procedure, ProcedureFactory>
{
private final MutableMap map = Maps.mutable.of();
private final Function super T, ? extends V> groupBy;
private final DoubleFunction super T> function;
private SumByDoubleProcedure(Function super T, ? extends V> groupBy, DoubleFunction super T> function)
{
this.groupBy = groupBy;
this.function = function;
}
@Override
public void value(T each)
{
V groupKey = this.groupBy.valueOf(each);
DoubleDoublePair sumCompensation = this.map.getIfAbsentPut(groupKey, () -> PrimitiveTuples.pair(0.0d, 0.0d));
double sum = sumCompensation.getOne();
double compensation = sumCompensation.getTwo();
double adjustedValue = this.function.doubleValueOf(each) - compensation;
double nextSum = sum + adjustedValue;
compensation = nextSum - sum - adjustedValue;
sum = nextSum;
this.map.put(groupKey, PrimitiveTuples.pair(sum, compensation));
}
public MutableMap getResult()
{
return this.map;
}
@Override
public SumByDoubleProcedure create()
{
return new SumByDoubleProcedure<>(this.groupBy, this.function);
}
}
private static final class SumByDoubleCombiner extends AbstractProcedureCombiner>
{
private final MutableObjectDoubleMap result;
private final MutableObjectDoubleMap compensation = ObjectDoubleMaps.mutable.empty();
private SumByDoubleCombiner(MutableObjectDoubleMap result)
{
super(true);
this.result = result;
}
@Override
public void combineOne(SumByDoubleProcedure thingToCombine)
{
if (this.result.isEmpty())
{
thingToCombine.getResult().forEachKeyValue((each, sumCompensation) -> {
this.result.put(each, sumCompensation.getOne());
this.compensation.put(each, sumCompensation.getTwo());
});
}
else
{
thingToCombine.getResult().forEachKeyValue((each, sumCompensation) -> {
double sum = this.result.get(each);
double currentCompensation = this.compensation.getIfAbsentPut(each, () -> 0.0d) + sumCompensation.getTwo();
double adjustedValue = sumCompensation.getOne() - currentCompensation;
double nextSum = sum + adjustedValue;
this.compensation.put(each, nextSum - sum - adjustedValue);
this.result.put(each, nextSum);
});
}
}
}
private static final class SumByFloatProcedure implements Procedure, ProcedureFactory>
{
private final MutableMap map = Maps.mutable.of();
private final Function super T, ? extends V> groupBy;
private final FloatFunction super T> function;
private SumByFloatProcedure(Function super T, ? extends V> groupBy, FloatFunction super T> function)
{
this.groupBy = groupBy;
this.function = function;
}
@Override
public void value(T each)
{
V groupKey = this.groupBy.valueOf(each);
DoubleDoublePair sumCompensation = this.map.getIfAbsentPut(groupKey, () -> PrimitiveTuples.pair(0.0d, 0.0d));
double sum = sumCompensation.getOne();
double compensation = sumCompensation.getTwo();
double adjustedValue = this.function.floatValueOf(each) - compensation;
double nextSum = sum + adjustedValue;
compensation = nextSum - sum - adjustedValue;
sum = nextSum;
this.map.put(groupKey, PrimitiveTuples.pair(sum, compensation));
}
public MutableMap getResult()
{
return this.map;
}
@Override
public SumByFloatProcedure create()
{
return new SumByFloatProcedure<>(this.groupBy, this.function);
}
}
private static final class SumByFloatCombiner extends AbstractProcedureCombiner>
{
private final MutableObjectDoubleMap result;
private final MutableObjectDoubleMap compensation = ObjectDoubleMaps.mutable.empty();
private SumByFloatCombiner(MutableObjectDoubleMap result)
{
super(true);
this.result = result;
}
@Override
public void combineOne(SumByFloatProcedure thingToCombine)
{
if (this.result.isEmpty())
{
thingToCombine.getResult().forEachKeyValue((each, sumCompensation) -> {
this.result.put(each, sumCompensation.getOne());
this.compensation.put(each, sumCompensation.getTwo());
});
}
else
{
thingToCombine.getResult().forEachKeyValue((each, sumCompensation) -> {
double sum = this.result.get(each);
double currentCompensation = this.compensation.getIfAbsentPut(each, () -> 0.0d) + sumCompensation.getTwo();
double adjustedValue = sumCompensation.getOne() - currentCompensation;
double nextSum = sum + adjustedValue;
this.compensation.put(each, nextSum - sum - adjustedValue);
this.result.put(each, nextSum);
});
}
}
}
private static final class SumByLongProcedure