org.infinispan.stream.impl.DistributedCacheStream Maven / Gradle / Ivy
package org.infinispan.stream.impl;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.BinaryOperator;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.IntFunction;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.function.ToDoubleFunction;
import java.util.function.ToIntFunction;
import java.util.function.ToLongFunction;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.DoubleStream;
import java.util.stream.IntStream;
import java.util.stream.LongStream;
import java.util.stream.Stream;
import org.infinispan.Cache;
import org.infinispan.CacheStream;
import org.infinispan.DoubleCacheStream;
import org.infinispan.IntCacheStream;
import org.infinispan.LongCacheStream;
import org.infinispan.commons.CacheException;
import org.infinispan.commons.marshall.Externalizer;
import org.infinispan.commons.marshall.SerializeWith;
import org.infinispan.commons.util.CloseableIterator;
import org.infinispan.container.entries.CacheEntry;
import org.infinispan.distribution.DistributionManager;
import org.infinispan.distribution.LocalizedCacheTopology;
import org.infinispan.distribution.ch.ConsistentHash;
import org.infinispan.factories.ComponentRegistry;
import org.infinispan.remoting.transport.Address;
import org.infinispan.stream.impl.intops.object.DistinctOperation;
import org.infinispan.stream.impl.intops.object.FilterOperation;
import org.infinispan.stream.impl.intops.object.FlatMapOperation;
import org.infinispan.stream.impl.intops.object.FlatMapToDoubleOperation;
import org.infinispan.stream.impl.intops.object.FlatMapToIntOperation;
import org.infinispan.stream.impl.intops.object.FlatMapToLongOperation;
import org.infinispan.stream.impl.intops.object.LimitOperation;
import org.infinispan.stream.impl.intops.object.MapOperation;
import org.infinispan.stream.impl.intops.object.MapToDoubleOperation;
import org.infinispan.stream.impl.intops.object.MapToIntOperation;
import org.infinispan.stream.impl.intops.object.MapToLongOperation;
import org.infinispan.stream.impl.intops.object.PeekOperation;
import org.infinispan.stream.impl.termop.object.ForEachBiOperation;
import org.infinispan.stream.impl.termop.object.ForEachOperation;
import org.infinispan.stream.impl.termop.object.NoMapIteratorOperation;
import org.infinispan.util.CloseableSuppliedIterator;
import org.infinispan.util.RangeSet;
import org.infinispan.util.concurrent.TimeoutException;
import org.infinispan.util.function.CloseableSupplier;
/**
* Implementation of {@link CacheStream} that provides support for lazily distributing stream methods to appropriate
* nodes
* @param The type of the stream
*/
public class DistributedCacheStream extends AbstractCacheStream, CacheStream>
implements CacheStream {
// This is a hack to allow for cast to work properly, since Java doesn't work as well with nested generics
protected static Supplier> supplierStreamCast(Supplier supplier) {
return supplier;
}
/**
* Standard constructor requiring all pertinent information to properly utilize a distributed cache stream
* @param localAddress the local address for this node
* @param parallel whether or not this stream is parallel
* @param dm the distribution manager to find out what keys map where
* @param supplier a supplier of local cache stream instances.
* @param csm manager that handles sending out messages to other nodes
* @param includeLoader whether or not a cache loader should be utilized for these operations
* @param distributedBatchSize default size of distributed batches
* @param executor executor to be used for certain operations that require async processing (ie. iterator)
*/
public DistributedCacheStream(Address localAddress, boolean parallel, DistributionManager dm,
Supplier>> supplier, ClusterStreamManager csm, boolean includeLoader,
int distributedBatchSize, Executor executor, ComponentRegistry registry) {
super(localAddress, parallel, dm, supplierStreamCast(supplier), csm, includeLoader, distributedBatchSize,
executor, registry);
}
/**
* Constructor that also allows a simple map method to be inserted first to change to another type. This is
* important because the {@link CacheStream#map(Function)} currently doesn't return a {@link CacheStream}. If this
* is changed we can remove this constructor and update references accordingly.
* @param localAddress the local address for this node
* @param parallel whether or not this stream is parallel
* @param dm the distribution manager to find out what keys map where
* @param supplier a supplier of local cache stream instances.
* @param csm manager that handles sending out messages to other nodes
* @param includeLoader whether or not a cache loader should be utilized for these operations
* @param distributedBatchSize default size of distributed batches
* @param executor executor to be used for certain operations that require async processing (ie. iterator)
* @param function initial function to apply to the stream to change the type
*/
public DistributedCacheStream(Address localAddress, boolean parallel, DistributionManager dm,
Supplier>> supplier, ClusterStreamManager csm, boolean includeLoader,
int distributedBatchSize, Executor executor, ComponentRegistry registry,
Function super CacheEntry, R> function) {
super(localAddress, parallel, dm, supplierStreamCast(supplier), csm, includeLoader, distributedBatchSize, executor,
registry);
intermediateOperations.add(new MapOperation(function));
iteratorOperation = IteratorOperation.MAP;
}
/**
* This constructor is to be used only when a user calls a map or flat map method changing back to a regular
* Stream from an IntStream, DoubleStream etc.
* @param other other instance of {@link AbstractCacheStream} to copy details from
*/
protected DistributedCacheStream(AbstractCacheStream other) {
super(other);
}
@Override
protected CacheStream unwrap() {
return this;
}
// Intermediate operations that are stored for lazy evalulation
@Override
public CacheStream filter(Predicate super R> predicate) {
return addIntermediateOperation(new FilterOperation<>(predicate));
}
@Override
public CacheStream map(Function super R, ? extends R1> mapper) {
if (iteratorOperation != IteratorOperation.FLAT_MAP) {
iteratorOperation = IteratorOperation.MAP;
}
addIntermediateOperationMap(new MapOperation<>(mapper));
return (CacheStream) this;
}
@Override
public IntCacheStream mapToInt(ToIntFunction super R> mapper) {
if (iteratorOperation != IteratorOperation.FLAT_MAP) {
iteratorOperation = IteratorOperation.MAP;
}
addIntermediateOperationMap(new MapToIntOperation<>(mapper));
return intCacheStream();
}
@Override
public LongCacheStream mapToLong(ToLongFunction super R> mapper) {
if (iteratorOperation != IteratorOperation.FLAT_MAP) {
iteratorOperation = IteratorOperation.MAP;
}
addIntermediateOperationMap(new MapToLongOperation<>(mapper));
return longCacheStream();
}
@Override
public DoubleCacheStream mapToDouble(ToDoubleFunction super R> mapper) {
if (iteratorOperation != IteratorOperation.FLAT_MAP) {
iteratorOperation = IteratorOperation.MAP;
}
addIntermediateOperationMap(new MapToDoubleOperation<>(mapper));
return doubleCacheStream();
}
@Override
public CacheStream flatMap(Function super R, ? extends Stream extends R1>> mapper) {
iteratorOperation = IteratorOperation.FLAT_MAP;
addIntermediateOperationMap(new FlatMapOperation(mapper));
return (CacheStream) this;
}
@Override
public IntCacheStream flatMapToInt(Function super R, ? extends IntStream> mapper) {
iteratorOperation = IteratorOperation.FLAT_MAP;
addIntermediateOperationMap(new FlatMapToIntOperation<>(mapper));
return intCacheStream();
}
@Override
public LongCacheStream flatMapToLong(Function super R, ? extends LongStream> mapper) {
iteratorOperation = IteratorOperation.FLAT_MAP;
addIntermediateOperationMap(new FlatMapToLongOperation<>(mapper));
return longCacheStream();
}
@Override
public DoubleCacheStream flatMapToDouble(Function super R, ? extends DoubleStream> mapper) {
iteratorOperation = IteratorOperation.FLAT_MAP;
addIntermediateOperationMap(new FlatMapToDoubleOperation<>(mapper));
return doubleCacheStream();
}
@Override
public CacheStream distinct() {
// Distinct is applied remotely as well
addIntermediateOperation(DistinctOperation.getInstance());
return new IntermediateCacheStream<>(this).distinct();
}
@Override
public CacheStream sorted() {
return new IntermediateCacheStream<>(this).sorted();
}
@Override
public CacheStream sorted(Comparator super R> comparator) {
return new IntermediateCacheStream<>(this).sorted(comparator);
}
@Override
public CacheStream peek(Consumer super R> action) {
return addIntermediateOperation(new PeekOperation<>(action));
}
@Override
public CacheStream limit(long maxSize) {
// Limit is applied remotely as well
addIntermediateOperation(new LimitOperation<>(maxSize));
return new IntermediateCacheStream<>(this).limit(maxSize);
}
@Override
public CacheStream skip(long n) {
return new IntermediateCacheStream<>(this).skip(n);
}
// Now we have terminal operators
@Override
public R reduce(R identity, BinaryOperator accumulator) {
return performOperation(TerminalFunctions.reduceFunction(identity, accumulator), true, accumulator, null);
}
@Override
public Optional reduce(BinaryOperator accumulator) {
R value = performOperation(TerminalFunctions.reduceFunction(accumulator), true,
(e1, e2) -> {
if (e1 != null) {
if (e2 != null) {
return accumulator.apply(e1, e2);
}
return e1;
}
return e2;
}, null);
return Optional.ofNullable(value);
}
@Override
public U reduce(U identity, BiFunction accumulator, BinaryOperator combiner) {
return performOperation(TerminalFunctions.reduceFunction(identity, accumulator, combiner), true, combiner, null);
}
/**
* {@inheritDoc}
* Note: this method doesn't pay attention to ordering constraints and any sorting performed on the stream will
* be ignored by this terminal operator. If you wish to have an ordered collector use the
* {@link DistributedCacheStream#collect(Collector)} method making sure the
* {@link java.util.stream.Collector.Characteristics#UNORDERED} property is not set.
* @param supplier
* @param accumulator
* @param combiner
* @param
* @return
*/
@Override
public R1 collect(Supplier supplier, BiConsumer accumulator, BiConsumer combiner) {
return performOperation(TerminalFunctions.collectFunction(supplier, accumulator, combiner), true,
(e1, e2) -> {
combiner.accept(e1, e2);
return e1;
}, null);
}
@SerializeWith(value = IdentifyFinishCollector.IdentityFinishCollectorExternalizer.class)
private static final class IdentifyFinishCollector implements Collector {
private final Collector realCollector;
IdentifyFinishCollector(Collector realCollector) {
this.realCollector = realCollector;
}
@Override
public Supplier supplier() {
return realCollector.supplier();
}
@Override
public BiConsumer accumulator() {
return realCollector.accumulator();
}
@Override
public BinaryOperator combiner() {
return realCollector.combiner();
}
@Override
public Function finisher() {
return null;
}
@Override
public Set characteristics() {
Set characteristics = realCollector.characteristics();
if (characteristics.size() == 0) {
return EnumSet.of(Characteristics.IDENTITY_FINISH);
} else {
Set tweaked = EnumSet.copyOf(characteristics);
tweaked.add(Characteristics.IDENTITY_FINISH);
return tweaked;
}
}
public static final class IdentityFinishCollectorExternalizer implements Externalizer {
@Override
public void writeObject(ObjectOutput output, IdentifyFinishCollector object) throws IOException {
output.writeObject(object.realCollector);
}
@Override
public IdentifyFinishCollector readObject(ObjectInput input) throws IOException, ClassNotFoundException {
return new IdentifyFinishCollector((Collector) input.readObject());
}
}
}
@Override
public R1 collect(Collector super R, A, R1> collector) {
// If it is not an identify finish we have to prevent the remote finisher, and apply locally only after
// everything is combined.
if (collector.characteristics().contains(Collector.Characteristics.IDENTITY_FINISH)) {
return performOperation(TerminalFunctions.collectorFunction(collector), true,
(BinaryOperator) collector.combiner(), null);
} else {
// Need to wrap collector to force identity finish
A intermediateResult = performOperation(TerminalFunctions.collectorFunction(
new IdentifyFinishCollector<>(collector)), true, collector.combiner(), null);
return collector.finisher().apply(intermediateResult);
}
}
@Override
public Optional min(Comparator super R> comparator) {
R value = performOperation(TerminalFunctions.minFunction(comparator), false,
(e1, e2) -> {
if (e1 != null) {
if (e2 != null) {
return comparator.compare(e1, e2) > 0 ? e2 : e1;
} else {
return e1;
}
}
return e2;
}, null);
return Optional.ofNullable(value);
}
@Override
public Optional max(Comparator super R> comparator) {
R value = performOperation(TerminalFunctions.maxFunction(comparator), false,
(e1, e2) -> {
if (e1 != null) {
if (e2 != null) {
return comparator.compare(e1, e2) > 0 ? e1 : e2;
} else {
return e1;
}
}
return e2;
}, null);
return Optional.ofNullable(value);
}
@Override
public boolean anyMatch(Predicate super R> predicate) {
return performOperation(TerminalFunctions.anyMatchFunction(predicate), false, Boolean::logicalOr, b -> b);
}
@Override
public boolean allMatch(Predicate super R> predicate) {
return performOperation(TerminalFunctions.allMatchFunction(predicate), false, Boolean::logicalAnd, b -> !b);
}
@Override
public boolean noneMatch(Predicate super R> predicate) {
return performOperation(TerminalFunctions.noneMatchFunction(predicate), false, Boolean::logicalAnd, b -> !b);
}
@Override
public Optional findFirst() {
// We aren't sorted, so just do findAny
return findAny();
}
@Override
public Optional findAny() {
R value = performOperation(TerminalFunctions.findAnyFunction(), false, (r1, r2) -> r1 == null ? r2 : r1,
Objects::nonNull);
return Optional.ofNullable(value);
}
@Override
public long count() {
return performOperation(TerminalFunctions.countFunction(), true, (l1, l2) -> l1 + l2, null);
}
// The next ones are key tracking terminal operators
@Override
public Iterator iterator() {
return remoteIterator();
}
Iterator remoteIterator() {
BlockingQueue queue = new ArrayBlockingQueue<>(distributedBatchSize);
final AtomicBoolean complete = new AtomicBoolean();
Lock nextLock = new ReentrantLock();
Condition nextCondition = nextLock.newCondition();
Consumer consumer = new HandOffConsumer<>(queue, complete, nextLock, nextCondition);
IteratorSupplier supplier = new IteratorSupplier<>(queue, complete, nextLock, nextCondition, csm);
boolean iteratorParallelDistribute = parallelDistribution == null ? false : parallelDistribution;
if (rehashAware) {
rehashAwareIteration(complete, consumer, supplier, iteratorParallelDistribute);
} else {
ignoreRehashIteration(consumer, supplier, iteratorParallelDistribute);
}
CloseableIterator closeableIterator = new CloseableSuppliedIterator<>(supplier);
onClose(supplier::close);
return closeableIterator;
}
private void ignoreRehashIteration(Consumer consumer, IteratorSupplier supplier, boolean iteratorParallelDistribute) {
CollectionConsumer remoteResults = new CollectionConsumer<>(consumer);
ConsistentHash ch = dm.getWriteConsistentHash();
boolean runLocal = ch.getMembers().contains(localAddress);
boolean stayLocal = runLocal && segmentsToFilter != null
&& ch.getSegmentsForOwner(localAddress).containsAll(segmentsToFilter);
NoMapIteratorOperation, R> op = new NoMapIteratorOperation<>(intermediateOperations, supplierForSegments(ch,
segmentsToFilter, null, !stayLocal), distributedBatchSize);
Thread thread = Thread.currentThread();
executor.execute(() -> {
try {
log.tracef("Thread %s submitted iterator request for stream", thread);
if (!stayLocal) {
Object id = csm.remoteStreamOperation(iteratorParallelDistribute, parallel, ch, segmentsToFilter,
keysToFilter, Collections.
© 2015 - 2025 Weber Informatics LLC | Privacy Policy