All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.landawn.abacus.util.stream.BaseStream Maven / Gradle / Ivy

Go to download

A general programming library in Java/Android. It's easy to learn and simple to use with concise and powerful APIs.

There is a newer version: 5.2.4
Show newest version
/*
 * Copyright (C) 2016, 2017, 2018, 2019 HaiYang Li
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.landawn.abacus.util.stream;

import java.io.Closeable;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Executor;
import java.util.function.Function;
import java.util.function.Supplier;

import com.landawn.abacus.annotation.Beta;
import com.landawn.abacus.annotation.IntermediateOp;
import com.landawn.abacus.annotation.LazyEvaluation;
import com.landawn.abacus.annotation.ParallelSupported;
import com.landawn.abacus.annotation.SequentialOnly;
import com.landawn.abacus.annotation.TerminalOp;
import com.landawn.abacus.annotation.TerminalOpTriggered;
import com.landawn.abacus.exception.TooManyElementsException;
import com.landawn.abacus.util.If.OrElse;
import com.landawn.abacus.util.Immutable;
import com.landawn.abacus.util.ImmutableList;
import com.landawn.abacus.util.ImmutableSet;
import com.landawn.abacus.util.IntList;
import com.landawn.abacus.util.LongMultiset;
import com.landawn.abacus.util.MergeResult;
import com.landawn.abacus.util.Multiset;
import com.landawn.abacus.util.Percentage;
import com.landawn.abacus.util.RateLimiter;
import com.landawn.abacus.util.Throwables;
import com.landawn.abacus.util.u.Optional;

import lombok.Data;
import lombok.EqualsAndHashCode;
import lombok.experimental.Accessors;

/**
 * The Stream will be automatically closed after execution(A terminal method is executed/triggered).
 *
 * @param  the type of the stream elements
 * @param  the type of array
 * @param 

the type of predicate * @param the type of consumer * @param the type of PrimitiveList/List * @param the type of Optional * @param the type of Indexed * @param the type of Iterator * @param the type of of the stream implementing {@code BaseStream} * @see Stream * @see EntryStream * @see IntStream * @see LongStream * @see DoubleStream * @see com.landawn.abacus.util.ExceptionalStream * @see Collectors * @see com.landawn.abacus.util.Fn * @see com.landawn.abacus.util.Comparators */ @com.landawn.abacus.annotation.Immutable @LazyEvaluation public interface BaseStream, S extends BaseStream> extends Closeable, Immutable { /** * Returns a stream consisting of the elements of this stream that match the given predicate. * * @param predicate * @return */ @ParallelSupported @IntermediateOp S filter(P predicate); /** * Returns a stream consisting of the elements of this stream that match the given predicate. * * @param predicate * @param actionOnDroppedItem * @return */ @Beta @ParallelSupported @IntermediateOp S filter(P predicate, C actionOnDroppedItem); /** * Keep the elements until the given predicate returns false. * The stream should be sorted, which means if x is the first element: predicate.test(x) returns false, any element y behind x: predicate.test(y) should returns false. * * In parallel Streams, the elements after the first element which predicate returns false may be tested by predicate too. * *
* For example: *

     * // For sequential stream:
     * Stream.of(1, 2, 3, 4, 5, 6).takeWhile(it -> it < 5).toList() ===> [1, 2, 3, 4]
     * Stream.of(1, 2, 5, 6, 3, 4).takeWhile(it -> it < 5).toList() ===> [1, 2]
     * Stream.of(5, 6, 1, 2, 3, 4).takeWhile(it -> it < 5).toList() ===> []
     *
     *
     * // For parallel stream:
     * Stream.of(1, 2, 3, 4, 5, 6).parallel().takeWhile(it -> it < 5).toList() ===> [1, 2, 3, 4] // Order could be different since it's in parallel stream.
     * Stream.of(1, 2, 5, 6, 3, 4).parallel().takeWhile(it -> it < 5).toList() ===> [1, 2] // or [1, 2, 3] or [1, 2, 3, 4] // Order could be different since it's in parallel stream.
     * Stream.of(5, 6, 1, 2, 3, 4).parallel().takeWhile(it -> it < 5).toList() ===> any sub set of [1, 2, 3, 4], including [] // Order could be different since it's in parallel stream.
     * 
* * @param predicate * @return */ @ParallelSupported @IntermediateOp S takeWhile(P predicate); /** * Remove the elements until the given predicate returns false. * The stream should be sorted, which means if x is the first element: predicate.test(x) returns true, any element y behind x: predicate.test(y) should returns true. * * In parallel Streams, the elements after the first element which predicate returns false may be tested by predicate too. * *
* For example: *
     * // For sequential stream:
     * Stream.of(1, 2, 3, 4, 5, 6).dropWhile(it -> it < 4).toList() ===> [4, 5, 6]
     * Stream.of(1, 2, 5, 6, 3, 4).dropWhile(it -> it < 4).toList() ===> [5, 6, 3, 4]
     * Stream.of(5, 6, 1, 2, 3, 4).dropWhile(it -> it < 4).toList() ===> [5, 6, 1, 2, 3, 4]
     *
     *
     * // For parallel stream:
     * Stream.of(1, 2, 3, 4, 5, 6).parallel().dropWhile(it -> it < 4).toList() ===> [4, 5, 6] // Order could be different since it's in parallel stream.
     * Stream.of(1, 2, 5, 6, 3, 4).parallel().dropWhile(it -> it < 4).toList() ===> [5, 6, 4] // or [5, 6, 3, 4] // Order could be different since it's in parallel stream.
     * Stream.of(5, 6, 1, 2, 3, 4).parallel().dropWhile(it -> it < 4).toList() ===> [5, 6] + any sub set of [1, 2, 3, 4] // Order could be different since it's in parallel stream.
     * 
* * @param predicate * @return */ @ParallelSupported @IntermediateOp S dropWhile(P predicate); /** * Remove the elements until the given predicate returns false. The stream should be sorted, which means if x is the first element: predicate.test(x) returns true, any element y behind x: predicate.test(y) should returns true. * * In parallel Streams, the elements after the first element which predicate returns false may be tested by predicate too. * * * @param predicate * @param actionOnDroppedItem * @return {@link #dropWhile(Object)} */ @Beta @ParallelSupported @IntermediateOp S dropWhile(P predicate, C actionOnDroppedItem); /** * * @param predicate * @return * @see #dropWhile(Object) */ @Beta @ParallelSupported @IntermediateOp S skipUntil(P predicate); // /** // * // * @param predicate // * @return // * @deprecated to be removed in future version. // */ // @ParallelSupported // @IntermediateOp // @Deprecated // S removeIf(P predicate); // // /** // * // * @param predicate // * @param actionOnDroppedItem // * @return // * @deprecated to be removed in future version. // */ // @ParallelSupported // @IntermediateOp // @Deprecated // S removeIf(P predicate, C actionOnDroppedItem); /** * Returns Stream of {@code S} with consecutive sub sequences of the elements, each of the same size (the final sequence may be smaller). * * @param chunkSize the desired size of each sub sequence (the last may be smaller). * @return */ @SequentialOnly @IntermediateOp Stream split(int chunkSize); /** * Returns Stream of {@code PL} with consecutive sub sequences of the elements, each of the same size (the final sequence may be smaller). * *
* This method only runs sequentially, even in parallel stream. * * @param chunkSize the desired size of each sub sequence (the last may be smaller). * @return */ @SequentialOnly @IntermediateOp Stream splitToList(int chunkSize); /** * Split the stream by the specified predicate. * * * This stream should be sorted by value which is used to verify the border. *
* This method only runs sequentially, even in parallel stream. * * @param predicate * @return */ @SequentialOnly @IntermediateOp Stream split(final P predicate); /** * Split the stream by the specified predicate. * * This method only runs sequentially, even in parallel stream. * * @param predicate * @return */ @SequentialOnly @IntermediateOp Stream splitToList(final P predicate); /** * Split the stream into two pieces at where. * The first piece will be loaded into memory. * * @param where * @return */ @SequentialOnly @IntermediateOp Stream splitAt(int where); /** * Split the stream into two pieces at where turns to {@code true}. * The first piece will be loaded into memory. * *
     * 
     * Stream.of(1, 3, 2, 4, 2, 5).splitAt(it -> it >= 4).forEach(s -> s.println()); // [1, 3, 2], [4, 2, 5]
     * 
     * 
* * @param where * @return */ @SequentialOnly @IntermediateOp Stream splitAt(P where); /** * * @param windowSize * @return * @see #sliding(int, int) */ @SequentialOnly @IntermediateOp Stream sliding(int windowSize); /** * * @param windowSize * @return * @see #sliding(int, int) */ @SequentialOnly @IntermediateOp Stream slidingToList(int windowSize); /** * Stream.of(1, 2, 3, 4, 5, 6, 7, 8).sliding(3, 1).forEach(Stream::println) *
output:
* [1, 2, 3]
* [2, 3, 4]
* [3, 4, 5]
* [4, 5, 6]
* [5, 6, 7]
* [6, 7, 8]
* *
============================================================================
* Stream.of(1, 2, 3, 4, 5, 6, 7, 8).sliding(3, 3).forEach(Stream::println) *
output:
* [1, 2, 3]
* [4, 5, 6]
* [7, 8]
* *
============================================================================
* Stream.of(1, 2, 3, 4, 5, 6, 7, 5).sliding(3, 5).forEach(Stream::println) *
output:
* [1, 2, 3]
* [6, 7, 8]
* *
* This method only runs sequentially, even in parallel stream. * * @param windowSize * @param increment * @return */ @SequentialOnly @IntermediateOp Stream sliding(int windowSize, int increment); /** * * @param windowSize * @param increment * @return * @see #sliding(int, int) */ @SequentialOnly @IntermediateOp Stream slidingToList(int windowSize, int increment); /** *
* This method only runs sequentially, even in parallel stream. * * @param c * @return * @see IntList#intersection(IntList) */ @SequentialOnly @IntermediateOp S intersection(Collection c); /** *
* This method only runs sequentially, even in parallel stream. * * @param c * @return * @see IntList#difference(IntList) */ @SequentialOnly @IntermediateOp S difference(Collection c); /** *
* This method only runs sequentially, even in parallel stream. * * @param c * @return * @see IntList#symmetricDifference(IntList) */ @SequentialOnly @IntermediateOp S symmetricDifference(Collection c); /** * *
* This method only runs sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly @IntermediateOp @TerminalOpTriggered S reversed(); /** * *
* This method only runs sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly @IntermediateOp @TerminalOpTriggered S rotated(int distance); /** * *
* This method only runs sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly @IntermediateOp @TerminalOpTriggered S shuffled(); /** * *
* This method only runs sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly @IntermediateOp @TerminalOpTriggered S shuffled(Random rnd); /** * Returns a stream consisting of the distinct elements of this stream. * * @return */ @SequentialOnly @IntermediateOp S distinct(); /** * Returns a stream consisting of the elements of this stream in sorted order. * *
* All elements will be loaded to memory. * * @return */ @ParallelSupported @IntermediateOp @TerminalOpTriggered S sorted(); // /** // *
// * This method only runs sequentially, even in parallel stream and all elements will be loaded to memory. // * // * @return // */ // @SequentialOnly // S cached(); @ParallelSupported @IntermediateOp @TerminalOpTriggered S reverseSorted(); /** * *
* This method only runs sequentially, even in parallel stream and retrieved elements will be saved in memory for next cycle. * * @return */ @SequentialOnly @IntermediateOp S cycled(); /** * *
* This method only runs sequentially, even in parallel stream and retrieved elements will be saved in memory for next cycle. * * @param rounds * * @return */ @SequentialOnly @IntermediateOp S cycled(long rounds); /** *
* This method only runs sequentially, even in parallel stream. * * @return */ @SequentialOnly @IntermediateOp Stream indexed(); /** * * @param n * @return */ @SequentialOnly @IntermediateOp S skip(long n); /** * * @param n * @param consumer * @return */ @Beta @ParallelSupported @IntermediateOp S skip(long n, C consumer); /** * * @param maxSize * @return */ @SequentialOnly @IntermediateOp S limit(long maxSize); // /** // * Same as: {@code stream.skip(from).limit(to - from)}. // * // * @param from // * @param to // * @return // * @deprecated // */ // @Deprecated // @SequentialOnly // S slice(long from, long to); @SequentialOnly @IntermediateOp S step(long step); /** * * @param permitsPerSecond * @return * @see RateLimiter#create(double) */ @SequentialOnly @IntermediateOp S rateLimited(double permitsPerSecond); /** * * @param rateLimiter * @return * @see RateLimiter#acquire() */ @SequentialOnly @IntermediateOp S rateLimited(RateLimiter rateLimiter); /** * Same as {@code peek} * * @param action * @return * @see #peek(Object) */ @ParallelSupported @IntermediateOp S onEach(C action); @ParallelSupported @IntermediateOp S peek(C action); @SequentialOnly @IntermediateOp S prepend(S stream); @SequentialOnly @IntermediateOp S prepend(OT op); @SequentialOnly @IntermediateOp S append(S stream); @SequentialOnly @IntermediateOp S append(OT op); @SequentialOnly @IntermediateOp S appendIfEmpty(Supplier supplier); @SequentialOnly @IntermediateOp S throwIfEmpty(Supplier exceptionSupplier); /** * This is a terminal operation. That's to say this stream will be closed after this operation. * * @param * @param * @param func * @return * @throws E */ @TerminalOp Optional applyIfNotEmpty(Throwables.Function func) throws E; /** * This is a terminal operation. That's to say this stream will be closed after this operation. * * @param * @param action * @throws E * @return */ @TerminalOp OrElse acceptIfNotEmpty(Throwables.Consumer action) throws E; // /** // *
// * This method only runs sequentially, even in parallel stream and all elements will be loaded to memory. // * // * @return // */ // @SequentialOnly // S cached(); @SequentialOnly @TerminalOp String join(CharSequence delimiter); @SequentialOnly @TerminalOp String join(final CharSequence delimiter, final CharSequence prefix, final CharSequence suffix); // /** // * Same as: {@code stream.skip(from).limit(to - from)}. // * // * @param from // * @param to // * @return // * @deprecated // */ // @Deprecated // @SequentialOnly // S slice(long from, long to); /** *
* All elements will be loaded to memory and sorted if not yet. * * @return */ @SequentialOnly @IntermediateOp Optional> percentiles(); @SequentialOnly @TerminalOp long count(); @SequentialOnly @TerminalOp OT first(); @SequentialOnly @TerminalOp OT last(); /** * @param position in current stream(not upstream or origin source). It starts from 0. * @return */ @Beta @SequentialOnly @TerminalOp OT elementAt(long position); /** * * @return * @throws TooManyElementsException if there are more than one element in this stream. */ @SequentialOnly @TerminalOp OT onlyOne() throws TooManyElementsException; @SequentialOnly @TerminalOp A toArray(); @SequentialOnly @TerminalOp List toList(); @SequentialOnly @TerminalOp Set toSet(); @SequentialOnly @TerminalOp ImmutableList toImmutableList(); @SequentialOnly @TerminalOp ImmutableSet toImmutableSet(); @SequentialOnly @TerminalOp > CC toCollection(Supplier supplier); @SequentialOnly @TerminalOp Multiset toMultiset(); @SequentialOnly @TerminalOp Multiset toMultiset(Supplier> supplier); //NOSONAR @SequentialOnly @TerminalOp LongMultiset toLongMultiset(); @SequentialOnly @TerminalOp LongMultiset toLongMultiset(Supplier> supplier); //NOSONAR @Beta @SequentialOnly @TerminalOp void println(); /** * Returns an iterator for the elements of this stream. * *
* Remember to close this Stream after the iteration is done, if needed. * * @return * @deprecated ? may cause memory/resource leak if forget to close this {@code Stream} */ @SequentialOnly @Deprecated ITER iterator(); @Beta @SequentialOnly @IntermediateOp @SuppressWarnings("rawtypes") SS __(Function transfer); //NOSONAR // @SequentialOnly // Try tried(); /** * * @param closeHandler * @return */ @SequentialOnly @IntermediateOp S onClose(Runnable closeHandler); /** * It will be called by terminal operations in final. */ @Override void close(); boolean isParallel(); @SequentialOnly @IntermediateOp S sequential(); /** * Consider using {@code sps(Function)} if only next operation need to be parallelized. For example: *
     * stream.parallel().map(f).filter(p)...;
     *
     * // Replace above line of code with "sps" if only "f" need to be parallelized. And "p" is fast enough to be executed in sequential Stream.
     * stream.sps(s -> s.map(f)).filter(p)...;
     * // Or switch the stream back sequential stream if don't use "sps".
     * stream.parallel().map(f).sequential().filter(p)...;
     *
     * 
* * In most scenarios, there could be only one operation need be parallelized in the stream. So {@code sps(Function)} is recommended in most of scenarios. * * @return * @see #parallel(Executor) * @see #parallel(int, Executor) * @see #parallel(int, Splitor, Executor) * @see #sps(Function) * @see #sps(int, Function) */ @SequentialOnly @IntermediateOp S parallel(); /** * Consider using {@code sps(int, Function)} if only next operation need to be parallelized. For example: *
     * stream.parallel(maxThreadNum).map(f).filter(p)...;
     *
     * // Replace above line of code with "sps" if only "f" need to be parallelized. And "p" is fast enough to be executed in sequential Stream.
     * stream.sps(maxThreadNum, s -> s.map(f)).filter(p)...;
     * // Or switch the stream back sequential stream if don't use "sps".
     * stream.parallel(maxThreadNum).map(f).sequential().filter(p)...;
     *
     * 
* * In most scenarios, there could be only one operation need be parallelized in the stream. So {@code sps(int, Function)} is recommended in most of scenarios. * * @param maxThreadNum if the specified value is bigger than the maximum allowed thread number per operation ({@code min(64, cpu_cores * 8)}), maximum allowed thread number per operation will be used. * To parallelize this Stream with thread number bigger than {@code min(64, cpu_cores * 8)}, please specified {@code executor} by calling {@link #parallel(int, Executor)} or {@link #parallel(int, Splitor, Executor)} or {@link #parallel(ParallelSettings)} * @return * @see #parallel(Executor) * @see #parallel(int, Executor) * @see #parallel(int, Splitor, Executor) * @see #sps(Function) * @see #sps(int, Function) */ @SequentialOnly @IntermediateOp S parallel(int maxThreadNum); /** * * * @param splitor * @return * @see #parallel(Executor) * @see #parallel(int, Executor) * @see #parallel(int, Splitor, Executor) * @see #sps(Function) * @see #sps(int, Function) */ @SequentialOnly @IntermediateOp S parallel(Splitor splitor); /** * Returns an equivalent stream that is parallel. May return itself if the stream was already parallel with the same maxThreadNum and splitor as the specified ones. * *

* When to use parallel Streams? *
*
     * 
     * Profiler.run(1, 1, 3, "sequential", () -> Stream.of(list).operation(F)...).printResult();
     * Profiler.run(1, 1, 3, "parallel", () -> Stream.of(list).parallel().operation(F)...).printResult();
     * 
     * 
* * Here is a sample performance test with computer: CPU Intel i7-3520M 4-cores 2.9 GHz, JDK 1.8.0_101, Windows 7: * *
     * 
     *
     public void test_perf() {
        final String[] strs = new String[10_000];
        N.fill(strs, N.uuid());
    
        final int m = 10;
        final Function<String, Long> mapper = str -> {
            long result = 0;
            for (int i = 0; i < m; i++) {
                result += N.sum(str.toCharArray()) + 1;
            }
            return result;
        };
    
        final MutableLong sum = MutableLong.of(0);
    
        for (int i = 0, len = strs.length; i < len; i++) {
            sum.add(mapper.apply(strs[i]));
        }
    
        final int threadNum = 1, loopNum = 100, roundNum = 3;
    
        Profiler.run(threadNum, loopNum, roundNum, "For Loop", () -> {
            long result = 0;
            for (int i = 0, len = strs.length; i < len; i++) {
                result += mapper.apply(strs[i]);
            }
            assertEquals(sum.longValue(), result);
        }).printResult();
    
        Profiler.run(threadNum, loopNum, roundNum, "JDK Sequential",
                () -> assertEquals(sum.longValue(), java.util.stream.Stream.of(strs).map(mapper).mapToLong(e -> e).sum())).printResult();
    
        Profiler.run(threadNum, loopNum, roundNum, "JDK Parallel",
                () -> assertEquals(sum.longValue(), java.util.stream.Stream.of(strs).parallel().map(mapper).mapToLong(e -> e).sum())).printResult();
    
        Profiler.run(threadNum, loopNum, roundNum, "Abcus Sequential", () -> assertEquals(sum.longValue(), Stream.of(strs).map(mapper).mapToLong(e -> e).sum()))
                .printResult();
    
        Profiler.run(threadNum, loopNum, roundNum, "Abcus Parallel",
                () -> assertEquals(sum.longValue(), Stream.of(strs).parallel().map(mapper).mapToLong(e -> e).sum())).printResult();
    
        Profiler.run(threadNum, loopNum, roundNum, "Abcus Parallel by chunck", () -> assertEquals(sum.longValue(),
                Stream.of(strs).splitToList(100).parallel().map(it -> N.sumLong(it, e -> mapper.apply(e))).mapToLong(e -> e).sum())).printResult();
     }
    
     * 
     * 
* And test result: Unit is milliseconds. N(the number of elements) is 10_000, Q(cost per element of F, the per-element function (usually a lambda), here is mapper) is calculated by: value of 'For loop' / N(10_000). * * * * * * * * *
m = 1 m = 10m = 50m = 100m = 500m = 1000
Q 0.000020.00020.0010.0020.010.02
For Loop0.232.31122110219
JDK Sequential0.282.31122114212
JDK Parallel0.221.361266122
Abcus Sequential0.321122112212
Abcus Parallel1111111677128
* * Comparison: *
    *
  • Again, do NOT and should NOT use parallel Streams if you don't have any performance problem with sequential Streams, because using parallel Streams has extra cost.
  • *
  • Again, consider using parallel Streams only when N(the number of elements) * Q(cost per element of F, the per-element function (usually a lambda)) is big enough.
  • *
  • The implementation of parallel Streams in Abacus is more than 10 times, slower than parallel Streams in JDK when Q is tiny(here is less than 0.0002 milliseconds by the test):
  • *
      *
    • The implementation of parallel Streams in JDK 8 still can beat the sequential/for loop when Q is tiny(Here is 0.00002 milliseconds by the test). * That's amazing, considering the extra cost brought by parallel computation. It's well done.
    • *
    • The implementation of parallel Streams in Abacus is pretty simple and straight forward. * The extra cost(starting threads/synchronization/queue...) brought by parallel Streams in Abacus is too bigger to tiny Q(Here is less than 0.001 milliseconds by the test). * But it starts to be faster than sequential Streams when Q is big enough(Here is 0.001 milliseconds by the test) and starts to catch the parallel Streams in JDK when Q is bigger(Here is 0.01 milliseconds by the test).
    • *
    • Consider using the parallel Streams in Abacus when Q is big enough, specially when IO involved in F. * Because one IO operation(e.g. DB/web service request..., Reading/Writing file...) usually takes 1 to 1000 milliseconds, or even longer. * By the parallel Streams APIs in Abacus, it's very simple to specify max thread numbers. Sometimes, it's much faster to execute IO/Network requests with a bit more threads. * It's fair to say that the parallel Streams in Abacus is high efficient, may same as or faster than the parallel Streams in JDK when Q is big enough, except F is heavy cpu-used operation. * Most of the times, the Q is big enough to consider using parallel Stream is because IO/Network is involved in F.
    • *
    *
  • JDK 7 is supported by the Streams in Abacus. It's perfect to work with retrolambda on Android
  • *
  • All primitive types are supported by Stream APIs in Abacus except boolean
  • *
* *

* A bit more about Lambdas/Stream APIs, you may heard that Lambdas/Stream APIs is 5 time slower than imperative programming. * It's true when Q and F is VERY, VERY tiny, like f = (int a, int b) -> a + b;. * But if we look into the samples in the article and think about it: it just takes less than 1 milliseconds to get the max value in 100k numbers. * There is potential performance issue only if the "get the max value in 100K numbers" call many, many times in your API or single request. * Otherwise, the difference between 0.1 milliseconds to 0.5 milliseconds can be totally ignored. * Usually we meet performance issue only if Q and F is big enough. However, the performance of Lambdas/Streams APIs is closed to for loop when Q and F is big enough. * No matter in which scenario, We don't need and should not concern the performance of Lambdas/Stream APIs. * *

* Although it's is parallel Streams, it doesn't means all the methods are executed in parallel. * Because the sequential way is as fast, or even faster than the parallel way for some methods, or is pretty difficult, if not possible, to implement the method by parallel approach. * Here are the methods which are executed sequentially even in parallel Streams. *

* splitXXX/splitAt/splitBy/slidingXXX/collapse, distinct, reverse, rotate, shuffle, indexed, cached, top, kthLargest, count, toArray, toList, toList, toSet, toMultiset, toLongMultiset, * intersection(Collection c), difference(Collection c), symmetricDifference(Collection c), forEach(identity, accumulator, predicate), findFirstOrLast, findFirstAndLast * * @param maxThreadNum Default value is the number of cpu-cores. Steps/operations will be executed sequentially if maxThreadNum is 1. * @param splitor The target array is split by ranges for multiple threads if splitor is splitor.ARRAY and target stream composed by array. It looks like: * *

     * for (int i = 0; i < maxThreadNum; i++) {
     *     final int sliceIndex = i;
     *
     *     futureList.add(asyncExecutor.execute(new Runnable() {
     *         public void run() {
     *             int cursor = fromIndex + sliceIndex * sliceSize;
     *             final int to = toIndex - cursor > sliceSize ? cursor + sliceSize : toIndex;
     *             while (cursor < to) {
     *                 action.accept(elements[cursor++]);
     *             }
     *        }
     *    }));
     * }
     * 
* Otherwise, each thread will get the elements from the target array/iterator in the stream one by one with the target array/iterator synchronized. It looks like: *

     * for (int i = 0; i < maxThreadNum; i++) {
     *     futureList.add(asyncExecutor.execute(new Runnable() {
     *         public void run() {
     *             T next = null;
     *
     *             while (true) {
     *                 synchronized (elements) {
     *                     if (cursor.intValue() < toIndex) {
     *                         next = elements[cursor.getAndIncrement()];
     *                     } else {
     *                         break;
     *                     }
     *                 }
     *
     *                 action.accept(next);
     *             }
     *         }
     *     }));
     * }
     * 
* Using splitor.ARRAY only when F (the per-element function (usually a lambda)) is very tiny and the cost of synchronization on the target array/iterator is too big to it. * For the F involving IO or taking 'long' to complete, choose splitor.ITERATOR. Default value is splitor.ITERATOR. * *
* *
     * stream.parallel(maxThreadNum, splitor).map(f).filter(p)...;
     *
     * // Replace above line of code with "sps" if only "f" need to be parallelized. And "p" is fast enough to be executed in sequential Stream.
     * stream.sps(SP.create(maxThreadNum, splitor), s -> s.map(f)).filter(p)...;
     * // Or switch the stream back sequential stream if don't use "sps".
     * stream.parallel(maxThreadNum, splitor).map(f).sequential().filter(p)...;
     *
     * 
* * @param maxThreadNum if the specified value is bigger than the maximum allowed thread number per operation ({@code min(64, cpu_cores * 8)}), maximum allowed thread number per operation will be used. * To parallelize this Stream with thread number bigger than {@code min(64, cpu_cores * 8)}, please specified {@code executor} by calling {@link #parallel(int, Executor)} or {@link #parallel(int, Splitor, Executor)} or {@link #parallel(ParallelSettings)} * @param splitor * @return * @see #parallel(Executor) * @see #parallel(int, Executor) * @see #parallel(int, Splitor, Executor) * @see #sps(Function) * @see #sps(int, Function) * @see MergeResult * @see com.landawn.abacus.util.Profiler#run(int, int, int, String, Runnable) * @see Understanding Parallel Stream Performance in Java SE 8 * @see When to use parallel Streams */ @SequentialOnly @IntermediateOp S parallel(int maxThreadNum, Splitor splitor); /** *
     * stream.parallel(maxThreadNum, splitor, executor).map(f).filter(p)...;
     *
     * // Replace above line of code with "sps" if only "f" need to be parallelized. And "p" is fast enough to be executed in sequential Stream.
     * stream.sps(SP.create(maxThreadNum, splitor, executor), s -> s.map(f)).filter(p)...;
     * // Or switch the stream back sequential stream if don't use "sps".
     * stream.parallel(maxThreadNum, splitor, executor).map(f).sequential().filter(p)...;
     *
     * 
* * * @param maxThreadNum if the specified value is bigger than the maximum allowed thread number per operation ({@code min(64, cpu_cores * 8)}) and {@code executor} is not specified with a non-null value, maximum allowed thread number per operation will be used. * To parallelize this Stream with thread number bigger than {@code min(64, cpu_cores * 8)}, please specify {@code executor} with a non-null value. * @param splitor * @param executor * @return * @see #sps(Function) * @see #sps(int, Function) */ @SequentialOnly @IntermediateOp S parallel(int maxThreadNum, Splitor splitor, Executor executor); /** * * // Replace above line of code with "sps" if only "f" need to be parallelized. And "p" is fast enough to be executed in sequential Stream. * stream.sps(SP.create(maxThreadNum, executor), s -> s.map(f)).filter(p)...; * // Or switch the stream back sequential stream if don't use "sps". * stream.parallel(maxThreadNum, executor).map(f).sequential().filter(p)...; * * * @param maxThreadNum if the specified value is bigger than the maximum allowed thread number per operation ({@code min(64, cpu_cores * 8)}) and {@code executor} is not specified with a non-null value, maximum allowed thread number per operation will be used. * To parallelize this Stream with thread number bigger than {@code min(64, cpu_cores * 8)}, please specify {@code executor} with a non-null value. * @param executor * @return * @see #sps(Function) * @see #sps(int, Function) */ @SequentialOnly @IntermediateOp S parallel(int maxThreadNum, Executor executor); /** *
     * stream.parallel(executor).map(f).filter(p)...;
     *
     * // Replace above line of code with "sps" if only "f" need to be parallelized. And "p" is fast enough to be executed in sequential Stream.
     * stream.sps(SP.create(executor), s -> s.map(f)).filter(p)...;
     * // Or switch the stream back sequential stream if don't use "sps".
     * stream.parallel(executor).map(f).sequential().filter(p)...;
     *
     * 
* * * @param executor * @return * @see #sps(Function) * @see #sps(int, Function) */ @SequentialOnly @IntermediateOp S parallel(Executor executor); // /** // * Java 19 or above is required. // * // * @param maxThreadNum // * @param withVirtualThread // * @return // * @see #parallel(int, int) // * @see Executors#newVirtualThreadPerTaskExecutor() // * @see JEP 425: Virtual Threads // * @see Java Virtual Threads: Millions of Threads Within Grasp // */ // @Beta // @SequentialOnly // @IntermediateOp // S parallel(int maxThreadNum, boolean withVirtualThread); // // /** // * Virtual threads executed by each instance of {@code ExecutorService} created by {@code Executors.newVirtualThreadPerTaskExecutor()} will be {@code maxThreadNum / executorNumForVirtualThread}. // *
// * Probably {@code parallel(maxThreadNum, true)} will be enough for a lot of long delay and light requests(e.g. Web/DB requests). // * // *
// * Java 19 or above is required. // * // * @param maxThreadNum // * @param executorNumForVirtualThread // * @return // * @see #parallel(int, boolean) // * @see Executors#newVirtualThreadPerTaskExecutor() // * @see JEP 425: Virtual Threads // * @see Java Virtual Threads: Millions of Threads Within Grasp // */ // @Beta // @SequentialOnly // @IntermediateOp // S parallel(int maxThreadNum, int executorNumForVirtualThread); /** *
     * stream.parallel(parallelSettings).map(f).filter(p)...;
     *
     * // Replace above line of code with "sps" if only "f" need to be parallelized. And "p" is fast enough to be executed in sequential Stream.
     * stream.sps(SP.create(parallelSettings), s -> s.map(f)).filter(p)...;
     * // Or switch the stream back sequential stream if don't use "sps".
     * stream.parallel(parallelSettings).map(f).sequential().filter(p)...;
     *
     * 
* * @param ps * @return * @see #sps(Function) * @see #sps(int, Function) */ @Beta @SequentialOnly @IntermediateOp S parallel(ParallelSettings ps); /** * Temporarily switch the stream to parallel stream for operation {@code ops} and then switch back to sequence stream. *
* {@code stream().parallel().ops(map/filter/...).sequence()} * * @param * @param ops * @return */ @Beta @SequentialOnly @IntermediateOp @SuppressWarnings("rawtypes") SS sps(Function ops); /** * Temporarily switch the stream to parallel stream for operation {@code ops} and then switch back to sequence stream. *
* {@code stream().parallel(maxThreadNum).ops(map/filter/...).sequence()} * * @param * @param maxThreadNum * @param ops * @return */ @Beta @SequentialOnly @IntermediateOp @SuppressWarnings("rawtypes") SS sps(int maxThreadNum, Function ops); // @Beta // @SequentialOnly // @IntermediateOp // @SuppressWarnings("rawtypes") // SS sps(int maxThreadNum, boolean withVirtualThread, Function ops); // // @Beta // @SequentialOnly // @IntermediateOp // @SuppressWarnings("rawtypes") // SS sps(int maxThreadNum, int executorNumForVirtualThread, Function ops); // /** // * Temporarily switch the stream to parallel stream for operation {@code ops} and then switch back to sequence stream. // *
// * {@code stream().parallel(ps).ops(map/filter/...).sequence()} // * // * @param // * @param ps // * @param ops // * @return // */ // @Beta // @SequentialOnly // @IntermediateOp // @SuppressWarnings("rawtypes") // SS sps(ParallelSettings ps, Function ops); // /** // * Returns a new sequential {@code SS} by apply {@code thisStream.parallel()} to the specified {@code func}. // * It's equal to: // *
    //     * 
    //     * thisStream.parallel().(action by func).sequential();
    //     * 
    //     * 
// * // * @param func // * @return // * @deprecated // */ // @Deprecated // > SS parallelOnly(Function func); // // /** // * Returns a new sequential {@code SS} by apply {@code thisStream.parallel(maxThreadNum)} to the specified {@code func}. // * It's equal to: // *
    //     * 
    //     * thisStream.parallel(maxThreadNum).(action by func).sequential();
    //     * 
    //     * 
// * // * @param maxThreadNum // * @param func // * @return // * @deprecated // */ // @Deprecated // > SS parallelOnly(int maxThreadNum, Function func); // // /** // * Returns a new sequential {@code S} by apply {@code thisStream.parallel(maxThreadNum, executor)} to the specified {@code func}. // * // *
    //     * 
    //     * thisStream.parallel(maxThreadNum, executor).(action by func).sequential();
    //     * 
    //     * 
// * // * @param maxThreadNum // * @param executor should be able to execute {@code maxThreadNum} * {@code following up operations} in parallel. // * @return // * @deprecated // */ // @Deprecated // > SS parallelOnly(int maxThreadNum, Executor executor, Function func); // /** // * Return the underlying maxThreadNum if the stream is parallel, otherwise 1 is returned. // * // * @return // */ // int maxThreadNum(); // /** // * Returns a parallel stream with the specified maxThreadNum . Or return // * itself, either because the stream was already parallel with same maxThreadNum, or because // * it's a sequential stream. // * // * @param maxThreadNum // * @return // * @deprecated // */ // @Deprecated // S maxThreadNum(int maxThreadNum); // /** // * Return the underlying splitor if the stream is parallel, otherwise the default value splitor.ITERATOR is returned. // * // * @return // */ // Splitor splitor(); // /** // * Returns a parallel stream with the specified splitor . Or return // * itself, either because the stream was already parallel with same splitor, or because // * it's a sequential stream. // * // * @param splitor // * @return // * @deprecated // */ // @Deprecated // S splitor(Splitor splitor); /** * Temporarily switch the stream to sequence stream for operation {@code ops} and then switch back to parallel stream with same {@code maxThreadNum/splitor/asyncExecutor}. *
* {@code stream().sequence().ops(map/filter/...).parallel(sameMaxThreadNum, sameSplitor, sameAsyncExecutor)} * * @param * @param ops * @return */ @Beta @SequentialOnly @IntermediateOp @SuppressWarnings("rawtypes") SS psp(Function ops); /** * Generally, or most of time, elements will be fetched and processed by parallel threads one by one. * *
     * synchronized (elements) {
     *    if (elements.hasNext()) {
     *       next = elements.next();
     *    }
     * }
     *
     * // Do something with next...
     * 
* * {@code Splitor.ARRAY} only works when stream elements are stored in an array. * The array will be equally split into slices based on thread number for each thread: * *
     * int sliceSize = (toIndex - fromIndex) / threadNum + ((toIndex - fromIndex) % threadNum == 0 ? 0 : 1);
     * 
* * If stream elements are not stored in an array, {@code Splitor.ARRAY} will just works as same as how {@code Splitor.ITERATOR} works. * *
*
* * If you want to customize the buffer size for each thread, try {@code stream.split(bufferSize)} or {@code stream.splitToList(bufferSize)} *
     * stream.split(bufferSize).parallel(...).map/filter/flatMap...(sliceStream -> // do whatever you want to do);
     * stream.splitToList(bufferSize).parallel(...).map/filter/flatMap...(sliceList -> // do whatever you want to do);
     * // Or
     * stream.sps(maxThreadNum, bufferSize, op)
     * 
* * @see Stream#sps(Function) * @see Stream#sps(int, Function) * @see Stream#sps(int, int, Function) */ public enum Splitor { /** * {@code Splitor.ARRAY} only works when stream elements are stored in an array. * The array will be equally split into slices based on thread number for each thread: * *
         * int sliceSize = (toIndex - fromIndex) / threadNum + ((toIndex - fromIndex) % threadNum == 0 ? 0 : 1);
         * 
* * If stream elements are not stored in an array, {@code Splitor.ARRAY} will just works as same as how {@code Splitor.ITERATOR} works. * * @see Stream#sps(Function) * @see Stream#sps(int, Function) * @see Stream#sps(int, int, Function) */ ARRAY, /** * Elements will be fetched and processed by parallel threads one by one. * *
         * synchronized (elements) {
         *    if (elements.hasNext()) {
         *       next = elements.next();
         *    }
         * }
         *
         * // Do something with next...
         * 
* * @see Stream#sps(Function) * @see Stream#sps(int, Function) * @see Stream#sps(int, int, Function) */ ITERATOR; } @Data @Accessors(fluent = true) @EqualsAndHashCode(callSuper = false) public static final class ParallelSettings { private int maxThreadNum; private int executorNumForVirtualThread; private Splitor splitor; private Executor executor; // private boolean cancelUncompletedThreads = false; @Beta public static class PS { private PS() { // utility class; } public static ParallelSettings create(final int maxThreadNum) { return new ParallelSettings().maxThreadNum(maxThreadNum); } public static ParallelSettings create(final Splitor splitor) { return new ParallelSettings().splitor(splitor); } public static ParallelSettings create(final Executor executor) { return new ParallelSettings().executor(executor); } public static ParallelSettings create(final int maxThreadNum, final Splitor splitor) { return new ParallelSettings().maxThreadNum(maxThreadNum).splitor(splitor); } public static ParallelSettings create(final int maxThreadNum, final Executor executor) { return new ParallelSettings().maxThreadNum(maxThreadNum).executor(executor); } public static ParallelSettings create(final Splitor splitor, final Executor executor) { return new ParallelSettings().splitor(splitor).executor(executor); } public static ParallelSettings create(final int maxThreadNum, final Splitor splitor, final Executor executor) { return new ParallelSettings().maxThreadNum(maxThreadNum).splitor(splitor).executor(executor); } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy