All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.landawn.abacus.util.stream.BaseStream Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2016, 2017, 2018, 2019 HaiYang Li
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 * in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package com.landawn.abacus.util.stream;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Executor;

import com.landawn.abacus.annotation.Beta;
import com.landawn.abacus.annotation.ParallelSupported;
import com.landawn.abacus.annotation.SequentialOnly;
import com.landawn.abacus.exception.DuplicatedResultException;
import com.landawn.abacus.util.ImmutableList;
import com.landawn.abacus.util.ImmutableSet;
import com.landawn.abacus.util.IntList;
import com.landawn.abacus.util.LongMultiset;
import com.landawn.abacus.util.Multiset;
import com.landawn.abacus.util.Nth;
import com.landawn.abacus.util.Percentage;
import com.landawn.abacus.util.u.Optional;
import com.landawn.abacus.util.function.Function;
import com.landawn.abacus.util.function.Supplier;

/** 
 * The Stream will be automatically closed after execution(A terminal method is executed/triggered).
 *
 * @param  the type of the stream elements
 * @param  the type of array
 * @param 

the type of predicate * @param the type of consumer * @param the type of PrimitiveList/List * @param the type of Optional * @param the type of Indexed * @param the type of Iterator * @param the type of of the stream implementing {@code BaseStream} * @see Stream * @see IntStream * @see LongStream * @see DoubleStream */ public interface BaseStream> extends AutoCloseable { /** * Returns a stream consisting of the elements of this stream that match the given predicate. * * @param predicate * @return */ @ParallelSupported S filter(P predicate); /** * Keep the elements until the given predicate returns false. The stream should be sorted, which means if x is the first element: predicate.text(x) returns false, any element y behind x: predicate.text(y) should returns false. * * In parallel Streams, the elements after the first element which predicate returns false may be tested by predicate too. * * @param predicate * @return */ @ParallelSupported S takeWhile(P predicate); /** * Remove the elements until the given predicate returns false. The stream should be sorted, which means if x is the first element: predicate.text(x) returns true, any element y behind x: predicate.text(y) should returns true. * * In parallel Streams, the elements after the first element which predicate returns false may be tested by predicate too. * * @param predicate * @return */ @ParallelSupported S dropWhile(P predicate); /** * Remove the elements until the given predicate returns false. The stream should be sorted, which means if x is the first element: predicate.text(x) returns true, any element y behind x: predicate.text(y) should returns true. * * In parallel Streams, the elements after the first element which predicate returns false may be tested by predicate too. * * * @param predicate * @param consumer * @return */ @ParallelSupported S dropWhile(P predicate, C consumer); @ParallelSupported S removeIf(P predicate); @ParallelSupported S removeIf(P predicate, C consumer); /** * Returns Stream of {@code S} with consecutive sub sequences of the elements, each of the same size (the final sequence may be smaller). * * @param chunkSize the desired size of each sub sequence (the last may be smaller). * @return */ @SequentialOnly Stream split(int chunkSize); /** * Returns Stream of {@code PL} with consecutive sub sequences of the elements, each of the same size (the final sequence may be smaller). * *
* This method only run sequentially, even in parallel stream. * * @param chunkSize the desired size of each sub sequence (the last may be smaller). * @return */ @SequentialOnly public abstract Stream splitToList(int chunkSize); /** * Split the stream by the specified predicate. * * * This stream should be sorted by value which is used to verify the border. *
* This method only run sequentially, even in parallel stream. * * @param predicate * @return */ @SequentialOnly Stream split(final P predicate); /** * Split the stream by the specified predicate. * * This method only run sequentially, even in parallel stream. * * @param predicate * @return */ @SequentialOnly Stream splitToList(final P predicate); /** * Split the stream into two pieces at where. * The first piece will be loaded into memory. * * @param where * @return */ @SequentialOnly Stream splitAt(int where); /** * Split the stream into two pieces at where turns to {@code false} * The first piece will be loaded into memory. * *

     * 
     * Stream.of(1, 3, 2, 4, 2, 5).splitBy(i -> i <= 3).forEach(s -> s.println()); // [1, 3, 2], [4, 2, 5]
     * 
     * 
* * @param where * @return */ @SequentialOnly Stream splitBy(P where); /** * * @param windowSize * @return * @see #sliding(int, int) */ @SequentialOnly Stream sliding(int windowSize); /** * * @param windowSize * @return * @see #sliding(int, int) */ @SequentialOnly Stream slidingToList(int windowSize); /** * Stream.of(1, 2, 3, 4, 5, 6, 7, 8).sliding(3, 1).forEach(Stream::println) *
output:
* [1, 2, 3]
* [2, 3, 4]
* [3, 4, 5]
* [4, 5, 6]
* [5, 6, 7]
* [6, 7, 8]
* *
============================================================================
* Stream.of(1, 2, 3, 4, 5, 6, 7, 8).sliding(3, 3).forEach(Stream::println) *
output:
* [1, 2, 3]
* [4, 5, 6]
* [7, 8]
* *
============================================================================
* Stream.of(1, 2, 3, 4, 5, 6, 7, 5).sliding(3, 5).forEach(Stream::println) *
output:
* [1, 2, 3]
* [6, 7, 8]
* *
* This method only run sequentially, even in parallel stream. * * @param windowSize * @param increment * @return */ @SequentialOnly Stream sliding(int windowSize, int increment); /** * * @param windowSize * @param increment * @return * @see #sliding(int, int) */ @SequentialOnly Stream slidingToList(int windowSize, int increment); /** *
* This method only run sequentially, even in parallel stream. * * @param c * @return * @see IntList#intersection(IntList) */ @SequentialOnly S intersection(Collection c); /** *
* This method only run sequentially, even in parallel stream. * * @param c * @return * @see IntList#difference(IntList) */ @SequentialOnly S difference(Collection c); /** *
* This method only run sequentially, even in parallel stream. * * @param c * @return * @see IntList#symmetricDifference(IntList) */ @SequentialOnly S symmetricDifference(Collection c); /** *
* All elements will be loaded to memory and sorted if not yet. * * @return */ @SequentialOnly Optional> percentiles(); /** * *
* This method only run sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly S reversed(); /** * *
* This method only run sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly S shuffled(); /** * *
* This method only run sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly S shuffled(Random rnd); /** * *
* This method only run sequentially, even in parallel stream and all elements will be loaded to memory. * * @return */ @SequentialOnly S rotated(int distance); /** * Returns a stream consisting of the distinct elements of this stream. * * @return */ @SequentialOnly S distinct(); /** * Returns a stream consisting of the elements of this stream in sorted order. * *
* All elements will be loaded to memory. * * @return the new stream */ @ParallelSupported S sorted(); @ParallelSupported S reverseSorted(); @SequentialOnly S prepend(S stream); @SequentialOnly S append(S s); @SequentialOnly S appendIfEmpty(Supplier suppliers); // /** // *
// * This method only run sequentially, even in parallel stream and all elements will be loaded to memory. // * // * @return // */ // @SequentialOnly // S cached(); /** *
* This method only run sequentially, even in parallel stream. * * @return */ @SequentialOnly Stream indexed(); @SequentialOnly String join(CharSequence delimiter); @SequentialOnly String join(final CharSequence delimiter, final CharSequence prefix, final CharSequence suffix); /** * * @param n * @return */ @SequentialOnly S skip(long n); /** * * @param n * @param consumer * @return */ @ParallelSupported S skip(long n, C consumer); /** * * @param maxSize * @return */ @SequentialOnly S limit(long maxSize); /** * Same as: {@code stream.skip(from).limit(to - from)}. * * @param from * @param to * @return */ @SequentialOnly S slice(long from, long to); @SequentialOnly S step(long step); /** * * @return */ @SequentialOnly long count(); /** * * @param action * @return */ @ParallelSupported S peek(C action); /** * Same as {@code peek} * * @param action * @return * @see #peek(Object) */ @ParallelSupported S carry(C action); @SequentialOnly OT first(); @SequentialOnly OT last(); /** * * @return * @throws DuplicatedResultException if there are more than one element in this stream. */ @SequentialOnly OT onlyOne() throws DuplicatedResultException; /** * * @return */ @SequentialOnly A toArray(); @SequentialOnly List toList(); @SequentialOnly Set toSet(); @SequentialOnly ImmutableList toImmutableList(); @SequentialOnly ImmutableSet toImmutableSet(); @SequentialOnly > CC toCollection(Supplier supplier); @SequentialOnly Multiset toMultiset(); @SequentialOnly Multiset toMultiset(Supplier> supplier); @SequentialOnly LongMultiset toLongMultiset(); @SequentialOnly LongMultiset toLongMultiset(Supplier> supplier); /** * Returns an iterator for the elements of this stream. * *
* Remember to close this Stream after the iteration is done, if required. * * @return the element iterator for this stream */ @SequentialOnly ITER iterator(); @Beta @SequentialOnly void println(); @SequentialOnly R __(Function transfer); // @SequentialOnly // Try tried(); /** * * @param closeHandler * @return */ @SequentialOnly S onClose(Runnable closeHandler); /** * */ @Override @SequentialOnly void close(); /** * * @return */ boolean isParallel(); /** * * @return */ S sequential(); /** * * @return */ S parallel(); /** * * @param maxThreadNum * @return */ S parallel(int maxThreadNum); /** * * @param splitor * @return */ S parallel(Splitor splitor); /** * Returns an equivalent stream that is parallel. May return itself if the stream was already parallel with the same maxThreadNum and splitor as the specified ones. * *

* When to use parallel Streams? *
*
     * 
     * Profiler.run(1, 1, 3, "sequential", () -> Stream.of(list).operation(F)...).printResult();
     * Profiler.run(1, 1, 3, "parallel", () -> Stream.of(list).parallel().operation(F)...).printResult();
     * 
     * 
* * Here is a sample performance test with computer: CPU Intel i7-3520M 4-cores 2.9 GHz, JDK 1.8.0_101, Windows 7: * *
     * 
     * public void test_perf() {
     *     final String[] strs = new String[10_000];
     *     N.fill(strs, N.uuid());
     * 
     *     final int m = 1;
     *     final Function mapper = str -> {
     *         long result = 0;
     *         for (int i = 0; i < m; i++) {
     *             result += sum(str.toCharArray()) + 1;
     *         }
     *         return result;
     *     };
     * 
     *     final MutableLong sum = MutableLong.of(0);
     * 
     *     for (int i = 0, len = strs.length; i < len; i++) {
     *         sum.add(mapper.apply(strs[i]));
     *     }
     * 
     *     final int threadNum = 1, loopNum = 100, roundNum = 3;
     * 
     *     Profiler.run(threadNum, loopNum, roundNum, "For Loop", () -> {
     *         long result = 0;
     *         for (int i = 0, len = strs.length; i < len; i++) {
     *             result += mapper.apply(strs[i]);
     *         }
     *         assertEquals(sum.longValue(), result);
     *     }).printResult();
     * 
     *     Profiler.run(threadNum, loopNum, roundNum, "JDK Sequential",
     *             () -> assertEquals(sum.longValue(), java.util.stream.Stream.of(strs).map(mapper).mapToLong(e -> e).sum())).printResult();
     * 
     *     Profiler.run(threadNum, loopNum, roundNum, "JDK Parallel",
     *             () -> assertEquals(sum.longValue(), java.util.stream.Stream.of(strs).parallel().map(mapper).mapToLong(e -> e).sum())).printResult();
     * 
     *     Profiler.run(threadNum, loopNum, roundNum, "Abcus Sequential",
     *             () -> assertEquals(sum.longValue(), Stream.of(strs).map(mapper).mapToLong(e -> e).sum().longValue())).printResult();
     * 
     *     Profiler.run(threadNum, loopNum, roundNum, "Abcus Parallel",
     *             () -> assertEquals(sum.longValue(), Stream.of(strs).parallel().map(mapper).mapToLong(e -> e).sum().longValue())).printResult();
     * }
     * 
     * 
* And test result: Unit is milliseconds. N(the number of elements) is 10_000, Q(cost per element of F, the per-element function (usually a lambda), here is mapper) is calculated by: value of 'For loop' / N(10_000). * * * * * * * * *
m = 1 m = 10m = 50m = 100m = 500m = 1000
Q 0.000020.00020.0010.0020.010.02
For Loop0.232.31122110219
JDK Sequential0.282.31122114212
JDK Parallel0.221.361266122
Abcus Sequential0.321122112212
Abcus Parallel1111111677128
* * Comparison: *
    *
  • Again, do NOT and should NOT use parallel Streams if you don't have any performance problem with sequential Streams, because using parallel Streams has extra cost.
  • *
  • Again, consider using parallel Streams only when N(the number of elements) * Q(cost per element of F, the per-element function (usually a lambda)) is big enough.
  • *
  • The implementation of parallel Streams in Abacus is more than 10 times, slower than parallel Streams in JDK when Q is tiny(here is less than 0.0002 milliseconds by the test):
  • *
      *
    • The implementation of parallel Streams in JDK 8 still can beat the sequential/for loop when Q is tiny(Here is 0.00002 milliseconds by the test). * That's amazing, considering the extra cost brought by parallel computation. It's well done.
    • *
    • The implementation of parallel Streams in Abacus is pretty simple and straight forward. * The extra cost(starting threads/synchronization/queue...) brought by parallel Streams in Abacus is too bigger to tiny Q(Here is less than 0.001 milliseconds by the test). * But it starts to be faster than sequential Streams when Q is big enough(Here is 0.001 milliseconds by the test) and starts to catch the parallel Streams in JDK when Q is bigger(Here is 0.01 milliseconds by the test).
    • *
    • Consider using the parallel Streams in Abacus when Q is big enough, specially when IO involved in F. * Because one IO operation(e.g. DB/web service request..., Reading/Writing file...) usually takes 1 to 1000 milliseconds, or even longer. * By the parallel Streams APIs in Abacus, it's very simple to specify max thread numbers. Sometimes, it's much faster to execute IO/Network requests with a bit more threads. * It's fair to say that the parallel Streams in Abacus is high efficient, may same as or faster than the parallel Streams in JDK when Q is big enough, except F is heavy cpu-used operation. * Most of the times, the Q is big enough to consider using parallel Stream is because IO/Network is involved in F.
    • *
    *
  • JDK 7 is supported by the Streams in Abacus. It's perfect to work with retrolambda on Android
  • *
  • All primitive types are supported by Stream APIs in Abacus except boolean
  • *
* *

* A bit more about Lambdas/Stream APIs, you may heard that Lambdas/Stream APIs is 5 time slower than imperative programming. * It's true when Q and F is VERY, VERY tiny, like f = (int a, int b) -> a + b;. * But if we look into the samples in the article and think about it: it just takes less than 1 milliseconds to get the max value in 100k numbers. * There is potential performance issue only if the "get the max value in 100K numbers" call many, many times in your API or single request. * Otherwise, the difference between 0.1 milliseconds to 0.5 milliseconds can be totally ignored. * Usually we meet performance issue only if Q and F is big enough. However, the performance of Lambdas/Streams APIs is closed to for loop when Q and F is big enough. * No matter in which scenario, We don't need and should not concern the performance of Lambdas/Stream APIs. * *

* Although it's is parallel Streams, it doesn't means all the methods are executed in parallel. * Because the sequential way is as fast, or even faster than the parallel way for some methods, or is pretty difficult, if not possible, to implement the method by parallel approach. * Here are the methods which are executed sequentially even in parallel Streams. *

* splitXXX/splitAt/splitBy/slidingXXX/collapse, distinct, reverse, rotate, shuffle, indexed, cached, top, kthLargest, count, toArray, toList, toList, toSet, toMultiset, toLongMultiset, * intersection(Collection c), difference(Collection c), symmetricDifference(Collection c), forEach(identity, accumulator, predicate), findFirstOrLast, findFirstAndLast * * @param maxThreadNum Default value is the number of cpu-cores. Steps/operations will be executed sequentially if maxThreadNum is 1. * @param splitor The target array is split by ranges for multiple threads if splitor is splitor.ARRAY and target stream composed by array. It looks like: * *

     * for (int i = 0; i < maxThreadNum; i++) {
     *     final int sliceIndex = i;
     * 
     *     futureList.add(asyncExecutor.execute(new Runnable() {
     *         public void run() {
     *             int cursor = fromIndex + sliceIndex * sliceSize;
     *             final int to = toIndex - cursor > sliceSize ? cursor + sliceSize : toIndex;
     *             while (cursor < to) {
     *                 action.accept(elements[cursor++]);
     *             }
     *        }
     *    }));
     * }
     * 
* Otherwise, each thread will get the elements from the target array/iterator in the stream one by one with the target array/iterator synchronized. It looks like: *

     * for (int i = 0; i < maxThreadNum; i++) {
     *     futureList.add(asyncExecutor.execute(new Runnable() {
     *         public void run() {
     *             T next = null;
     * 
     *             while (true) {
     *                 synchronized (elements) {
     *                     if (cursor.intValue() < toIndex) {
     *                         next = elements[cursor.getAndIncrement()];
     *                     } else {
     *                         break;
     *                     }
     *                 }
     * 
     *                 action.accept(next);
     *             }
     *         }
     *     }));
     * }
     * 
* Using splitor.ARRAY only when F (the per-element function (usually a lambda)) is very tiny and the cost of synchronization on the target array/iterator is too big to it. * For the F involving IO or taking 'long' to complete, choose splitor.ITERATOR. Default value is splitor.ITERATOR. * @return * @see Nth * @see com.landawn.abacus.util.Profiler#run(int, int, int, String, Runnable) * @see Understanding Parallel Stream Performance in Java SE 8 * @see When to use parallel Streams */ S parallel(int maxThreadNum, Splitor splitor); /** * * @param maxThreadNum * @param executor should be able to execute sum of {@code maxThreadNum} operations in parallel. * @return */ S parallel(int maxThreadNum, Executor executor); /** * * @param executor should be able to execute sum of {@code maxThreadNum} operations in parallel. * @return */ S parallel(Executor executor); // /** // * Returns a new sequential {@code SS} by apply {@code thisStream.parallel()} to the specified {@code func}. // * It's equal to: // *
    //     * 
    //     * thisStream.parallel().(action by func).sequential();
    //     * 
    //     * 
// * // * @param func // * @return // * @deprecated // */ // @Deprecated // > SS parallelOnly(Function func); // // /** // * Returns a new sequential {@code SS} by apply {@code thisStream.parallel(maxThreadNum)} to the specified {@code func}. // * It's equal to: // *
    //     * 
    //     * thisStream.parallel(maxThreadNum).(action by func).sequential();
    //     * 
    //     * 
// * // * @param maxThreadNum // * @param func // * @return // * @deprecated // */ // @Deprecated // > SS parallelOnly(int maxThreadNum, Function func); // // /** // * Returns a new sequential {@code S} by apply {@code thisStream.parallel(maxThreadNum, executor)} to the specified {@code func}. // * // *
    //     * 
    //     * thisStream.parallel(maxThreadNum, executor).(action by func).sequential();
    //     * 
    //     * 
// * // * @param maxThreadNum // * @param executor should be able to execute sum of {@code maxThreadNum} operations in parallel. // * @return // * @deprecated // */ // @Deprecated // > SS parallelOnly(int maxThreadNum, Executor executor, Function func); // /** // * Return the underlying maxThreadNum if the stream is parallel, otherwise 1 is returned. // * // * @return // */ // int maxThreadNum(); // /** // * Returns a parallel stream with the specified maxThreadNum . Or return // * itself, either because the stream was already parallel with same maxThreadNum, or because // * it's a sequential stream. // * // * @param maxThreadNum // * @return // * @deprecated // */ // @Deprecated // S maxThreadNum(int maxThreadNum); // /** // * Return the underlying splitor if the stream is parallel, otherwise the default value splitor.ITERATOR is returned. // * // * @return // */ // Splitor splitor(); // /** // * Returns a parallel stream with the specified splitor . Or return // * itself, either because the stream was already parallel with same splitor, or because // * it's a sequential stream. // * // * @param splitor // * @return // * @deprecated // */ // @Deprecated // S splitor(Splitor splitor); public static enum Splitor { ARRAY, ITERATOR; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy