All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pekko.stream.scaladsl.StreamConverters.scala Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * license agreements; and to You under the Apache License, version 2.0:
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * This file is part of the Apache Pekko project, which was derived from Akka.
 */

/*
 * Copyright (C) 2009-2022 Lightbend Inc. 
 */

package org.apache.pekko.stream.scaladsl

import java.io.{ InputStream, OutputStream }
import java.util.Spliterators
import java.util.stream.{ Collector, StreamSupport }

import scala.concurrent.{ Await, Future }
import scala.concurrent.duration._
import scala.concurrent.duration.Duration._

import org.apache.pekko
import pekko.NotUsed
import pekko.stream.{ Attributes, IOResult, SinkShape }
import pekko.stream.impl._
import pekko.stream.impl.Stages.DefaultAttributes
import pekko.stream.impl.io.{ InputStreamSinkStage, InputStreamSource, OutputStreamGraphStage, OutputStreamSourceStage }
import pekko.util.ByteString

/**
 * Converters for interacting with the blocking `java.io` streams APIs and Java 8 Streams
 */
object StreamConverters {

  /**
   * Creates a Source from an [[InputStream]] created by the given function.
   * Emitted elements are up to `chunkSize` sized [[pekko.util.ByteString]] elements.
   * The actual size of the emitted elements depends on how much data the underlying
   * [[java.io.InputStream]] returns on each read invocation. Such chunks will
   * never be larger than chunkSize though.
   *
   * You can configure the default dispatcher for this Source by changing the `pekko.stream.materializer.blocking-io-dispatcher` or
   * set it for a given Source by using [[pekko.stream.ActorAttributes]].
   *
   * It materializes a [[Future]] of [[IOResult]] containing the number of bytes read from the source file upon completion,
   * and a possible exception if IO operation was not completed successfully. Note that bytes having been read by the source does
   * not give any guarantee that the bytes were seen by downstream stages.
   *
   * The created [[InputStream]] will be closed when the [[Source]] is cancelled.
   *
   * @param in a function which creates the InputStream to read from
   * @param chunkSize the size of each read operation, defaults to 8192
   */
  def fromInputStream(in: () => InputStream, chunkSize: Int = 8192): Source[ByteString, Future[IOResult]] = {
    Source.fromGraph(new InputStreamSource(in, chunkSize))
  }

  /**
   * Creates a Source which when materialized will return an [[OutputStream]] which it is possible
   * to write the ByteStrings to the stream this Source is attached to.
   *
   * This Source is intended for inter-operation with legacy APIs since it is inherently blocking.
   *
   * You can configure the internal buffer size by using [[pekko.stream.ActorAttributes]].
   *
   * The created [[OutputStream]] will be closed when the [[Source]] is cancelled, and closing the [[OutputStream]]
   * will complete this [[Source]].
   *
   * @param writeTimeout the max time the write operation on the materialized OutputStream should block, defaults to 5 seconds
   */
  def asOutputStream(writeTimeout: FiniteDuration = 5.seconds): Source[ByteString, OutputStream] =
    Source.fromGraph(new OutputStreamSourceStage(writeTimeout))

  /**
   * Creates a Sink which writes incoming [[pekko.util.ByteString]]s to an [[OutputStream]] created by the given function.
   *
   * Materializes a [[Future]] of [[IOResult]] that will be completed with the size of the file (in bytes) at the streams completion,
   * and a possible exception if IO operation was not completed successfully.
   *
   * You can configure the default dispatcher for this Source by changing the `pekko.stream.materializer.blocking-io-dispatcher` or
   * set it for a given Source by using [[pekko.stream.ActorAttributes]].
   * If `autoFlush` is true the OutputStream will be flushed whenever a byte array is written, defaults to false.
   *
   * The [[OutputStream]] will be closed when the stream flowing into this [[Sink]] is completed. The [[Sink]]
   * will cancel the stream when the [[OutputStream]] is no longer writable.
   */
  def fromOutputStream(out: () => OutputStream, autoFlush: Boolean = false): Sink[ByteString, Future[IOResult]] =
    Sink.fromGraph(new OutputStreamGraphStage(out, autoFlush))

  /**
   * Creates a Sink which when materialized will return an [[InputStream]] which it is possible
   * to read the values produced by the stream this Sink is attached to.
   *
   * This Sink is intended for inter-operation with legacy APIs since it is inherently blocking.
   *
   * You can configure the internal buffer size by using [[pekko.stream.ActorAttributes]].
   *
   * The [[InputStream]] will be closed when the stream flowing into this [[Sink]] completes, and
   * closing the [[InputStream]] will cancel this [[Sink]].
   *
   * @param readTimeout the max time the read operation on the materialized InputStream should block
   */
  def asInputStream(readTimeout: FiniteDuration = 5.seconds): Sink[ByteString, InputStream] =
    Sink.fromGraph(new InputStreamSinkStage(readTimeout))

  /**
   * Creates a sink which materializes into a ``Future`` which will be completed with result of the Java 8 ``Collector`` transformation
   * and reduction operations. This allows usage of Java 8 streams transformations for reactive streams. The ``Collector`` will trigger
   * demand downstream. Elements emitted through the stream will be accumulated into a mutable result container, optionally transformed
   * into a final representation after all input elements have been processed. The ``Collector`` can also do reduction
   * at the end. Reduction processing is performed sequentially
   *
   * Note that a flow can be materialized multiple times, so the function producing the ``Collector`` must be able
   * to handle multiple invocations.
   */
  def javaCollector[T, R](collectorFactory: () => java.util.stream.Collector[T, _ <: Any, R]): Sink[T, Future[R]] =
    Flow[T]
      .fold {
        new FirstCollectorState[T,
          R](collectorFactory.asInstanceOf[() => java.util.stream.Collector[T, Any, R]]): CollectorState[
          T, R]
      } { (state, elem) =>
        state.update(elem)
      }
      .map(state => state.finish())
      .toMat(Sink.head)(Keep.right)
      .withAttributes(DefaultAttributes.javaCollector)

  /**
   * Creates a sink which materializes into a ``Future`` which will be completed with result of the Java 8 ``Collector`` transformation
   * and reduction operations. This allows usage of Java 8 streams transformations for reactive streams. The ``Collector`` will trigger demand
   * downstream. Elements emitted through the stream will be accumulated into a mutable result container, optionally transformed
   * into a final representation after all input elements have been processed. The ``Collector`` can also do reduction
   * at the end. Reduction processing is performed in parallel based on graph ``Balance``.
   *
   * Note that a flow can be materialized multiple times, so the function producing the ``Collector`` must be able
   * to handle multiple invocations.
   */
  def javaCollectorParallelUnordered[T, R](parallelism: Int)(
      collectorFactory: () => java.util.stream.Collector[T, _ <: Any, R]): Sink[T, Future[R]] = {
    if (parallelism == 1) javaCollector[T, R](collectorFactory)
    else {
      Sink
        .fromGraph(GraphDSL.createGraph(Sink.head[R]) { implicit b => sink =>
          import GraphDSL.Implicits._
          val factory = collectorFactory.asInstanceOf[() => Collector[T, Any, R]]
          val balance = b.add(Balance[T](parallelism))
          val merge = b.add(Merge[CollectorState[T, R]](parallelism))

          for (i <- 0 until parallelism) {
            val worker = Flow[T]
              .fold(new FirstCollectorState(factory): CollectorState[T, R]) { (state, elem) =>
                state.update(elem)
              }
              .async

            balance.out(i) ~> worker ~> merge.in(i)
          }

          merge.out
            .fold(new FirstReducerState(factory): ReducerState[T, R]) { (state, elem) =>
              state.update(elem.accumulated())
            }
            .map(state => state.finish()) ~> sink.in

          SinkShape(balance.in)
        })
        .withAttributes(DefaultAttributes.javaCollectorParallelUnordered)
    }
  }

  /**
   * Creates a sink which materializes into Java 8 ``Stream`` that can be run to trigger demand through the sink.
   * Elements emitted through the stream will be available for reading through the Java 8 ``Stream``.
   *
   * The Java 8 ``Stream`` will be ended when the stream flowing into this ``Sink`` completes, and closing the Java
   * ``Stream`` will cancel the inflow of this ``Sink``.
   *
   * If the Java 8 ``Stream`` throws exception the Pekko stream is cancelled.
   *
   * Be aware that Java ``Stream`` blocks current thread while waiting on next element from downstream.
   * As it is interacting wit blocking API the implementation runs on a separate dispatcher
   * configured through the ``pekko.stream.blocking-io-dispatcher``.
   */
  def asJavaStream[T](): Sink[T, java.util.stream.Stream[T]] = {
    // TODO removing the QueueSink name, see issue #22523
    Sink
      .fromGraph(new QueueSink[T](1).withAttributes(Attributes.none))
      .mapMaterializedValue(queue =>
        StreamSupport
          .stream(
            Spliterators.spliteratorUnknownSize(
              new java.util.Iterator[T] {
                var nextElementFuture: Future[Option[T]] = queue.pull()
                var nextElement: Option[T] = _

                override def hasNext: Boolean = {
                  nextElement = Await.result(nextElementFuture, Inf)
                  nextElement.isDefined
                }

                override def next(): T = {
                  val next = nextElement.get
                  nextElementFuture = queue.pull()
                  next
                }
              },
              0),
            false)
          .onClose(new Runnable { def run = queue.cancel() }))
      .withAttributes(DefaultAttributes.asJavaStream)
  }

  /**
   * Creates a source that wraps a Java 8 ``Stream``. ``Source`` uses a stream iterator to get all its
   * elements and send them downstream on demand.
   *
   * Example usage: `StreamConverters.fromJavaStream(() => IntStream.rangeClosed(1, 10))`
   *
   * You can use [[Source.async]] to create asynchronous boundaries between synchronous Java ``Stream``
   * and the rest of flow.
   */
  def fromJavaStream[T, S <: java.util.stream.BaseStream[T, S]](
      stream: () => java.util.stream.BaseStream[T, S]): Source[T, NotUsed] =
    Source.fromGraph(new JavaStreamSource[T, S](stream)).withAttributes(DefaultAttributes.fromJavaStream)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy