All Downloads are FREE. Search and download functionalities are using the official Maven repository.

zio.stream.ZTransducer.scala Maven / Gradle / Ivy

The newest version!
package zio.stream

import zio._

import java.nio.charset.{Charset, StandardCharsets}
import scala.collection.mutable

// Contract notes for transducers:
// - When a None is received, the transducer must flush all of its internal state
//   and remain empty until subsequent Some(Chunk) values.
//
//   Stated differently, after a first push(None), all subsequent push(None) must
//   result in Chunk.empty.
abstract class ZTransducer[-R, +E, -I, +O](val push: ZManaged[R, Nothing, Option[Chunk[I]] => ZIO[R, E, Chunk[O]]]) {
  self =>

  /**
   * Compose this transducer with another transducer, resulting in a composite transducer.
   */
  def >>>[R1 <: R, E1 >: E, O2 >: O, O3](that: ZTransducer[R1, E1, O2, O3]): ZTransducer[R1, E1, I, O3] =
    ZTransducer {
      self.push.zipWith(that.push) { (pushLeft, pushRight) =>
        {
          case None =>
            pushLeft(None).flatMap(cl =>
              if (cl.isEmpty) pushRight(None)
              else pushRight(Some(cl)).zipWith(pushRight(None))(_ ++ _)
            )
          case inputs @ Some(_) =>
            pushLeft(inputs).flatMap(cl => pushRight(Some(cl)))
        }
      }
    }

  /**
   * Compose this transducer with a sink, resulting in a sink that processes elements by piping
   * them through this transducer and piping the results into the sink.
   */
  def >>>[R1 <: R, E1 >: E, O2 >: O, I1 <: I, L, Z](that: ZSink[R1, E1, O2, L, Z]): ZSink[R1, E1, I1, L, Z] =
    ZSink[R1, E1, I1, L, Z] {
      self.push.zipWith(that.push) { (pushSelf, pushThat) =>
        {
          case None =>
            pushSelf(None)
              .mapError(e => (Left(e), Chunk.empty))
              .flatMap(chunk => pushThat(Some(chunk)) *> pushThat(None))
          case inputs @ Some(_) =>
            pushSelf(inputs)
              .mapError(e => (Left(e), Chunk.empty))
              .flatMap(chunk => pushThat(Some(chunk)))
        }
      }
    }

  /**
   * Transforms the inputs of this transducer.
   */
  final def contramap[J](f: J => I): ZTransducer[R, E, J, O] =
    ZTransducer(self.push.map(push => is => push(is.map(_.map(f)))))

  /**
   * Effectually transforms the inputs of this transducer
   */
  final def contramapM[R1 <: R, E1 >: E, J](f: J => ZIO[R1, E1, I]): ZTransducer[R1, E1, J, O] =
    ZTransducer[R1, E1, J, O](self.push.map(push => is => ZIO.foreach(is)(_.mapM(f)).flatMap(push)))

  /**
   * Filters the outputs of this transducer.
   */
  final def filter(p: O => Boolean): ZTransducer[R, E, I, O] =
    ZTransducer(self.push.map(push => i => push(i).map(_.filter(p))))

  /**
   * Filters the inputs of this transducer.
   */
  final def filterInput[I1 <: I](p: I1 => Boolean): ZTransducer[R, E, I1, O] =
    ZTransducer(self.push.map(push => is => push(is.map(_.filter(p)))))

  /**
   * Effectually filters the inputs of this transducer.
   */
  final def filterInputM[R1 <: R, E1 >: E, I1 <: I](p: I1 => ZIO[R1, E1, Boolean]): ZTransducer[R1, E1, I1, O] =
    ZTransducer[R1, E1, I1, O](self.push.map(push => is => ZIO.foreach(is)(_.filterM(p)).flatMap(push)))

  /**
   * Transforms the outputs of this transducer.
   */
  final def map[P](f: O => P): ZTransducer[R, E, I, P] =
    ZTransducer(self.push.map(push => i => push(i).map(_.map(f))))

  /**
   * Transforms the chunks emitted by this transducer.
   */
  final def mapChunks[O2](f: Chunk[O] => Chunk[O2]): ZTransducer[R, E, I, O2] =
    ZTransducer {
      self.push.map(push => (input: Option[Chunk[I]]) => push(input).map(f))
    }

  /**
   * Effectfully transforms the chunks emitted by this transducer.
   */
  final def mapChunksM[R1 <: R, E1 >: E, O2](
    f: Chunk[O] => ZIO[R1, E1, Chunk[O2]]
  ): ZTransducer[R1, E1, I, O2] =
    ZTransducer {
      self.push.map(push => (input: Option[Chunk[I]]) => push(input).flatMap(f))
    }

  /**
   * Transforms the outputs of this transducer.
   */
  final def mapError[E1](f: E => E1): ZTransducer[R, E1, I, O] =
    ZTransducer(self.push.map(push => i => push(i).mapError(f)))

  /**
   * Effectually transforms the outputs of this transducer
   */
  final def mapM[R1 <: R, E1 >: E, P](f: O => ZIO[R1, E1, P]): ZTransducer[R1, E1, I, P] =
    ZTransducer[R1, E1, I, P](self.push.map(push => i => push(i).flatMap(_.mapM(f))))
}

object ZTransducer extends ZTransducerPlatformSpecificConstructors {
  def apply[R, E, I, O](
    push: ZManaged[R, Nothing, Option[Chunk[I]] => ZIO[R, E, Chunk[O]]]
  ): ZTransducer[R, E, I, O] =
    new ZTransducer(push) {}

  /**
   * Shorthand form for [[ZTransducer.identity]]. Use as:
   * {{{
   * ZTransducer[Int].filter(_ % 2 != 0)
   * }}}
   */
  def apply[I]: ZTransducer[Any, Nothing, I, I] = identity[I]

  /**
   * Reads the first n values from the stream and uses them to choose the transducer that will be used for the remainder of the stream.
   * If the stream ends before it has collected n values the partial chunk will be provided to f.
   */
  def branchAfter[R, E, I, O](n: Int)(f: Chunk[I] => ZTransducer[R, E, I, O]): ZTransducer[R, E, I, O] =
    ZTransducer {
      sealed trait State
      object State {
        final case class Collecting(data: Chunk[I]) extends State
        final case class Emitting(finalizer: ZManaged.Finalizer, push: Option[Chunk[I]] => ZIO[R, E, Chunk[O]])
            extends State
        val initial: State = Collecting(Chunk.empty)
      }

      val toCollect = Math.max(0, n)

      ZManaged.scope.flatMap { scope =>
        ZRefM.makeManaged(State.initial).map { stateRef =>
          {
            case None =>
              stateRef.getAndSet(State.initial).flatMap {
                case State.Emitting(finalizer, push) =>
                  push(None) <* finalizer(Exit.unit)
                case State.Collecting(data) =>
                  f(data).push.use(_(None))
              }
            case Some(data) =>
              stateRef.modify {
                case s @ State.Emitting(_, push) =>
                  push(Some(data)).map((_, s))
                case s @ State.Collecting(collected) =>
                  if (data.isEmpty) ZIO.succeedNow((Chunk.empty, s))
                  else {
                    val remaining = toCollect - collected.length
                    if (remaining <= data.length) {
                      val (newCollected, remainder) = data.splitAt(remaining)
                      scope(f(collected ++ newCollected).push).flatMap { case (finalizer, push) =>
                        push(Some(remainder)).map((_, State.Emitting(finalizer, push)))
                      }
                    } else {
                      ZIO.succeedNow((Chunk.empty, State.Collecting(collected ++ data)))
                    }
                  }
              }
          }
        }
      }
    }

  /**
   * Creates a transducer accumulating incoming values into chunks of maximum size `n`.
   */
  def collectAllN[I](n: Int): ZTransducer[Any, Nothing, I, Chunk[I]] =
    ZTransducer {

      def go(in: Chunk[I], leftover: Chunk[I], outBuilder: ChunkBuilder[Chunk[I]]): (Chunk[Chunk[I]], Chunk[I]) = {
        val (left, nextIn) = in.splitAt(n - leftover.size)

        if (leftover.size + left.size < n) outBuilder.result() -> (leftover ++ left)
        else {
          val nextOutBuilder =
            if (leftover.nonEmpty) outBuilder += (leftover ++ left)
            else outBuilder += left
          go(nextIn, Chunk.empty, nextOutBuilder)
        }
      }

      ZRef.makeManaged[Chunk[I]](Chunk.empty).map { stateRef =>
        {
          case None =>
            stateRef
              .getAndSet(Chunk.empty)
              .map { leftover =>
                if (leftover.nonEmpty) Chunk(leftover)
                else Chunk.empty
              }

          case Some(in) =>
            stateRef.modify { leftover =>
              val (out, nextLeftover) = go(in, leftover, ChunkBuilder.make())
              out -> nextLeftover
            }
        }
      }
    }

  /**
   * Creates a transducer accumulating incoming values into maps of up to `n` keys. Elements
   * are mapped to keys using the function `key`; elements mapped to the same key will
   * be merged with the function `f`.
   */
  def collectAllToMapN[K, I](n: Long)(key: I => K)(f: (I, I) => I): ZTransducer[Any, Nothing, I, Map[K, I]] =
    foldWeighted(Map[K, I]())((acc, i: I) => if (acc contains key(i)) 0 else 1, n) { (acc, i) =>
      val k = key(i)

      if (acc contains k) acc.updated(k, f(acc(k), i))
      else acc.updated(k, i)
    }.filter(_.nonEmpty)

  /**
   * Creates a transducer accumulating incoming values into sets of maximum size `n`.
   */
  def collectAllToSetN[I](n: Long): ZTransducer[Any, Nothing, I, Set[I]] =
    foldWeighted(Set[I]())((acc, i: I) => if (acc(i)) 0 else 1, n)(_ + _).filter(_.nonEmpty)

  /**
   * Accumulates incoming elements into a chunk as long as they verify predicate `p`.
   */
  def collectAllWhile[I](p: I => Boolean): ZTransducer[Any, Nothing, I, List[I]] =
    fold[I, (List[I], Boolean)]((Nil, true))(_._2) { case ((as, _), a) =>
      if (p(a)) (a :: as, true) else (as, false)
    }.map(_._1.reverse).filter(_.nonEmpty)

  /**
   * Accumulates incoming elements into a chunk as long as they verify effectful predicate `p`.
   */
  def collectAllWhileM[R, E, I](p: I => ZIO[R, E, Boolean]): ZTransducer[R, E, I, List[I]] =
    foldM[R, E, I, (List[I], Boolean)]((Nil, true))(_._2) { case ((as, _), a) =>
      p(a).map(if (_) (a :: as, true) else (as, false))
    }.map(_._1.reverse).filter(_.nonEmpty)

  /**
   * Creates a transducer that always dies with the specified exception.
   */
  def die(e: => Throwable): ZTransducer[Any, Nothing, Any, Nothing] =
    ZTransducer(Managed.succeed((_: Any) => IO.die(e)))

  /**
   * Creates a transducer that starts consuming values as soon as one fails
   * the predicate `p`.
   */
  def dropWhile[I](p: I => Boolean): ZTransducer[Any, Nothing, I, I] =
    ZTransducer {
      for {
        dropping <- ZRef.makeManaged(true)
        push = { (is: Option[Chunk[I]]) =>
          is match {
            case None => UIO(Chunk.empty)
            case Some(is) =>
              dropping.modify {
                case false => is -> false
                case true =>
                  val is1 = is.dropWhile(p)
                  is1 -> (is1.length == 0)
              }
          }
        }
      } yield push
    }

  /**
   * Creates a transducer that starts consuming values as soon as one fails
   * the effectful predicate `p`.
   */
  def dropWhileM[R, E, I](p: I => ZIO[R, E, Boolean]): ZTransducer[R, E, I, I] =
    ZTransducer {
      for {
        dropping <- ZRef.makeManaged(true)
        push = { (is: Option[Chunk[I]]) =>
          is match {
            case None => UIO(Chunk.empty)
            case Some(is) =>
              dropping.get.flatMap {
                case false => UIO(is -> false)
                case true  => is.dropWhileM(p).map(is1 => is1 -> is1.isEmpty)
              }.flatMap { case (is, pt) => dropping.set(pt) as is }
          }
        }
      } yield push
    }

  /**
   * Creates a transducer that always fails with the specified failure.
   */
  def fail[E](e: => E): ZTransducer[Any, E, Any, Nothing] =
    ZTransducer(ZManaged.succeed((_: Option[Any]) => ZIO.fail(e)))

  /**
   * Creates a transducer by folding over a structure of type `O` for as long as
   * `contFn` results in `true`. The transducer will emit a value when `contFn`
   * evaluates to `false` and then restart the folding.
   */
  def fold[I, O](z: O)(contFn: O => Boolean)(f: (O, I) => O): ZTransducer[Any, Nothing, I, O] =
    ZTransducer {
      def go(in: Chunk[I], state: O, progress: Boolean): (Chunk[O], O, Boolean) =
        in.foldLeft[(Chunk[O], O, Boolean)]((Chunk.empty, state, progress)) { case ((os0, state, _), i) =>
          val o = f(state, i)
          if (contFn(o))
            (os0, o, true)
          else
            (os0 :+ o, z, false)
        }

      ZRef.makeManaged[Option[O]](Some(z)).map { state =>
        {
          case Some(in) =>
            state.modify { s =>
              val (o, s2, progress) = go(in, s.getOrElse(z), s.nonEmpty)
              if (progress)
                o -> Some(s2)
              else
                o -> None
            }
          case None => state.getAndSet(None).map(_.fold[Chunk[O]](Chunk.empty)(Chunk.single(_)))
        }
      }
    }

  /**
   * Creates a transducer by folding over a structure of type `O`. The transducer will
   * fold the inputs until the stream ends, resulting in a stream with one element.
   */
  def foldLeft[I, O](z: O)(f: (O, I) => O): ZTransducer[Any, Nothing, I, O] =
    fold(z)(_ => true)(f)

  /**
   * Creates a transducer by effectfully folding over a structure of type `O`. The transducer will
   * fold the inputs until the stream ends, resulting in a stream with one element.
   */
  def foldLeftM[R, E, I, O](z: O)(f: (O, I) => ZIO[R, E, O]): ZTransducer[R, E, I, O] =
    foldM(z)(_ => true)(f)

  /**
   * Creates a sink by effectfully folding over a structure of type `S`.
   */
  def foldM[R, E, I, O](z: O)(contFn: O => Boolean)(f: (O, I) => ZIO[R, E, O]): ZTransducer[R, E, I, O] =
    ZTransducer {
      val initial = Some(z)

      def go(in: Chunk[I], state: O, progress: Boolean): ZIO[R, E, (Chunk[O], O, Boolean)] =
        in.foldM[R, E, (Chunk[O], O, Boolean)]((Chunk.empty, state, progress)) { case ((os0, state, _), i) =>
          f(state, i).map { o =>
            if (contFn(o))
              (os0, o, true)
            else
              (os0 :+ o, z, false)
          }
        }

      ZRef.makeManaged[Option[O]](initial).map { state =>
        {
          case Some(in) =>
            state.get.flatMap(s => go(in, s.getOrElse(z), s.nonEmpty)).flatMap { case (os, s, progress) =>
              if (progress)
                state.set(Some(s)) *> Push.emit(os)
              else
                state.set(None) *> Push.emit(os)
            }
          case None =>
            state.getAndSet(None).map(_.fold[Chunk[O]](Chunk.empty)(Chunk.single(_)))
        }
      }
    }

  /**
   * Creates a transducer that folds elements of type `I` into a structure
   * of type `O` until `max` elements have been folded.
   *
   * Like [[foldWeighted]], but with a constant cost function of 1.
   */
  def foldUntil[I, O](z: O, max: Long)(f: (O, I) => O): ZTransducer[Any, Nothing, I, O] =
    fold[I, (O, Long)]((z, 0))(_._2 < max) { case ((o, count), i) =>
      (f(o, i), count + 1)
    }.map(_._1)

  /**
   * Creates a transducer that effectfully folds elements of type `I` into a structure
   * of type `O` until `max` elements have been folded.
   *
   * Like [[foldWeightedM]], but with a constant cost function of 1.
   */
  def foldUntilM[R, E, I, O](z: O, max: Long)(f: (O, I) => ZIO[R, E, O]): ZTransducer[R, E, I, O] =
    foldM[R, E, I, (O, Long)]((z, 0))(_._2 < max) { case ((o, count), i) =>
      f(o, i).map((_, count + 1))
    }.map(_._1)

  /**
   * Creates a transducer that folds elements of type `I` into a structure
   * of type `O`, until `max` worth of elements (determined by the `costFn`)
   * have been folded.
   *
   * @note Elements that have an individual cost larger than `max` will
   * force the transducer to cross the `max` cost. See [[foldWeightedDecompose]]
   * for a variant that can handle these cases.
   */
  def foldWeighted[I, O](z: O)(costFn: (O, I) => Long, max: Long)(f: (O, I) => O): ZTransducer[Any, Nothing, I, O] =
    foldWeightedDecompose[I, O](z)(costFn, max, Chunk.single(_))(f)

  /**
   * Creates a transducer that folds elements of type `I` into a structure
   * of type `O`, until `max` worth of elements (determined by the `costFn`)
   * have been folded.
   *
   * The `decompose` function will be used for decomposing elements that
   * cause an `O` aggregate to cross `max` into smaller elements. For
   * example:
   * {{{
   * Stream(1, 5, 1)
   *  .aggregate(
   *    ZTransducer
   *      .foldWeightedDecompose(List[Int]())((i: Int) => i.toLong, 4,
   *        (i: Int) => Chunk(i - 1, 1)) { (acc, el) =>
   *        el :: acc
   *      }
   *      .map(_.reverse)
   *  )
   *  .runCollect
   * }}}
   *
   * The stream would emit the elements `List(1), List(4), List(1, 1)`.
   *
   * Be vigilant with this function, it has to generate "simpler" values
   * or the fold may never end. A value is considered indivisible if
   * `decompose` yields the empty chunk or a single-valued chunk. In
   * these cases, there is no other choice than to yield a value that
   * will cross the threshold.
   *
   * The [[foldWeightedDecomposeM]] allows the decompose function
   * to return a `ZIO` value, and consequently it allows the transducer
   * to fail.
   */
  def foldWeightedDecompose[I, O](
    z: O
  )(costFn: (O, I) => Long, max: Long, decompose: I => Chunk[I])(f: (O, I) => O): ZTransducer[Any, Nothing, I, O] =
    ZTransducer {
      case class FoldWeightedState(result: O, cost: Long)

      val initial = FoldWeightedState(z, 0)

      def go(
        in: Chunk[I],
        os0: Chunk[O],
        state: FoldWeightedState,
        dirty: Boolean
      ): (Chunk[O], FoldWeightedState, Boolean) =
        in.foldLeft[(Chunk[O], FoldWeightedState, Boolean)]((os0, state, dirty)) { case ((os0, state, _), i) =>
          val total = state.cost + costFn(state.result, i)

          if (total > max) {
            val is = decompose(i)

            if (is.length <= 1 && !dirty)
              // If `i` cannot be decomposed, we need to cross the `max` threshold. To
              // minimize "injury", we only allow this when we haven't added anything else
              // to the aggregate (dirty = false).
              (os0 :+ f(state.result, if (is.nonEmpty) is(0) else i), initial, false)
            else if (is.length <= 1 && dirty) {
              // If the state is dirty and `i` cannot be decomposed, we close the current
              // aggregate and a create new one from `is`. We're not adding `f(initial, i)` to
              // the results immediately because it could be that `i` by itself does not
              // cross the threshold, so we can attempt to aggregate it with subsequent elements.
              val elem = if (is.nonEmpty) is(0) else i
              (os0 :+ state.result, FoldWeightedState(f(initial.result, elem), costFn(initial.result, elem)), true)
            } else
              // `i` got decomposed, so we will recurse and see whether the decomposition
              // can be aggregated without crossing `max`.
              go(is, os0, state, dirty)
          } else (os0, FoldWeightedState(f(state.result, i), total), true)
        }

      ZRef.makeManaged[Option[FoldWeightedState]](Some(initial)).map { state =>
        {
          case Some(in) =>
            state.modify { s =>
              val (o, s2, dirty) = go(in, Chunk.empty, s.getOrElse(initial), s.nonEmpty)
              if (dirty)
                o -> Some(s2)
              else
                o -> None
            }
          case None => state.getAndSet(None).map(_.fold[Chunk[O]](Chunk.empty)(s => Chunk.single(s.result)))
        }
      }
    }

  /**
   * Creates a transducer that effectfully folds elements of type `I` into a structure
   * of type `S`, until `max` worth of elements (determined by the `costFn`) have
   * been folded.
   *
   * @note Elements that have an individual cost larger than `max` will
   * force the transducer to cross the `max` cost. See [[foldWeightedDecomposeM]]
   * for a variant that can handle these cases.
   */
  def foldWeightedM[R, E, I, O](
    z: O
  )(costFn: (O, I) => ZIO[R, E, Long], max: Long)(f: (O, I) => ZIO[R, E, O]): ZTransducer[R, E, I, O] =
    foldWeightedDecomposeM(z)(costFn, max, (i: I) => UIO.succeedNow(Chunk.single(i)))(f)

  /**
   * Creates a transducer that effectfully folds elements of type `I` into a structure
   * of type `S`, until `max` worth of elements (determined by the `costFn`) have
   * been folded.
   *
   * The `decompose` function will be used for decomposing elements that
   * cause an `S` aggregate to cross `max` into smaller elements. Be vigilant with
   * this function, it has to generate "simpler" values or the fold may never end.
   * A value is considered indivisible if `decompose` yields the empty chunk or a
   * single-valued chunk. In these cases, there is no other choice than to yield
   * a value that will cross the threshold.
   *
   * See [[foldWeightedDecompose]] for an example.
   */
  def foldWeightedDecomposeM[R, E, I, O](z: O)(
    costFn: (O, I) => ZIO[R, E, Long],
    max: Long,
    decompose: I => ZIO[R, E, Chunk[I]]
  )(f: (O, I) => ZIO[R, E, O]): ZTransducer[R, E, I, O] =
    ZTransducer {
      final case class FoldWeightedState(result: O, cost: Long)

      val initial = FoldWeightedState(z, 0)

      def go(
        in: Chunk[I],
        os: Chunk[O],
        state: FoldWeightedState,
        dirty: Boolean
      ): ZIO[R, E, (Chunk[O], FoldWeightedState, Boolean)] =
        in.foldM[R, E, (Chunk[O], FoldWeightedState, Boolean)]((os, state, dirty)) { case ((os, state, _), i) =>
          costFn(state.result, i).flatMap { cost =>
            val total = cost + state.cost
            if (total > max)
              decompose(i).flatMap(is =>
                // See comments on `foldWeightedDecompose` for details on every case here.
                if (is.length <= 1 && !dirty)
                  f(state.result, if (is.nonEmpty) is(0) else i).map(o => ((os :+ o), initial, false))
                else if (is.length <= 1 && dirty) {
                  val elem = if (is.nonEmpty) is(0) else i

                  f(initial.result, elem).zipWith(costFn(initial.result, elem)) { (s, cost) =>
                    (os :+ state.result, FoldWeightedState(s, cost), true)
                  }
                } else go(is, os, state, dirty)
              )
            else
              f(state.result, i).map(o => (os, FoldWeightedState(o, total), true))
          }
        }

      ZRef.makeManaged[Option[FoldWeightedState]](Some(initial)).map { state =>
        {
          case Some(in) =>
            state.get.flatMap(s => go(in, Chunk.empty, s.getOrElse(initial), s.nonEmpty)).flatMap {
              case (os, s, dirty) =>
                if (dirty)
                  state.set(Some(s)) *> Push.emit(os)
                else
                  state.set(None) *> Push.emit(os)
            }
          case None =>
            state.getAndSet(None).map(_.fold[Chunk[O]](Chunk.empty)(s => Chunk.single(s.result)))
        }
      }
    }

  /**
   * Creates a transducer that always evaluates the specified effect.
   */
  def fromEffect[R, E, A](zio: ZIO[R, E, A]): ZTransducer[R, E, Any, A] =
    ZTransducer(Managed.succeed((_: Any) => zio.map(Chunk.single(_))))

  /**
   * Creates a transducer that purely transforms incoming values.
   */
  def fromFunction[I, O](f: I => O): ZTransducer[Any, Nothing, I, O] =
    identity.map(f)

  /**
   * Creates a transducer that effectfully transforms incoming values.
   */
  def fromFunctionM[R, E, I, O](f: I => ZIO[R, E, O]): ZTransducer[R, E, I, O] =
    identity.mapM(f(_))

  /**
   * Creates a transducer from a chunk processing function.
   */
  def fromPush[R, E, I, O](push: Option[Chunk[I]] => ZIO[R, E, Chunk[O]]): ZTransducer[R, E, I, O] =
    ZTransducer(Managed.succeed(push))

  /**
   * Creates a transducer that returns the first element of the stream, if it exists.
   */
  def head[O]: ZTransducer[Any, Nothing, O, Option[O]] =
    foldLeft[O, Option[O]](Option.empty[O]) { case (acc, a) =>
      acc match {
        case Some(_) => acc
        case None    => Some(a)
      }
    }

  /**
   * The identity transducer. Passes elements through.
   */
  def identity[I]: ZTransducer[Any, Nothing, I, I] =
    ZTransducer.fromPush {
      case Some(is) => ZIO.succeedNow(is)
      case None     => ZIO.succeedNow(Chunk.empty)
    }

  /**
   * Decodes chunks of ISO/IEC 8859-1 bytes into strings.
   *
   * This transducer uses the String constructor's behavior when handling malformed byte
   * sequences.
   */
  val iso_8859_1Decode: ZTransducer[Any, Nothing, Byte, String] =
    ZTransducer.fromPush {
      case Some(is) => ZIO.succeedNow(Chunk.single(new String(is.toArray, StandardCharsets.ISO_8859_1)))
      case None     => ZIO.succeedNow(Chunk.empty)
    }

  /**
   * Creates a transducer that returns the last element of the stream, if it exists.
   */
  def last[O]: ZTransducer[Any, Nothing, O, Option[O]] =
    foldLeft[O, Option[O]](Option.empty[O])((_, a) => Some(a))

  /**
   * Emits the provided chunk before emitting any other value.
   */
  def prepend[A](values: Chunk[A]): ZTransducer[Any, Nothing, A, A] =
    ZTransducer {
      ZRef.makeManaged(values).map { stateRef =>
        {
          case None =>
            stateRef.getAndSet(Chunk.empty)
          case Some(xs) =>
            stateRef.getAndSet(Chunk.empty).map(c => if (c.isEmpty) xs else c ++ xs)
        }
      }
    }

  /**
   * Splits strings on newlines. Handles both Windows newlines (`\r\n`) and UNIX newlines (`\n`).
   */
  val splitLines: ZTransducer[Any, Nothing, String, String] =
    ZTransducer {
      ZRef.makeManaged[(Option[String], Boolean)]((None, false)).map { stateRef =>
        {
          case None =>
            stateRef.getAndSet((None, false)).flatMap {
              case (None, _)      => ZIO.succeedNow(Chunk.empty)
              case (Some(str), _) => ZIO.succeedNow(Chunk(str))
            }

          case Some(strings) =>
            stateRef.modify { case (leftover, wasSplitCRLF) =>
              val buf    = mutable.ArrayBuffer[String]()
              var inCRLF = wasSplitCRLF
              var carry  = leftover getOrElse ""

              strings.foreach { string =>
                val concat = carry + string

                if (concat.length() > 0) {
                  var i =
                    // If we had a split CRLF, we start reading
                    // from the last character of the leftover (which was the '\r')
                    if (inCRLF && carry.length > 0) carry.length - 1
                    // Otherwise we just skip over the entire previous leftover as
                    // it doesn't contain a newline.
                    else carry.length
                  var sliceStart = 0

                  while (i < concat.length()) {
                    if (concat(i) == '\n') {
                      buf += concat.substring(sliceStart, i)
                      i += 1
                      sliceStart = i
                    } else if (concat(i) == '\r' && (i + 1) < concat.length && concat(i + 1) == '\n') {
                      buf += concat.substring(sliceStart, i)
                      i += 2
                      sliceStart = i
                    } else if (concat(i) == '\r' && i == concat.length - 1) {
                      inCRLF = true
                      i += 1
                    } else {
                      i += 1
                    }
                  }

                  carry = concat.substring(sliceStart, concat.length)
                }
              }

              (Chunk.fromArray(buf.toArray), (if (carry.length() > 0) Some(carry) else None, inCRLF))
            }
        }
      }
    }

  /**
   * Splits strings on a delimiter.
   */
  def splitOn(delimiter: String): ZTransducer[Any, Nothing, String, String] = {
    val chars = ZTransducer.fromFunction[String, Chunk[Char]](s => Chunk.fromArray(s.toArray)).mapChunks(_.flatten)
    val split = splitOnChunk(Chunk.fromArray(delimiter.toArray)).map(_.mkString(""))
    chars >>> split
  }

  /**
   * Splits elements on a delimiter and transforms the splits into desired output.
   */
  def splitOnChunk[A](delimiter: Chunk[A]): ZTransducer[Any, Nothing, A, Chunk[A]] =
    ZTransducer {
      ZRef.makeManaged[(Option[Chunk[A]], Int)](None -> 0).map { state =>
        {
          case None =>
            state.modify {
              case s @ (None, _)    => Chunk.empty         -> s
              case (Some(chunk), _) => Chunk.single(chunk) -> (None -> 0)
            }
          case Some(inputChunk: Chunk[A]) =>
            state.modify { s0 =>
              var out: mutable.ArrayBuffer[Chunk[A]] = null
              var chunkIndex                         = 0
              var buffer: Chunk[A]                   = s0._1.getOrElse(Chunk.empty)
              var delimIndex                         = s0._2
              while (chunkIndex < inputChunk.length) {
                val in    = buffer :+ inputChunk(chunkIndex)
                var index = buffer.length
                var start = 0
                buffer = Chunk.empty
                while (index < in.length) {
                  while (delimIndex < delimiter.length && index < in.length && in(index) == delimiter(delimIndex)) {
                    delimIndex += 1
                    index += 1
                  }
                  if (delimIndex == delimiter.length || in.isEmpty) {
                    if (out eq null) out = mutable.ArrayBuffer[Chunk[A]]()
                    val slice = in.slice(start, index - delimiter.length)
                    out += slice
                    delimIndex = 0
                    start = index
                  }
                  if (index < in.length) {
                    delimIndex = 0
                    while (index < in.length && in(index) != delimiter(0)) index += 1
                  }
                }

                if (start < in.length) {
                  buffer = in.drop(start)
                }

                chunkIndex += 1
              }

              val chunk = if (out eq null) Chunk.empty else Chunk.fromArray(out.toArray)
              val buf   = if (buffer.isEmpty) None else Some(buffer)
              chunk -> (buf -> delimIndex)
            }
        }
      }
    }

  /**
   * Decodes chunks of Unicode bytes into strings.
   *
   * Detects byte order marks for UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, UTF-32LE or defaults
   * to UTF-8 if no BOM is detected.
   */
  val utfDecode: ZTransducer[Any, Nothing, Byte, String] =
    branchAfter(4) { bytes =>
      bytes.toList match {
        case 0 :: 0 :: -2 :: -1 :: Nil if Charset.isSupported("UTF-32BE") => utf32BEDecode
        case -2 :: -1 :: 0 :: 0 :: Nil if Charset.isSupported("UTF-32LE") => utf32LEDecode
        case -17 :: -69 :: -65 :: x1 :: Nil                               => prepend(Chunk(x1)) >>> utf8Decode
        case -2 :: -1 :: x1 :: x2 :: Nil                                  => prepend(Chunk(x1, x2)) >>> utf16BEDecode
        case -1 :: -2 :: x1 :: x2 :: Nil                                  => prepend(Chunk(x1, x2)) >>> utf16LEDecode
        case _                                                            => prepend(bytes) >>> utf8Decode
      }
    }

  /**
   * Decodes chunks of UTF-8 bytes into strings.
   *
   * This transducer uses the String constructor's behavior when handling malformed byte
   * sequences.
   */
  val utf8Decode: ZTransducer[Any, Nothing, Byte, String] = {
    val transducer = ZTransducer[Any, Nothing, Byte, String] {
      def is2ByteSequenceStart(b: Byte) = (b & 0xe0) == 0xc0
      def is3ByteSequenceStart(b: Byte) = (b & 0xf0) == 0xe0
      def is4ByteSequenceStart(b: Byte) = (b & 0xf8) == 0xf0
      def computeSplit(chunk: Chunk[Byte]) = {
        // There are 3 bad patterns we need to check to detect an incomplete chunk:
        // - 2/3/4 byte sequences that start on the last byte
        // - 3/4 byte sequences that start on the second-to-last byte
        // - 4 byte sequences that start on the third-to-last byte
        //
        // Otherwise, we can convert the entire concatenated chunk to a string.
        val len = chunk.length

        if (
          len >= 1 &&
          (is2ByteSequenceStart(chunk(len - 1)) ||
            is3ByteSequenceStart(chunk(len - 1)) ||
            is4ByteSequenceStart(chunk(len - 1)))
        )
          len - 1
        else if (
          len >= 2 &&
          (is3ByteSequenceStart(chunk(len - 2)) ||
            is4ByteSequenceStart(chunk(len - 2)))
        )
          len - 2
        else if (len >= 3 && is4ByteSequenceStart(chunk(len - 3)))
          len - 3
        else len
      }

      ZRef.makeManaged[Chunk[Byte]](Chunk.empty).map { stateRef =>
        {
          case None =>
            stateRef.getAndSet(Chunk.empty).flatMap { leftovers =>
              if (leftovers.isEmpty) ZIO.succeedNow(Chunk.empty)
              else ZIO.succeedNow(Chunk.single(new String(leftovers.toArray[Byte], StandardCharsets.UTF_8)))
            }

          case Some(bytes) =>
            stateRef.modify { leftovers =>
              val concat = leftovers ++ bytes

              val (toConvert, newLeftovers) = concat.splitAt(computeSplit(concat))

              if (toConvert.isEmpty) (Chunk.empty, newLeftovers.materialize)
              else (Chunk.single(new String(toConvert.toArray[Byte], "UTF-8")), newLeftovers.materialize)
            }
        }
      }
    }

    // handle optional byte order mark
    branchAfter(3) { bytes =>
      bytes.toList match {
        case -17 :: -69 :: -65 :: Nil =>
          transducer
        case _ =>
          prepend(bytes) >>> transducer
      }
    }
  }

  /**
   * Decodes chunks of UTF-16 bytes into strings.
   * If no byte order mark is found big-endianness is assumed.
   *
   * This transducer uses the endisn-specific String constructor's behavior when handling
   * malformed byte sequences.
   */
  val utf16Decode: ZTransducer[Any, Nothing, Byte, String] =
    branchAfter(2) { bytes =>
      bytes.toList match {
        case -2 :: -1 :: Nil =>
          utf16BEDecode
        case -1 :: -2 :: Nil =>
          utf16LEDecode
        case _ =>
          prepend(bytes) >>> utf16BEDecode
      }
    }

  /**
   * Decodes chunks of UTF-16BE bytes into strings.
   *
   * This transducer uses the String constructor's behavior when handling malformed byte
   * sequences.
   */
  val utf16BEDecode: ZTransducer[Any, Nothing, Byte, String] =
    utfFixedLengthDecode(StandardCharsets.UTF_16BE, 2)

  /**
   * Decodes chunks of UTF-16LE bytes into strings.
   *
   * This transducer uses the String constructor's behavior when handling malformed byte
   * sequences.
   */
  val utf16LEDecode: ZTransducer[Any, Nothing, Byte, String] =
    utfFixedLengthDecode(StandardCharsets.UTF_16LE, 2)

  /**
   * Decodes chunks of UTF-32 bytes into strings.
   * If no byte order mark is found big-endianness is assumed.
   */
  lazy val utf32Decode: ZTransducer[Any, Nothing, Byte, String] =
    branchAfter(4) { bytes =>
      bytes.toList match {
        case 0 :: 0 :: -2 :: -1 :: Nil =>
          utf32BEDecode
        case -1 :: -2 :: 0 :: 0 :: Nil =>
          utf32LEDecode
        case _ =>
          prepend(bytes) >>> utf32BEDecode
      }
    }

  /**
   * Decodes chunks of UTF-32BE bytes into strings.
   *
   * This transducer uses the String constructor's behavior when handling malformed byte
   * sequences.
   */
  lazy val utf32BEDecode: ZTransducer[Any, Nothing, Byte, String] =
    utfFixedLengthDecode(Charset.forName("UTF-32BE"), 4)

  /**
   * Decodes chunks of UTF-32LE bytes into strings.
   *
   * This transducer uses the String constructor's behavior when handling malformed byte
   * sequences.
   */
  lazy val utf32LEDecode: ZTransducer[Any, Nothing, Byte, String] =
    utfFixedLengthDecode(Charset.forName("UTF-32LE"), 4)

  private def utfFixedLengthDecode(charset: Charset, width: Int): ZTransducer[Any, Nothing, Byte, String] =
    ZTransducer {
      ZRef.makeManaged[Chunk[Byte]](Chunk.empty).map { stateRef =>
        {
          case None =>
            stateRef.getAndSet(Chunk.empty).flatMap { leftovers =>
              if (leftovers.isEmpty) ZIO.succeedNow(Chunk.empty)
              else ZIO.succeedNow(Chunk.single(new String(leftovers.toArray[Byte], charset)))
            }
          case Some(bytes) =>
            stateRef.modify { old =>
              val data      = old ++ bytes
              val remainder = data.length % width
              if (remainder == 0) {
                val decoded = new String(data.toArray, charset)
                (Chunk.single(decoded), Chunk.empty)
              } else {
                val (fullChunk, rest) = data.splitAt(data.length - remainder)
                val decoded           = new String(fullChunk.toArray, charset)
                (Chunk.single(decoded), rest)
              }
            }
        }
      }
    }

  /**
   * Decodes chunks of US-ASCII bytes into strings.
   *
   * This transducer uses the String constructor's behavior when handling malformed byte
   * sequences.
   */
  val usASCIIDecode: ZTransducer[Any, Nothing, Byte, String] =
    ZTransducer.fromPush {
      case Some(chunk) => ZIO.succeedNow(Chunk.single(new String(chunk.toArray, StandardCharsets.US_ASCII)))
      case None        => ZIO.succeedNow(Chunk.empty)
    }

  object Push {
    def emit[A](a: A): UIO[Chunk[A]]         = IO.succeedNow(Chunk.single(a))
    def emit[A](as: Chunk[A]): UIO[Chunk[A]] = IO.succeedNow(as)
    val next: UIO[Chunk[Nothing]]            = IO.succeedNow(Chunk.empty)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy