com.twitter.algebird.Preparer.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of algebird-core_2.10 Show documentation
algebird-core
There is a newer version: 0.12.2
package com.twitter.algebird

/**
 * Preparer is a way to build up an Aggregator through composition using a
 * more natural API: it allows you to start with the input type and describe a series
 * of transformations and aggregations from there, rather than starting from the aggregation
 * and composing "outwards" in both directions.
 *
 * Uses of Preparer will always start with a call to Preparer[A], and end with a call to
 * monoidAggregate or a related method, to produce an Aggregator instance.
 */
sealed trait Preparer[A, T] extends java.io.Serializable {
  /**
   * Produce a new MonoidAggregator which includes the Preparer's transformation chain in its prepare stage.
   */
  def monoidAggregate[B, C](aggregator: MonoidAggregator[T, B, C]): MonoidAggregator[A, B, C]

  /**
   * Produce a new Preparer that chains this one-to-many transformation.
   * Because "many" could include "none", this limits future aggregations
   * to those done using monoids.
   */
  def flatMap[U](fn: T => TraversableOnce[U]): FlatMapPreparer[A, U]

  /**
   * Like flatMap using identity.
   */
  def flatten[U](implicit ev: <:<[T, TraversableOnce[U]]) = flatMap(ev)

  /**
   * Filter out values that do not meet the predicate.
   * Like flatMap, this limits future aggregations to MonoidAggregator.
   */
  def filter(fn: T => Boolean) = flatMap { t => if (fn(t)) Some(t) else None }

  def collect[U](p: PartialFunction[T, U]): FlatMapPreparer[A, U] =
    flatMap { t => if (p.isDefinedAt(t)) Some(p(t)) else None }

  /**
   * count and following methods all just call monoidAggregate with one of the standard Aggregators.
   * see the Aggregator object for more docs.
   */

  def count(pred: T => Boolean) = monoidAggregate(Aggregator.count(pred))
  def exists(pred: T => Boolean) = monoidAggregate(Aggregator.exists(pred))
  def forall(pred: T => Boolean) = monoidAggregate(Aggregator.forall(pred))
  def size = monoidAggregate(Aggregator.size)

  def sortedTake(count: Int)(implicit ord: Ordering[T]) =
    monoidAggregate(Aggregator.sortedTake(count))

  def sortedReverseTake(count: Int)(implicit ord: Ordering[T]) =
    monoidAggregate(Aggregator.sortedReverseTake(count))

  def toList = monoidAggregate(Aggregator.toList)
  def toSet = monoidAggregate(Aggregator.toSet)
  def uniqueCount = monoidAggregate(Aggregator.uniqueCount)

  /**
   * transform a given Aggregator into a MonoidAggregator by lifting the reduce and present stages
   * into Option space
   */
  def lift[B, C](aggregator: Aggregator[T, B, C]): MonoidAggregator[A, Option[B], Option[C]] =
    monoidAggregate(aggregator.lift)

  /**
   * headOption and following methods are all just calling lift with standard Aggregators
   * see the Aggregator object for more docs
   */
  def headOption = lift(Aggregator.head)
  def lastOption = lift(Aggregator.last)
  def maxOption(implicit ord: Ordering[T]) = lift(Aggregator.max)
  def maxOptionBy[U: Ordering](fn: T => U) = {
    implicit val ordT = Ordering.by(fn)
    lift(Aggregator.max[T])
  }

  def minOption(implicit ord: Ordering[T]) = lift(Aggregator.min)
  def minOptionBy[U: Ordering](fn: T => U) = {
    implicit val ordT = Ordering.by(fn)
    lift(Aggregator.min[T])
  }

  def sumOption(implicit sg: Semigroup[T]) = lift(Aggregator.fromSemigroup(sg))
  def reduceOption(fn: (T, T) => T) = lift(Aggregator.fromReduce(fn))
}

object Preparer {
  /**
   * This is the expected entry point for creating a new Preparer.
   */
  def apply[A] = MapPreparer.identity[A]
}

/**
 * A Preparer that has had zero or more map transformations applied, but no flatMaps.
 * This can produce any type of Aggregator.
 */
trait MapPreparer[A, T] extends Preparer[A, T] {

  def prepareFn: A => T

  def map[U](fn: T => U): MapPreparer[A, U] =
    MapPreparer[A, U](fn.compose(prepareFn))

  def flatMap[U](fn: T => TraversableOnce[U]) =
    FlatMapPreparer[A, U](fn.compose(prepareFn))

  def monoidAggregate[B, C](aggregator: MonoidAggregator[T, B, C]): MonoidAggregator[A, B, C] =
    aggregator.composePrepare(prepareFn)

  /**
   * Produce a new Aggregator which includes the Preparer's transformation chain in its prepare stage.
   */
  def aggregate[B, C](aggregator: Aggregator[T, B, C]): Aggregator[A, B, C] =
    aggregator.composePrepare(prepareFn)

  /**
   * Split the processing into two parallel aggregations.
   * You provide a function which produces two different aggregators from this preparer,
   * and it will return a single aggregator which does both aggregations in parallel.
   * (See also Aggregator's join method.)
   *
   * We really need to generate N versions of this for 3-way, 4-way etc splits.
   */

  def split[B1, B2, C1, C2](fn: MapPreparer[T, T] => (Aggregator[T, B1, C1], Aggregator[T, B2, C2])): Aggregator[A, (B1, B2), (C1, C2)] = {
    val (a1, a2) = fn(MapPreparer.identity[T])
    aggregate(a1.join(a2))
  }

  /**
   * head and following methods all just call aggregate with one of the standard Aggregators.
   * see the Aggregator object for more docs.
   */

  def head = aggregate(Aggregator.head)
  def last = aggregate(Aggregator.last)
  def max(implicit ord: Ordering[T]) = aggregate(Aggregator.max)
  def maxBy[U: Ordering](fn: T => U) = {
    implicit val ordT = Ordering.by(fn)
    aggregate(Aggregator.max[T])
  }

  def min(implicit ord: Ordering[T]) = aggregate(Aggregator.min)
  def minBy[U: Ordering](fn: T => U) = {
    implicit val ordT = Ordering.by(fn)
    aggregate(Aggregator.min[T])
  }

  def sum(implicit sg: Semigroup[T]) = aggregate(Aggregator.fromSemigroup(sg))
  def reduce(fn: (T, T) => T) = aggregate(Aggregator.fromReduce(fn))
}

object MapPreparer {
  /**
   * Create a concrete MapPreparer.
   */
  def apply[A, T](fn: A => T) = new MapPreparer[A, T] { val prepareFn = fn }

  /**
   * This is purely an optimization for the case of mapping by identity.
   * It overrides the key methods to not actually use the identity function.
   */
  def identity[A] = new MapPreparer[A, A] {
    val prepareFn = (a: A) => a
    override def map[U](fn: A => U) = MapPreparer(fn)
    override def flatMap[U](fn: A => TraversableOnce[U]) = FlatMapPreparer(fn)
    override def monoidAggregate[B, C](aggregator: MonoidAggregator[A, B, C]) = aggregator
    override def aggregate[B, C](aggregator: Aggregator[A, B, C]) = aggregator
  }
}

/**
 * A Preparer that has had one or more flatMap operations applied.
 * It can only accept MonoidAggregators.
 */
trait FlatMapPreparer[A, T] extends Preparer[A, T] {

  def prepareFn: A => TraversableOnce[T]

  def map[U](fn: T => U): FlatMapPreparer[A, U] =
    FlatMapPreparer { a: A => prepareFn(a).map(fn) }

  def flatMap[U](fn: T => TraversableOnce[U]) =
    FlatMapPreparer { a: A => prepareFn(a).flatMap(fn) }

  def monoidAggregate[B, C](aggregator: MonoidAggregator[T, B, C]): MonoidAggregator[A, B, C] =
    aggregator.sumBefore.composePrepare(prepareFn)

  /**
   * alias of monoidAggregate for convenience
   * unlike MapPreparer's aggregate, can only take MonoidAggregator
   */
  def aggregate[B, C](aggregator: MonoidAggregator[T, B, C]) = monoidAggregate(aggregator)

  /**
   * Like monoidAggregate, but using an implicit Monoid to construct the Aggregator
   */
  def sum(implicit monoid: Monoid[T]) = monoidAggregate(Aggregator.fromMonoid(monoid))

  /**
   * Split the processing into two parallel aggregations.
   * You provide a function which produces two different aggregators from this preparer,
   * and it will return a single aggregator which does both aggregations in parallel.
   * (See also Aggregator's join method.)
   *
   * We really need to generate N versions of this for 3-way, 4-way etc splits.
   */

  def split[B1, B2, C1, C2](fn: FlatMapPreparer[TraversableOnce[T], T] => (MonoidAggregator[TraversableOnce[T], B1, C1], MonoidAggregator[TraversableOnce[T], B2, C2])): Aggregator[A, (B1, B2), (C1, C2)] = {
    val (a1, a2) = fn(FlatMapPreparer.identity[T])
    a1.join(a2).composePrepare(prepareFn)
  }
}

object FlatMapPreparer {
  /**
   * Create a concrete FlatMapPreparer.
   */
  def apply[A, T](fn: A => TraversableOnce[T]) = new FlatMapPreparer[A, T] { val prepareFn = fn }

  /**
   * This is purely an optimization for the case of flatMapping by identity.
   * It overrides the key methods to not actually use the identity function.
   */
  def identity[A] = new FlatMapPreparer[TraversableOnce[A], A] {
    val prepareFn = (a: TraversableOnce[A]) => a

    override def map[U](fn: A => U) =
      FlatMapPreparer{ a: TraversableOnce[A] => a.map(fn) }

    override def flatMap[U](fn: A => TraversableOnce[U]) =
      FlatMapPreparer{ a: TraversableOnce[A] => a.flatMap(fn) }

    override def monoidAggregate[B, C](aggregator: MonoidAggregator[A, B, C]) = aggregator.sumBefore
  }
}