banm.ignite-scala_2.10.0.0.1.source-code.IgnitePipe.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of ignite-scala_2.10 Show documentation
scala api for distributed closures on apache ignite
The newest version!
package ignite.scala

import com.twitter.algebird.Semigroup
import org.apache.ignite.IgniteCache
import java.io.Serializable

object IgnitePipe {

  def empty: IgnitePipe[Nothing] = EmptyPipe

  def from[T](iter: Iterable[T])(implicit c: ComputeRunner): IgnitePipe[T] =
    IterablePipe[T](iter)

  def from[T](iterGen: () => Iterable[T])(implicit c: ComputeRunner): IgnitePipe[T] =
    from(List(())).flatMap(_ => iterGen())

  def collocated[K, V, T](cache: IgniteCache[K, V], keys: Set[K])(f: (IgniteCache[K, V], K) => T)(implicit c: ComputeRunner): CacheAffinityPipe[K, V, T] =
    new CacheAffinityPipe[K, V, T] {
      override def compute = c
      override def source = keys.map(CacheAffinity[K, V](cache.getName, _))
      override def transform = { ca: CacheAffinity[K, V] => f(cache, ca.key) }
      // TODO: this can be inefficient. keyset enrichment should happen in ComputeRunner
    }
}

/**
 * Provides composable distributed closures that can run on Apache Ignite.
 *
 * Allows chaining functions to be executed on the cluster. Reduction is done
 * on the client. Note that pipe operations like flattening, filtering are also
 * performed on the client after gathering results from the nodes.
 *
 * Best practice is to push computations to the cluster as much as possible
 * and flatten, filter on the client only if the scatter-gather overhead is
 * acceptable and results can fit on the client.
 */
sealed trait IgnitePipe[T] extends Serializable {
  // TODO: make this covariant

  /**
   * Transform each element using the function f.
   *
   * This is executed on the cluster nodes. Chained map transforms
   * are composed and executed once on the cluster nodes. Use .fork
   * to manually split the chain if tuning is required.
   */
  def map[U](f: T => U): IgnitePipe[U]

  /**
   * Transform each value using the function f and flatten the result.
   *
   * Note: This is not a monadic composition.
   *
   * Flatten step is performed on the client. If you have a chain of flatMaps,
   * all functions in the chain are composed and flattening is performed once
   * on the client.
   *
   * To manually split the flatMap chain, use .fork. Forking is useful when
   * dealing with long, lazy chains, or when adding a barrier is desired.
   */
  def flatMap[U](f: T => TraversableOnce[U]): IgnitePipe[U]

  /**
   * Filter elements using the function f.
   *
   * Implemented as a flatMap executed on the client.
   */
  def filter(f: T => Boolean): IgnitePipe[T] =
    flatMap { t => if (f(t)) Iterator(t) else Iterator.empty }

  /**
   * Prepare a Reduction based on the provided Semigroup.
   *
   * Note that results can arrived from cluster nodes in any order,
   * so the operation has to be associative and commutative.
   */
  def reduce(implicit sg: Semigroup[T]): Reduction[T]

  /** Merge two pipes of the same type*/
  def ++(p: IgnitePipe[T]): IgnitePipe[T] = p match {
    case IterablePipe(iter) if iter.isEmpty => this
    case _ => MergedPipe(this, p)
  }

  /**
   * Manually add a fork in the execution chain.
   * This creates a barrier, which means the subsequent transforms
   * are planned on a fresh Ignite closure.
   */
  def fork: IgnitePipe[T]

  /** Execute the chain and return the computed values. */
  def execute: Iterable[T]
}

final case object EmptyPipe extends IgnitePipe[Nothing] {

  override def map[U](f: Nothing => U) = sys.error("map called on EmptyPipe")

  override def flatMap[U](f: Nothing => TraversableOnce[U]) =
    sys.error("flatMap called on EmptyPipe")

  override def reduce(implicit sg: Semigroup[Nothing]) = EmptyReduction

  override def fork = this

  override def execute = Iterable.empty[Nothing]
}

/**
 * Trait for pipes that hold information about
 * the cluster along with the source and transform
 * for underlying computation.
 */
trait HasComputeConfig[S, T] {
  def compute: ComputeRunner

  def source: Iterable[S]

  def transform: S => T
}

/**
 * Represents a transforming computation on the cluster.
 */
sealed abstract class TransformValuePipe[S, T] extends IgnitePipe[T]
  with HasComputeConfig[S, T] {

  override def map[U](f: T => U) = PipeHelper.toTransformValuePipe[S, T, U](this)(f)

  override def flatMap[U](f: T => TraversableOnce[U]) =
    PipeHelper.toFlatMapValuePipe[S, T, U](this)(f)

  override def reduce(implicit sg: Semigroup[T]) =
    TransformValueReduction.from(this)(sg)

  override def fork = PipeHelper.forkPipe(this)

  override def execute = compute.apply(source)(transform)
}

/**
 * Represents a transforming computation on the cluster
 * followed by flattening of results done at the client.
 */
sealed abstract class FlatMapValuePipe[S, T] extends IgnitePipe[T]
  with HasComputeConfig[S, TraversableOnce[T]] {

  override def map[U](f: T => U) =
    PipeHelper.toTransformValuePipe[S, T, U](this)(f)

  override def flatMap[U](f: T => TraversableOnce[U]) =
    PipeHelper.toFlatMapValuePipe[S, T, U](this)(f)

  override def reduce(implicit sg: Semigroup[T]) =
    FlatMapValueReduction.from(this)(sg)

  override def fork = PipeHelper.forkPipe(this)

  override def execute = compute.flatMapApply[S, T](source)(transform)
}

/**
 * Represents a transforming affinity (cache-collocation)
 * computation on the cluster.
 */
sealed abstract class CacheAffinityPipe[K, V, T] extends IgnitePipe[T]
  with HasComputeConfig[CacheAffinity[K, V], T] {

  override def map[U](f: T => U) =
    PipeHelper.toCacheAffinityPipe[K, V, T, U](this)(f)

  override def flatMap[U](f: T => TraversableOnce[U]) =
    PipeHelper.toFlatMapCacheAffinityPipe[K, V, T, U](this)(f)

  override def reduce(implicit sg: Semigroup[T]) =
    CacheAffinityValueReduction.from(this)(sg)

  override def fork = PipeHelper.forkPipe(this)

  override def execute = compute.affinityApply(source)(transform)
}

/**
 * Represents a transforming affnity (cache-collocated)
 * computation on the cluster followed by flattening of results
 * done at the client.
 */
sealed abstract class FlatMapCacheAffinityPipe[K, V, T] extends IgnitePipe[T]
  with HasComputeConfig[CacheAffinity[K, V], TraversableOnce[T]] {

  override def map[U](f: T => U) =
    PipeHelper.toCacheAffinityPipe[K, V, T, U](this)(f)

  override def flatMap[U](f: T => TraversableOnce[U]) =
    PipeHelper.toFlatMapCacheAffinityPipe[K, V, T, U](this)(f)

  override def reduce(implicit sg: Semigroup[T]) =
    FlatMapCacheAffinityReduction.from(this)(sg)

  override def fork = PipeHelper.forkPipe(this)

  override def execute = compute.flatMapAffinityApply(source)(transform)
}

final case class MergedPipe[T](left: IgnitePipe[T], right: IgnitePipe[T])
  extends IgnitePipe[T] {

  override def map[U](f: T => U) =
    MergedPipe(left.map(f), right.map(f))

  override def flatMap[U](f: T => TraversableOnce[U]) =
    MergedPipe(left.flatMap(f), right.flatMap(f))

  override def reduce(implicit sg: Semigroup[T]) =
    MergedReduction(left.reduce, right.reduce)

  override def fork = this

  override def execute = left.execute ++ right.execute
}

/**
 * A pipe containing a sequence of values.
 *
 * Can be generally used as the starting point in the execution chain. The sequence is
 * partitioned and load balanced across the cluster nodes.
 */
final case class IterablePipe[T](iter: Iterable[T])(implicit val compute: ComputeRunner)
  extends IgnitePipe[T] {

  override def map[U](f: T => U) = PipeHelper.toTransformValuePipe[T, U](this)(f)

  override def flatMap[U](f: T => TraversableOnce[U]) =
    PipeHelper.toFlatMapValuePipe[T, U](this)(f)

  override def reduce(implicit sg: Semigroup[T]) =
    ValueReduction(iter.reduce(sg.plus(_, _)))(compute)

  override def fork = this

  override def execute = iter
}

/**
 * Helpers for switching betweeen IgnitePipe types.
 */
private object PipeHelper {

  def toTransformValuePipe[T, U](ip: IterablePipe[T])(f: T => U): TransformValuePipe[T, U] =
    new TransformValuePipe[T, U] {
      override val compute = ip.compute
      override val source = ip.iter
      override def transform = f
    }

  def toTransformValuePipe[S, T, U](tvp: TransformValuePipe[S, T])(f: T => U): TransformValuePipe[S, U] =
    new TransformValuePipe[S, U] {
      override val compute = tvp.compute
      override val source = tvp.source
      override def transform = tvp.transform.andThen(f)
    }

  def toFlatMapValuePipe[S, T, U](tvp: TransformValuePipe[S, T])(f: T => TraversableOnce[U]): FlatMapValuePipe[S, U] =
    new FlatMapValuePipe[S, U] {
      override val compute = tvp.compute
      override val source = tvp.source
      override def transform = tvp.transform.andThen(f)
    }

  def toFlatMapValuePipe[S, T, U](fvp: FlatMapValuePipe[S, T])(f: T => TraversableOnce[U]): FlatMapValuePipe[S, U] =
    new FlatMapValuePipe[S, U] {
      override val compute = fvp.compute
      override val source = fvp.source
      override def transform = fvp.transform.andThen(_.map(f)).andThen(_.flatten)
    }

  def toFlatMapValuePipe[T, U](ip: IterablePipe[T])(f: T => TraversableOnce[U]): FlatMapValuePipe[T, U] =
    new FlatMapValuePipe[T, U] {
      override val compute = ip.compute
      override val source = ip.iter
      override def transform = f
    }

  // this adds a barrier. the supplied function f is executed
  // on the cluster only after flatten step of the input pipe is
  // executed on the client
  def toTransformValuePipe[S, T, U](fvp: FlatMapValuePipe[S, T])(f: T => U): TransformValuePipe[T, U] =
    IterablePipe(fvp.execute)(fvp.compute).map(f)

  def toCacheAffinityPipe[K, V, T, U](cap: CacheAffinityPipe[K, V, T])(f: T => U): CacheAffinityPipe[K, V, U] =
    new CacheAffinityPipe[K, V, U] {
      override val compute = cap.compute
      override val source = cap.source
      override def transform = cap.transform.andThen(f)
    }

  // this adds a barrier similar to the non-affinity version
  def toCacheAffinityPipe[K, V, T, U](fcap: FlatMapCacheAffinityPipe[K, V, T])(f: T => U): TransformValuePipe[T, U] =
    IterablePipe(fcap.execute)(fcap.compute).map(f)

  def toFlatMapCacheAffinityPipe[K, V, T, U](cap: CacheAffinityPipe[K, V, T])(f: T => TraversableOnce[U]): FlatMapCacheAffinityPipe[K, V, U] =
    new FlatMapCacheAffinityPipe[K, V, U] {
      override val compute = cap.compute
      override val source = cap.source
      override def transform = cap.transform.andThen(f)
    }

  def toFlatMapCacheAffinityPipe[K, V, T, U](fcap: FlatMapCacheAffinityPipe[K, V, T])(f: T => TraversableOnce[U]): FlatMapCacheAffinityPipe[K, V, U] =
    new FlatMapCacheAffinityPipe[K, V, U] {
      override val compute = fcap.compute
      override val source = fcap.source
      override def transform = fcap.transform.andThen(_.map(f)).andThen(_.flatten)
    }

  def forkPipe[S, T](tvp: TransformValuePipe[S, T]): IgnitePipe[T] =
    IgnitePipe.from(() => tvp.execute)(tvp.compute)

  def forkPipe[S, T](fvp: FlatMapValuePipe[S, T]): IgnitePipe[T] =
    IgnitePipe.from(() => fvp.execute)(fvp.compute)

  def forkPipe[K, V, T](cap: CacheAffinityPipe[K, V, T]): IgnitePipe[T] =
    IgnitePipe.from(() => cap.execute)(cap.compute)

  def forkPipe[K, V, T](fcap: FlatMapCacheAffinityPipe[K, V, T]): IgnitePipe[T] =
    IgnitePipe.from(() => fcap.execute)(fcap.compute)
}

object ReduceHelper {
  // creates a pipe representing the result of the reduction
  def toPipe[S, T](r: Reduction[T] with HasComputeConfig[S, _]): IgnitePipe[T] =
    new TransformValuePipe[T, T] {
      override val compute = r.compute
      override def source = r.execute.toIterable
      override def transform = identity
    }
}