All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.summingbird.TestGraphs.scala Maven / Gradle / Ivy

The newest version!
/*
 Copyright 2013 Twitter, Inc.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */

package com.twitter.summingbird

import com.twitter.algebird.{ MapAlgebra, Monoid, Semigroup }
import com.twitter.summingbird.option.JobId
import org.scalacheck.Arbitrary

/**
 * Helpful functions and test graphs designed to flex Summingbird
 * planners.
 */

object TestGraphs {
  // implicit ordering on the either pair
  implicit def eitherOrd[T, U]: Ordering[Either[T, U]] =
    new Ordering[Either[T, U]] {
      def compare(l: Either[T, U], r: Either[T, U]) =
        (l, r) match {
          case (Left(_), Right(_)) => -1
          case (Right(_), Left(_)) => 1
          case (Left(_), Left(_)) => 0
          case (Right(_), Right(_)) => 0
        }
    }

  // Helpers

  private def sum[V: Semigroup](opt: Option[V], v: V): V = if (opt.isDefined) Semigroup.plus(opt.get, v) else v

  private def scanSum[V: Semigroup](it: Iterator[V]): Iterator[(Option[V], V)] = {
    var prev: Option[V] = None
    it.map { v =>
      val res = (prev, v)
      prev = Some(sum(prev, v))
      res
    }
  }

  /**
   * This function simulates the loop join in ScaldingPlatform loopJoin. Used when joining against a store and the store depends on the result of the join.
   * The function takes an Iterable of Either and a valuesFn function. The Eithers in the Iterable are updates to the store, corresponding to the two TypedPipes
   * in ScaldingPlatform loopJoin (summingbird-scalding/src/main/scala/com/twitter/summingbird/scalding/ScaldingPlatform.scala).
   * The result is a join stream and the output stream of the store.
   */
  private def loopJoinInScala[K: Ordering, U, V: Monoid](leftAndRight: Iterable[(K, (Long, Either[U, V]))], valuesFn: ((Long, (U, Option[V]))) => TraversableOnce[(Long, V)]): List[(K, List[(Option[(Long, (U, Option[V]))], Option[(Long, (Option[V], V))])])] = {
    leftAndRight
      .groupBy(_._1)
      .mapValues {
        _.map(_._2).toList.sortBy(identity)
          .scanLeft((Option.empty[(Long, (U, Option[V]))], Option.empty[(Long, (Option[V], V))])) {
            case ((_, None), (time, Left(u))) =>
              /*
                 * This is a lookup, but there is no value for this key
                 */
              val joinResult = Some((time, (u, None)))
              val sumResult = Semigroup.sumOption(valuesFn(time, (u, None))).map(v => (time, (None, v._2)))
              (joinResult, sumResult)
            case ((_, Some((_, (optv, v)))), (time, Left(u))) =>
              /*
                 * This is a lookup, and there is an existing value
                 */
              val currentV = Some(sum(optv, v)) // isn't u already a sum and optu prev value?
              val joinResult = Some((time, (u, currentV)))
              val sumResult = Semigroup.sumOption(valuesFn(time, (u, currentV))).map(v => (time, (currentV, v._2)))
              (joinResult, sumResult)
            case ((_, None), (time, Right(v))) =>
              /*
                 * This is merging in new data into the store not coming in from the service
                 * (either from the store history or from a merge after the leftJoin, but
                 * There was previously no data.
                 */
              val joinResult = None
              val sumResult = Some((time, (None, v)))
              (joinResult, sumResult)
            case ((_, Some((_, (optv, oldv)))), (time, Right(v))) =>
              /*
                 * This is the case where we are updating a non-empty key. This should
                 * only be triggered by a merged data-stream after the join since
                 * store initialization
                 */
              val joinResult = None
              val currentV = Some(sum(optv, oldv))
              val sumResult = Some((time, (currentV, v)))
              (joinResult, sumResult)
          }
      }.toList
  }

  // Test graphs

  def diamondJobInScala[T, K, V: Monoid](source: TraversableOnce[T])(fnA: T => TraversableOnce[(K, V)])(fnB: T => TraversableOnce[(K, V)]): Map[K, V] = {
    val stream = source.toStream
    val left = stream.flatMap(fnA)
    val right = stream.flatMap(fnB)
    MapAlgebra.sumByKey(left ++ right)
  }

  def diamondJob[P <: Platform[P], T, K, V: Monoid](source: Producer[P, T], sink: P#Sink[T], store: P#Store[K, V])(fnA: T => TraversableOnce[(K, V)])(fnB: T => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] = {
    val written = source.write(sink)
    val left = written.flatMap(fnA)
    val right = written.flatMap(fnB)
    left.merge(right).sumByKey(store)
  }

  def singleStepInScala[T, K, V: Monoid](source: TraversableOnce[T])(fn: T => TraversableOnce[(K, V)]): Map[K, V] =
    MapAlgebra.sumByKey(
      source.flatMap(fn)
    )

  def singleStepJob[P <: Platform[P], T, K, V: Monoid](source: Producer[P, T], store: P#Store[K, V])(fn: T => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] =
    source
      .flatMap(fn).name("FM")
      .sumByKey(store)

  def twinStepOptionMapFlatMapScala[T1, T2, K, V: Monoid](source: TraversableOnce[T1])(fnA: T1 => Option[T2], fnB: T2 => TraversableOnce[(K, V)]): Map[K, V] =
    MapAlgebra.sumByKey(
      source.flatMap(fnA(_).iterator).flatMap(fnB)
    )

  def twinStepOptionMapFlatMapJob[P <: Platform[P], T1, T2, K, V: Monoid](source: Producer[P, T1], store: P#Store[K, V])(fnA: T1 => Option[T2], fnB: T2 => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] =
    source
      .optionMap(fnA)
      .flatMap(fnB)
      .sumByKey(store)

  def singleStepMapKeysInScala[T, K1, K2, V: Monoid](source: TraversableOnce[T])(fnA: T => TraversableOnce[(K1, V)], fnB: K1 => TraversableOnce[K2]): Map[K2, V] =
    MapAlgebra.sumByKey(
      source.flatMap(fnA).flatMap { x => fnB(x._1).map((_, x._2)) }
    )

  def singleStepMapKeysJob[P <: Platform[P], T, K1, K2, V: Monoid](source: Producer[P, T], store: P#Store[K2, V])(fnA: T => TraversableOnce[(K1, V)], fnB: K1 => TraversableOnce[K2]): TailProducer[P, (K2, (Option[V], V))] =
    source
      .flatMap(fnA)
      .flatMapKeys(fnB)
      .sumByKey(store)

  def repeatedTupleLeftJoinInScala[T, U, JoinedU, K, V: Monoid](source: TraversableOnce[T])(service: K => Option[JoinedU])(preJoinFn: T => TraversableOnce[(K, U)])(postJoinFn: ((K, (U, Option[JoinedU]))) => TraversableOnce[(K, V)]): Map[K, V] =
    MapAlgebra.sumByKey(
      source
        .flatMap(preJoinFn)
        .flatMap { case (k, v) => List((k, v), (k, v)) }
        .map { case (k, v) => (k, (v, service(k))) }
        .flatMap(postJoinFn)
    )

  def repeatedTupleLeftJoinJob[P <: Platform[P], T, U, JoinedU, K, V: Monoid](
    source: Producer[P, T],
    service: P#Service[K, JoinedU],
    store: P#Store[K, V])(preJoinFn: T => TraversableOnce[(K, U)])(postJoinFn: ((K, (U, Option[JoinedU]))) => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] =
    source
      .name("My named source")
      .flatMap(preJoinFn)
      .flatMap { case (k, v) => List((k, v), (k, v)) }
      .leftJoin(service)
      .name("My named flatmap")
      .flatMap(postJoinFn)
      .sumByKey(store)

  def leftJoinInScala[T, U, JoinedU, K, V: Monoid](source: TraversableOnce[T])(service: K => Option[JoinedU])(preJoinFn: T => TraversableOnce[(K, U)])(postJoinFn: ((K, (U, Option[JoinedU]))) => TraversableOnce[(K, V)]): Map[K, V] =
    MapAlgebra.sumByKey(
      source
        .flatMap(preJoinFn)
        .map { case (k, v) => (k, (v, service(k))) }
        .flatMap(postJoinFn)
    )

  def leftJoinJob[P <: Platform[P], T, U, JoinedU, K, V: Monoid](
    source: Producer[P, T],
    service: P#Service[K, JoinedU],
    store: P#Store[K, V])(preJoinFn: T => TraversableOnce[(K, U)])(postJoinFn: ((K, (U, Option[JoinedU]))) => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] =
    source
      .name("My named source")
      .flatMap(preJoinFn)
      .leftJoin(service)
      .name("My named flatmap")
      .flatMap(postJoinFn)
      .sumByKey(store)

  def leftJoinWithFlatMapValuesInScala[T, U, JoinedU, K, V: Monoid](source: TraversableOnce[T])(service: K => Option[JoinedU])(preJoinFn: T => TraversableOnce[(K, U)])(postJoinFn: ((U, Option[JoinedU])) => TraversableOnce[V]): Map[K, V] =
    MapAlgebra.sumByKey(
      source
        .flatMap(preJoinFn)
        .map { case (k, v) => (k, (v, service(k))) }
        .flatMap { case (k, v) => postJoinFn(v).map { v => (k, v) } }
    )

  def leftJoinJobWithFlatMapValues[P <: Platform[P], T, U, JoinedU, K, V: Monoid](
    source: Producer[P, T],
    service: P#Service[K, JoinedU],
    store: P#Store[K, V])(preJoinFn: T => TraversableOnce[(K, U)])(postJoinFn: ((U, Option[JoinedU])) => TraversableOnce[V]): TailProducer[P, (K, (Option[V], V))] =
    source
      .name("My named source")
      .flatMap(preJoinFn)
      .leftJoin(service)
      .name("My named flatmap")
      .flatMapValues(postJoinFn)
      .sumByKey(store)

  def leftJoinWithStoreInScala[T1, T2, U, JoinedU: Monoid, K: Ordering, V: Monoid](source1: TraversableOnce[T1], source2: TraversableOnce[T2])(simpleFM1: T1 => TraversableOnce[(Long, (K, JoinedU))])(simpleFM2: T2 => TraversableOnce[(Long, (K, U))])(postJoinFn: ((Long, (K, (U, Option[JoinedU])))) => TraversableOnce[(Long, (K, V))]): (Map[K, JoinedU], Map[K, V]) = {

    val firstStore = MapAlgebra.sumByKey(
      source1
        .flatMap(simpleFM1)
        .map { case (_, kju) => kju } // drop the time from the key for the store
    )

    // create the delta stream
    val sumStream: Iterable[(Long, (K, (Option[JoinedU], JoinedU)))] =
      source1
        .flatMap(simpleFM1)
        .toList.groupBy(_._1)
        .mapValues {
          _.map { case (time, (k, joinedu)) => (k, joinedu) }
            .groupBy(_._1)
            .mapValues { l => scanSum(l.iterator.map(_._2)).toList }
            .toIterable
            .flatMap { case (k, lv) => lv.map { case (optju, ju) => (k, (optju, ju)) } }
        }
        .toIterable
        .flatMap { case (time, lv) => lv.map { case (k, (optju, ju)) => (time, (k, (optju, ju))) } }

    // zip the left and right streams
    val leftAndRight: Iterable[(K, (Long, Either[U, JoinedU]))] =
      source2
        .flatMap(simpleFM2)
        .toList
        .map { case (time, (k, u)) => (k, (time, Left(u))) }
        .++(sumStream.map { case (time, (k, (optju, ju))) => (k, (time, Right(ju))) })

    // scan left to join the left values and the right summing result stream
    val resultStream: List[(Long, (K, (U, Option[JoinedU])))] =
      leftAndRight
        .groupBy(_._1)
        .mapValues {
          _.map(_._2).toList.sortBy(identity)
            .scanLeft(Option.empty[(Long, JoinedU)], Option.empty[(Long, U, Option[JoinedU])]) {
              case ((None, result), (time, Left(u))) => {
                // The was no value previously
                (None, Some((time, u, None)))
              }
              case ((prev @ Some((oldt, ju)), result), (time, Left(u))) => {
                // gate the time for window join?
                (prev, Some((time, u, Some(ju))))
              }
              case ((None, result), (time, Right(joined))) => {
                (Some((time, joined)), None)
              }

              case ((Some((oldt, oldJ)), result), (time, Right(joined))) => {
                val nextJoined = Semigroup.plus(oldJ, joined)
                (Some((time, nextJoined)), None)
              }
            }
        }.toList.flatMap { case (k, lv) => lv.map { case ((_, optuju)) => (k, optuju) } }
        .flatMap {
          case (k, opt) => opt.map { case (time, u, optju) => (time, (k, (u, optju))) }
        }

    // compute the final store result after join
    val finalStore = MapAlgebra.sumByKey(
      resultStream
        .flatMap(postJoinFn)
        .map { case (time, (k, v)) => (k, v) } // drop the time
    )
    (firstStore, finalStore)
  }

  def leftJoinWithStoreJob[P <: Platform[P], T1, T2, U, K, JoinedU: Monoid, V: Monoid](
    source1: Producer[P, T1],
    source2: Producer[P, T2],
    storeAndService: P#Store[K, JoinedU] with P#Service[K, JoinedU],
    store: P#Store[K, V])(simpleFM1: T1 => TraversableOnce[(K, JoinedU)])(simpleFM2: T2 => TraversableOnce[(K, U)])(postJoinFn: ((K, (U, Option[JoinedU]))) => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] = {

    // sum to first store
    val dag1: Summer[P, K, JoinedU] = source1
      .flatMap(simpleFM1)
      .sumByKey(storeAndService)

    // join second source with stream from first store
    val dag2: Summer[P, K, V] = source2
      .flatMap(simpleFM2)
      .leftJoin(storeAndService)
      .flatMap(postJoinFn)
      .sumByKey(store)

    dag1.also(dag2)
  }

  def leftJoinWithDependentStoreInScala[T, U, K: Ordering, V: Monoid](source: TraversableOnce[T])(simpleFM: T => TraversableOnce[(Long, (K, U))])(flatMapValuesFn: ((Long, (U, Option[V]))) => TraversableOnce[(Long, V)]): Map[K, V] = {

    // zip the left and right streams
    val leftAndRight: Iterable[(K, (Long, Either[U, V]))] =
      source
        .flatMap(simpleFM)
        .toList
        .map { case (time, (k, u)) => (k, (time, Left(u))) }

    // scan left to join the left values and the right summing result stream
    val resultStream = loopJoinInScala(leftAndRight, flatMapValuesFn)

    // compute the final store result after join
    val rightStream = resultStream
      .flatMap { case (k, lopts) => lopts.map { case ((_, optoptv)) => (k, optoptv) } }

    // compute the final store result after join
    MapAlgebra.sumByKey(
      rightStream
        .flatMap { case (k, opt) => opt.map { case (time, (optv, v)) => (k, v) } } // drop time and opt[v]
    )
  }

  def leftJoinWithDependentStoreJob[P <: Platform[P], T, V1, U, K, V: Monoid](
    source1: Producer[P, T],
    storeAndService: P#Store[K, V] with P#Service[K, V])(simpleFM1: T => TraversableOnce[(K, U)])(valuesFlatMap1: ((U, Option[V])) => TraversableOnce[V1])(valuesFlatMap2: (V1) => TraversableOnce[V]): TailProducer[P, (K, (Option[V], V))] = {

    source1
      .flatMap(simpleFM1)
      .leftJoin(storeAndService)
      .flatMapValues(valuesFlatMap1)
      .flatMapValues(valuesFlatMap2)
      .sumByKey(storeAndService)
  }

  def leftJoinWithDependentStoreJoinFanoutInScala[T, U, K: Ordering, V: Monoid, V1: Monoid](source: TraversableOnce[T])(simpleFM: T => TraversableOnce[(Long, (K, U))])(flatMapValuesFn: ((Long, (U, Option[V]))) => TraversableOnce[(Long, V)])(flatMapFn: ((Long, (K, (U, Option[V])))) => TraversableOnce[(Long, (K, V1))]): (Map[K, V], Map[K, V1]) = {

    // zip the left and right streams
    val leftAndRight: Iterable[(K, (Long, Either[U, V]))] =
      source
        .flatMap(simpleFM)
        .toList
        .map { case (time, (k, u)) => (k, (time, Left(u))) }

    // scan left to join the left values and the right summing result stream
    val resultStream = loopJoinInScala(leftAndRight, flatMapValuesFn)

    val leftStream = resultStream
      .flatMap { case (k, lopts) => lopts.map { case ((optuoptv, _)) => (k, optuoptv) } }

    val rightStream = resultStream
      .flatMap { case (k, lopts) => lopts.map { case ((_, optoptv)) => (k, optoptv) } }

    // compute the first store using the join stream as input
    val storeAfterFlatMap = MapAlgebra.sumByKey(
      leftStream
        .flatMap { case (k, opt) => opt.map { case (time, (u, optv)) => (time, (k, (u, optv))) } }
        .flatMap(flatMapFn(_))
        .map { case (time, (k, v)) => (k, v) } // drop the time
    )

    // compute the final store result after join
    val storeAfterJoin = MapAlgebra.sumByKey(
      rightStream
        .flatMap { case (k, opt) => opt.map { case (time, (optv, v)) => (k, v) } } // drop time and opt[v]
    )

    (storeAfterJoin, storeAfterFlatMap)
  }

  def leftJoinWithDependentStoreJoinFanoutJob[P <: Platform[P], T1, V1: Monoid, U, K, V: Monoid](
    source1: Producer[P, T1],
    storeAndService: P#Store[K, V] with P#Service[K, V],
    store: P#Store[K, V1])(simpleFM1: T1 => TraversableOnce[(K, U)])(valuesFlatMap1: ((U, Option[V])) => TraversableOnce[V])(flatMapFn: ((K, (U, Option[V]))) => TraversableOnce[(K, V1)]): TailProducer[P, (K, (Option[V], V))] = {

    val join: KeyedProducer[P, K, (U, Option[V])] = source1
      .flatMap(simpleFM1)
      .leftJoin(storeAndService)

    val dependentSum: Summer[P, K, V] = join
      .flatMapValues(valuesFlatMap1)
      .sumByKey(storeAndService)

    val indepSum: Summer[P, K, V1] = join
      .flatMap(flatMapFn)
      .sumByKey(store)

    indepSum.also(dependentSum)
  }

  def realJoinTestJob[P <: Platform[P], T1, T2, T3, T4, K1, K2, U, JoinedU, V: Monoid](
    source1: Producer[P, T1],
    source2: Producer[P, T2],
    source3: Producer[P, T3],
    source4: Producer[P, T4],
    service: P#Service[K1, JoinedU],
    store: P#Store[K2, V],
    simpleFM1: T1 => TraversableOnce[(K2, V)],
    simpleFM2: T2 => TraversableOnce[(K2, V)],
    simpleFM3: T3 => TraversableOnce[(K2, V)],
    preJoin: T4 => (K1, U),
    postJoin: ((K1, (U, Option[JoinedU]))) => TraversableOnce[(K2, V)]): TailProducer[P, (K2, (Option[V], V))] = {
    val data1 = source1.flatMap(simpleFM1)
    val data2 = source2.flatMap(simpleFM2)
    val data3 = source3.flatMap(simpleFM3)
    val data4 = source4.map(preJoin).leftJoin(service).flatMap(postJoin)
    data1.merge(data2).merge(data3).merge(data4).sumByKey(store).name("Customer Supplied Job")
  }

  def writtenPostSum[P <: Platform[P], T, K, V: Monoid](source: Producer[P, T], sink: P#Sink[(K, (Option[V], V))], store: P#Store[K, V])(fnA: T => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] = {
    val left = source.flatMap(fnA)
    left.sumByKey(store).write(sink)
  }

  def realJoinTestJobInScala[P <: Platform[P], T1, T2, T3, T4, K1, K2, U, JoinedU, V: Monoid](
    source1: List[T1],
    source2: List[T2],
    source3: List[T3],
    source4: List[T4],
    service: K1 => Option[JoinedU],
    simpleFM1: T1 => TraversableOnce[(K2, V)],
    simpleFM2: T2 => TraversableOnce[(K2, V)],
    simpleFM3: T3 => TraversableOnce[(K2, V)],
    preJoin: T4 => (K1, U),
    postJoin: ((K1, (U, Option[JoinedU]))) => TraversableOnce[(K2, V)]): Map[K2, V] = {
    val data1 = source1.flatMap(simpleFM1)
    val data2 = source2.flatMap(simpleFM2)
    val data3 = source3.flatMap(simpleFM3)
    val data4 = source4.map(preJoin).map { case (k, v) => (k, (v, service(k))) }
      .flatMap(postJoin)
    MapAlgebra.sumByKey(data1 ::: data2 ::: data3 ::: data4)
  }

  def multipleSummerJobInScala[T1, T2, K1, V1: Monoid, K2, V2: Monoid](source: List[T1])(fnR: T1 => TraversableOnce[T2], fnA: T2 => TraversableOnce[(K1, V1)], fnB: T2 => TraversableOnce[(K2, V2)]): (Map[K1, V1], Map[K2, V2]) = {
    val mapA = MapAlgebra.sumByKey(source.flatMap(fnR).flatMap(fnA))
    val mapB = MapAlgebra.sumByKey(source.flatMap(fnR).flatMap(fnB))
    (mapA, mapB)
  }

  def multipleSummerJob[P <: Platform[P], T1, T2, K1, V1: Monoid, K2, V2: Monoid](source: Producer[P, T1], store1: P#Store[K1, V1], store2: P#Store[K2, V2])(fnR: T1 => TraversableOnce[T2], fnA: T2 => TraversableOnce[(K1, V1)],
    fnB: T2 => TraversableOnce[(K2, V2)]): TailProducer[P, (K2, (Option[V2], V2))] = {
    val combined = source.flatMap(fnR)
    val calculated = combined.flatMap(fnB).sumByKey(store2)
    combined.flatMap(fnA).sumByKey(store1).also(calculated)
  }

  def mapOnlyJob[P <: Platform[P], T, U](
    source: Producer[P, T],
    sink: P#Sink[U])(mapOp: T => TraversableOnce[U]): TailProducer[P, U] =
    source
      .flatMap(mapOp)
      .write(sink)

  def lookupJob[P <: Platform[P], T, U](
    source: Producer[P, T], srv: P#Service[T, U], sink: P#Sink[(T, U)]): TailProducer[P, (T, U)] =
    source.lookup(srv).collectValues { case Some(v) => v }.write(sink)

  def lookupJobInScala[T, U](in: List[T], srv: (T) => Option[U]): List[(T, U)] =
    in.map { t => (t, srv(t)) }.collect { case (t, Some(u)) => (t, u) }

  def twoSumByKey[P <: Platform[P], K, V: Monoid, K2](
    source: Producer[P, (K, V)],
    store: P#Store[K, V],
    fn: K => List[K2],
    store2: P#Store[K2, V]): TailProducer[P, (K2, (Option[V], V))] =
    source
      .sumByKey(store)
      .mapValues(_._2)
      .flatMapKeys(fn)
      .sumByKey(store2)

  def twoSumByKeyInScala[K1, V: Semigroup, K2](in: List[(K1, V)], fn: K1 => List[K2]): (Map[K1, V], Map[K2, V]) = {
    val sum1 = MapAlgebra.sumByKey(in)
    val sumStream = in.groupBy(_._1)
      .mapValues { l => scanSum(l.iterator.map(_._2)).toList }
      .toIterable
      .flatMap { case (k, lv) => lv.map((k, _)) }
    val v2 = sumStream.map { case (k, (_, v)) => fn(k).map { (_, v) } }.flatten
    val sum2 = MapAlgebra.sumByKey(v2)
    (sum1, sum2)
  }

  def jobWithStats[P <: Platform[P], T, K, V: Monoid](id: JobId, source: Producer[P, T], store: P#Store[K, V])(fn: T => TraversableOnce[(K, V)]): TailProducer[P, (K, (Option[V], V))] = {
    implicit val jobID: JobId = id
    val origCounter = Counter(Group("counter.test"), Name("orig_counter"))
    val fmCounter = Counter(Group("counter.test"), Name("fm_counter"))
    val fltrCounter = Counter(Group("counter.test"), Name("fltr_counter"))
    source
      .flatMap { x => origCounter.incr; fn(x) }.name("FM")
      .filter { x => fmCounter.incrBy(2); true }
      .map { x => fltrCounter.incr; x }
      .sumByKey(store)
  }
}

class TestGraphs[P <: Platform[P], T: Manifest: Arbitrary, K: Arbitrary, V: Arbitrary: Equiv: Monoid](platform: P)(
    store: () => P#Store[K, V])(sink: () => P#Sink[T])(
        sourceMaker: TraversableOnce[T] => Producer[P, T])(
            toLookupFn: P#Store[K, V] => (K => Option[V]))(
                toSinkChecker: (P#Sink[T], List[T]) => Boolean)(
                    run: (P, P#Plan[_]) => Unit) {

  def diamondChecker(items: List[T], fnA: T => List[(K, V)], fnB: T => List[(K, V)]): Boolean = {
    val currentStore = store()
    val currentSink = sink()
    // Use the supplied platform to execute the source into the
    // supplied store.
    val plan = platform.plan {
      TestGraphs.diamondJob(sourceMaker(items), currentSink, currentStore)(fnA)(fnB)
    }
    run(platform, plan)
    val lookupFn = toLookupFn(currentStore)
    TestGraphs.diamondJobInScala(items)(fnA)(fnB).forall {
      case (k, v) =>
        val lv = lookupFn(k).getOrElse(Monoid.zero)
        val eqv = Equiv[V].equiv(v, lv)
        if (!eqv) {
          println(s"in diamondChecker: $k, $v is scala result, but platform gave $lv")
        }
        eqv
    } && toSinkChecker(currentSink, items)
  }

  /**
   * Accepts a platform, and supplier of an EMPTY store from K -> V
   * and returns a ScalaCheck property. The property generates a
   * random function from T => TraversableOnce[(K, V)], wires up
   * singleStepJob summingbird job using this function and runs the
   * job using the supplied platform.
   *
   * Results are retrieved using the supplied toMap function. The
   * initial data source is generated using the supplied sourceMaker
   * function.
   */
  def singleStepChecker(items: List[T], fn: T => List[(K, V)]): Boolean = {
    val currentStore = store()
    // Use the supplied platform to execute the source into the
    // supplied store.
    val plan = platform.plan {
      TestGraphs.singleStepJob(sourceMaker(items), currentStore)(fn)
    }
    run(platform, plan)
    val lookupFn = toLookupFn(currentStore)
    TestGraphs.singleStepInScala(items)(fn).forall {
      case (k, v) =>
        val lv = lookupFn(k).getOrElse(Monoid.zero)
        Equiv[V].equiv(v, lv)
    }
  }

  /**
   * Accepts a platform, a service of K -> JoinedU and a supplier of
   * an EMPTY store from K -> V and returns a ScalaCheck
   * property. The property generates random functions between the
   * types required by leftJoinJob, wires up a summingbird job using
   * those functions and runs the job using the supplied platform.
   *
   * Results are retrieved using the supplied toMap function. The
   * service is tested in scala's in-memory mode using serviceToFn,
   * and the initial data source is generated using the supplied
   * sourceMaker function.
   */
  def leftJoinChecker[U: Arbitrary, JoinedU: Arbitrary](service: P#Service[K, JoinedU], serviceToFn: P#Service[K, JoinedU] => (K => Option[JoinedU]),
    items: List[T], preJoinFn: T => List[(K, U)],
    postJoinFn: ((K, (U, Option[JoinedU]))) => List[(K, V)]): Boolean = {
    val currentStore = store()

    val plan = platform.plan {
      TestGraphs.leftJoinJob(sourceMaker(items), service, currentStore)(preJoinFn)(postJoinFn)
    }
    run(platform, plan)
    val serviceFn = serviceToFn(service)
    val lookupFn = toLookupFn(currentStore)

    MapAlgebra.sumByKey(
      items
        .flatMap(preJoinFn)
        .map { case (k, u) => (k, (u, serviceFn(k))) }
        .flatMap(postJoinFn)
    ).forall {
        case (k, v) =>
          val lv = lookupFn(k).getOrElse(Monoid.zero)
          Equiv[V].equiv(v, lv)
      }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy