All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.summingbird.online.executor.FinalFlatMap.scala Maven / Gradle / Ivy

The newest version!
/*
Copyright 2013 Twitter, Inc.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package com.twitter.summingbird.online.executor

import com.twitter.algebird.Semigroup
import com.twitter.bijection.Injection
import com.twitter.util.Future

import com.twitter.summingbird.online.Externalizer

import com.twitter.summingbird.online.FlatMapOperation

import com.twitter.summingbird.online.option.{
  SummerBuilder,
  MaxWaitingFutures,
  MaxFutureWaitTime,
  MaxEmitPerExecute
}
import scala.collection.mutable.{ Map => MMap, ListBuffer }
// These CMaps we generate in the FFM, we use it as an immutable wrapper around
// a mutable map.
import scala.collection.{ Map => CMap }
import scala.util.control.NonFatal

/**
 * @author Oscar Boykin
 * @author Sam Ritchie
 * @author Ashu Singhal
 * @author Ian O Connell
 */

// This is not a user settable variable.
// Its supplied by the planning system usually to ensure its large enough to cover the space
// used by the summers times some delta.
private[summingbird] case class KeyValueShards(get: Int) {
  def summerIdFor[K](k: K): Int =
    math.abs(k.hashCode % get)
}

class FinalFlatMap[Event, Key, Value: Semigroup, S <: InputState[_], D, RC](
  @transient flatMapOp: FlatMapOperation[Event, (Key, Value)],
  summerBuilder: SummerBuilder,
  maxWaitingFutures: MaxWaitingFutures,
  maxWaitingTime: MaxFutureWaitTime,
  maxEmitPerExec: MaxEmitPerExecute,
  summerShards: KeyValueShards,
  pDecoder: Injection[Event, D],
  pEncoder: Injection[(Int, CMap[Key, Value]), D])
    extends AsyncBase[Event, (Int, CMap[Key, Value]), S, D, RC](maxWaitingFutures,
      maxWaitingTime,
      maxEmitPerExec) {

  type InS = S
  type OutputElement = (Int, CMap[Key, Value])

  val encoder = pEncoder
  val decoder = pDecoder

  val lockedOp = Externalizer(flatMapOp)

  type SummerK = Key
  type SummerV = (Seq[S], Value)
  lazy val sCache = summerBuilder.getSummer[SummerK, SummerV](implicitly[Semigroup[(Seq[S], Value)]])

  // Lazy transient as const futures are not serializable
  @transient private[this] lazy val noData = List(
    (List(), Future.value(Nil))
  )

  private def formatResult(outData: Map[Key, (Seq[S], Value)]): TraversableOnce[(Seq[S], Future[TraversableOnce[OutputElement]])] = {
    if (outData.isEmpty) {
      noData
    } else {
      var mmMap = MMap[Int, (ListBuffer[S], MMap[Key, Value])]()

      outData.toIterator.foreach {
        case (k, (listS, v)) =>
          val newK = summerShards.summerIdFor(k)
          val (buffer, mmap) = mmMap.getOrElseUpdate(newK, (ListBuffer[S](), MMap[Key, Value]()))
          buffer ++= listS
          mmap += k -> v
      }

      mmMap.toIterator.map {
        case (outerKey, (listS, innerMap)) =>
          (listS, Future.value(List((outerKey, innerMap))))
      }
    }
  }

  override def tick: Future[TraversableOnce[(Seq[S], Future[TraversableOnce[OutputElement]])]] =
    sCache.tick.map(formatResult(_))

  def cache(state: S,
    items: TraversableOnce[(Key, Value)]): Future[TraversableOnce[(Seq[S], Future[TraversableOnce[OutputElement]])]] =
    try {
      val itemL = items.toList
      if (itemL.size > 0) {
        state.fanOut(itemL.size)
        sCache.addAll(itemL.map {
          case (k, v) =>
            k -> (List(state), v)
        }).map(formatResult(_))
      } else { // Here we handle mapping to nothing, option map et. al
        Future.value(
          List(
            (List(state), Future.value(Nil))
          )
        )
      }
    } catch {
      case NonFatal(e) => Future.exception(e)
    }

  override def apply(state: S,
    tup: Event) =
    lockedOp.get.apply(tup).map { cache(state, _) }.flatten

  override def cleanup {
    lockedOp.get.close
    sCache.cleanup
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy