All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gilt.gfc.aws.kinesis.client.KCLWorkerRunner.scala Maven / Gradle / Ivy

package com.gilt.gfc.aws.kinesis.client

import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorFactory
import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{KinesisClientLibConfiguration, Worker}
import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason
import com.amazonaws.services.kinesis.metrics.interfaces.IMetricsFactory
import com.amazonaws.services.kinesis.model.Record
import com.gilt.gfc.logging.Loggable

import scala.concurrent.duration._
import scala.concurrent.{Await, ExecutionContext, Future}
import scala.language.postfixOps
import scala.util.control.NonFatal
import scala.util.{Failure, Success, Try}

/**
 * A helper class to merge required/optional/default parameters and run a KCL Worker.
 *
 * @param config             KCL config, @see KCLConfiguration for some useful defaults.
 * @param checkpointInterval how often to save checkpoint to dynamodb
 * @param numRetries         how many times to retry operation on exception before giving up
 * @param initialize         (ShardId) => Unit : additional code to execute when handler is initialized
 * @param shutdown           (ShardId, Checkpointer, ShutdownReason) => Unit : additional code to execute on shutdown
 *
 */
case class KCLWorkerRunner (
  config: KinesisClientLibConfiguration
, checkpointInterval: FiniteDuration = 5 minutes
, numRetries: Int = 3
, initialize: (String) => Unit = (_) => ()
, shutdown: (String, IRecordProcessorCheckpointer, ShutdownReason) => Unit = (_,_,_) => ()
, metricsFactory: Option[IMetricsFactory] = None
) extends Loggable {

  /** Override default checkpointInterval. */
  def withCheckpointInterval( cpi: FiniteDuration
                            ): KCLWorkerRunner = {

    this.copy(checkpointInterval = cpi)
  }


  /** Override default num retries. */
  def withNumRetries( n: Int
                    ): KCLWorkerRunner = {

    this.copy(numRetries = n)
  }


  /** Override default (NOOP) init function. */
  def withInitialize( init: (String) => Unit
                    ): KCLWorkerRunner = {

    this.copy(initialize = init)
  }


  /** Override default (NOOP) shutdown function. */
  def withShutdown( sd: (String, IRecordProcessorCheckpointer, ShutdownReason) => Unit
                  ): KCLWorkerRunner = {

    this.copy(shutdown = sd)
  }

  def withMetricsFactory(factory: IMetricsFactory): KCLWorkerRunner = {
    this.copy(metricsFactory = Some(factory))
  }

  /**
   * Run KCL worker with the given callback.
   *
   * @param processRecords     (ShardId, Records, Checkpointer) => Unit : Kinesis record handler
   * @param evReader           evidence that A has implementation of KinesisRecordReader implicitly available in scope
   */
  def runBatchProcessor[A]( processRecords: (String, Seq[A], IRecordProcessorCheckpointer) => Unit
                         )( implicit evReader: KinesisRecordReader[A]
                          ): Unit = {
    try {

      val recordProcessorFactory = KCLRecordProcessorFactory(
        checkpointInterval
      , numRetries
      , initialize
      , shutdown
      ) { (shardId, records, checkpointer) =>

        val (as,errs) = records.map(tryToConvertRecord[A] _).partition(_.isSuccess)

        // process what we could parse, if this call throws an exception - the whole batch will be retried
        processRecords(shardId, as.map(_.get), checkpointer)

        // log records we could not parse, pointless to retry them
        errs.map(_.failed.get).foreach { e => error(e.getMessage, e) }
      }

      val worker = metricsFactory.fold(
        new Worker(recordProcessorFactory, config))(
        mf => new Worker(recordProcessorFactory, config, mf))

      worker.run()

    } catch {
      case NonFatal(e) =>
        error(e.getMessage, e)
    }
  }

  /**
   * Run KCL worker with the given callback.
   * Simple single-threaded execution, access to shard ID and checkpointer.
   *
   * @param processRecord     (ShardId, Record, Checkpointer) => Unit : Kinesis record handler
   * @param evReader          evidence that A has implementation of KinesisRecordReader implicitly available in scope
   */
  def runSingleRecordProcessor[A]( processRecord: (String, A, IRecordProcessorCheckpointer) => Unit
                                )( implicit evReader: KinesisRecordReader[A]
                                 ): Unit = {

    runBatchProcessor[A] { (shardId, as, checkpointer) => as.foreach(a => processRecord(shardId, a, checkpointer)) }
  }


  /**
   * Run KCL worker with the given asynchronous callback.
   * Batch will be processed in parallel.
   * You can control the level of parallelism by configuring provided execution context parameter.
   *
   * @param callbackTimeout   how long to wait for async call results
   * @param processRecord     (Record) => Future[Unit] : async record handler
   * @param executor          where to execute record processing functions
   * @param evReader          evidence that A has implementation of KinesisRecordReader implicitly available in scope
   */
  def runAsyncSingleRecordProcessor[A]( callbackTimeout: FiniteDuration
                                     )( processRecord: (A) => Future[Unit]
                                     )( implicit executor: ExecutionContext
                                      ,          evReader: KinesisRecordReader[A]
                                      ): Unit = {

    runBatchProcessor[Record]({ (shardId, records, checkpointer) =>

      val resFutures: Future[Seq[(Record, Try[Unit])]] = Future.traverse(records) { r =>

        debug(s"Got ${r.toString} from kinesis shard ${shardId}")

        tryToConvertRecord[A](r) match {
          case Failure(e) =>
            Future.successful(r -> Failure(e))

          case Success(a) =>
            val res : Future[(Record, Try[Unit])] = processRecord(a) map (_ => r -> Success(Unit))

            res recover {
              case NonFatal(e) => r -> Failure(e)
            }
        }
      }

      val results = Await.result(resFutures, callbackTimeout)

      // Log each result individually
      results.foreach { case (r, res) =>
        res match {
          case Success(_) =>
            debug(s"Successfully processed ${r.toString} from kinesis shard ${shardId}.")

          case Failure(e) =>
            e match {
              case ce: KCLWorkerRunnerRecordConversionException =>
                error(s"Skipped a record from kinesis shard ${shardId}", ce)

              // this runs in a context where same record batch will be retried a few times
              case NonFatal(e) =>
                throw KCLWorkerRunnerRecordProcessingException(r, shardId, e)
            }
        }
      }
    })(Implicits.IdentityKinesisRecordReader) // we'll convert on the worker thread
  }



  /** Adds a bit more context to failed attempts to convert kinesis records. */
  private[this]
  def tryToConvertRecord[A](r: Record
                          )( implicit evReader: KinesisRecordReader[A]
                           ): Try[A] = {

    val copyOfTheData = r.getData.duplicate // they are mutable, can only read once, see hexData

    try {
      Success(evReader(r))
    } catch {
      case NonFatal(e) =>
        Failure(KCLWorkerRunnerRecordConversionException(r, ByteBufferUtil.toHexString(copyOfTheData), e))
    }
  }
}


case class KCLWorkerRunnerRecordConversionException(
  record: Record
, hexData: String
, cause: Throwable
) extends RuntimeException(s"Failed to convert ${record} to required type: ${cause.getMessage} :: DATA: ${hexData}", cause)


case class KCLWorkerRunnerRecordProcessingException(
  record: Record
, shardId: String
, cause: Throwable
) extends RuntimeException(s"Failed to process ${record} from shard ${shardId}: ${cause.getMessage}", cause)




© 2015 - 2025 Weber Informatics LLC | Privacy Policy