All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.snowplowanalytics.snowplow.sources.kinesis.KinesisSource.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved.
 *
 * This program is licensed to you under the Snowplow Community License Version 1.0,
 * and you may not use this file except in compliance with the Snowplow Community License Version 1.0.
 * You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0
 */
package com.snowplowanalytics.snowplow.sources.kinesis

import cats.effect.{Async, Ref, Sync}
import cats.implicits._
import com.snowplowanalytics.snowplow.sources.SourceAndAck
import com.snowplowanalytics.snowplow.sources.internal.{LowLevelEvents, LowLevelSource}
import fs2.{Chunk, Pull, Stream}
import org.typelevel.log4cats.slf4j.Slf4jLogger
import org.typelevel.log4cats.{Logger, SelfAwareStructuredLogger}
import software.amazon.kinesis.lifecycle.events.{ProcessRecordsInput, ShardEndedInput}
import software.amazon.kinesis.retrieval.kpl.ExtendedSequenceNumber

import java.util.concurrent.{CountDownLatch, SynchronousQueue}
import scala.concurrent.duration.FiniteDuration
import scala.jdk.CollectionConverters._

object KinesisSource {

  private implicit def logger[F[_]: Sync]: SelfAwareStructuredLogger[F] = Slf4jLogger.getLogger[F]

  def build[F[_]: Async](config: KinesisSourceConfig): F[SourceAndAck[F]] =
    Ref.ofEffect(Sync[F].realTime).flatMap { liveness =>
      LowLevelSource.toSourceAndAck {
        new LowLevelSource[F, Map[String, Checkpointable]] {
          def stream: Stream[F, Stream[F, LowLevelEvents[Map[String, Checkpointable]]]] =
            kinesisStream(config, liveness)

          def checkpointer: KinesisCheckpointer[F] =
            new KinesisCheckpointer[F]()

          def lastLiveness: F[FiniteDuration] =
            liveness.get
        }
      }
    }

  private def kinesisStream[F[_]: Async](
    config: KinesisSourceConfig,
    liveness: Ref[F, FiniteDuration]
  ): Stream[F, Stream[F, LowLevelEvents[Map[String, Checkpointable]]]] = {
    val actionQueue = new SynchronousQueue[KCLAction]()
    for {
      _ <- Stream.resource(KCLScheduler.populateQueue[F](config, actionQueue))
      events <- Stream.emit(pullFromQueue(actionQueue, liveness).stream).repeat
    } yield events
  }

  private def pullFromQueue[F[_]: Sync](
    queue: SynchronousQueue[KCLAction],
    liveness: Ref[F, FiniteDuration]
  ): Pull[F, LowLevelEvents[Map[String, Checkpointable]], Unit] =
    Pull.eval(resolveNextAction(queue, liveness)).flatMap {
      case KCLAction.ProcessRecords(_, processRecordsInput) if processRecordsInput.records.asScala.isEmpty =>
        pullFromQueue[F](queue, liveness)
      case KCLAction.ProcessRecords(shardId, processRecordsInput) =>
        Pull.output1(provideNextChunk(shardId, processRecordsInput)).covary[F] *> pullFromQueue[F](queue, liveness)
      case KCLAction.ShardEnd(shardId, await, shardEndedInput) =>
        handleShardEnd[F](shardId, await, shardEndedInput) *> Pull.done
      case KCLAction.KCLError(t) =>
        Pull.eval(Logger[F].error(t)("Exception from Kinesis source")) *> Pull.raiseError[F](t)
    }

  private def resolveNextAction[F[_]: Sync](queue: SynchronousQueue[KCLAction], liveness: Ref[F, FiniteDuration]): F[KCLAction] = {
    val nextAction = Sync[F].delay(Option[KCLAction](queue.poll)).flatMap {
      case Some(action) => Sync[F].pure(action)
      case None         => Sync[F].interruptible(queue.take)
    }
    nextAction <* updateLiveness(liveness)
  }

  private def updateLiveness[F[_]: Sync](liveness: Ref[F, FiniteDuration]): F[Unit] =
    Sync[F].realTime.flatMap(now => liveness.set(now))

  private def provideNextChunk(shardId: String, input: ProcessRecordsInput) = {
    val chunk       = Chunk.javaList(input.records()).map(_.data())
    val lastRecord  = input.records.asScala.last // last is safe because we handled the empty case above
    val firstRecord = input.records.asScala.head
    val checkpointable = Checkpointable.Record(
      new ExtendedSequenceNumber(lastRecord.sequenceNumber, lastRecord.subSequenceNumber),
      input.checkpointer
    )
    LowLevelEvents(chunk, Map[String, Checkpointable](shardId -> checkpointable), Some(firstRecord.approximateArrivalTimestamp))
  }

  private def handleShardEnd[F[_]: Sync](
    shardId: String,
    await: CountDownLatch,
    shardEndedInput: ShardEndedInput
  ) = {
    val checkpointable = Checkpointable.ShardEnd(shardEndedInput.checkpointer, await)
    val last           = LowLevelEvents(Chunk.empty, Map[String, Checkpointable](shardId -> checkpointable), None)
    Pull
      .eval(Logger[F].info(s"Ending this window of events early because reached the end of Kinesis shard $shardId"))
      .covaryOutput *>
      Pull.output1(last).covary[F]
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy