All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.snowplowanalytics.snowplow.sources.SourceAndAck.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved.
 *
 * This program is licensed to you under the Snowplow Community License Version 1.0,
 * and you may not use this file except in compliance with the Snowplow Community License Version 1.0.
 * You may obtain a copy of the Snowplow Community License Version 1.0 at https://docs.snowplow.io/community-license-1.0
 */
package com.snowplowanalytics.snowplow.sources

import cats.Show
import cats.implicits._
import fs2.Stream
import scala.concurrent.duration.FiniteDuration

/**
 * The machinery for sourcing events from and external stream and then acking/checkpointing them.
 *
 * Implementations of this trait are provided by the sources library (e.g. kinesis, kafka, pubsub)
 * whereas implementations of [[EventProcessor]] are provided by the specific application (e.g.
 * enrich, transformer, loaders)
 */
trait SourceAndAck[F[_]] {

  /**
   * Wraps the [[EventProcessor]] to create a Stream which, when compiled drained, causes events to
   * flow through the processor.
   *
   * @param config
   *   Configures how events are fed into the processor, e.g. whether to use timed windows
   * @param processor
   *   The EventProcessor, which is implemented by the specific application, e.g. enrich or a loader
   * @return
   *   A stream which should be compiled and drained
   */
  def stream(config: EventProcessingConfig, processor: EventProcessor[F]): Stream[F, Nothing]

  /**
   * Reports on whether the source of events is healthy
   *
   * @param maxAllowedProcessingLatency
   *   A maximum allowed value for how long the `EventProcessor` may spend processing any pending
   *   un-acked events. If this cutoff is exceeded then `isHealthy` returns an unhealthy status.
   *
   * Note, unlike our statsd metrics, this latency measurement does not consider min/max values over
   * a period of time. It is a snapshot measurement for a single point in time.
   *
   * If events are getting processed quickly then latency is low and the probe should report
   * healthy. If any event is "stuck" then latency is high and the probe should report unhealthy.
   */
  def isHealthy(maxAllowedProcessingLatency: FiniteDuration): F[SourceAndAck.HealthStatus]
}

object SourceAndAck {

  sealed trait HealthStatus { self =>
    final def showIfUnhealthy: Option[String] =
      self match {
        case Healthy              => None
        case unhealthy: Unhealthy => Some(unhealthy.show)
      }
  }

  case object Healthy extends HealthStatus
  sealed trait Unhealthy extends HealthStatus

  /**
   * The health status expected if the source is at a stage of its lifecycle where cannot provide
   * events
   *
   * For Pubsub this could be because the Subscriber is not yet running. For Kafka this could be due
   * to re-balancing.
   */
  case object Disconnected extends Unhealthy

  /**
   * The health status expected if an event is "stuck" in the EventProcessor
   *
   * @param latency
   *   How long the EventProcessor has spent trying to process the stuck event
   */
  case class LaggingEventProcessor(latency: FiniteDuration) extends Unhealthy

  /**
   * The health status expected if the source of events has been inactive for some time
   *
   * @param duration
   *   How long the source of events has been inactive
   */
  case class InactiveSource(duration: FiniteDuration) extends Unhealthy

  implicit def showUnhealthy: Show[Unhealthy] = Show {
    case Disconnected                   => "No connection to a source of events"
    case LaggingEventProcessor(latency) => show"Processing latency is $latency"
    case InactiveSource(duration)       => show"Source of events has been inactive for $duration"
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy