All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kinesis4cats.producer.ShardMapCache.scala Maven / Gradle / Ivy

There is a newer version: 0.0.32
Show newest version
/*
 * Copyright 2023-2023 etspaceman
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kinesis4cats.producer

import scala.concurrent.duration._

import java.nio.charset.StandardCharsets
import java.time.Instant

import cats.effect.syntax.all._
import cats.effect.{Async, Ref}
import cats.syntax.all._
import org.typelevel.log4cats.StructuredLogger

import kinesis4cats.Utils
import kinesis4cats.logging.{LogContext, LogEncoder}
import kinesis4cats.models._

/** A cache of shards for a stream, which can be used to predict the shard ID
  * for a given record's partition key
  *
  * @param config
  *   [[kinesis4cats.producer.ShardMapCache.Config ShardMapCache.Config]]
  * @param logger
  *   [[org.typelevel.log4cats.StructuredLogger StructuredLogger]]
  * @param shardMapRef
  *   [[cats.effect.Ref Ref]] of [[kinesis4cats.producer.ShardMap ShardMap]]
  * @param shardMapF
  *   F that supplies a new [[kinesis4cats.producer.ShardMap ShardMap]]
  * @param F
  *   [[cats.effect.Async Async]]
  * @param LE
  *   [[kinesis4cats.producer.ShardMapCache.LogEncoders ShardMapCache.LogEncoders]]
  */
private[kinesis4cats] class ShardMapCache[F[_]] private (
    config: ShardMapCache.Config,
    logger: StructuredLogger[F],
    shardMapRef: Ref[F, ShardMap],
    shardMapF: F[Either[ShardMapCache.Error, ShardMap]]
)(implicit
    F: Async[F],
    LE: ShardMapCache.LogEncoders
) {
  import LE._

  /** Predicts a shard that a record will land on given its partition key
    *
    * @param partitionKey
    *   The partition key for the record
    * @return
    *   Either a
    *   [[kinesis4cats.producer.ShardMapCache.Error ShardMapCache.Error]] or
    *   [[kinesis4cats.models.ShardId ShardId]]
    */
  def shardForPartitionKey(
      partitionKey: String
  ): F[Either[ShardMapCache.Error, ShardId]] =
    shardMapRef.get.map(_.shardForPartitionKey(partitionKey))

  /** Refresh the shard cache by running shardMapF
    */
  def refresh(): F[Either[ShardMapCache.Error, Unit]] = {
    val ctx = LogContext()
    for {
      newMap <- shardMapF
      res <- newMap.bitraverse(
        e =>
          logger
            .error(ctx.context, e)("Error retrieving newest shard map")
            .as(e),
        x =>
          for {
            _ <- logger.debug(ctx.context)(
              "Successfully retrieved new shard map"
            )
            _ <- logger.trace(ctx.addEncoded("shardMap", x).context)(
              "Logging shard map"
            )
            _ <- shardMapRef.set(x)
          } yield ()
      )
    } yield res
  }

  /** Start the cache
    */
  private def start() = for {
    _ <- refresh().toResource
    _ <- F
      .sleep(config.refreshInterval)
      .flatMap(_ => refresh())
      .foreverM
      .background
      .void
  } yield ()

}

object ShardMapCache {

  /** Construct a ShardMapCache
    *
    * @param config
    *   ShardMapCache
    * @param shardMapF
    *   F that supplies a new [[kinesis4cats.producer.ShardMap ShardMap]]
    * @param loggerF
    *   F of [[org.typelevel.log4cats.StructuredLogger StructuredLogger]]
    * @param F
    *   [[cats.effect.Async Async]]
    * @param LE
    *   [[kinesis4cats.producer.ShardMapCache.LogEncoders ShardMapCache.LogEncoders]]
    * @return
    */
  def apply[F[_]](
      config: Config,
      shardMapF: F[Either[Error, ShardMap]],
      loggerF: F[StructuredLogger[F]]
  )(implicit
      F: Async[F],
      LE: ShardMapCache.LogEncoders
  ) = for {
    logger <- loggerF.toResource
    ref <- Ref.of[F, ShardMap](ShardMap.empty).toResource
    service = new ShardMapCache[F](config, logger, ref, shardMapF)
    _ <- service.start()
  } yield service

  /** [[kinesis4cats.logging.LogEncoder LogEncoder]] instances for the
    * ShardMapCache
    *
    * @param shardMapLogEncoder
    *   [[kinesis4cats.logging.LogEncoder LogEncoder]] instance for
    *   [[kinesis4cats.producer.ShardMap]]
    */
  final class LogEncoders(implicit val shardMapLogEncoder: LogEncoder[ShardMap])

  /** Configuration for the ShardMapCache
    *
    * @param refreshInterval
    *   How often to refresh the shard cache
    */
  final case class Config(refreshInterval: FiniteDuration)

  object Config {

    /** Default configuration for the ShardMapCache
      */
    val default = Config(1.hour)
  }

  /** Errors that can be received in the ShardMapCache
    *
    * @param msg
    *   Error message
    */
  sealed abstract class Error(msg: String) extends Exception(msg)

  /** Error for when the partition key cannot be matched to a shard
    *
    * @param partitionKey
    *   partition key that was not matched
    */
  final case class ShardForPartitionKeyNotFound(partitionKey: String)
      extends Error(s"Could not find shard for partition key ${partitionKey}")

  /** Error for when the cache could not list the shards
    *
    * @param e
    *   Underlying error
    */
  final case class ListShardsError(e: Throwable) extends Error(e.getMessage())

}

final case class ShardMap(shards: List[ShardMapRecord], lastUpdated: Instant) {
  def shardForPartitionKey(
      partitionKey: String
  ): Either[ShardMapCache.Error, ShardId] = {
    val hashBytes = Utils.md5(partitionKey.getBytes(StandardCharsets.UTF_8))
    val hashKey = BigInt.apply(1, hashBytes)
    ShardMap.findShard(partitionKey, hashKey, shards)
  }
}

object ShardMap {
  @annotation.tailrec
  def findShard(
      partitionKey: String,
      hashKey: BigInt,
      shards: List[ShardMapRecord]
  ): Either[ShardMapCache.Error, ShardId] = shards match {
    case Nil => Left(ShardMapCache.ShardForPartitionKeyNotFound(partitionKey))
    case h :: t =>
      if (h.hashKeyRange.isBetween(hashKey)) Right(h.shardId)
      else findShard(partitionKey, hashKey, t)
  }

  def empty = ShardMap(List.empty, Instant.now())

}

final case class ShardMapRecord(shardId: ShardId, hashKeyRange: HashKeyRange)




© 2015 - 2025 Weber Informatics LLC | Privacy Policy