All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nl.vroste.zio.kinesis.client.zionative.ShardAssignmentStrategy.scala Maven / Gradle / Ivy

The newest version!
package nl.vroste.zio.kinesis.client.zionative

import java.time.Instant
import nl.vroste.zio.kinesis.client.zionative.LeaseRepository.Lease
import zio.ZIO

import zio._
import zio.Random.shuffle

/**
 * Decides which shards this worker would like to have
 */
trait ShardAssignmentStrategy {

  /**
   * Given the current lease assignment, pick which shards this worker desires
   *
   * Leases for picked shards which don't yet exist will be created, leases owned by other workers will be stolen. Note
   * that all of this may fail when other workers are active in the pool, so it is not guaranteed that this worker
   * actually gets its desired shards. However, on the next lease assignment interval, the strategy can indicate its
   * desired shards again based.
   *
   * Leases for shards that are not desired (anymore) will be released.
   *
   * @param leases
   *   Current lease assignments + time of last update
   * @param shards
   *   IDs of all shards of the stream
   * @param workerId
   *   ID of current worker, reflected in the 'owner' property of leases
   * @return
   */
  def desiredShards(
    leases: Set[(Lease, Instant)],
    shards: Set[String],
    workerId: String
  ): ZIO[Any, Nothing, Set[String]]
}

object ShardAssignmentStrategy {

  /**
   * Manual shard assignment for a fixed set of shards
   *
   * Note that users must make sure that other workers in the pool also use a manual shard assignment strategy,
   * otherwise stealing back of leases by other workers may occur.
   *
   * You also need to make sure that all shards are covered by the shard assignment of all workers. After resharding,
   * the list of shards must be reconfigured.
   *
   * @param shardAssignment
   *   IDs of shards to assign to this worker
   */
  def manual(shardAssignment: Set[String]) =
    new ShardAssignmentStrategy {
      override def desiredShards(
        leases: Set[(Lease, Instant)],
        shards: Set[String],
        workerId: String
      ): ZIO[Any, Nothing, Set[String]] =
        ZIO.succeed(shardAssignment intersect shards)
    }

  // TODO possible future strategy: manual + fallback
  // When no other worker has active leases for other shards, take them over

  /**
   * Balances shard assignments between a set of workers by taking / stealing leases so that each has its fair share
   *
   * Shards which do not yet have a lease will be claimed by this worker. To get a fair share of leases, the desired
   * shards may contain leases to steal from other workers. The most shards will be selected from the busiest worker. A
   * random selection is made within each worker's shards so that the chance of contention with other stealing workers
   * is reduced.
   *
   * @param expirationTime
   *   Time after which a lease is considered expired if not updated in the meantime This should be set to several times
   *   the lease renewal interval so that leases are not considered expired until several renew intervals have been
   *   missed, in case of transient connection issues.
   */
  def balanced(expirationTime: Duration = 10.seconds) =
    new ShardAssignmentStrategy {
      override def desiredShards(
        leases: Set[(Lease, Instant)],
        shards: Set[String],
        workerId: String
      ): ZIO[Any, Nothing, Set[String]] =
        for {
          now          <- zio.Clock.currentDateTime.map(_.toInstant())
          expiredLeases = leases.collect {
                            case (lease, lastUpdated)
                                if lastUpdated.isBefore(now.minusMillis(expirationTime.toMillis)) &&
                                  !lease.checkpoint.contains(Left(SpecialCheckpoint.ShardEnd)) =>
                              lease
                          }
          _            <-
            ZIO.logInfo(s"Found expired leases: ${expiredLeases.map(_.key).mkString(",")}").when(expiredLeases.nonEmpty)

          shardsWithoutLease = shards.filterNot(shard => leases.map(_._1.key).toList.contains(shard))

          toTake             <- leasesToTake(leases.map(_._1).toList, workerId, expiredLeases.toList)
          shardsWeAlreadyHave = leases.collect { case (lease, _) if lease.owner.contains(workerId) => lease.key }
        } yield shardsWeAlreadyHave ++ shardsWithoutLease ++ toTake

    }

  /**
   * Compute which leases to take, either without owner or from other workers
   *
   * We take expired and unowned leases first. We only steal if necessary and do it from the busiest worker first. We
   * will not steal more than the other worker's target (nr leases / nr workers).
   *
   * @param allLeases
   *   Latest known state of the all leases
   * @param workerId
   *   ID of this worker
   * @param expiredLeases
   *   Leases that have expired
   * @return
   *   List of leases that should be taken by this worker
   */
  def leasesToTake(
    allLeases: List[Lease],
    workerId: String,
    expiredLeases: List[Lease] = List.empty
  ): ZIO[Any, Nothing, Set[String]] = {
    val allWorkers    = allLeases.map(_.owner).collect { case Some(owner) => owner }.toSet + workerId
    val activeWorkers =
      (allLeases.toSet -- expiredLeases).map(_.owner).collect { case Some(owner) => owner } + workerId
    val zombieWorkers = allWorkers -- activeWorkers

    val minTarget = Math.floor(allLeases.size * 1.0 / (activeWorkers.size * 1.0)).toInt

    // If the nr of workers does not evenly divide the shards, there's some leases that at least one worker should take
    // These we will not steal, only take
    val optional = allLeases.size % activeWorkers.size

    val target = minTarget

    val ourLeases     = allLeases.filter(_.owner.contains(workerId))
    val unownedLeases = allLeases.filter(_.owner.isEmpty)

    val minNrLeasesToTake = Math.max(0, target - ourLeases.size)
    val maxNrLeasesToTake = Math.max(0, target + optional - ourLeases.size)

    // We may already own some leases
    ZIO.logInfo(
      s"We have ${ourLeases.size}, we would like to have at least ${target}/${allLeases.size} leases (${activeWorkers.size} active workers, " +
        s"${zombieWorkers.size} zombie workers), we need ${minNrLeasesToTake} more with an optional ${optional}"
    ) *> (if (minNrLeasesToTake > 0 || unownedLeases.nonEmpty)
            for {
              leasesWithoutOwner         <- shuffle(allLeases.filter(_.owner.isEmpty))
              leasesExpired              <- shuffle(expiredLeases)
              leasesWithoutOwnerOrExpired = (leasesWithoutOwner ++ leasesExpired).take(maxNrLeasesToTake + optional)

              // We can only steal from our target budget, not the optional ones
              remaining = Math.min(maxNrLeasesToTake, Math.max(0, minNrLeasesToTake - leasesWithoutOwnerOrExpired.size))
              toSteal  <- leasesToSteal(allLeases, workerId, target, nrLeasesToSteal = remaining)
            } yield (leasesWithoutOwnerOrExpired ++ toSteal).map(_.key).toSet
          else ZIO.succeed(Set.empty[String]))
  }

  /**
   * Computes leases to steal from other workers
   *
   * @param allLeases
   *   Latest known state of the all leases
   * @param workerId
   *   ID of this worker
   * @param target
   *   Target number of leases for this worker
   * @param nrLeasesToSteal
   *   How many leases to steal to get to the target
   * @return
   *   List of leases that should be stolen
   */
  def leasesToSteal(
    allLeases: List[Lease],
    workerId: String,
    target: Int,
    nrLeasesToSteal: Int
  ): ZIO[Any, Nothing, List[Lease]] = {
    val leasesByWorker =
      allLeases.groupBy(_.owner).collect { case (Some(owner), leases) => owner -> leases }
    val allWorkers     = allLeases.map(_.owner).collect { case Some(owner) => owner }.toSet ++ Set(workerId)
    // println(s"Planning to steal ${nrLeasesToSteal} leases")

    // From busiest to least busy
    val nrLeasesByWorker  = allWorkers
      .map(worker => worker -> allLeases.count(_.owner.contains(worker)))
      .toMap
      .view
      .filterKeys(_ != workerId)
      .toList
      .sortBy { case (worker, nrLeases) =>
        (nrLeases * -1, worker)
      } // Sort desc by nr of leases and then asc by worker ID for deterministic sort order
    // println(s"Nr Leases by worker: ${nrLeasesByWorker}")
    val spilloverByWorker = nrLeasesByWorker.map { case (worker, nrLeases) =>
      worker -> Math.max(0, nrLeases - target)
    }
    // println(s"Spillover: ${spilloverByWorker}")

    // Determine how many leases to take from each worker
    val nrLeasesToStealByWorker = spilloverByWorker
      .foldLeft((nrLeasesToSteal, Map.empty[String, Int])) {
        case ((leasesToStealLeft, nrByWorker), (worker, spillover)) =>
          val toTake = Math.min(spillover, leasesToStealLeft)

          (leasesToStealLeft - toTake, nrByWorker + (worker -> toTake))
      }
      ._2
      .filter(_._2 > 0)
    // println(s"Going to steal from workers ${nrLeasesToStealByWorker}")

    // From each worker that we want to take some leases, randomize the leases to reduce contention
    for {
      leasesToStealByWorker <- ZIO.foreach(nrLeasesToStealByWorker.toList) { case (worker, nrLeasesToTake) =>
                                 shuffle(leasesByWorker.getOrElse(worker, List.empty))
                                   .map(_.take(nrLeasesToTake))
                               }
      leasesToSteal          = leasesToStealByWorker.flatten
    } yield leasesToSteal
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy