zio.kafka.consumer.fetch.ManyPartitionsQueueSizeBasedFetchStrategy.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of zio-kafka_2.13 Show documentation
zio-kafka
The newest version!
package zio.kafka.consumer.fetch

import org.apache.kafka.common.TopicPartition
import zio.{ Chunk, ZIO }
import zio.kafka.consumer.internal.PartitionStream

import scala.collection.mutable

/**
 * A fetch strategy that allows a stream to fetch data when its queue size is at or below `maxPartitionQueueSize`, as
 * long as the total queue size is at or below `maxTotalQueueSize`. This strategy is suitable when
 * [[QueueSizeBasedFetchStrategy]] requires too much heap space, particularly when a lot of partitions are being
 * consumed.
 *
 * @param maxPartitionQueueSize
 *   Maximum number of records to be buffered per partition. This buffer improves throughput and supports varying
 *   downstream message processing time, while maintaining some backpressure. Low values effectively disable prefetching
 *   in favour of low memory consumption. Large values leave it up to `maxTotalQueueSize` parameter to backpressure only
 *   over the buffers of all partitions together.
 *
 * The number of records that are fetched on every poll is controlled by the `max.poll.records` setting, the number of
 * records fetched for every partition is somewhere between 0 and `max.poll.records`.
 *
 * The default value for this parameter is 2 * the default `max.poll.records` of 500, rounded to the nearest power of 2.
 *
 * @param maxTotalQueueSize
 *   Maximum number of records to be buffered over all partitions together. This can be used to limit memory usage when
 *   consuming a large number of partitions.
 *
 * When multiple streams are eligible for pre-fetching (because their queue size is below `maxPartitionQueueSize`), but
 * together they exceed `maxTotalQueueSize`, then every call a random set of eligible streams is selected that stays
 * below `maxTotalQueueSize`. The randomization ensures fairness and prevents read-starvation for streams at the end of
 * the list.
 *
 * The default value is 20 * the default for `maxPartitionQueueSize`, allowing approximately 20 partitions to do
 * pre-fetching in each poll.
 */
final case class ManyPartitionsQueueSizeBasedFetchStrategy(
  maxPartitionQueueSize: Int = 1024,
  maxTotalQueueSize: Int = 20480
) extends FetchStrategy {
  override def selectPartitionsToFetch(
    streams: Chunk[PartitionStream]
  ): ZIO[Any, Nothing, Set[TopicPartition]] =
    for {
      random          <- ZIO.random
      shuffledStreams <- random.shuffle(streams)
      tps <- ZIO
               .foldLeft(shuffledStreams)((mutable.ArrayBuilder.make[TopicPartition], maxTotalQueueSize)) {
                 case (acc @ (partitions, queueBudget), stream) =>
                   stream.queueSize.map { queueSize =>
                     if (queueSize <= maxPartitionQueueSize && queueSize <= queueBudget) {
                       (partitions += stream.tp, queueBudget - queueSize)
                     } else acc
                   }
               }
               .map { case (tps, _) => tps.result().toSet }
    } yield tps
}