All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fs2.kafka.KafkaProducer.scala Maven / Gradle / Ivy

/*
 * Copyright 2018-2024 OVO Energy Limited
 *
 * SPDX-License-Identifier: Apache-2.0
 */

package fs2.kafka

import scala.annotation.nowarn
import scala.concurrent.Promise

import cats.{Apply, Functor}
import cats.effect.*
import cats.syntax.all.*
import fs2.*
import fs2.kafka.internal.*
import fs2.kafka.producer.MkProducer

import org.apache.kafka.clients.producer.RecordMetadata
import org.apache.kafka.common.{Metric, MetricName, PartitionInfo}

/**
  * [[KafkaProducer]] represents a producer of Kafka records, with the ability to produce
  * `ProducerRecord`s using [[produce]].
  */
abstract class KafkaProducer[F[_], K, V] {

  /**
    * Produces the specified [[ProducerRecords]] in two steps: the first effect puts the records in
    * the buffer of the producer, and the second effect waits for the records to send.

* * It's possible to `flatten` the result from this function to have an effect which both sends * the records and waits for them to finish sending.

* * Waiting for individual records to send can substantially limit performance. In some cases, * this is necessary, and so we might want to consider the following alternatives.

* * - Wait for the produced records in batches, improving the rate at which records are * produced, but loosing the guarantee where `produce >> otherAction` means `otherAction` * executes after the record has been sent.
* - Run several `produce.flatten >> otherAction` concurrently, improving the rate at which * records are produced, and still have `otherAction` execute after records have been sent, * but losing the order of produced records. */ def produce( records: ProducerRecords[K, V] ): F[F[ProducerResult[K, V]]] } object KafkaProducer { implicit class ProducerOps[F[_], K, V](private val producer: KafkaProducer[F, K, V]) extends AnyVal { /** * Produce a single [[ProducerRecord]], see [[KafkaProducer.produce]] for general semantics. */ def produceOne_(record: ProducerRecord[K, V])(implicit F: Functor[F]): F[F[RecordMetadata]] = produceOne(record).map(_.map { res => res.head.get._2 // Should always be present so get is ok }) /** * Produce a single record to the specified topic using the provided key and value, see * [[KafkaProducer.produce]] for general semantics. */ def produceOne_(topic: String, key: K, value: V)(implicit F: Functor[F]): F[F[RecordMetadata]] = produceOne_(ProducerRecord(topic, key, value)) /** * Produce a single record to the specified topic using the provided key and value, see * [[KafkaProducer.produce]] for general semantics. */ def produceOne( topic: String, key: K, value: V ): F[F[ProducerResult[K, V]]] = produceOne(ProducerRecord(topic, key, value)) /** * Produce a single [[ProducerRecord]], see [[KafkaProducer.produce]] for general semantics. */ def produceOne(record: ProducerRecord[K, V]): F[F[ProducerResult[K, V]]] = producer.produce(ProducerRecords.one(record)) } /** * [[KafkaProducer.Metrics]] extends [[KafkaProducer]] to provide access to the underlying * producer metrics. */ abstract class Metrics[F[_], K, V] extends KafkaProducer[F, K, V] { /** * Returns producer metrics. * * @see * org.apache.kafka.clients.producer.KafkaProducer#metrics */ def metrics: F[Map[MetricName, Metric]] } /** * [[KafkaProducer.PartitionsFor]] extends [[KafkaProducer.Metrics]] to provide access to the * underlying producer partitions. */ abstract class PartitionsFor[F[_], K, V] extends KafkaProducer.Metrics[F, K, V] { /** * Returns partition metadata for the given topic. * * @see * org.apache.kafka.clients.producer.KafkaProducer#partitionsFor */ def partitionsFor(topic: String): F[List[PartitionInfo]] } /** * Creates a new [[KafkaProducer]] in the `Resource` context, using the specified * [[ProducerSettings]]. Note that there is another version where `F[_]` is specified explicitly * and the key and value type can be inferred, which allows you to use the following syntax. * * {{{ * KafkaProducer.resource[F].using(settings) * }}} */ def resource[F[_], K, V]( settings: ProducerSettings[F, K, V] )(implicit F: Async[F], mk: MkProducer[F]): Resource[F, KafkaProducer.PartitionsFor[F, K, V]] = KafkaProducerConnection.resource(settings)(F, mk).flatMap(_.withSerializersFrom(settings)) private[kafka] def from[F[_], K, V]( connection: KafkaProducerConnection[F], keySerializer: KeySerializer[F, K], valueSerializer: ValueSerializer[F, V] ): KafkaProducer.PartitionsFor[F, K, V] = new KafkaProducer.PartitionsFor[F, K, V] { override def produce( records: ProducerRecords[K, V] ): F[F[ProducerResult[K, V]]] = connection.produce(records)(keySerializer, valueSerializer) override def metrics: F[Map[MetricName, Metric]] = connection.metrics override def toString: String = "KafkaProducer$" + System.identityHashCode(this) override def partitionsFor(topic: String): F[List[PartitionInfo]] = connection.partitionsFor(topic) } /** * Creates a new [[KafkaProducer]] in the `Stream` context, using the specified * [[ProducerSettings]]. Note that there is another version where `F[_]` is specified explicitly * and the key and value type can be inferred, which allows you to use the following syntax. * * {{{ * KafkaProducer.stream[F].using(settings) * }}} */ def stream[F[_], K, V]( settings: ProducerSettings[F, K, V] )(implicit F: Async[F], mk: MkProducer[F]): Stream[F, KafkaProducer.PartitionsFor[F, K, V]] = Stream.resource(KafkaProducer.resource(settings)(F, mk)) private[kafka] def produce[F[_]: Async, K, V]( withProducer: WithProducer[F], keySerializer: KeySerializer[F, K], valueSerializer: ValueSerializer[F, V], records: ProducerRecords[K, V] ): F[F[ProducerResult[K, V]]] = withProducer { (producer, blocking) => records .traverse(produceRecord(keySerializer, valueSerializer, producer, blocking)) .map(_.sequence) } private[kafka] def produceRecord[F[_], K, V]( keySerializer: KeySerializer[F, K], valueSerializer: ValueSerializer[F, V], producer: KafkaByteProducer, blocking: Blocking[F] )(implicit F: Async[F] ): ProducerRecord[K, V] => F[F[(ProducerRecord[K, V], RecordMetadata)]] = record => asJavaRecord(keySerializer, valueSerializer, record).flatMap { javaRecord => F.delay(Promise[(ProducerRecord[K, V], RecordMetadata)]()) .flatMap { promise => blocking { producer.send( javaRecord, { (metadata, exception) => if (exception == null) promise.success((record, metadata)) else promise.failure(exception) } ) }.map(javaFuture => F.fromFutureCancelable( F.delay((promise.future, F.delay(javaFuture.cancel(true)).void)) ) ) } } /** * Creates a [[KafkaProducer]] using the provided settings and produces record in batches. */ def pipe[F[_], K, V]( settings: ProducerSettings[F, K, V] )(implicit F: Async[F], mk: MkProducer[F] ): Pipe[F, ProducerRecords[K, V], ProducerResult[K, V]] = records => stream(settings)(F, mk).flatMap(pipe(_).apply(records)) /** * Produces records in batches using the provided [[KafkaProducer]]. */ def pipe[F[_]: Concurrent, K, V]( producer: KafkaProducer[F, K, V] ): Pipe[F, ProducerRecords[K, V], ProducerResult[K, V]] = _.evalMap(producer.produce).parEvalMap(Int.MaxValue)(identity) private[this] def serializeToBytes[F[_], K, V]( keySerializer: KeySerializer[F, K], valueSerializer: ValueSerializer[F, V], record: ProducerRecord[K, V] )(implicit F: Apply[F]): F[(Array[Byte], Array[Byte])] = { val keyBytes = keySerializer.serialize(record.topic, record.headers, record.key) val valueBytes = valueSerializer.serialize(record.topic, record.headers, record.value) keyBytes.product(valueBytes) } private[this] def asJavaRecord[F[_], K, V]( keySerializer: KeySerializer[F, K], valueSerializer: ValueSerializer[F, V], record: ProducerRecord[K, V] )(implicit F: Apply[F]): F[KafkaByteProducerRecord] = serializeToBytes(keySerializer, valueSerializer, record).map { case (keyBytes, valueBytes) => new KafkaByteProducerRecord( record.topic, record.partition.fold[java.lang.Integer](null)(identity), record.timestamp.fold[java.lang.Long](null)(identity), keyBytes, valueBytes, record.headers.asJava ) } def apply[F[_]]: ProducerPartiallyApplied[F] = new ProducerPartiallyApplied final private[kafka] class ProducerPartiallyApplied[F[_]](val dummy: Boolean = true) extends AnyVal { /** * Alternative version of `resource` where the `F[_]` is specified explicitly, and where the * key and value type can be inferred from the [[ProducerSettings]]. This allows you to use the * following syntax. * * {{{ * KafkaProducer[F].resource(settings) * }}} */ def resource[K, V](settings: ProducerSettings[F, K, V])(implicit F: Async[F], mk: MkProducer[F] ): Resource[F, KafkaProducer[F, K, V]] = KafkaProducer.resource(settings)(F, mk) /** * Alternative version of `stream` where the `F[_]` is specified explicitly, and where the key * and value type can be inferred from the [[ProducerSettings]]. This allows you to use the * following syntax. * * {{{ * KafkaProducer[F].stream(settings) * }}} */ def stream[K, V](settings: ProducerSettings[F, K, V])(implicit F: Async[F], mk: MkProducer[F] ): Stream[F, KafkaProducer[F, K, V]] = KafkaProducer.stream(settings)(F, mk) override def toString: String = "ProducerPartiallyApplied$" + System.identityHashCode(this) } /* * Prevents the default `MkProducer` instance from being implicitly available * to code defined in this object, ensuring factory methods require an instance * to be provided at the call site. */ @nowarn("msg=never used") implicit private def mkAmbig1[F[_]]: MkProducer[F] = throw new AssertionError("should not be used") @nowarn("msg=never used") implicit private def mkAmbig2[F[_]]: MkProducer[F] = throw new AssertionError("should not be used") }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy