All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.kafka010.ShadowedCachedKafkaProducer.scala Maven / Gradle / Ivy

The newest version!
package org.apache.spark.sql.kafka010

import java.time.Duration
import java.util.Properties
import java.util.concurrent.{ConcurrentMap, ExecutionException, Executors, TimeUnit}
import java.{util => ju}

import com.google.common.cache._
import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
import com.landoop.lenses.topology.client.kafka.metrics.{KafkaMetricsBuilder, KafkaPublisher}
import com.landoop.lenses.topology.client.metrics.MetricsPublishTask
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.spark.SparkEnv
import org.apache.spark.internal.Logging

import scala.collection.JavaConverters._
import scala.util.control.NonFatal

private[kafka010] object ShadowedCachedKafkaProducer extends Logging {

  private type Producer = KafkaProducer[Array[Byte], Array[Byte]]

  private lazy val cacheExpireTimeout: Long =
    SparkEnv.get.conf.getTimeAsMs("spark.kafka.producer.cache.timeout", "10m")

  private val cacheLoader = new CacheLoader[Seq[(String, Object)], Producer] {
    override def load(config: Seq[(String, Object)]): Producer = {
      val configMap = config.map(x => x._1 -> x._2).toMap.asJava
      createKafkaProducer(configMap)
    }
  }

  private val removalListener = new RemovalListener[Seq[(String, Object)], Producer]() {
    override def onRemoval(
                            notification: RemovalNotification[Seq[(String, Object)], Producer]): Unit = {
      val paramsSeq: Seq[(String, Object)] = notification.getKey
      val producer: Producer = notification.getValue
      logDebug(
        s"Evicting kafka producer $producer params: $paramsSeq, due to ${notification.getCause}")
      close(paramsSeq, producer)
    }
  }

  private lazy val guavaCache: LoadingCache[Seq[(String, Object)], Producer] =
    CacheBuilder.newBuilder().expireAfterAccess(cacheExpireTimeout, TimeUnit.MILLISECONDS)
      .removalListener(removalListener)
      .build[Seq[(String, Object)], Producer](cacheLoader)

  private def createKafkaProducer(producerConfiguration: ju.Map[String, Object]): Producer = {
    val props = new Properties()
    props.putAll(producerConfiguration)
    val kafkaProducer: Producer = new Producer(producerConfiguration)

    val key = "lenses.topology.description"
    if (!producerConfiguration.containsKey(key))
      throw new RuntimeException("Must specify option 'kafka.lenses.topology.description' as appName:topic1,topic2,...,topicn")

    val description = producerConfiguration.get(key).toString
    val appName = description.takeWhile(_ != ':')
    val topics = description.dropWhile(_ != ':').drop(1).split(',')

    val publisher = new KafkaPublisher(props)

    val task = new MetricsPublishTask(publisher, Duration.ofSeconds(5))
    // we register each topic we're interested in
    topics.foreach { topic =>
      task.register(appName, topic, new KafkaMetricsBuilder(kafkaProducer ))
    }

    // this executor will run the metrics task
    val executor = Executors.newSingleThreadExecutor()
    executor.submit(task)

    logDebug(s"Created a new instance of KafkaProducer for $producerConfiguration.")
    kafkaProducer
  }

  /**
    * Get a cached KafkaProducer for a given configuration. If matching KafkaProducer doesn't
    * exist, a new KafkaProducer will be created. KafkaProducer is thread safe, it is best to keep
    * one instance per specified kafkaParams.
    */
  private[kafka010] def getOrCreate(kafkaParams: ju.Map[String, Object]): Producer = {
    val paramsSeq: Seq[(String, Object)] = paramsToSeq(kafkaParams)
    try {
      guavaCache.get(paramsSeq)
    } catch {
      case e@(_: ExecutionException | _: UncheckedExecutionException | _: ExecutionError)
        if e.getCause != null =>
        throw e.getCause
    }
  }

  private def paramsToSeq(kafkaParams: ju.Map[String, Object]): Seq[(String, Object)] = {
    val paramsSeq: Seq[(String, Object)] = kafkaParams.asScala.toSeq.sortBy(x => x._1)
    paramsSeq
  }

  /** For explicitly closing kafka producer */
  private[kafka010] def close(kafkaParams: ju.Map[String, Object]): Unit = {
    val paramsSeq = paramsToSeq(kafkaParams)
    guavaCache.invalidate(paramsSeq)
  }

  /** Auto close on cache evict */
  private def close(paramsSeq: Seq[(String, Object)], producer: Producer): Unit = {
    try {
      logInfo(s"Closing the KafkaProducer with params: ${paramsSeq.mkString("\n")}.")
      producer.close()
    } catch {
      case NonFatal(e) => logWarning("Error while closing kafka producer.", e)
    }
  }

  private def clear(): Unit = {
    logInfo("Cleaning up guava cache.")
    guavaCache.invalidateAll()
  }

  // Intended for testing purpose only.
  private def getAsMap: ConcurrentMap[Seq[(String, Object)], Producer] = guavaCache.asMap()
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy