All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.streaming.api.kafka.DynamicPartitionKafkaCluster.scala Maven / Gradle / Ivy

/*
 * Copyright (c) Microsoft. All rights reserved.
 * Licensed under the MIT license. See LICENSE file in the project root for full license information.
 */

package org.apache.spark.streaming.kafka

import java.util.Properties

import kafka.api._
import kafka.common.{ErrorMapping, OffsetAndMetadata, OffsetMetadataAndError, TopicAndPartition}
import kafka.consumer.{ConsumerConfig, SimpleConsumer}
import org.apache.spark.SparkException

import scala.collection.mutable.ArrayBuffer
import scala.util.Random
import scala.util.control.NonFatal

/**
 * Convenience methods for interacting with a Kafka cluster.
 * @param kafkaParams Kafka 
 * configuration parameters.
 *   Requires "metadata.broker.list" or "bootstrap.servers" to be set with Kafka broker(s),
 *   NOT zookeeper servers, specified in host1:port1,host2:port2 form
 */
private[spark]
class DynamicPartitionKafkaCluster(override val kafkaParams: Map[String, String]) extends KafkaCluster(kafkaParams) {
  import KafkaCluster.{Err, LeaderOffset, SimpleConsumerConfig}

  def findLeaders2(
                   topicAndPartitions: Set[TopicAndPartition]
                   ): (Option[Err], Map[TopicAndPartition, (String, Int)]) = {
    val topics = topicAndPartitions.map(_.topic)
    val (getPartionMetaError, topicMetaSet) = getPartitionMetadata2(topics)
    val answer = {
      val leaderMap = topicMetaSet.flatMap { tm: TopicMetadata =>
        tm.partitionsMetadata.flatMap { pm: PartitionMetadata =>
          val tp = TopicAndPartition(tm.topic, pm.partitionId)
          if (topicAndPartitions(tp)) {
            pm.leader.map { l =>
              tp -> (l.host -> l.port)
            }
          } else {
            None
          }
        }
      }.toMap

      if (leaderMap.keys.size == topicAndPartitions.size) {
        (getPartionMetaError, leaderMap)
      } else {
        val missing = topicAndPartitions.diff(leaderMap.keySet)
        val err = new Err
        err.append(new SparkException(s"Couldn't find leaders for ${missing}"))
        (Some(err), leaderMap)
      }
    }
    answer
  }

  def getPartitions2(topics: Set[String]): (Option[Err], Set[TopicAndPartition]) = {
    val (errs, topicMetaSet) = getPartitionMetadata2(topics)
    val topicAndPartitions = topicMetaSet.flatMap { tm: TopicMetadata =>
      tm.partitionsMetadata.map { pm: PartitionMetadata =>
        TopicAndPartition(tm.topic, pm.partitionId)
      }
    }
    (errs, topicAndPartitions)
  }

  def getPartitionMetadata2(topics: Set[String]): (Option[Err], Set[TopicMetadata]) = {
    val req = TopicMetadataRequest(
      TopicMetadataRequest.CurrentVersion, 0, config.clientId, topics.toSeq)
    var topicMetaSet: Set[TopicMetadata] = Set()
    val errs = new Err
    withBrokers(Random.shuffle(config.seedBrokers), errs) { consumer =>
      val resp: TopicMetadataResponse = consumer.send(req)
      val respErrs = resp.topicsMetadata.filter(m => m.errorCode != ErrorMapping.NoError)
      if (respErrs.isEmpty) {
        return (None, resp.topicsMetadata.toSet)
      } else {
        respErrs.foreach { m =>
          val cause = ErrorMapping.exceptionFor(m.errorCode)
          val msg = s"Error getting partition metadata for '${m.topic}'. Does the topic exist?"
          errs.append(new SparkException(msg, cause))
        }

        val tms = resp.topicsMetadata.filter(m => m.errorCode == ErrorMapping.NoError).toSet
        if (tms.size > topicMetaSet.size) {
          topicMetaSet = tms
        }
      }
    }
    (Some(errs), topicMetaSet)
  }

  def getLatestLeaderOffsets2(
                               topicAndPartitions: Set[TopicAndPartition]
                               ): (Option[Err], Map[TopicAndPartition, LeaderOffset]) =
    getLeaderOffsets2(topicAndPartitions, OffsetRequest.LatestTime)

  def getEarliestLeaderOffsets2(
                                 topicAndPartitions: Set[TopicAndPartition]
                                 ): (Option[Err], Map[TopicAndPartition, LeaderOffset]) =
    getLeaderOffsets2(topicAndPartitions, OffsetRequest.EarliestTime)

  def getLeaderOffsets2(
      topicAndPartitions: Set[TopicAndPartition],
      before: Long
    ): (Option[Err], Map[TopicAndPartition, LeaderOffset]) = {
    val (err, leaderOffsets) = getLeaderOffsets2(topicAndPartitions, before, 1)
    (err,
     leaderOffsets.map { kv =>
       // mapValues isnt serializable, see SI-7005
       kv._1 -> kv._2.head
     })
  }

  def getLeaderOffsets2(
      topicAndPartitions: Set[TopicAndPartition],
      before: Long,
      maxNumOffsets: Int
      ): (Option[Err], Map[TopicAndPartition, Seq[LeaderOffset]]) = {
    val (leaderErr, tpToLeaderMap) = findLeaders2(topicAndPartitions)
    val leaderToTp: Map[(String, Int), Seq[TopicAndPartition]] = flip(tpToLeaderMap)
    val leaders = leaderToTp.keys
    var result = Map[TopicAndPartition, Seq[LeaderOffset]]()
    val errs = new Err
    withBrokers(leaders, errs) { consumer =>
      val partitionsToGetOffsets: Seq[TopicAndPartition] =
        leaderToTp((consumer.host, consumer.port))
      val reqMap = partitionsToGetOffsets.map { tp: TopicAndPartition =>
        tp -> PartitionOffsetRequestInfo(before, maxNumOffsets)
      }.toMap
      val req = OffsetRequest(reqMap)
      val resp = consumer.getOffsetsBefore(req)
      val respMap = resp.partitionErrorAndOffsets
      partitionsToGetOffsets.foreach { tp: TopicAndPartition =>
        respMap.get(tp).foreach { por: PartitionOffsetsResponse =>
          if (por.error == ErrorMapping.NoError) {
            if (por.offsets.nonEmpty) {
              result += tp -> por.offsets.map { off =>
                LeaderOffset(consumer.host, consumer.port, off)
              }
            } else {
              errs.append(new SparkException(
                s"Empty offsets for ${tp}, is ${before} before log beginning?"))
            }
          } else {
            errs.append(ErrorMapping.exceptionFor(por.error))
          }
        }
      }
      if (result.keys.size == topicAndPartitions.size) {
        return (None, result)
      }
    }
    val missing = topicAndPartitions.diff(result.keySet)
    errs.append(new SparkException(s"Couldn't find leader offsets for ${missing}"))
    (Some(errs), result)
  }

  private def flip[K, V](m: Map[K, V]): Map[V, Seq[K]] =
    m.groupBy(_._2).map { kv =>
      kv._1 -> kv._2.keys.toSeq
    }

  // Try a call against potentially multiple brokers, accumulating errors
  private def withBrokers(brokers: Iterable[(String, Int)], errs: Err)
                         (fn: SimpleConsumer => Any): Unit = {
    brokers.foreach { hp =>
      var consumer: SimpleConsumer = null
      try {
        consumer = connect(hp._1, hp._2)
        fn(consumer)
      } catch {
        case NonFatal(e) =>
          errs.append(e)
      } finally {
        if (consumer != null) {
          consumer.close()
        }
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy