
kafka.controller.ControllerChannelManager.scala Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.controller
import java.net.SocketTimeoutException
import java.util.concurrent.{BlockingQueue, LinkedBlockingQueue, TimeUnit}
import com.yammer.metrics.core.{Gauge, Timer}
import kafka.api._
import kafka.cluster.Broker
import kafka.metrics.KafkaMetricsGroup
import kafka.server.KafkaConfig
import kafka.utils._
import org.apache.kafka.clients._
import org.apache.kafka.common.metrics.Metrics
import org.apache.kafka.common.network._
import org.apache.kafka.common.protocol.ApiKeys
import org.apache.kafka.common.requests.UpdateMetadataRequest.EndPoint
import org.apache.kafka.common.requests._
import org.apache.kafka.common.security.JaasContext
import org.apache.kafka.common.security.auth.SecurityProtocol
import org.apache.kafka.common.utils.{LogContext, Time}
import org.apache.kafka.common.{KafkaException, Node, TopicPartition}
import scala.collection.JavaConverters._
import scala.collection.mutable.HashMap
import scala.collection.{Set, mutable}
object ControllerChannelManager {
val QueueSizeMetricName = "QueueSize"
val RequestRateAndQueueTimeMetricName = "RequestRateAndQueueTimeMs"
}
class ControllerChannelManager(controllerContext: ControllerContext, config: KafkaConfig, time: Time, metrics: Metrics,
stateChangeLogger: StateChangeLogger, threadNamePrefix: Option[String] = None) extends Logging with KafkaMetricsGroup {
import ControllerChannelManager._
protected val brokerStateInfo = new HashMap[Int, ControllerBrokerStateInfo]
private val brokerLock = new Object
this.logIdent = "[Channel manager on controller " + config.brokerId + "]: "
newGauge(
"TotalQueueSize",
new Gauge[Int] {
def value: Int = brokerLock synchronized {
brokerStateInfo.values.iterator.map(_.messageQueue.size).sum
}
}
)
controllerContext.liveBrokers.foreach(addNewBroker)
def startup() = {
brokerLock synchronized {
brokerStateInfo.foreach(brokerState => startRequestSendThread(brokerState._1))
}
}
def shutdown() = {
brokerLock synchronized {
brokerStateInfo.values.foreach(removeExistingBroker)
}
}
def sendRequest(brokerId: Int, apiKey: ApiKeys, request: AbstractRequest.Builder[_ <: AbstractRequest],
callback: AbstractResponse => Unit = null) {
brokerLock synchronized {
val stateInfoOpt = brokerStateInfo.get(brokerId)
stateInfoOpt match {
case Some(stateInfo) =>
stateInfo.messageQueue.put(QueueItem(apiKey, request, callback, time.milliseconds()))
case None =>
warn(s"Not sending request $request to broker $brokerId, since it is offline.")
}
}
}
def addBroker(broker: Broker) {
// be careful here. Maybe the startup() API has already started the request send thread
brokerLock synchronized {
if (!brokerStateInfo.contains(broker.id)) {
addNewBroker(broker)
startRequestSendThread(broker.id)
}
}
}
def removeBroker(brokerId: Int) {
brokerLock synchronized {
removeExistingBroker(brokerStateInfo(brokerId))
}
}
private def addNewBroker(broker: Broker) {
val messageQueue = new LinkedBlockingQueue[QueueItem]
debug(s"Controller ${config.brokerId} trying to connect to broker ${broker.id}")
val brokerNode = broker.node(config.interBrokerListenerName)
val logContext = new LogContext(s"[Controller id=${config.brokerId}, targetBrokerId=${brokerNode.idString}] ")
val networkClient = {
val channelBuilder = ChannelBuilders.clientChannelBuilder(
config.interBrokerSecurityProtocol,
JaasContext.Type.SERVER,
config,
config.interBrokerListenerName,
config.saslMechanismInterBrokerProtocol,
config.saslInterBrokerHandshakeRequestEnable
)
val selector = new Selector(
NetworkReceive.UNLIMITED,
Selector.NO_IDLE_TIMEOUT_MS,
metrics,
time,
"controller-channel",
Map("broker-id" -> brokerNode.idString).asJava,
false,
channelBuilder,
logContext
)
new NetworkClient(
selector,
new ManualMetadataUpdater(Seq(brokerNode).asJava),
config.brokerId.toString,
1,
0,
0,
Selectable.USE_DEFAULT_BUFFER_SIZE,
Selectable.USE_DEFAULT_BUFFER_SIZE,
config.requestTimeoutMs,
ClientDnsLookup.DEFAULT,
time,
false,
new ApiVersions,
logContext
)
}
val threadName = threadNamePrefix match {
case None => s"Controller-${config.brokerId}-to-broker-${broker.id}-send-thread"
case Some(name) => s"$name:Controller-${config.brokerId}-to-broker-${broker.id}-send-thread"
}
val requestRateAndQueueTimeMetrics = newTimer(
RequestRateAndQueueTimeMetricName, TimeUnit.MILLISECONDS, TimeUnit.SECONDS, brokerMetricTags(broker.id)
)
val requestThread = new RequestSendThread(config.brokerId, controllerContext, messageQueue, networkClient,
brokerNode, config, time, requestRateAndQueueTimeMetrics, stateChangeLogger, threadName)
requestThread.setDaemon(false)
val queueSizeGauge = newGauge(
QueueSizeMetricName,
new Gauge[Int] {
def value: Int = messageQueue.size
},
brokerMetricTags(broker.id)
)
brokerStateInfo.put(broker.id, ControllerBrokerStateInfo(networkClient, brokerNode, messageQueue,
requestThread, queueSizeGauge, requestRateAndQueueTimeMetrics))
}
private def brokerMetricTags(brokerId: Int) = Map("broker-id" -> brokerId.toString)
private def removeExistingBroker(brokerState: ControllerBrokerStateInfo) {
try {
// Shutdown the RequestSendThread before closing the NetworkClient to avoid the concurrent use of the
// non-threadsafe classes as described in KAFKA-4959.
// The call to shutdownLatch.await() in ShutdownableThread.shutdown() serves as a synchronization barrier that
// hands off the NetworkClient from the RequestSendThread to the ZkEventThread.
brokerState.requestSendThread.shutdown()
brokerState.networkClient.close()
brokerState.messageQueue.clear()
removeMetric(QueueSizeMetricName, brokerMetricTags(brokerState.brokerNode.id))
removeMetric(RequestRateAndQueueTimeMetricName, brokerMetricTags(brokerState.brokerNode.id))
brokerStateInfo.remove(brokerState.brokerNode.id)
} catch {
case e: Throwable => error("Error while removing broker by the controller", e)
}
}
protected def startRequestSendThread(brokerId: Int) {
val requestThread = brokerStateInfo(brokerId).requestSendThread
if (requestThread.getState == Thread.State.NEW)
requestThread.start()
}
}
case class QueueItem(apiKey: ApiKeys, request: AbstractRequest.Builder[_ <: AbstractRequest],
callback: AbstractResponse => Unit, enqueueTimeMs: Long)
class RequestSendThread(val controllerId: Int,
val controllerContext: ControllerContext,
val queue: BlockingQueue[QueueItem],
val networkClient: NetworkClient,
val brokerNode: Node,
val config: KafkaConfig,
val time: Time,
val requestRateAndQueueTimeMetrics: Timer,
val stateChangeLogger: StateChangeLogger,
name: String)
extends ShutdownableThread(name = name) {
logIdent = s"[RequestSendThread controllerId=$controllerId] "
private val socketTimeoutMs = config.controllerSocketTimeoutMs
override def doWork(): Unit = {
def backoff(): Unit = pause(100, TimeUnit.MILLISECONDS)
val QueueItem(apiKey, requestBuilder, callback, enqueueTimeMs) = queue.take()
requestRateAndQueueTimeMetrics.update(time.milliseconds() - enqueueTimeMs, TimeUnit.MILLISECONDS)
var clientResponse: ClientResponse = null
try {
var isSendSuccessful = false
while (isRunning && !isSendSuccessful) {
// if a broker goes down for a long time, then at some point the controller's zookeeper listener will trigger a
// removeBroker which will invoke shutdown() on this thread. At that point, we will stop retrying.
try {
if (!brokerReady()) {
isSendSuccessful = false
backoff()
}
else {
val clientRequest = networkClient.newClientRequest(brokerNode.idString, requestBuilder,
time.milliseconds(), true)
clientResponse = NetworkClientUtils.sendAndReceive(networkClient, clientRequest, time)
isSendSuccessful = true
}
} catch {
case e: Throwable => // if the send was not successful, reconnect to broker and resend the message
warn(s"Controller $controllerId epoch ${controllerContext.epoch} fails to send request $requestBuilder " +
s"to broker $brokerNode. Reconnecting to broker.", e)
networkClient.close(brokerNode.idString)
isSendSuccessful = false
backoff()
}
}
if (clientResponse != null) {
val requestHeader = clientResponse.requestHeader
val api = requestHeader.apiKey
if (api != ApiKeys.LEADER_AND_ISR && api != ApiKeys.STOP_REPLICA && api != ApiKeys.UPDATE_METADATA)
throw new KafkaException(s"Unexpected apiKey received: $apiKey")
val response = clientResponse.responseBody
stateChangeLogger.withControllerEpoch(controllerContext.epoch).trace(s"Received response " +
s"${response.toString(requestHeader.apiVersion)} for request $api with correlation id " +
s"${requestHeader.correlationId} sent to broker $brokerNode")
if (callback != null) {
callback(response)
}
}
} catch {
case e: Throwable =>
error(s"Controller $controllerId fails to send a request to broker $brokerNode", e)
// If there is any socket error (eg, socket timeout), the connection is no longer usable and needs to be recreated.
networkClient.close(brokerNode.idString)
}
}
private def brokerReady(): Boolean = {
try {
if (!NetworkClientUtils.isReady(networkClient, brokerNode, time.milliseconds())) {
if (!NetworkClientUtils.awaitReady(networkClient, brokerNode, time, socketTimeoutMs))
throw new SocketTimeoutException(s"Failed to connect within $socketTimeoutMs ms")
info(s"Controller $controllerId connected to $brokerNode for sending state change requests")
}
true
} catch {
case e: Throwable =>
warn(s"Controller $controllerId's connection to broker $brokerNode was unsuccessful", e)
networkClient.close(brokerNode.idString)
false
}
}
override def initiateShutdown(): Boolean = {
if (super.initiateShutdown()) {
networkClient.initiateClose()
true
} else
false
}
}
class ControllerBrokerRequestBatch(controller: KafkaController, stateChangeLogger: StateChangeLogger) extends Logging {
val controllerContext = controller.controllerContext
val controllerId: Int = controller.config.brokerId
val leaderAndIsrRequestMap = mutable.Map.empty[Int, mutable.Map[TopicPartition, LeaderAndIsrRequest.PartitionState]]
val stopReplicaRequestMap = mutable.Map.empty[Int, Seq[StopReplicaRequestInfo]]
val updateMetadataRequestBrokerSet = mutable.Set.empty[Int]
val updateMetadataRequestPartitionInfoMap = mutable.Map.empty[TopicPartition, UpdateMetadataRequest.PartitionState]
def newBatch() {
// raise error if the previous batch is not empty
if (leaderAndIsrRequestMap.nonEmpty)
throw new IllegalStateException("Controller to broker state change requests batch is not empty while creating " +
s"a new one. Some LeaderAndIsr state changes $leaderAndIsrRequestMap might be lost ")
if (stopReplicaRequestMap.nonEmpty)
throw new IllegalStateException("Controller to broker state change requests batch is not empty while creating a " +
s"new one. Some StopReplica state changes $stopReplicaRequestMap might be lost ")
if (updateMetadataRequestBrokerSet.nonEmpty)
throw new IllegalStateException("Controller to broker state change requests batch is not empty while creating a " +
s"new one. Some UpdateMetadata state changes to brokers $updateMetadataRequestBrokerSet with partition info " +
s"$updateMetadataRequestPartitionInfoMap might be lost ")
}
def clear() {
leaderAndIsrRequestMap.clear()
stopReplicaRequestMap.clear()
updateMetadataRequestBrokerSet.clear()
updateMetadataRequestPartitionInfoMap.clear()
}
def addLeaderAndIsrRequestForBrokers(brokerIds: Seq[Int], topicPartition: TopicPartition,
leaderIsrAndControllerEpoch: LeaderIsrAndControllerEpoch,
replicas: Seq[Int], isNew: Boolean) {
brokerIds.filter(_ >= 0).foreach { brokerId =>
val result = leaderAndIsrRequestMap.getOrElseUpdate(brokerId, mutable.Map.empty)
val alreadyNew = result.get(topicPartition).exists(_.isNew)
result.put(topicPartition, new LeaderAndIsrRequest.PartitionState(leaderIsrAndControllerEpoch.controllerEpoch,
leaderIsrAndControllerEpoch.leaderAndIsr.leader,
leaderIsrAndControllerEpoch.leaderAndIsr.leaderEpoch,
leaderIsrAndControllerEpoch.leaderAndIsr.isr.map(Integer.valueOf).asJava,
leaderIsrAndControllerEpoch.leaderAndIsr.zkVersion,
replicas.map(Integer.valueOf).asJava,
isNew || alreadyNew))
}
addUpdateMetadataRequestForBrokers(controllerContext.liveOrShuttingDownBrokerIds.toSeq, Set(topicPartition))
}
def addStopReplicaRequestForBrokers(brokerIds: Seq[Int], topicPartition: TopicPartition, deletePartition: Boolean,
callback: (AbstractResponse, Int) => Unit) {
brokerIds.filter(b => b >= 0).foreach { brokerId =>
stopReplicaRequestMap.getOrElseUpdate(brokerId, Seq.empty[StopReplicaRequestInfo])
val v = stopReplicaRequestMap(brokerId)
stopReplicaRequestMap(brokerId) = v :+ StopReplicaRequestInfo(PartitionAndReplica(topicPartition, brokerId),
deletePartition, (r: AbstractResponse) => callback(r, brokerId))
}
}
/** Send UpdateMetadataRequest to the given brokers for the given partitions and partitions that are being deleted */
def addUpdateMetadataRequestForBrokers(brokerIds: Seq[Int],
partitions: collection.Set[TopicPartition]) {
def updateMetadataRequestPartitionInfo(partition: TopicPartition, beingDeleted: Boolean) {
val leaderIsrAndControllerEpochOpt = controllerContext.partitionLeadershipInfo.get(partition)
leaderIsrAndControllerEpochOpt match {
case Some(l @ LeaderIsrAndControllerEpoch(leaderAndIsr, controllerEpoch)) =>
val replicas = controllerContext.partitionReplicaAssignment(partition)
val offlineReplicas = replicas.filter(!controllerContext.isReplicaOnline(_, partition))
val leaderIsrAndControllerEpoch = if (beingDeleted) {
val leaderDuringDelete = LeaderAndIsr.duringDelete(leaderAndIsr.isr)
LeaderIsrAndControllerEpoch(leaderDuringDelete, controllerEpoch)
} else {
l
}
val partitionStateInfo = new UpdateMetadataRequest.PartitionState(leaderIsrAndControllerEpoch.controllerEpoch,
leaderIsrAndControllerEpoch.leaderAndIsr.leader,
leaderIsrAndControllerEpoch.leaderAndIsr.leaderEpoch,
leaderIsrAndControllerEpoch.leaderAndIsr.isr.map(Integer.valueOf).asJava,
leaderIsrAndControllerEpoch.leaderAndIsr.zkVersion,
replicas.map(Integer.valueOf).asJava,
offlineReplicas.map(Integer.valueOf).asJava)
updateMetadataRequestPartitionInfoMap.put(partition, partitionStateInfo)
case None =>
info(s"Leader not yet assigned for partition $partition. Skip sending UpdateMetadataRequest.")
}
}
val givenPartitions = if (partitions.isEmpty)
controllerContext.partitionLeadershipInfo.keySet
else
partitions
updateMetadataRequestBrokerSet ++= brokerIds.filter(_ >= 0)
givenPartitions.foreach(partition => updateMetadataRequestPartitionInfo(partition,
beingDeleted = controller.topicDeletionManager.topicsToBeDeleted.contains(partition.topic)))
}
def sendRequestsToBrokers(controllerEpoch: Int) {
try {
val stateChangeLog = stateChangeLogger.withControllerEpoch(controllerEpoch)
val leaderAndIsrRequestVersion: Short =
if (controller.config.interBrokerProtocolVersion >= KAFKA_1_0_IV0) 1
else 0
leaderAndIsrRequestMap.foreach { case (broker, leaderAndIsrPartitionStates) =>
leaderAndIsrPartitionStates.foreach { case (topicPartition, state) =>
val typeOfRequest =
if (broker == state.basePartitionState.leader) "become-leader"
else "become-follower"
stateChangeLog.trace(s"Sending $typeOfRequest LeaderAndIsr request $state to broker $broker for partition $topicPartition")
}
val leaderIds = leaderAndIsrPartitionStates.map(_._2.basePartitionState.leader).toSet
val leaders = controllerContext.liveOrShuttingDownBrokers.filter(b => leaderIds.contains(b.id)).map {
_.node(controller.config.interBrokerListenerName)
}
val leaderAndIsrRequestBuilder = new LeaderAndIsrRequest.Builder(leaderAndIsrRequestVersion, controllerId,
controllerEpoch, leaderAndIsrPartitionStates.asJava, leaders.asJava)
controller.sendRequest(broker, ApiKeys.LEADER_AND_ISR, leaderAndIsrRequestBuilder,
(r: AbstractResponse) => controller.eventManager.put(controller.LeaderAndIsrResponseReceived(r, broker)))
}
leaderAndIsrRequestMap.clear()
updateMetadataRequestPartitionInfoMap.foreach { case (tp, partitionState) =>
stateChangeLog.trace(s"Sending UpdateMetadata request $partitionState to brokers $updateMetadataRequestBrokerSet " +
s"for partition $tp")
}
val partitionStates = Map.empty ++ updateMetadataRequestPartitionInfoMap
val updateMetadataRequestVersion: Short =
if (controller.config.interBrokerProtocolVersion >= KAFKA_1_0_IV0) 4
else if (controller.config.interBrokerProtocolVersion >= KAFKA_0_10_2_IV0) 3
else if (controller.config.interBrokerProtocolVersion >= KAFKA_0_10_0_IV1) 2
else if (controller.config.interBrokerProtocolVersion >= KAFKA_0_9_0) 1
else 0
val updateMetadataRequest = {
val liveBrokers = if (updateMetadataRequestVersion == 0) {
// Version 0 of UpdateMetadataRequest only supports PLAINTEXT.
controllerContext.liveOrShuttingDownBrokers.map { broker =>
val securityProtocol = SecurityProtocol.PLAINTEXT
val listenerName = ListenerName.forSecurityProtocol(securityProtocol)
val node = broker.node(listenerName)
val endPoints = Seq(new EndPoint(node.host, node.port, securityProtocol, listenerName))
new UpdateMetadataRequest.Broker(broker.id, endPoints.asJava, broker.rack.orNull)
}
} else {
controllerContext.liveOrShuttingDownBrokers.map { broker =>
val endPoints = broker.endPoints.map { endPoint =>
new UpdateMetadataRequest.EndPoint(endPoint.host, endPoint.port, endPoint.securityProtocol, endPoint.listenerName)
}
new UpdateMetadataRequest.Broker(broker.id, endPoints.asJava, broker.rack.orNull)
}
}
new UpdateMetadataRequest.Builder(updateMetadataRequestVersion, controllerId, controllerEpoch, partitionStates.asJava,
liveBrokers.asJava)
}
updateMetadataRequestBrokerSet.foreach { broker =>
controller.sendRequest(broker, ApiKeys.UPDATE_METADATA, updateMetadataRequest, null)
}
updateMetadataRequestBrokerSet.clear()
updateMetadataRequestPartitionInfoMap.clear()
stopReplicaRequestMap.foreach { case (broker, replicaInfoList) =>
val stopReplicaWithDelete = replicaInfoList.filter(_.deletePartition).map(_.replica).toSet
val stopReplicaWithoutDelete = replicaInfoList.filterNot(_.deletePartition).map(_.replica).toSet
debug(s"The stop replica request (delete = true) sent to broker $broker is ${stopReplicaWithDelete.mkString(",")}")
debug(s"The stop replica request (delete = false) sent to broker $broker is ${stopReplicaWithoutDelete.mkString(",")}")
val (replicasToGroup, replicasToNotGroup) = replicaInfoList.partition(r => !r.deletePartition && r.callback == null)
// Send one StopReplicaRequest for all partitions that require neither delete nor callback. This potentially
// changes the order in which the requests are sent for the same partitions, but that's OK.
val stopReplicaRequest = new StopReplicaRequest.Builder(controllerId, controllerEpoch, false,
replicasToGroup.map(_.replica.topicPartition).toSet.asJava)
controller.sendRequest(broker, ApiKeys.STOP_REPLICA, stopReplicaRequest)
replicasToNotGroup.foreach { r =>
val stopReplicaRequest = new StopReplicaRequest.Builder(
controllerId, controllerEpoch, r.deletePartition,
Set(r.replica.topicPartition).asJava)
controller.sendRequest(broker, ApiKeys.STOP_REPLICA, stopReplicaRequest, r.callback)
}
}
stopReplicaRequestMap.clear()
} catch {
case e: Throwable =>
if (leaderAndIsrRequestMap.nonEmpty) {
error("Haven't been able to send leader and isr requests, current state of " +
s"the map is $leaderAndIsrRequestMap. Exception message: $e")
}
if (updateMetadataRequestBrokerSet.nonEmpty) {
error(s"Haven't been able to send metadata update requests to brokers $updateMetadataRequestBrokerSet, " +
s"current state of the partition info is $updateMetadataRequestPartitionInfoMap. Exception message: $e")
}
if (stopReplicaRequestMap.nonEmpty) {
error("Haven't been able to send stop replica requests, current state of " +
s"the map is $stopReplicaRequestMap. Exception message: $e")
}
throw new IllegalStateException(e)
}
}
}
case class ControllerBrokerStateInfo(networkClient: NetworkClient,
brokerNode: Node,
messageQueue: BlockingQueue[QueueItem],
requestSendThread: RequestSendThread,
queueSizeGauge: Gauge[Int],
requestRateAndTimeMetrics: Timer)
case class StopReplicaRequestInfo(replica: PartitionAndReplica, deletePartition: Boolean, callback: AbstractResponse => Unit)
class Callbacks(val stopReplicaResponseCallback: (AbstractResponse, Int) => Unit = (_, _ ) => ())
© 2015 - 2025 Weber Informatics LLC | Privacy Policy