org.apache.spark.sql.kafka010.CachedKafkaConsumer.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.kafka010
import java.{util => ju}
import java.util.concurrent.TimeoutException
import scala.collection.JavaConverters._
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer, OffsetOutOfRangeException}
import org.apache.kafka.common.TopicPartition
import org.apache.spark.{SparkEnv, SparkException, TaskContext}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.kafka010.KafkaSource._
/**
* Consumer of single topicpartition, intended for cached reuse.
* Underlying consumer is not threadsafe, so neither is this,
* but processing the same topicpartition and group id in multiple threads is usually bad anyway.
*/
private[kafka010] case class CachedKafkaConsumer private(
topicPartition: TopicPartition,
kafkaParams: ju.Map[String, Object]) extends Logging {
import CachedKafkaConsumer._
private val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
private var consumer = createConsumer
/** indicates whether this consumer is in use or not */
private var inuse = true
/** Iterator to the already fetch data */
private var fetchedData = ju.Collections.emptyIterator[ConsumerRecord[Array[Byte], Array[Byte]]]
private var nextOffsetInFetchedData = UNKNOWN_OFFSET
/** Create a KafkaConsumer to fetch records for `topicPartition` */
private def createConsumer: KafkaConsumer[Array[Byte], Array[Byte]] = {
val c = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParams)
val tps = new ju.ArrayList[TopicPartition]()
tps.add(topicPartition)
c.assign(tps)
c
}
case class AvailableOffsetRange(earliest: Long, latest: Long)
/**
* Return the available offset range of the current partition. It's a pair of the earliest offset
* and the latest offset.
*/
def getAvailableOffsetRange(): AvailableOffsetRange = {
consumer.seekToBeginning(Set(topicPartition).asJava)
val earliestOffset = consumer.position(topicPartition)
consumer.seekToEnd(Set(topicPartition).asJava)
val latestOffset = consumer.position(topicPartition)
AvailableOffsetRange(earliestOffset, latestOffset)
}
/**
* Get the record for the given offset if available. Otherwise it will either throw error
* (if failOnDataLoss = true), or return the next available offset within [offset, untilOffset),
* or null.
*
* @param offset the offset to fetch.
* @param untilOffset the max offset to fetch. Exclusive.
* @param pollTimeoutMs timeout in milliseconds to poll data from Kafka.
* @param failOnDataLoss When `failOnDataLoss` is `true`, this method will either return record at
* offset if available, or throw exception.when `failOnDataLoss` is `false`,
* this method will either return record at offset if available, or return
* the next earliest available record less than untilOffset, or null. It
* will not throw any exception.
*/
def get(
offset: Long,
untilOffset: Long,
pollTimeoutMs: Long,
failOnDataLoss: Boolean): ConsumerRecord[Array[Byte], Array[Byte]] = {
require(offset < untilOffset,
s"offset must always be less than untilOffset [offset: $offset, untilOffset: $untilOffset]")
logDebug(s"Get $groupId $topicPartition nextOffset $nextOffsetInFetchedData requested $offset")
// The following loop is basically for `failOnDataLoss = false`. When `failOnDataLoss` is
// `false`, first, we will try to fetch the record at `offset`. If no such record exists, then
// we will move to the next available offset within `[offset, untilOffset)` and retry.
// If `failOnDataLoss` is `true`, the loop body will be executed only once.
var toFetchOffset = offset
while (toFetchOffset != UNKNOWN_OFFSET) {
try {
return fetchData(toFetchOffset, untilOffset, pollTimeoutMs, failOnDataLoss)
} catch {
case e: OffsetOutOfRangeException =>
// When there is some error thrown, it's better to use a new consumer to drop all cached
// states in the old consumer. We don't need to worry about the performance because this
// is not a common path.
resetConsumer()
reportDataLoss(failOnDataLoss, s"Cannot fetch offset $toFetchOffset", e)
toFetchOffset = getEarliestAvailableOffsetBetween(toFetchOffset, untilOffset)
}
}
resetFetchedData()
null
}
/**
* Return the next earliest available offset in [offset, untilOffset). If all offsets in
* [offset, untilOffset) are invalid (e.g., the topic is deleted and recreated), it will return
* `UNKNOWN_OFFSET`.
*/
private def getEarliestAvailableOffsetBetween(offset: Long, untilOffset: Long): Long = {
val range = getAvailableOffsetRange()
logWarning(s"Some data may be lost. Recovering from the earliest offset: ${range.earliest}")
if (offset >= range.latest || range.earliest >= untilOffset) {
// [offset, untilOffset) and [earliestOffset, latestOffset) have no overlap,
// either
// --------------------------------------------------------
// ^ ^ ^ ^
// | | | |
// earliestOffset latestOffset offset untilOffset
//
// or
// --------------------------------------------------------
// ^ ^ ^ ^
// | | | |
// offset untilOffset earliestOffset latestOffset
val warningMessage =
s"""
|The current available offset range is $range.
| Offset ${offset} is out of range, and records in [$offset, $untilOffset) will be
| skipped ${additionalMessage(failOnDataLoss = false)}
""".stripMargin
logWarning(warningMessage)
UNKNOWN_OFFSET
} else if (offset >= range.earliest) {
// -----------------------------------------------------------------------------
// ^ ^ ^ ^
// | | | |
// earliestOffset offset min(untilOffset,latestOffset) max(untilOffset, latestOffset)
//
// This will happen when a topic is deleted and recreated, and new data are pushed very fast,
// then we will see `offset` disappears first then appears again. Although the parameters
// are same, the state in Kafka cluster is changed, so the outer loop won't be endless.
logWarning(s"Found a disappeared offset $offset. " +
s"Some data may be lost ${additionalMessage(failOnDataLoss = false)}")
offset
} else {
// ------------------------------------------------------------------------------
// ^ ^ ^ ^
// | | | |
// offset earliestOffset min(untilOffset,latestOffset) max(untilOffset, latestOffset)
val warningMessage =
s"""
|The current available offset range is $range.
| Offset ${offset} is out of range, and records in [$offset, ${range.earliest}) will be
| skipped ${additionalMessage(failOnDataLoss = false)}
""".stripMargin
logWarning(warningMessage)
range.earliest
}
}
/**
* Get the record for the given offset if available. Otherwise it will either throw error
* (if failOnDataLoss = true), or return the next available offset within [offset, untilOffset),
* or null.
*
* @throws OffsetOutOfRangeException if `offset` is out of range
* @throws TimeoutException if cannot fetch the record in `pollTimeoutMs` milliseconds.
*/
private def fetchData(
offset: Long,
untilOffset: Long,
pollTimeoutMs: Long,
failOnDataLoss: Boolean): ConsumerRecord[Array[Byte], Array[Byte]] = {
if (offset != nextOffsetInFetchedData || !fetchedData.hasNext()) {
// This is the first fetch, or the last pre-fetched data has been drained.
// Seek to the offset because we may call seekToBeginning or seekToEnd before this.
seek(offset)
poll(pollTimeoutMs)
}
if (!fetchedData.hasNext()) {
// We cannot fetch anything after `poll`. Two possible cases:
// - `offset` is out of range so that Kafka returns nothing. Just throw
// `OffsetOutOfRangeException` to let the caller handle it.
// - Cannot fetch any data before timeout. TimeoutException will be thrown.
val range = getAvailableOffsetRange()
if (offset < range.earliest || offset >= range.latest) {
throw new OffsetOutOfRangeException(
Map(topicPartition -> java.lang.Long.valueOf(offset)).asJava)
} else {
throw new TimeoutException(
s"Cannot fetch record for offset $offset in $pollTimeoutMs milliseconds")
}
} else {
val record = fetchedData.next()
nextOffsetInFetchedData = record.offset + 1
// In general, Kafka uses the specified offset as the start point, and tries to fetch the next
// available offset. Hence we need to handle offset mismatch.
if (record.offset > offset) {
// This may happen when some records aged out but their offsets already got verified
if (failOnDataLoss) {
reportDataLoss(true, s"Cannot fetch records in [$offset, ${record.offset})")
// Never happen as "reportDataLoss" will throw an exception
null
} else {
if (record.offset >= untilOffset) {
reportDataLoss(false, s"Skip missing records in [$offset, $untilOffset)")
null
} else {
reportDataLoss(false, s"Skip missing records in [$offset, ${record.offset})")
record
}
}
} else if (record.offset < offset) {
// This should not happen. If it does happen, then we probably misunderstand Kafka internal
// mechanism.
throw new IllegalStateException(
s"Tried to fetch $offset but the returned record offset was ${record.offset}")
} else {
record
}
}
}
/** Create a new consumer and reset cached states */
private def resetConsumer(): Unit = {
consumer.close()
consumer = createConsumer
resetFetchedData()
}
/** Reset the internal pre-fetched data. */
private def resetFetchedData(): Unit = {
nextOffsetInFetchedData = UNKNOWN_OFFSET
fetchedData = ju.Collections.emptyIterator[ConsumerRecord[Array[Byte], Array[Byte]]]
}
/**
* Return an addition message including useful message and instruction.
*/
private def additionalMessage(failOnDataLoss: Boolean): String = {
if (failOnDataLoss) {
s"(GroupId: $groupId, TopicPartition: $topicPartition). " +
s"$INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE"
} else {
s"(GroupId: $groupId, TopicPartition: $topicPartition). " +
s"$INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE"
}
}
/**
* Throw an exception or log a warning as per `failOnDataLoss`.
*/
private def reportDataLoss(
failOnDataLoss: Boolean,
message: String,
cause: Throwable = null): Unit = {
val finalMessage = s"$message ${additionalMessage(failOnDataLoss)}"
reportDataLoss0(failOnDataLoss, finalMessage, cause)
}
private def close(): Unit = consumer.close()
private def seek(offset: Long): Unit = {
logDebug(s"Seeking to $groupId $topicPartition $offset")
consumer.seek(topicPartition, offset)
}
private def poll(pollTimeoutMs: Long): Unit = {
val p = consumer.poll(pollTimeoutMs)
val r = p.records(topicPartition)
logDebug(s"Polled $groupId ${p.partitions()} ${r.size}")
fetchedData = r.iterator
}
}
private[kafka010] object CachedKafkaConsumer extends Logging {
private val UNKNOWN_OFFSET = -2L
private case class CacheKey(groupId: String, topicPartition: TopicPartition)
private lazy val cache = {
val conf = SparkEnv.get.conf
val capacity = conf.getInt("spark.sql.kafkaConsumerCache.capacity", 64)
new ju.LinkedHashMap[CacheKey, CachedKafkaConsumer](capacity, 0.75f, true) {
override def removeEldestEntry(
entry: ju.Map.Entry[CacheKey, CachedKafkaConsumer]): Boolean = {
if (entry.getValue.inuse == false && this.size > capacity) {
logWarning(s"KafkaConsumer cache hitting max capacity of $capacity, " +
s"removing consumer for ${entry.getKey}")
try {
entry.getValue.close()
} catch {
case e: SparkException =>
logError(s"Error closing earliest Kafka consumer for ${entry.getKey}", e)
}
true
} else {
false
}
}
}
}
def releaseKafkaConsumer(
topic: String,
partition: Int,
kafkaParams: ju.Map[String, Object]): Unit = {
val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
val topicPartition = new TopicPartition(topic, partition)
val key = CacheKey(groupId, topicPartition)
synchronized {
val consumer = cache.get(key)
if (consumer != null) {
consumer.inuse = false
} else {
logWarning(s"Attempting to release consumer that does not exist")
}
}
}
/**
* Removes (and closes) the Kafka Consumer for the given topic, partition and group id.
*/
def removeKafkaConsumer(
topic: String,
partition: Int,
kafkaParams: ju.Map[String, Object]): Unit = {
val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
val topicPartition = new TopicPartition(topic, partition)
val key = CacheKey(groupId, topicPartition)
synchronized {
val removedConsumer = cache.remove(key)
if (removedConsumer != null) {
removedConsumer.close()
}
}
}
/**
* Get a cached consumer for groupId, assigned to topic and partition.
* If matching consumer doesn't already exist, will be created using kafkaParams.
*/
def getOrCreate(
topic: String,
partition: Int,
kafkaParams: ju.Map[String, Object]): CachedKafkaConsumer = synchronized {
val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
val topicPartition = new TopicPartition(topic, partition)
val key = CacheKey(groupId, topicPartition)
// If this is reattempt at running the task, then invalidate cache and start with
// a new consumer
if (TaskContext.get != null && TaskContext.get.attemptNumber > 1) {
removeKafkaConsumer(topic, partition, kafkaParams)
val consumer = new CachedKafkaConsumer(topicPartition, kafkaParams)
consumer.inuse = true
cache.put(key, consumer)
consumer
} else {
if (!cache.containsKey(key)) {
cache.put(key, new CachedKafkaConsumer(topicPartition, kafkaParams))
}
val consumer = cache.get(key)
consumer.inuse = true
consumer
}
}
private def reportDataLoss0(
failOnDataLoss: Boolean,
finalMessage: String,
cause: Throwable = null): Unit = {
if (failOnDataLoss) {
if (cause != null) {
throw new IllegalStateException(finalMessage, cause)
} else {
throw new IllegalStateException(finalMessage)
}
} else {
if (cause != null) {
logWarning(finalMessage, cause)
} else {
logWarning(finalMessage)
}
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy