
kafka.log.LogManager.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.servicemix.bundles.kafka_2.12
Show all versions of org.apache.servicemix.bundles.kafka_2.12
This OSGi bundle wraps ${pkgArtifactId} ${pkgVersion} jar file.
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.log
import java.io._
import java.nio.file.{Files, NoSuchFileException}
import java.util.concurrent._
import java.util.concurrent.atomic.AtomicInteger
import kafka.server.checkpoints.OffsetCheckpointFile
import kafka.server.metadata.ConfigRepository
import kafka.server._
import kafka.server.metadata.BrokerMetadataPublisher.info
import kafka.utils._
import org.apache.kafka.common.{DirectoryId, KafkaException, TopicPartition, Uuid}
import org.apache.kafka.common.utils.{KafkaThread, Time, Utils}
import org.apache.kafka.common.errors.{InconsistentTopicIdException, KafkaStorageException, LogDirNotFoundException}
import scala.jdk.CollectionConverters._
import scala.collection._
import scala.collection.mutable.ArrayBuffer
import scala.util.{Failure, Success, Try}
import kafka.utils.Implicits._
import org.apache.kafka.common.config.TopicConfig
import org.apache.kafka.common.requests.{AbstractControlRequest, LeaderAndIsrRequest}
import org.apache.kafka.image.TopicsImage
import org.apache.kafka.metadata.properties.{MetaProperties, MetaPropertiesEnsemble, PropertiesUtils}
import java.util.{OptionalLong, Properties}
import org.apache.kafka.server.common.MetadataVersion
import org.apache.kafka.storage.internals.log.LogConfig.MessageFormatVersion
import org.apache.kafka.server.metrics.KafkaMetricsGroup
import org.apache.kafka.server.util.{FileLock, Scheduler}
import org.apache.kafka.storage.internals.log.{CleanerConfig, LogConfig, LogDirFailureChannel, ProducerStateManagerConfig, RemoteIndexCache}
import org.apache.kafka.storage.internals.checkpoint.CleanShutdownFileHandler
import java.util
import scala.annotation.nowarn
/**
* The entry point to the kafka log management subsystem. The log manager is responsible for log creation, retrieval, and cleaning.
* All read and write operations are delegated to the individual log instances.
*
* The log manager maintains logs in one or more directories. New logs are created in the data directory
* with the fewest logs. No attempt is made to move partitions after the fact or balance based on
* size or I/O rate.
*
* A background thread handles log retention by periodically truncating excess log segments.
*/
@threadsafe
class LogManager(logDirs: Seq[File],
initialOfflineDirs: Seq[File],
configRepository: ConfigRepository,
val initialDefaultConfig: LogConfig,
val cleanerConfig: CleanerConfig,
recoveryThreadsPerDataDir: Int,
val flushCheckMs: Long,
val flushRecoveryOffsetCheckpointMs: Long,
val flushStartOffsetCheckpointMs: Long,
val retentionCheckMs: Long,
val maxTransactionTimeoutMs: Int,
val producerStateManagerConfig: ProducerStateManagerConfig,
val producerIdExpirationCheckIntervalMs: Int,
interBrokerProtocolVersion: MetadataVersion,
scheduler: Scheduler,
brokerTopicStats: BrokerTopicStats,
logDirFailureChannel: LogDirFailureChannel,
time: Time,
val keepPartitionMetadataFile: Boolean,
remoteStorageSystemEnable: Boolean,
val initialTaskDelayMs: Long) extends Logging {
import LogManager._
private val metricsGroup = new KafkaMetricsGroup(this.getClass)
private val logCreationOrDeletionLock = new Object
private val currentLogs = new Pool[TopicPartition, UnifiedLog]()
// Future logs are put in the directory with "-future" suffix. Future log is created when user wants to move replica
// from one log directory to another log directory on the same broker. The directory of the future log will be renamed
// to replace the current log of the partition after the future log catches up with the current log
private val futureLogs = new Pool[TopicPartition, UnifiedLog]()
// Each element in the queue contains the log object to be deleted and the time it is scheduled for deletion.
private val logsToBeDeleted = new LinkedBlockingQueue[(UnifiedLog, Long)]()
// Map of stray partition to stray log. This holds all stray logs detected on the broker.
// Visible for testing
private val strayLogs = new Pool[TopicPartition, UnifiedLog]()
private val _liveLogDirs: ConcurrentLinkedQueue[File] = createAndValidateLogDirs(logDirs, initialOfflineDirs)
@volatile private var _currentDefaultConfig = initialDefaultConfig
@volatile private var numRecoveryThreadsPerDataDir = recoveryThreadsPerDataDir
// This map contains all partitions whose logs are getting loaded and initialized. If log configuration
// of these partitions get updated at the same time, the corresponding entry in this map is set to "true",
// which triggers a config reload after initialization is finished (to get the latest config value).
// See KAFKA-8813 for more detail on the race condition
// Visible for testing
private[log] val partitionsInitializing = new ConcurrentHashMap[TopicPartition, Boolean]().asScala
def reconfigureDefaultLogConfig(logConfig: LogConfig): Unit = {
this._currentDefaultConfig = logConfig
}
def currentDefaultConfig: LogConfig = _currentDefaultConfig
def liveLogDirs: Seq[File] = {
if (_liveLogDirs.size == logDirs.size)
logDirs
else
_liveLogDirs.asScala.toBuffer
}
private val dirLocks = lockLogDirs(liveLogDirs)
private val directoryIds: mutable.Map[String, Uuid] = loadDirectoryIds(liveLogDirs)
def directoryIdsSet: Predef.Set[Uuid] = directoryIds.values.toSet
@volatile private var recoveryPointCheckpoints = liveLogDirs.map(dir =>
(dir, new OffsetCheckpointFile(new File(dir, RecoveryPointCheckpointFile), logDirFailureChannel))).toMap
@volatile private var logStartOffsetCheckpoints = liveLogDirs.map(dir =>
(dir, new OffsetCheckpointFile(new File(dir, LogStartOffsetCheckpointFile), logDirFailureChannel))).toMap
private val preferredLogDirs = new ConcurrentHashMap[TopicPartition, String]()
def hasOfflineLogDirs(): Boolean = offlineLogDirs.nonEmpty
def onlineLogDirId(uuid: Uuid): Boolean = directoryIds.exists(_._2 == uuid)
private def offlineLogDirs: Iterable[File] = {
val logDirsSet = mutable.Set[File]() ++= logDirs
_liveLogDirs.forEach(dir => logDirsSet -= dir)
logDirsSet
}
// A map that stores hadCleanShutdown flag for each log dir.
private val hadCleanShutdownFlags = new ConcurrentHashMap[String, Boolean]()
// A map that tells whether all logs in a log dir had been loaded or not at startup time.
private val loadLogsCompletedFlags = new ConcurrentHashMap[String, Boolean]()
@volatile private var _cleaner: LogCleaner = _
private[kafka] def cleaner: LogCleaner = _cleaner
metricsGroup.newGauge("OfflineLogDirectoryCount", () => offlineLogDirs.size)
for (dir <- logDirs) {
metricsGroup.newGauge("LogDirectoryOffline",
() => if (_liveLogDirs.contains(dir)) 0 else 1,
Map("logDirectory" -> dir.getAbsolutePath).asJava)
}
/**
* Create and check validity of the given directories that are not in the given offline directories, specifically:
*
* - Ensure that there are no duplicates in the directory list
*
- Create each directory if it doesn't exist
*
- Check that each path is a readable directory
*
*/
private def createAndValidateLogDirs(dirs: Seq[File], initialOfflineDirs: Seq[File]): ConcurrentLinkedQueue[File] = {
val liveLogDirs = new ConcurrentLinkedQueue[File]()
val canonicalPaths = mutable.HashSet.empty[String]
for (dir <- dirs) {
try {
if (initialOfflineDirs.contains(dir))
throw new IOException(s"Failed to load ${dir.getAbsolutePath} during broker startup")
if (!dir.exists) {
info(s"Log directory ${dir.getAbsolutePath} not found, creating it.")
val created = dir.mkdirs()
if (!created)
throw new IOException(s"Failed to create data directory ${dir.getAbsolutePath}")
Utils.flushDir(dir.toPath.toAbsolutePath.normalize.getParent)
}
if (!dir.isDirectory || !dir.canRead)
throw new IOException(s"${dir.getAbsolutePath} is not a readable log directory.")
// getCanonicalPath() throws IOException if a file system query fails or if the path is invalid (e.g. contains
// the Nul character). Since there's no easy way to distinguish between the two cases, we treat them the same
// and mark the log directory as offline.
if (!canonicalPaths.add(dir.getCanonicalPath))
throw new KafkaException(s"Duplicate log directory found: ${dirs.mkString(", ")}")
liveLogDirs.add(dir)
} catch {
case e: IOException =>
logDirFailureChannel.maybeAddOfflineLogDir(dir.getAbsolutePath, s"Failed to create or validate data directory ${dir.getAbsolutePath}", e)
}
}
if (liveLogDirs.isEmpty) {
fatal(s"Shutdown broker because none of the specified log dirs from ${dirs.mkString(", ")} can be created or validated")
Exit.halt(1)
}
liveLogDirs
}
def resizeRecoveryThreadPool(newSize: Int): Unit = {
info(s"Resizing recovery thread pool size for each data dir from $numRecoveryThreadsPerDataDir to $newSize")
numRecoveryThreadsPerDataDir = newSize
}
/**
* The log directory failure handler. It will stop log cleaning in that directory.
*
* @param dir the absolute path of the log directory
*/
def handleLogDirFailure(dir: String): Unit = {
warn(s"Stopping serving logs in dir $dir")
logCreationOrDeletionLock synchronized {
_liveLogDirs.remove(new File(dir))
directoryIds.remove(dir)
if (_liveLogDirs.isEmpty) {
fatal(s"Shutdown broker because all log dirs in ${logDirs.mkString(", ")} have failed")
Exit.halt(1)
}
recoveryPointCheckpoints = recoveryPointCheckpoints.filter { case (file, _) => file.getAbsolutePath != dir }
logStartOffsetCheckpoints = logStartOffsetCheckpoints.filter { case (file, _) => file.getAbsolutePath != dir }
if (cleaner != null)
cleaner.handleLogDirFailure(dir)
def removeOfflineLogs(logs: Pool[TopicPartition, UnifiedLog]): Iterable[TopicPartition] = {
val offlineTopicPartitions: Iterable[TopicPartition] = logs.collect {
case (tp, log) if log.parentDir == dir => tp
}
offlineTopicPartitions.foreach { topicPartition => {
val removedLog = removeLogAndMetrics(logs, topicPartition)
removedLog.foreach {
log => log.closeHandlers()
}
}}
offlineTopicPartitions
}
val offlineCurrentTopicPartitions = removeOfflineLogs(currentLogs)
val offlineFutureTopicPartitions = removeOfflineLogs(futureLogs)
warn(s"Logs for partitions ${offlineCurrentTopicPartitions.mkString(",")} are offline and " +
s"logs for future partitions ${offlineFutureTopicPartitions.mkString(",")} are offline due to failure on log directory $dir")
dirLocks.filter(_.file.getParent == dir).foreach(dir => CoreUtils.swallow(dir.destroy(), this))
}
}
/**
* Lock all the given directories
*/
private def lockLogDirs(dirs: Seq[File]): Seq[FileLock] = {
dirs.flatMap { dir =>
try {
val lock = new FileLock(new File(dir, LockFileName))
if (!lock.tryLock())
throw new KafkaException("Failed to acquire lock on file .lock in " + lock.file.getParent +
". A Kafka instance in another process or thread is using this directory.")
Some(lock)
} catch {
case e: IOException =>
logDirFailureChannel.maybeAddOfflineLogDir(dir.getAbsolutePath, s"Disk error while locking directory $dir", e)
None
}
}
}
/**
* Retrieves the Uuid for the directory, given its absolute path.
*/
def directoryId(dir: String): Option[Uuid] = directoryIds.get(dir)
def directoryPath(uuid: Uuid): Option[String] = directoryIds.find(_._2 == uuid).map(_._1)
/**
* Determine directory ID for each directory with a meta.properties.
* If meta.properties does not include a directory ID, one is generated and persisted back to meta.properties.
* Directories without a meta.properties don't get a directory ID assigned.
*/
private def loadDirectoryIds(logDirs: Seq[File]): mutable.Map[String, Uuid] = {
val result = mutable.HashMap[String, Uuid]()
logDirs.foreach(logDir => {
try {
val props = PropertiesUtils.readPropertiesFile(
new File(logDir, MetaPropertiesEnsemble.META_PROPERTIES_NAME).getAbsolutePath)
val metaProps = new MetaProperties.Builder(props).build()
metaProps.directoryId().ifPresent(directoryId => {
result += (logDir.getAbsolutePath -> directoryId)
})
} catch {
case _: NoSuchFileException =>
info(s"No meta.properties file found in $logDir.")
case e: IOException =>
logDirFailureChannel.maybeAddOfflineLogDir(logDir.getAbsolutePath, s"Disk error while loading ID $logDir", e)
}
})
result
}
private def addLogToBeDeleted(log: UnifiedLog): Unit = {
this.logsToBeDeleted.add((log, time.milliseconds()))
}
def addStrayLog(strayPartition: TopicPartition, strayLog: UnifiedLog): Unit = {
this.strayLogs.put(strayPartition, strayLog)
}
// Only for testing
private[log] def hasLogsToBeDeleted: Boolean = !logsToBeDeleted.isEmpty
private[log] def loadLog(logDir: File,
hadCleanShutdown: Boolean,
recoveryPoints: Map[TopicPartition, Long],
logStartOffsets: Map[TopicPartition, Long],
defaultConfig: LogConfig,
topicConfigOverrides: Map[String, LogConfig],
numRemainingSegments: ConcurrentMap[String, Int],
isStray: UnifiedLog => Boolean): UnifiedLog = {
val topicPartition = UnifiedLog.parseTopicPartitionName(logDir)
val config = topicConfigOverrides.getOrElse(topicPartition.topic, defaultConfig)
val logRecoveryPoint = recoveryPoints.getOrElse(topicPartition, 0L)
val logStartOffset = logStartOffsets.getOrElse(topicPartition, 0L)
val log = UnifiedLog(
dir = logDir,
config = config,
logStartOffset = logStartOffset,
recoveryPoint = logRecoveryPoint,
maxTransactionTimeoutMs = maxTransactionTimeoutMs,
producerStateManagerConfig = producerStateManagerConfig,
producerIdExpirationCheckIntervalMs = producerIdExpirationCheckIntervalMs,
scheduler = scheduler,
time = time,
brokerTopicStats = brokerTopicStats,
logDirFailureChannel = logDirFailureChannel,
lastShutdownClean = hadCleanShutdown,
topicId = None,
keepPartitionMetadataFile = keepPartitionMetadataFile,
numRemainingSegments = numRemainingSegments,
remoteStorageSystemEnable = remoteStorageSystemEnable)
if (logDir.getName.endsWith(UnifiedLog.DeleteDirSuffix)) {
addLogToBeDeleted(log)
} else if (logDir.getName.endsWith(UnifiedLog.StrayDirSuffix)) {
addStrayLog(topicPartition, log)
warn(s"Loaded stray log: $logDir")
} else if (isStray(log)) {
// Unlike Zookeeper mode, which tracks pending topic deletions under a ZNode, KRaft is unable to prevent a topic from being recreated before every replica has been deleted.
// A KRaft broker with an offline directory may be unable to detect it still holds a to-be-deleted replica,
// and can create a conflicting topic partition for a new incarnation of the topic in one of the remaining online directories.
// So upon a restart in which the offline directory is back online we need to clean up the old replica directory.
log.renameDir(UnifiedLog.logStrayDirName(log.topicPartition), shouldReinitialize = false)
addStrayLog(log.topicPartition, log)
warn(s"Log in ${logDir.getAbsolutePath} marked stray and renamed to ${log.dir.getAbsolutePath}")
} else {
val previous = {
if (log.isFuture)
this.futureLogs.put(topicPartition, log)
else
this.currentLogs.put(topicPartition, log)
}
if (previous != null) {
if (log.isFuture)
throw new IllegalStateException(s"Duplicate log directories found: ${log.dir.getAbsolutePath}, ${previous.dir.getAbsolutePath}")
else
throw new IllegalStateException(s"Duplicate log directories for $topicPartition are found in both ${log.dir.getAbsolutePath} " +
s"and ${previous.dir.getAbsolutePath}. It is likely because log directory failure happened while broker was " +
s"replacing current replica with future replica. Recover broker from this failure by manually deleting one of the two directories " +
s"for this partition. It is recommended to delete the partition in the log directory that is known to have failed recently.")
}
}
log
}
// factory class for naming the log recovery threads used in metrics
private class LogRecoveryThreadFactory(val dirPath: String) extends ThreadFactory {
val threadNum = new AtomicInteger(0)
override def newThread(runnable: Runnable): Thread = {
KafkaThread.nonDaemon(logRecoveryThreadName(dirPath, threadNum.getAndIncrement()), runnable)
}
}
// create a unique log recovery thread name for each log dir as the format: prefix-dirPath-threadNum, ex: "log-recovery-/tmp/kafkaLogs-0"
private def logRecoveryThreadName(dirPath: String, threadNum: Int, prefix: String = "log-recovery"): String = s"$prefix-$dirPath-$threadNum"
/*
* decrement the number of remaining logs
* @return the number of remaining logs after decremented 1
*/
private[log] def decNumRemainingLogs(numRemainingLogs: ConcurrentMap[String, Int], path: String): Int = {
require(path != null, "path cannot be null to update remaining logs metric.")
numRemainingLogs.compute(path, (_, oldVal) => oldVal - 1)
}
/**
* Recover and load all logs in the given data directories
*/
private[log] def loadLogs(defaultConfig: LogConfig, topicConfigOverrides: Map[String, LogConfig], isStray: UnifiedLog => Boolean): Unit = {
info(s"Loading logs from log dirs $liveLogDirs")
val startMs = time.hiResClockMs()
val threadPools = ArrayBuffer.empty[ExecutorService]
val offlineDirs = mutable.Set.empty[(String, IOException)]
val jobs = ArrayBuffer.empty[Seq[Future[_]]]
var numTotalLogs = 0
// log dir path -> number of Remaining logs map for remainingLogsToRecover metric
val numRemainingLogs: ConcurrentMap[String, Int] = new ConcurrentHashMap[String, Int]
// log recovery thread name -> number of remaining segments map for remainingSegmentsToRecover metric
val numRemainingSegments: ConcurrentMap[String, Int] = new ConcurrentHashMap[String, Int]
def handleIOException(logDirAbsolutePath: String, e: IOException): Unit = {
offlineDirs.add((logDirAbsolutePath, e))
error(s"Error while loading log dir $logDirAbsolutePath", e)
}
val uncleanLogDirs = mutable.Buffer.empty[String]
for (dir <- liveLogDirs) {
val logDirAbsolutePath = dir.getAbsolutePath
var hadCleanShutdown: Boolean = false
try {
val pool = Executors.newFixedThreadPool(numRecoveryThreadsPerDataDir,
new LogRecoveryThreadFactory(logDirAbsolutePath))
threadPools.append(pool)
val cleanShutdownFileHandler = new CleanShutdownFileHandler(dir.getPath)
if (cleanShutdownFileHandler.exists()) {
// Cache the clean shutdown status and use that for rest of log loading workflow. Delete the CleanShutdownFile
// so that if broker crashes while loading the log, it is considered hard shutdown during the next boot up. KAFKA-10471
cleanShutdownFileHandler.delete()
hadCleanShutdown = true
}
hadCleanShutdownFlags.put(logDirAbsolutePath, hadCleanShutdown)
var recoveryPoints = Map[TopicPartition, Long]()
try {
recoveryPoints = this.recoveryPointCheckpoints(dir).read()
} catch {
case e: Exception =>
warn(s"Error occurred while reading recovery-point-offset-checkpoint file of directory " +
s"$logDirAbsolutePath, resetting the recovery checkpoint to 0", e)
}
var logStartOffsets = Map[TopicPartition, Long]()
try {
logStartOffsets = this.logStartOffsetCheckpoints(dir).read()
} catch {
case e: Exception =>
warn(s"Error occurred while reading log-start-offset-checkpoint file of directory " +
s"$logDirAbsolutePath, resetting to the base offset of the first segment", e)
}
val logsToLoad = Option(dir.listFiles).getOrElse(Array.empty).filter(logDir =>
logDir.isDirectory &&
// Ignore remote-log-index-cache directory as that is index cache maintained by tiered storage subsystem
// but not any topic-partition dir.
!logDir.getName.equals(RemoteIndexCache.DIR_NAME) &&
UnifiedLog.parseTopicPartitionName(logDir).topic != KafkaRaftServer.MetadataTopic)
numTotalLogs += logsToLoad.length
numRemainingLogs.put(logDirAbsolutePath, logsToLoad.length)
loadLogsCompletedFlags.put(logDirAbsolutePath, logsToLoad.isEmpty)
if (logsToLoad.isEmpty) {
info(s"No logs found to be loaded in $logDirAbsolutePath")
} else if (hadCleanShutdown) {
info(s"Skipping recovery of ${logsToLoad.length} logs from $logDirAbsolutePath since " +
"clean shutdown file was found")
} else {
info(s"Recovering ${logsToLoad.length} logs from $logDirAbsolutePath since no " +
"clean shutdown file was found")
uncleanLogDirs.append(logDirAbsolutePath)
}
val jobsForDir = logsToLoad.map { logDir =>
val runnable: Runnable = () => {
debug(s"Loading log $logDir")
var log = None: Option[UnifiedLog]
val logLoadStartMs = time.hiResClockMs()
try {
log = Some(loadLog(logDir, hadCleanShutdown, recoveryPoints, logStartOffsets,
defaultConfig, topicConfigOverrides, numRemainingSegments, isStray))
} catch {
case e: IOException =>
handleIOException(logDirAbsolutePath, e)
case e: KafkaStorageException if e.getCause.isInstanceOf[IOException] =>
// KafkaStorageException might be thrown, ex: during writing LeaderEpochFileCache
// And while converting IOException to KafkaStorageException, we've already handled the exception. So we can ignore it here.
} finally {
val logLoadDurationMs = time.hiResClockMs() - logLoadStartMs
val remainingLogs = decNumRemainingLogs(numRemainingLogs, logDirAbsolutePath)
val currentNumLoaded = logsToLoad.length - remainingLogs
log match {
case Some(loadedLog) => info(s"Completed load of $loadedLog with ${loadedLog.numberOfSegments} segments, " +
s"local-log-start-offset ${loadedLog.localLogStartOffset()} and log-end-offset ${loadedLog.logEndOffset} in ${logLoadDurationMs}ms " +
s"($currentNumLoaded/${logsToLoad.length} completed in $logDirAbsolutePath)")
case None => info(s"Error while loading logs in $logDir in ${logLoadDurationMs}ms ($currentNumLoaded/${logsToLoad.length} completed in $logDirAbsolutePath)")
}
if (remainingLogs == 0) {
// loadLog is completed for all logs under the logDdir, mark it.
loadLogsCompletedFlags.put(logDirAbsolutePath, true)
}
}
}
runnable
}
jobs += jobsForDir.map(pool.submit)
} catch {
case e: IOException =>
handleIOException(logDirAbsolutePath, e)
}
}
try {
addLogRecoveryMetrics(numRemainingLogs, numRemainingSegments)
for (dirJobs <- jobs) {
dirJobs.foreach(_.get)
}
offlineDirs.foreach { case (dir, e) =>
logDirFailureChannel.maybeAddOfflineLogDir(dir, s"Error while loading log dir $dir", e)
}
} catch {
case e: ExecutionException =>
error(s"There was an error in one of the threads during logs loading: ${e.getCause}")
throw e.getCause
} finally {
removeLogRecoveryMetrics()
threadPools.foreach(_.shutdown())
}
val elapsedMs = time.hiResClockMs() - startMs
val printedUncleanLogDirs = if (uncleanLogDirs.isEmpty) "" else s" (unclean log dirs = $uncleanLogDirs)"
info(s"Loaded $numTotalLogs logs in ${elapsedMs}ms$printedUncleanLogDirs")
}
private[log] def addLogRecoveryMetrics(numRemainingLogs: ConcurrentMap[String, Int],
numRemainingSegments: ConcurrentMap[String, Int]): Unit = {
debug("Adding log recovery metrics")
for (dir <- logDirs) {
metricsGroup.newGauge("remainingLogsToRecover", () => numRemainingLogs.get(dir.getAbsolutePath),
Map("dir" -> dir.getAbsolutePath).asJava)
for (i <- 0 until numRecoveryThreadsPerDataDir) {
val threadName = logRecoveryThreadName(dir.getAbsolutePath, i)
metricsGroup.newGauge("remainingSegmentsToRecover", () => numRemainingSegments.get(threadName),
Map("dir" -> dir.getAbsolutePath, "threadNum" -> i.toString).asJava)
}
}
}
private[log] def removeLogRecoveryMetrics(): Unit = {
debug("Removing log recovery metrics")
for (dir <- logDirs) {
metricsGroup.removeMetric("remainingLogsToRecover", Map("dir" -> dir.getAbsolutePath).asJava)
for (i <- 0 until numRecoveryThreadsPerDataDir) {
metricsGroup.removeMetric("remainingSegmentsToRecover", Map("dir" -> dir.getAbsolutePath, "threadNum" -> i.toString).asJava)
}
}
}
/**
* Start the background threads to flush logs and do log cleanup
*/
def startup(topicNames: Set[String], isStray: UnifiedLog => Boolean = _ => false): Unit = {
// ensure consistency between default config and overrides
val defaultConfig = currentDefaultConfig
startupWithConfigOverrides(defaultConfig, fetchTopicConfigOverrides(defaultConfig, topicNames), isStray)
}
// visible for testing
@nowarn("cat=deprecation")
private[log] def fetchTopicConfigOverrides(defaultConfig: LogConfig, topicNames: Set[String]): Map[String, LogConfig] = {
val topicConfigOverrides = mutable.Map[String, LogConfig]()
val defaultProps = defaultConfig.originals()
topicNames.foreach { topicName =>
var overrides = configRepository.topicConfig(topicName)
// save memory by only including configs for topics with overrides
if (!overrides.isEmpty) {
Option(overrides.getProperty(TopicConfig.MESSAGE_FORMAT_VERSION_CONFIG)).foreach { versionString =>
val messageFormatVersion = new MessageFormatVersion(versionString, interBrokerProtocolVersion.version)
if (messageFormatVersion.shouldIgnore) {
val copy = new Properties()
copy.putAll(overrides)
copy.remove(TopicConfig.MESSAGE_FORMAT_VERSION_CONFIG)
overrides = copy
if (messageFormatVersion.shouldWarn)
warn(messageFormatVersion.topicWarningMessage(topicName))
}
}
val logConfig = LogConfig.fromProps(defaultProps, overrides)
topicConfigOverrides(topicName) = logConfig
}
}
topicConfigOverrides
}
private def fetchLogConfig(topicName: String): LogConfig = {
// ensure consistency between default config and overrides
val defaultConfig = currentDefaultConfig
fetchTopicConfigOverrides(defaultConfig, Set(topicName)).values.headOption.getOrElse(defaultConfig)
}
// visible for testing
private[log] def startupWithConfigOverrides(
defaultConfig: LogConfig,
topicConfigOverrides: Map[String, LogConfig],
isStray: UnifiedLog => Boolean): Unit = {
loadLogs(defaultConfig, topicConfigOverrides, isStray) // this could take a while if shutdown was not clean
/* Schedule the cleanup task to delete old logs */
if (scheduler != null) {
info("Starting log cleanup with a period of %d ms.".format(retentionCheckMs))
scheduler.schedule("kafka-log-retention",
() => cleanupLogs(),
initialTaskDelayMs,
retentionCheckMs)
info("Starting log flusher with a default period of %d ms.".format(flushCheckMs))
scheduler.schedule("kafka-log-flusher",
() => flushDirtyLogs(),
initialTaskDelayMs,
flushCheckMs)
scheduler.schedule("kafka-recovery-point-checkpoint",
() => checkpointLogRecoveryOffsets(),
initialTaskDelayMs,
flushRecoveryOffsetCheckpointMs)
scheduler.schedule("kafka-log-start-offset-checkpoint",
() => checkpointLogStartOffsets(),
initialTaskDelayMs,
flushStartOffsetCheckpointMs)
scheduler.scheduleOnce("kafka-delete-logs", // will be rescheduled after each delete logs with a dynamic period
() => deleteLogs(),
initialTaskDelayMs)
}
if (cleanerConfig.enableCleaner) {
_cleaner = new LogCleaner(cleanerConfig, liveLogDirs, currentLogs, logDirFailureChannel, time = time)
_cleaner.startup()
}
}
/**
* Close all the logs
*/
def shutdown(brokerEpoch: Long = -1): Unit = {
info("Shutting down.")
metricsGroup.removeMetric("OfflineLogDirectoryCount")
for (dir <- logDirs) {
metricsGroup.removeMetric("LogDirectoryOffline", Map("logDirectory" -> dir.getAbsolutePath).asJava)
}
val threadPools = ArrayBuffer.empty[ExecutorService]
val jobs = mutable.Map.empty[File, Seq[Future[_]]]
// stop the cleaner first
if (cleaner != null) {
CoreUtils.swallow(cleaner.shutdown(), this)
}
val localLogsByDir = logsByDir
// close logs in each dir
for (dir <- liveLogDirs) {
debug(s"Flushing and closing logs at $dir")
val pool = Executors.newFixedThreadPool(numRecoveryThreadsPerDataDir,
KafkaThread.nonDaemon(s"log-closing-${dir.getAbsolutePath}", _))
threadPools.append(pool)
val logs = logsInDir(localLogsByDir, dir).values
val jobsForDir = logs.map { log =>
val runnable: Runnable = () => {
// flush the log to ensure latest possible recovery point
log.flush(true)
log.close()
}
runnable
}
jobs(dir) = jobsForDir.map(pool.submit).toSeq
}
try {
jobs.forKeyValue { (dir, dirJobs) =>
if (waitForAllToComplete(dirJobs,
e => warn(s"There was an error in one of the threads during LogManager shutdown: ${e.getCause}"))) {
val logs = logsInDir(localLogsByDir, dir)
// update the last flush point
debug(s"Updating recovery points at $dir")
checkpointRecoveryOffsetsInDir(dir, logs)
debug(s"Updating log start offsets at $dir")
checkpointLogStartOffsetsInDir(dir, logs)
// mark that the shutdown was clean by creating marker file for log dirs that:
// 1. had clean shutdown marker file; or
// 2. had no clean shutdown marker file, but all logs under it have been recovered at startup time
val logDirAbsolutePath = dir.getAbsolutePath
if (hadCleanShutdownFlags.getOrDefault(logDirAbsolutePath, false) ||
loadLogsCompletedFlags.getOrDefault(logDirAbsolutePath, false)) {
val cleanShutdownFileHandler = new CleanShutdownFileHandler(dir.getPath)
debug(s"Writing clean shutdown marker at $dir with broker epoch=$brokerEpoch")
CoreUtils.swallow(cleanShutdownFileHandler.write(brokerEpoch), this)
}
}
}
} finally {
threadPools.foreach(_.shutdown())
// regardless of whether the close succeeded, we need to unlock the data directories
dirLocks.foreach(_.destroy())
}
info("Shutdown complete.")
}
/**
* Truncate the partition logs to the specified offsets and checkpoint the recovery point to this offset
*
* @param partitionOffsets Partition logs that need to be truncated
* @param isFuture True iff the truncation should be performed on the future log of the specified partitions
*/
def truncateTo(partitionOffsets: Map[TopicPartition, Long], isFuture: Boolean): Unit = {
val affectedLogs = ArrayBuffer.empty[UnifiedLog]
for ((topicPartition, truncateOffset) <- partitionOffsets) {
val log = {
if (isFuture)
futureLogs.get(topicPartition)
else
currentLogs.get(topicPartition)
}
// If the log does not exist, skip it
if (log != null) {
// May need to abort and pause the cleaning of the log, and resume after truncation is done.
val needToStopCleaner = truncateOffset < log.activeSegment.baseOffset
if (needToStopCleaner && !isFuture)
abortAndPauseCleaning(topicPartition)
try {
if (log.truncateTo(truncateOffset))
affectedLogs += log
if (needToStopCleaner && !isFuture)
maybeTruncateCleanerCheckpointToActiveSegmentBaseOffset(log, topicPartition)
} finally {
if (needToStopCleaner && !isFuture)
resumeCleaning(topicPartition)
}
}
}
for (dir <- affectedLogs.map(_.parentDirFile).distinct) {
checkpointRecoveryOffsetsInDir(dir)
}
}
/**
* Delete all data in a partition and start the log at the new offset
*
* @param topicPartition The partition whose log needs to be truncated
* @param newOffset The new offset to start the log with
* @param isFuture True iff the truncation should be performed on the future log of the specified partition
* @param logStartOffsetOpt The log start offset to set for the log. If None, the new offset will be used.
*/
def truncateFullyAndStartAt(topicPartition: TopicPartition,
newOffset: Long,
isFuture: Boolean,
logStartOffsetOpt: Option[Long] = None): Unit = {
val log = {
if (isFuture)
futureLogs.get(topicPartition)
else
currentLogs.get(topicPartition)
}
// If the log does not exist, skip it
if (log != null) {
// Abort and pause the cleaning of the log, and resume after truncation is done.
if (!isFuture)
abortAndPauseCleaning(topicPartition)
try {
log.truncateFullyAndStartAt(newOffset, logStartOffsetOpt)
if (!isFuture)
maybeTruncateCleanerCheckpointToActiveSegmentBaseOffset(log, topicPartition)
} finally {
if (!isFuture)
resumeCleaning(topicPartition)
}
checkpointRecoveryOffsetsInDir(log.parentDirFile)
}
}
/**
* Write out the current recovery point for all logs to a text file in the log directory
* to avoid recovering the whole log on startup.
*/
def checkpointLogRecoveryOffsets(): Unit = {
val logsByDirCached = logsByDir
liveLogDirs.foreach { logDir =>
val logsToCheckpoint = logsInDir(logsByDirCached, logDir)
checkpointRecoveryOffsetsInDir(logDir, logsToCheckpoint)
}
}
/**
* Write out the current log start offset for all logs to a text file in the log directory
* to avoid exposing data that have been deleted by DeleteRecordsRequest
*/
def checkpointLogStartOffsets(): Unit = {
val logsByDirCached = logsByDir
liveLogDirs.foreach { logDir =>
checkpointLogStartOffsetsInDir(logDir, logsInDir(logsByDirCached, logDir))
}
}
/**
* Checkpoint recovery offsets for all the logs in logDir.
*
* @param logDir the directory in which the logs to be checkpointed are
*/
// Only for testing
private[log] def checkpointRecoveryOffsetsInDir(logDir: File): Unit = {
checkpointRecoveryOffsetsInDir(logDir, logsInDir(logDir))
}
/**
* Checkpoint recovery offsets for all the provided logs.
*
* @param logDir the directory in which the logs are
* @param logsToCheckpoint the logs to be checkpointed
*/
private def checkpointRecoveryOffsetsInDir(logDir: File, logsToCheckpoint: Map[TopicPartition, UnifiedLog]): Unit = {
try {
recoveryPointCheckpoints.get(logDir).foreach { checkpoint =>
val recoveryOffsets = logsToCheckpoint.map { case (tp, log) => tp -> log.recoveryPoint }
// checkpoint.write calls Utils.atomicMoveWithFallback, which flushes the parent
// directory and guarantees crash consistency.
checkpoint.write(recoveryOffsets)
}
} catch {
case e: KafkaStorageException =>
error(s"Disk error while writing recovery offsets checkpoint in directory $logDir: ${e.getMessage}")
case e: IOException =>
logDirFailureChannel.maybeAddOfflineLogDir(logDir.getAbsolutePath,
s"Disk error while writing recovery offsets checkpoint in directory $logDir: ${e.getMessage}", e)
}
}
/**
* Checkpoint log start offsets for all the provided logs in the provided directory.
*
* @param logDir the directory in which logs are checkpointed
* @param logsToCheckpoint the logs to be checkpointed
*/
private def checkpointLogStartOffsetsInDir(logDir: File, logsToCheckpoint: Map[TopicPartition, UnifiedLog]): Unit = {
try {
logStartOffsetCheckpoints.get(logDir).foreach { checkpoint =>
val logStartOffsets = logsToCheckpoint.collect {
case (tp, log) if log.remoteLogEnabled() || log.logStartOffset > log.logSegments.asScala.head.baseOffset =>
tp -> log.logStartOffset
}
checkpoint.write(logStartOffsets)
}
} catch {
case e: KafkaStorageException =>
error(s"Disk error while writing log start offsets checkpoint in directory $logDir: ${e.getMessage}")
}
}
// The logDir should be an absolute path
def maybeUpdatePreferredLogDir(topicPartition: TopicPartition, logDir: String): Unit = {
// Do not cache the preferred log directory if either the current log or the future log for this partition exists in the specified logDir
if (!getLog(topicPartition).exists(_.parentDir == logDir) &&
!getLog(topicPartition, isFuture = true).exists(_.parentDir == logDir))
preferredLogDirs.put(topicPartition, logDir)
}
/**
* Abort and pause cleaning of the provided partition and log a message about it.
*/
def abortAndPauseCleaning(topicPartition: TopicPartition): Unit = {
if (cleaner != null) {
cleaner.abortAndPauseCleaning(topicPartition)
info(s"The cleaning for partition $topicPartition is aborted and paused")
}
}
/**
* Abort cleaning of the provided partition and log a message about it.
*/
def abortCleaning(topicPartition: TopicPartition): Unit = {
if (cleaner != null) {
cleaner.abortCleaning(topicPartition)
info(s"The cleaning for partition $topicPartition is aborted")
}
}
/**
* Resume cleaning of the provided partition and log a message about it.
*/
private def resumeCleaning(topicPartition: TopicPartition): Unit = {
if (cleaner != null) {
cleaner.resumeCleaning(Seq(topicPartition))
info(s"Cleaning for partition $topicPartition is resumed")
}
}
/**
* Truncate the cleaner's checkpoint to the based offset of the active segment of
* the provided log.
*/
private def maybeTruncateCleanerCheckpointToActiveSegmentBaseOffset(log: UnifiedLog, topicPartition: TopicPartition): Unit = {
if (cleaner != null) {
cleaner.maybeTruncateCheckpoint(log.parentDirFile, topicPartition, log.activeSegment.baseOffset)
}
}
/**
* Get the log if it exists, otherwise return None
*
* @param topicPartition the partition of the log
* @param isFuture True iff the future log of the specified partition should be returned
*/
def getLog(topicPartition: TopicPartition, isFuture: Boolean = false): Option[UnifiedLog] = {
if (isFuture)
Option(futureLogs.get(topicPartition))
else
Option(currentLogs.get(topicPartition))
}
/**
* Method to indicate that logs are getting initialized for the partition passed in as argument.
* This method should always be followed by [[kafka.log.LogManager#finishedInitializingLog]] to indicate that log
* initialization is done.
*/
def initializingLog(topicPartition: TopicPartition): Unit = {
partitionsInitializing(topicPartition) = false
}
/**
* Mark the partition configuration for all partitions that are getting initialized for topic
* as dirty. That will result in reloading of configuration once initialization is done.
*/
def topicConfigUpdated(topic: String): Unit = {
partitionsInitializing.keys.filter(_.topic() == topic).foreach {
topicPartition => partitionsInitializing.replace(topicPartition, false, true)
}
}
/**
* Update the configuration of the provided topic.
*/
def updateTopicConfig(topic: String,
newTopicConfig: Properties,
isRemoteLogStorageSystemEnabled: Boolean,
wasRemoteLogEnabled: Boolean,
fromZK: Boolean): Unit = {
topicConfigUpdated(topic)
val logs = logsByTopic(topic)
// Combine the default properties with the overrides in zk to create the new LogConfig
val newLogConfig = LogConfig.fromProps(currentDefaultConfig.originals, newTopicConfig)
val isRemoteLogStorageEnabled = newLogConfig.remoteStorageEnable()
// We would like to validate the configuration no matter whether the logs have materialised on disk or not.
// Otherwise we risk someone creating a tiered-topic, disabling Tiered Storage cluster-wide and the check
// failing since the logs for the topic are non-existent.
LogConfig.validateRemoteStorageOnlyIfSystemEnabled(newLogConfig.values(), isRemoteLogStorageSystemEnabled, true)
// `remote.log.delete.on.disable` and `remote.log.copy.disable` are unsupported in ZK mode
if (fromZK) {
LogConfig.validateNoInvalidRemoteStorageConfigsInZK(newLogConfig.values())
}
LogConfig.validateTurningOffRemoteStorageWithDelete(newLogConfig.values(), wasRemoteLogEnabled, isRemoteLogStorageEnabled)
LogConfig.validateRetentionConfigsWhenRemoteCopyDisabled(newLogConfig.values(), isRemoteLogStorageEnabled)
if (logs.nonEmpty) {
logs.foreach { log =>
val oldLogConfig = log.updateConfig(newLogConfig)
if (oldLogConfig.compact && !newLogConfig.compact) {
abortCleaning(log.topicPartition)
}
}
}
}
/**
* Mark all in progress partitions having dirty configuration if broker configuration is updated.
*/
def brokerConfigUpdated(): Unit = {
partitionsInitializing.keys.foreach {
topicPartition => partitionsInitializing.replace(topicPartition, false, true)
}
}
/**
* Method to indicate that the log initialization for the partition passed in as argument is
* finished. This method should follow a call to [[kafka.log.LogManager#initializingLog]].
*
* It will retrieve the topic configs a second time if they were updated while the
* relevant log was being loaded.
*/
def finishedInitializingLog(topicPartition: TopicPartition,
maybeLog: Option[UnifiedLog]): Unit = {
val removedValue = partitionsInitializing.remove(topicPartition)
if (removedValue.contains(true))
maybeLog.foreach(_.updateConfig(fetchLogConfig(topicPartition.topic)))
}
/**
* If the log already exists, just return a copy of the existing log
* Otherwise if isNew=true or if there is no offline log directory, create a log for the given topic and the given partition
* Otherwise throw KafkaStorageException
*
* @param topicPartition The partition whose log needs to be returned or created
* @param isNew Whether the replica should have existed on the broker or not
* @param isFuture True if the future log of the specified partition should be returned or created
* @param topicId The topic ID of the partition's topic
* @param targetLogDirectoryId The directory Id that should host the the partition's topic.
* The next selected directory will be picked up if it None or equal {@link DirectoryId.UNASSIGNED}.
* The method assumes provided Id belong to online directory.
* @throws KafkaStorageException if isNew=false, log is not found in the cache and there is offline log directory on the broker
* @throws InconsistentTopicIdException if the topic ID in the log does not match the topic ID provided
*/
def getOrCreateLog(topicPartition: TopicPartition, isNew: Boolean = false, isFuture: Boolean = false,
topicId: Option[Uuid], targetLogDirectoryId: Option[Uuid] = Option.empty): UnifiedLog = {
logCreationOrDeletionLock synchronized {
val log = getLog(topicPartition, isFuture).getOrElse {
// create the log if it has not already been created in another thread
if (!isNew && offlineLogDirs.nonEmpty)
throw new KafkaStorageException(s"Can not create log for $topicPartition because log directories ${offlineLogDirs.mkString(",")} are offline")
val logDirs: List[File] = {
val preferredLogDir = targetLogDirectoryId.filterNot(Seq(DirectoryId.UNASSIGNED,DirectoryId.LOST).contains) match {
case Some(targetId) if !preferredLogDirs.containsKey(topicPartition) =>
// If partition is configured with both targetLogDirectoryId and preferredLogDirs, then
// preferredLogDirs will be respected, otherwise targetLogDirectoryId will be respected
directoryIds.find(_._2 == targetId).map(_._1).orNull
case _ =>
preferredLogDirs.get(topicPartition)
}
if (isFuture) {
if (preferredLogDir == null)
throw new IllegalStateException(s"Can not create the future log for $topicPartition without having a preferred log directory")
else if (getLog(topicPartition).get.parentDir == preferredLogDir)
throw new IllegalStateException(s"Can not create the future log for $topicPartition in the current log directory of this partition")
}
if (preferredLogDir != null)
List(new File(preferredLogDir))
else
nextLogDirs()
}
val logDirName = {
if (isFuture)
UnifiedLog.logFutureDirName(topicPartition)
else
UnifiedLog.logDirName(topicPartition)
}
val logDir = logDirs
.iterator // to prevent actually mapping the whole list, lazy map
.map(createLogDirectory(_, logDirName))
.find(_.isSuccess)
.getOrElse(Failure(new KafkaStorageException("No log directories available. Tried " + logDirs.map(_.getAbsolutePath).mkString(", "))))
.get // If Failure, will throw
val config = fetchLogConfig(topicPartition.topic)
val log = UnifiedLog(
dir = logDir,
config = config,
logStartOffset = 0L,
recoveryPoint = 0L,
maxTransactionTimeoutMs = maxTransactionTimeoutMs,
producerStateManagerConfig = producerStateManagerConfig,
producerIdExpirationCheckIntervalMs = producerIdExpirationCheckIntervalMs,
scheduler = scheduler,
time = time,
brokerTopicStats = brokerTopicStats,
logDirFailureChannel = logDirFailureChannel,
topicId = topicId,
keepPartitionMetadataFile = keepPartitionMetadataFile,
remoteStorageSystemEnable = remoteStorageSystemEnable)
if (isFuture)
futureLogs.put(topicPartition, log)
else
currentLogs.put(topicPartition, log)
info(s"Created log for partition $topicPartition in $logDir with properties ${config.overriddenConfigsAsLoggableString}")
// Remove the preferred log dir since it has already been satisfied
preferredLogDirs.remove(topicPartition)
log
}
// When running a ZK controller, we may get a log that does not have a topic ID. Assign it here.
if (log.topicId.isEmpty) {
topicId.foreach(log.assignTopicId)
}
// Ensure topic IDs are consistent
topicId.foreach { topicId =>
log.topicId.foreach { logTopicId =>
if (topicId != logTopicId)
throw new InconsistentTopicIdException(s"Tried to assign topic ID $topicId to log for topic partition $topicPartition," +
s"but log already contained topic ID $logTopicId")
}
}
log
}
}
private[log] def createLogDirectory(logDir: File, logDirName: String): Try[File] = {
val logDirPath = logDir.getAbsolutePath
if (isLogDirOnline(logDirPath)) {
val dir = new File(logDirPath, logDirName)
try {
Files.createDirectories(dir.toPath)
Success(dir)
} catch {
case e: IOException =>
val msg = s"Error while creating log for $logDirName in dir $logDirPath"
logDirFailureChannel.maybeAddOfflineLogDir(logDirPath, msg, e)
warn(msg, e)
Failure(new KafkaStorageException(msg, e))
}
} else {
Failure(new KafkaStorageException(s"Can not create log $logDirName because log directory $logDirPath is offline"))
}
}
/**
* Delete logs marked for deletion. Delete all logs for which `currentDefaultConfig.fileDeleteDelayMs`
* has elapsed after the delete was scheduled. Logs for which this interval has not yet elapsed will be
* considered for deletion in the next iteration of `deleteLogs`. The next iteration will be executed
* after the remaining time for the first log that is not deleted. If there are no more `logsToBeDeleted`,
* `deleteLogs` will be executed after `max(currentDefaultConfig.fileDeleteDelayMs, 1)`.
*/
private def deleteLogs(): Unit = {
var nextDelayMs = 0L
val fileDeleteDelayMs = currentDefaultConfig.fileDeleteDelayMs
try {
def nextDeleteDelayMs: Long = {
if (!logsToBeDeleted.isEmpty) {
val (_, scheduleTimeMs) = logsToBeDeleted.peek()
scheduleTimeMs + fileDeleteDelayMs - time.milliseconds()
} else {
// avoid the case: fileDeleteDelayMs is 0 with empty logsToBeDeleted
// in this case, logsToBeDeleted.take() will block forever
Math.max(fileDeleteDelayMs, 1)
}
}
while ({nextDelayMs = nextDeleteDelayMs; nextDelayMs <= 0}) {
val (removedLog, _) = logsToBeDeleted.take()
if (removedLog != null) {
try {
removedLog.delete()
info(s"Deleted log for partition ${removedLog.topicPartition} in ${removedLog.dir.getAbsolutePath}.")
} catch {
case e: KafkaStorageException =>
error(s"Exception while deleting $removedLog in dir ${removedLog.parentDir}.", e)
}
}
}
} catch {
case e: Throwable =>
error(s"Exception in kafka-delete-logs thread.", e)
} finally {
try {
scheduler.scheduleOnce("kafka-delete-logs",
() => deleteLogs(),
nextDelayMs)
} catch {
case e: Throwable =>
// No errors should occur unless scheduler has been shutdown
error(s"Failed to schedule next delete in kafka-delete-logs thread", e)
}
}
}
def recoverAbandonedFutureLogs(brokerId: Int, newTopicsImage: TopicsImage): Unit = {
val abandonedFutureLogs = findAbandonedFutureLogs(brokerId, newTopicsImage)
abandonedFutureLogs.foreach { case (futureLog, currentLog) =>
val tp = futureLog.topicPartition
// We invoke abortAndPauseCleaning here because log cleaner runs asynchronously and replaceCurrentWithFutureLog
// invokes resumeCleaning which requires log cleaner's internal state to have a key for the given topic partition.
abortAndPauseCleaning(tp)
if (currentLog.isDefined)
info(s"Attempting to recover abandoned future log for $tp at $futureLog and removing ${currentLog.get}")
else
info(s"Attempting to recover abandoned future log for $tp at $futureLog")
replaceCurrentWithFutureLog(currentLog, futureLog)
info(s"Successfully recovered abandoned future log for $tp")
}
}
private def findAbandonedFutureLogs(brokerId: Int, newTopicsImage: TopicsImage): Iterable[(UnifiedLog, Option[UnifiedLog])] = {
futureLogs.values.flatMap { futureLog =>
val topicId = futureLog.topicId.getOrElse {
throw new RuntimeException(s"The log dir $futureLog does not have a topic ID, " +
"which is not allowed when running in KRaft mode.")
}
val partitionId = futureLog.topicPartition.partition()
Option(newTopicsImage.getPartition(topicId, partitionId))
.filter(pr => directoryId(futureLog.parentDir).contains(pr.directory(brokerId)))
.map(_ => (futureLog, Option(currentLogs.get(futureLog.topicPartition)).filter(currentLog => currentLog.topicId.contains(topicId))))
}
}
/**
* Mark the partition directory in the source log directory for deletion and
* rename the future log of this partition in the destination log directory to be the current log
*
* @param topicPartition TopicPartition that needs to be swapped
*/
def replaceCurrentWithFutureLog(topicPartition: TopicPartition): Unit = {
logCreationOrDeletionLock synchronized {
val sourceLog = currentLogs.get(topicPartition)
val destLog = futureLogs.get(topicPartition)
if (sourceLog == null)
throw new KafkaStorageException(s"The current replica for $topicPartition is offline")
if (destLog == null)
throw new KafkaStorageException(s"The future replica for $topicPartition is offline")
info(s"Attempting to replace current log $sourceLog with $destLog for $topicPartition")
replaceCurrentWithFutureLog(Option(sourceLog), destLog, updateHighWatermark = true)
info(s"The current replica is successfully replaced with the future replica for $topicPartition")
}
}
def replaceCurrentWithFutureLog(sourceLog: Option[UnifiedLog], destLog: UnifiedLog, updateHighWatermark: Boolean = false): Unit = {
val topicPartition = destLog.topicPartition
destLog.renameDir(UnifiedLog.logDirName(topicPartition), shouldReinitialize = true)
// the metrics tags still contain "future", so we have to remove it.
// we will add metrics back after sourceLog remove the metrics
destLog.removeLogMetrics()
if (updateHighWatermark && sourceLog.isDefined) {
destLog.updateHighWatermark(sourceLog.get.highWatermark)
}
// Now that future replica has been successfully renamed to be the current replica
// Update the cached map and log cleaner as appropriate.
futureLogs.remove(topicPartition)
currentLogs.put(topicPartition, destLog)
if (cleaner != null) {
sourceLog.foreach { srcLog =>
cleaner.alterCheckpointDir(topicPartition, srcLog.parentDirFile, destLog.parentDirFile)
}
resumeCleaning(topicPartition)
}
try {
sourceLog.foreach { srcLog =>
srcLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition), shouldReinitialize = true)
// Now that replica in source log directory has been successfully renamed for deletion.
// Close the log, update checkpoint files, and enqueue this log to be deleted.
srcLog.close()
val logDir = srcLog.parentDirFile
val logsToCheckpoint = logsInDir(logDir)
checkpointRecoveryOffsetsInDir(logDir, logsToCheckpoint)
checkpointLogStartOffsetsInDir(logDir, logsToCheckpoint)
srcLog.removeLogMetrics()
addLogToBeDeleted(srcLog)
}
destLog.newMetrics()
} catch {
case e: KafkaStorageException =>
// If sourceLog's log directory is offline, we need close its handlers here.
// handleLogDirFailure() will not close handlers of sourceLog because it has been removed from currentLogs map
sourceLog.foreach { srcLog =>
srcLog.closeHandlers()
srcLog.removeLogMetrics()
}
throw e
}
}
/**
* Rename the directory of the given topic-partition "logdir" as "logdir.uuid.delete" and
* add it in the queue for deletion.
*
* @param topicPartition TopicPartition that needs to be deleted
* @param isFuture True iff the future log of the specified partition should be deleted
* @param checkpoint True if checkpoints must be written
* @return the removed log
*/
def asyncDelete(topicPartition: TopicPartition,
isFuture: Boolean = false,
checkpoint: Boolean = true,
isStray: Boolean = false): Option[UnifiedLog] = {
val removedLog: Option[UnifiedLog] = logCreationOrDeletionLock synchronized {
removeLogAndMetrics(if (isFuture) futureLogs else currentLogs, topicPartition)
}
removedLog match {
case Some(removedLog) =>
// We need to wait until there is no more cleaning task on the log to be deleted before actually deleting it.
if (cleaner != null && !isFuture) {
cleaner.abortCleaning(topicPartition)
if (checkpoint) {
cleaner.updateCheckpoints(removedLog.parentDirFile, partitionToRemove = Option(topicPartition))
}
}
if (isStray) {
// Move aside stray partitions, don't delete them
removedLog.renameDir(UnifiedLog.logStrayDirName(topicPartition), shouldReinitialize = false)
warn(s"Log for partition ${removedLog.topicPartition} is marked as stray and renamed to ${removedLog.dir.getAbsolutePath}")
} else {
removedLog.renameDir(UnifiedLog.logDeleteDirName(topicPartition), shouldReinitialize = false)
addLogToBeDeleted(removedLog)
info(s"Log for partition ${removedLog.topicPartition} is renamed to ${removedLog.dir.getAbsolutePath} and is scheduled for deletion")
}
if (checkpoint) {
val logDir = removedLog.parentDirFile
val logsToCheckpoint = logsInDir(logDir)
checkpointRecoveryOffsetsInDir(logDir, logsToCheckpoint)
checkpointLogStartOffsetsInDir(logDir, logsToCheckpoint)
}
case None =>
if (offlineLogDirs.nonEmpty) {
throw new KafkaStorageException(s"Failed to delete log for ${if (isFuture) "future" else ""} $topicPartition because it may be in one of the offline directories ${offlineLogDirs.mkString(",")}")
}
}
removedLog
}
/**
* Rename the directories of the given topic-partitions and add them in the queue for
* deletion. Checkpoints are updated once all the directories have been renamed.
*
* @param topicPartitions The set of topic-partitions to delete asynchronously
* @param errorHandler The error handler that will be called when a exception for a particular
* topic-partition is raised
*/
def asyncDelete(topicPartitions: Iterable[TopicPartition],
isStray: Boolean,
errorHandler: (TopicPartition, Throwable) => Unit): Unit = {
val logDirs = mutable.Set.empty[File]
topicPartitions.foreach { topicPartition =>
try {
getLog(topicPartition).foreach { log =>
logDirs += log.parentDirFile
asyncDelete(topicPartition, checkpoint = false, isStray = isStray)
}
getLog(topicPartition, isFuture = true).foreach { log =>
logDirs += log.parentDirFile
asyncDelete(topicPartition, isFuture = true, checkpoint = false, isStray = isStray)
}
} catch {
case e: Throwable => errorHandler(topicPartition, e)
}
}
val logsByDirCached = logsByDir
logDirs.foreach { logDir =>
if (cleaner != null) cleaner.updateCheckpoints(logDir)
val logsToCheckpoint = logsInDir(logsByDirCached, logDir)
checkpointRecoveryOffsetsInDir(logDir, logsToCheckpoint)
checkpointLogStartOffsetsInDir(logDir, logsToCheckpoint)
}
}
/**
* Provides the full ordered list of suggested directories for the next partition.
* Currently this is done by calculating the number of partitions in each directory and then sorting the
* data directories by fewest partitions.
*/
private def nextLogDirs(): List[File] = {
if (_liveLogDirs.size == 1) {
List(_liveLogDirs.peek())
} else {
// count the number of logs in each parent directory (including 0 for empty directories
val logCounts = allLogs.groupBy(_.parentDir).map { case (parent, logs) => parent -> logs.size }
val zeros = _liveLogDirs.asScala.map(dir => (dir.getPath, 0)).toMap
val dirCounts = (zeros ++ logCounts).toBuffer
// choose the directory with the least logs in it
dirCounts.sortBy(_._2).map {
case (path: String, _: Int) => new File(path)
}.toList
}
}
/**
* Delete any eligible logs. Return the number of segments deleted.
* Only consider logs that are not compacted.
*/
private def cleanupLogs(): Unit = {
debug("Beginning log cleanup...")
var total = 0
val startMs = time.milliseconds
// clean current logs.
val deletableLogs = {
if (cleaner != null) {
// prevent cleaner from working on same partitions when changing cleanup policy
cleaner.pauseCleaningForNonCompactedPartitions()
} else {
currentLogs.filter {
case (_, log) => !log.config.compact
}
}
}
try {
deletableLogs.foreach {
case (topicPartition, log) =>
debug(s"Garbage collecting '${log.name}'")
total += log.deleteOldSegments()
val futureLog = futureLogs.get(topicPartition)
if (futureLog != null) {
// clean future logs
debug(s"Garbage collecting future log '${futureLog.name}'")
total += futureLog.deleteOldSegments()
}
}
} finally {
if (cleaner != null) {
cleaner.resumeCleaning(deletableLogs.map(_._1))
}
}
debug(s"Log cleanup completed. $total files deleted in " +
(time.milliseconds - startMs) / 1000 + " seconds")
}
/**
* Get all the partition logs
*/
def allLogs: Iterable[UnifiedLog] = currentLogs.values ++ futureLogs.values
def logsByTopic(topic: String): Seq[UnifiedLog] = {
(currentLogs.toList ++ futureLogs.toList).collect {
case (topicPartition, log) if topicPartition.topic == topic => log
}
}
/**
* Map of log dir to logs by topic and partitions in that dir
*/
private def logsByDir: Map[String, Map[TopicPartition, UnifiedLog]] = {
// This code is called often by checkpoint processes and is written in a way that reduces
// allocations and CPU with many topic partitions.
// When changing this code please measure the changes with org.apache.kafka.jmh.server.CheckpointBench
val byDir = new mutable.AnyRefMap[String, mutable.AnyRefMap[TopicPartition, UnifiedLog]]()
def addToDir(tp: TopicPartition, log: UnifiedLog): Unit = {
byDir.getOrElseUpdate(log.parentDir, new mutable.AnyRefMap[TopicPartition, UnifiedLog]()).put(tp, log)
}
currentLogs.foreachEntry(addToDir)
futureLogs.foreachEntry(addToDir)
byDir
}
private def logsInDir(dir: File): Map[TopicPartition, UnifiedLog] = {
logsByDir.getOrElse(dir.getAbsolutePath, Map.empty)
}
private def logsInDir(cachedLogsByDir: Map[String, Map[TopicPartition, UnifiedLog]],
dir: File): Map[TopicPartition, UnifiedLog] = {
cachedLogsByDir.getOrElse(dir.getAbsolutePath, Map.empty)
}
// logDir should be an absolute path
def isLogDirOnline(logDir: String): Boolean = {
// The logDir should be an absolute path
if (!logDirs.exists(_.getAbsolutePath == logDir))
throw new LogDirNotFoundException(s"Log dir $logDir is not found in the config.")
_liveLogDirs.contains(new File(logDir))
}
/**
* Flush any log which has exceeded its flush interval and has unwritten messages.
*/
private def flushDirtyLogs(): Unit = {
debug("Checking for dirty logs to flush...")
for ((topicPartition, log) <- currentLogs.toList ++ futureLogs.toList) {
try {
val timeSinceLastFlush = time.milliseconds - log.lastFlushTime
debug(s"Checking if flush is needed on ${topicPartition.topic} flush interval ${log.config.flushMs}" +
s" last flushed ${log.lastFlushTime} time since last flush: $timeSinceLastFlush")
if (timeSinceLastFlush >= log.config.flushMs)
log.flush(false)
} catch {
case e: Throwable =>
error(s"Error flushing topic ${topicPartition.topic}", e)
}
}
}
private def removeLogAndMetrics(logs: Pool[TopicPartition, UnifiedLog], tp: TopicPartition): Option[UnifiedLog] = {
val removedLog = logs.remove(tp)
if (removedLog != null) {
removedLog.removeLogMetrics()
Some(removedLog)
} else {
None
}
}
def readBrokerEpochFromCleanShutdownFiles(): OptionalLong = {
// Verify whether all the log dirs have the same broker epoch in their clean shutdown files. If there is any dir not
// live, fail the broker epoch check.
if (liveLogDirs.size < logDirs.size) {
return OptionalLong.empty()
}
var brokerEpoch = -1L
for (dir <- liveLogDirs) {
val cleanShutdownFileHandler = new CleanShutdownFileHandler(dir.getPath)
val currentBrokerEpoch = cleanShutdownFileHandler.read
if (!currentBrokerEpoch.isPresent) {
info(s"Unable to read the broker epoch in ${dir.toString}.")
return OptionalLong.empty()
}
if (brokerEpoch != -1 && currentBrokerEpoch.getAsLong != brokerEpoch) {
info(s"Found different broker epochs in ${dir.toString}. Other=$brokerEpoch vs current=$currentBrokerEpoch.")
return OptionalLong.empty()
}
brokerEpoch = currentBrokerEpoch.getAsLong
}
OptionalLong.of(brokerEpoch)
}
}
object LogManager {
val LockFileName = ".lock"
/**
* Wait all jobs to complete
* @param jobs jobs
* @param callback this will be called to handle the exception caused by each Future#get
* @return true if all pass. Otherwise, false
*/
private[log] def waitForAllToComplete(jobs: Seq[Future[_]], callback: Throwable => Unit): Boolean = {
jobs.count(future => Try(future.get) match {
case Success(_) => false
case Failure(e) =>
callback(e)
true
}) == 0
}
val RecoveryPointCheckpointFile = "recovery-point-offset-checkpoint"
val LogStartOffsetCheckpointFile = "log-start-offset-checkpoint"
def apply(config: KafkaConfig,
initialOfflineDirs: Seq[String],
configRepository: ConfigRepository,
kafkaScheduler: Scheduler,
time: Time,
brokerTopicStats: BrokerTopicStats,
logDirFailureChannel: LogDirFailureChannel,
keepPartitionMetadataFile: Boolean): LogManager = {
val defaultProps = config.extractLogConfigMap
LogConfig.validateBrokerLogConfigValues(defaultProps, config.remoteLogManagerConfig.isRemoteStorageSystemEnabled())
val defaultLogConfig = new LogConfig(defaultProps)
val cleanerConfig = LogCleaner.cleanerConfig(config)
new LogManager(logDirs = config.logDirs.map(new File(_).getAbsoluteFile),
initialOfflineDirs = initialOfflineDirs.map(new File(_).getAbsoluteFile),
configRepository = configRepository,
initialDefaultConfig = defaultLogConfig,
cleanerConfig = cleanerConfig,
recoveryThreadsPerDataDir = config.numRecoveryThreadsPerDataDir,
flushCheckMs = config.logFlushSchedulerIntervalMs,
flushRecoveryOffsetCheckpointMs = config.logFlushOffsetCheckpointIntervalMs,
flushStartOffsetCheckpointMs = config.logFlushStartOffsetCheckpointIntervalMs,
retentionCheckMs = config.logCleanupIntervalMs,
maxTransactionTimeoutMs = config.transactionMaxTimeoutMs,
producerStateManagerConfig = new ProducerStateManagerConfig(config.producerIdExpirationMs, config.transactionPartitionVerificationEnable),
producerIdExpirationCheckIntervalMs = config.producerIdExpirationCheckIntervalMs,
scheduler = kafkaScheduler,
brokerTopicStats = brokerTopicStats,
logDirFailureChannel = logDirFailureChannel,
time = time,
keepPartitionMetadataFile = keepPartitionMetadataFile,
interBrokerProtocolVersion = config.interBrokerProtocolVersion,
remoteStorageSystemEnable = config.remoteLogManagerConfig.isRemoteStorageSystemEnabled(),
initialTaskDelayMs = config.logInitialTaskDelayMs)
}
/**
* Returns true if the given log should not be on the current broker
* according to the metadata image.
*
* @param brokerId The ID of the current broker.
* @param newTopicsImage The new topics image after broker has been reloaded
* @param log The log object to check
* @return true if the log should not exist on the broker, false otherwise.
*/
def isStrayKraftReplica(
brokerId: Int,
newTopicsImage: TopicsImage,
log: UnifiedLog
): Boolean = {
if (log.topicId.isEmpty) {
// Missing topic ID could result from storage failure or unclean shutdown after topic creation but before flushing
// data to the `partition.metadata` file. And before appending data to the log, the `partition.metadata` is always
// flushed to disk. So if the topic ID is missing, it mostly means no data was appended, and we can treat this as
// a stray log.
info(s"The topicId does not exist in $log, treat it as a stray log")
return true
}
val topicId = log.topicId.get
val partitionId = log.topicPartition.partition()
Option(newTopicsImage.getPartition(topicId, partitionId)) match {
case Some(partition) =>
if (!partition.replicas.contains(brokerId)) {
info(s"Found stray log dir $log: the current replica assignment ${partition.replicas.mkString("[", ", ", "]")} " +
s"does not contain the local brokerId $brokerId.")
true
} else {
false
}
case None =>
info(s"Found stray log dir $log: the topicId $topicId does not exist in the metadata image")
true
}
}
/**
* Find logs which should not be on the current broker, according to the full LeaderAndIsrRequest.
*
* @param brokerId The ID of the current broker.
* @param request The full LeaderAndIsrRequest, containing all partitions owned by the broker.
* @param logs A collection of Log objects.
*
* @return The topic partitions which are no longer needed on this broker.
*/
def findStrayReplicas(
brokerId: Int,
request: LeaderAndIsrRequest,
logs: Iterable[UnifiedLog]
): Iterable[TopicPartition] = {
if (request.requestType() != AbstractControlRequest.Type.FULL) {
throw new RuntimeException("Cannot use incremental LeaderAndIsrRequest to find strays.")
}
val partitions = new util.HashMap[TopicPartition, Uuid]()
request.data().topicStates().forEach(topicState => {
topicState.partitionStates().forEach(partition => {
partitions.put(new TopicPartition(topicState.topicName(), partition.partitionIndex()),
topicState.topicId())
})
})
logs.flatMap { log =>
val topicId = log.topicId.getOrElse {
throw new RuntimeException(s"The log dir $log does not have a topic ID, " +
"which is not allowed when running in KRaft mode.")
}
Option(partitions.get(log.topicPartition)) match {
case Some(id) =>
if (id.equals(topicId)) {
None
} else {
info(s"Found stray log dir $log: this partition now exists with topic ID $id not $topicId.")
Some(log.topicPartition)
}
case None =>
info(s"Found stray log dir $log: this partition does not exist in the new full LeaderAndIsrRequest.")
Some(log.topicPartition)
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy