kafka.log.LogManager.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of kafka_2.10 Show documentation
kafka
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kafka.log

import java.io._
import kafka.utils._
import scala.collection._
import kafka.log.Log._
import kafka.common.{TopicAndPartition, KafkaException}
import kafka.server.KafkaConfig


/**
 * The entry point to the kafka log management subsystem. The log manager is responsible for log creation, retrieval, and cleaning.
 * All read and write operations are delegated to the individual log instances.
 * 
 * The log manager maintains logs in one or more directories. New logs are created in the data directory
 * with the fewest logs. No attempt is made to move partitions after the fact or balance based on
 * size or I/O rate.
 * 
 * A background thread handles log retention by periodically truncating excess log segments.
 */
@threadsafe
private[kafka] class LogManager(val config: KafkaConfig,
                                scheduler: KafkaScheduler,
                                private val time: Time) extends Logging {

  val CleanShutdownFile = ".kafka_cleanshutdown"
  val LockFile = ".lock"
  val logDirs: Array[File] = config.logDirs.map(new File(_)).toArray
  private val logFileSizeMap = config.logSegmentBytesPerTopicMap
  private val logFlushInterval = config.logFlushIntervalMessages
  private val logFlushIntervals = config.logFlushIntervalMsPerTopicMap
  private val logCreationLock = new Object
  private val logRetentionSizeMap = config.logRetentionBytesPerTopicMap
  private val logRetentionMsMap = config.logRetentionHoursPerTopicMap.map(e => (e._1, e._2 * 60 * 60 * 1000L)) // convert hours to ms
  private val logRollMsMap = config.logRollHoursPerTopicMap.map(e => (e._1, e._2 * 60 * 60 * 1000L))
  private val logRollDefaultIntervalMs = 1000L * 60 * 60 * config.logRollHours
  private val logCleanupIntervalMs = 1000L * 60 * config.logCleanupIntervalMins
  private val logCleanupDefaultAgeMs = 1000L * 60 * 60 * config.logRetentionHours

  this.logIdent = "[Log Manager on Broker " + config.brokerId + "] "
  private val logs = new Pool[TopicAndPartition, Log]()
  
  createAndValidateLogDirs(logDirs)
  private var dirLocks = lockLogDirs(logDirs)
  loadLogs(logDirs)
  
  /**
   * 1. Ensure that there are no duplicates in the directory list
   * 2. Create each directory if it doesn't exist
   * 3. Check that each path is a readable directory 
   */
  private def createAndValidateLogDirs(dirs: Seq[File]) {
    if(dirs.map(_.getCanonicalPath).toSet.size < dirs.size)
      throw new KafkaException("Duplicate log directory found: " + logDirs.mkString(", "))
    for(dir <- dirs) {
      if(!dir.exists) {
        info("Log directory '" + dir.getAbsolutePath + "' not found, creating it.")
        val created = dir.mkdirs()
        if(!created)
          throw new KafkaException("Failed to create data directory " + dir.getAbsolutePath)
      }
      if(!dir.isDirectory || !dir.canRead)
        throw new KafkaException(dir.getAbsolutePath + " is not a readable log directory.")
    }
  }
  
  /**
   * Lock all the given directories
   */
  private def lockLogDirs(dirs: Seq[File]): Seq[FileLock] = {
    dirs.map { dir =>
      val lock = new FileLock(new File(dir, LockFile))
      if(!lock.tryLock())
        throw new KafkaException("Failed to acquire lock on file .lock in " + lock.file.getParentFile.getAbsolutePath + 
                               ". A Kafka instance in another process or thread is using this directory.")
      lock
    }
  }
  
  /**
   * Recovery and load all logs in the given data directories
   */
  private def loadLogs(dirs: Seq[File]) {
    for(dir <- dirs) {
      /* check if this set of logs was shut down cleanly */
      val cleanShutDownFile = new File(dir, CleanShutdownFile)
      val needsRecovery = !cleanShutDownFile.exists
      cleanShutDownFile.delete
      /* load the logs */
      val subDirs = dir.listFiles()
      if(subDirs != null) {
        for(dir <- subDirs) {
          if(dir.isDirectory){
            info("Loading log '" + dir.getName + "'")
            val topicPartition = parseTopicPartitionName(dir.getName)
            val rollIntervalMs = logRollMsMap.get(topicPartition.topic).getOrElse(this.logRollDefaultIntervalMs)
            val maxLogFileSize = logFileSizeMap.get(topicPartition.topic).getOrElse(config.logSegmentBytes)
            val log = new Log(dir, 
                              maxLogFileSize, 
                              config.messageMaxBytes,
                              logFlushInterval, 
                              rollIntervalMs, 
                              needsRecovery, 
                              config.logIndexSizeMaxBytes,
                              config.logIndexIntervalBytes,
                              time, 
                              config.brokerId)
            val previous = this.logs.put(topicPartition, log)
            if(previous != null)
              throw new IllegalArgumentException("Duplicate log directories found: %s, %s!".format(log.dir.getAbsolutePath, previous.dir.getAbsolutePath))
          }
        }
      }
    }
  }

  /**
   *  Start the log flush thread
   */
  def startup() {
    /* Schedule the cleanup task to delete old logs */
    if(scheduler != null) {
      info("Starting log cleaner every " + logCleanupIntervalMs + " ms")
      scheduler.scheduleWithRate(cleanupLogs, "kafka-logcleaner-", 60 * 1000, logCleanupIntervalMs, false)
      info("Starting log flusher every " + config.logFlushSchedulerIntervalMs +
                   " ms with the following overrides " + logFlushIntervals)
      scheduler.scheduleWithRate(flushDirtyLogs, "kafka-logflusher-",
                                 config.logFlushSchedulerIntervalMs, config.logFlushSchedulerIntervalMs, false)
    }
  }
  
  /**
   * Get the log if it exists
   */
  def getLog(topic: String, partition: Int): Option[Log] = {
    val topicAndPartiton = TopicAndPartition(topic, partition)
    val log = logs.get(topicAndPartiton)
    if (log == null)
      None
    else
      Some(log)
  }

  /**
   * Create the log if it does not exist, if it exists just return it
   */
  def getOrCreateLog(topic: String, partition: Int): Log = {
    val topicAndPartition = TopicAndPartition(topic, partition)
    logs.get(topicAndPartition) match {
      case null => createLogIfNotExists(topicAndPartition)
      case log: Log => log
    }
  }

  /**
   * Create a log for the given topic and the given partition
   * If the log already exists, just return a copy of the existing log
   */
  private def createLogIfNotExists(topicAndPartition: TopicAndPartition): Log = {
    logCreationLock synchronized {
      var log = logs.get(topicAndPartition)
      
      // check if the log has already been created in another thread
      if(log != null)
        return log
      
      // if not, create it
      val dataDir = nextLogDir()
      val dir = new File(dataDir, topicAndPartition.topic + "-" + topicAndPartition.partition)
      dir.mkdirs()
      val rollIntervalMs = logRollMsMap.get(topicAndPartition.topic).getOrElse(this.logRollDefaultIntervalMs)
      val maxLogFileSize = logFileSizeMap.get(topicAndPartition.topic).getOrElse(config.logSegmentBytes)
      log = new Log(dir, 
                    maxLogFileSize, 
                    config.messageMaxBytes,
                    logFlushInterval, 
                    rollIntervalMs, 
                    needsRecovery = false, 
                    config.logIndexSizeMaxBytes,
                    config.logIndexIntervalBytes, 
                    time, 
                    config.brokerId)
      info("Created log for partition [%s,%d] in %s.".format(topicAndPartition.topic, topicAndPartition.partition, dataDir.getAbsolutePath))
      logs.put(topicAndPartition, log)
      log
    }
  }
  
  /**
   * Choose the next directory in which to create a log. Currently this is done
   * by calculating the number of partitions in each directory and then choosing the
   * data directory with the fewest partitions.
   */
  private def nextLogDir(): File = {
    if(logDirs.size == 1) {
      logDirs(0)
    } else {
      // count the number of logs in each parent directory (including 0 for empty directories
      val logCounts = allLogs.groupBy(_.dir.getParent).mapValues(_.size)
      val zeros = logDirs.map(dir => (dir.getPath, 0)).toMap
      var dirCounts = (zeros ++ logCounts).toBuffer
    
      // choose the directory with the least logs in it
      val leastLoaded = dirCounts.sortBy(_._2).head
      new File(leastLoaded._1)
    }
  }

  def getOffsets(topicAndPartition: TopicAndPartition, timestamp: Long, maxNumOffsets: Int): Seq[Long] = {
    val log = getLog(topicAndPartition.topic, topicAndPartition.partition)
    log match {
      case Some(l) => l.getOffsetsBefore(timestamp, maxNumOffsets)
      case None => getEmptyOffsets(timestamp)
    }
  }

  /**
   * Runs through the log removing segments older than a certain age
   */
  private def cleanupExpiredSegments(log: Log): Int = {
    val startMs = time.milliseconds
    val topic = parseTopicPartitionName(log.name).topic
    val logCleanupThresholdMs = logRetentionMsMap.get(topic).getOrElse(this.logCleanupDefaultAgeMs)
    val toBeDeleted = log.markDeletedWhile(startMs - _.messageSet.file.lastModified > logCleanupThresholdMs)
    val total = log.deleteSegments(toBeDeleted)
    total
  }

  /**
   *  Runs through the log removing segments until the size of the log
   *  is at least logRetentionSize bytes in size
   */
  private def cleanupSegmentsToMaintainSize(log: Log): Int = {
    val topic = parseTopicPartitionName(log.dir.getName).topic
    val maxLogRetentionSize = logRetentionSizeMap.get(topic).getOrElse(config.logRetentionBytes)
    if(maxLogRetentionSize < 0 || log.size < maxLogRetentionSize) return 0
    var diff = log.size - maxLogRetentionSize
    def shouldDelete(segment: LogSegment) = {
      if(diff - segment.size >= 0) {
        diff -= segment.size
        true
      } else {
        false
      }
    }
    val toBeDeleted = log.markDeletedWhile( shouldDelete )
    val total = log.deleteSegments(toBeDeleted)
    total
  }

  /**
   * Delete any eligible logs. Return the number of segments deleted.
   */
  def cleanupLogs() {
    debug("Beginning log cleanup...")
    var total = 0
    val startMs = time.milliseconds
    for(log <- allLogs) {
      debug("Garbage collecting '" + log.name + "'")
      total += cleanupExpiredSegments(log) + cleanupSegmentsToMaintainSize(log)
    }
    debug("Log cleanup completed. " + total + " files deleted in " +
                  (time.milliseconds - startMs) / 1000 + " seconds")
  }

  /**
   * Close all the logs
   */
  def shutdown() {
    debug("Shutting down.")
    try {
      // close the logs
      allLogs.foreach(_.close())
      // mark that the shutdown was clean by creating the clean shutdown marker file
      logDirs.foreach(dir => Utils.swallow(new File(dir, CleanShutdownFile).createNewFile()))
    } finally {
      // regardless of whether the close succeeded, we need to unlock the data directories
      dirLocks.foreach(_.destroy())
    }
    debug("Shutdown complete.")
  }

  /**
   * Get all the partition logs
   */
  def allLogs(): Iterable[Log] = logs.values

  /**
   * Flush any log which has exceeded its flush interval and has unwritten messages.
   */
  private def flushDirtyLogs() = {
    debug("Checking for dirty logs to flush...")
    for (log <- allLogs) {
      try {
        val timeSinceLastFlush = System.currentTimeMillis - log.getLastFlushedTime
        var logFlushInterval = config.logFlushIntervalMs
        if(logFlushIntervals.contains(log.topicName))
          logFlushInterval = logFlushIntervals(log.topicName)
        debug(log.topicName + " flush interval  " + logFlushInterval +
                      " last flushed " + log.getLastFlushedTime + " time since last flush: " + timeSinceLastFlush)
        if(timeSinceLastFlush >= logFlushInterval)
          log.flush
      } catch {
        case e: Exception =>
          error("Error flushing topic " + log.topicName, e)
          e match {
            case _: IOException =>
              fatal("Halting due to unrecoverable I/O error while flushing logs: " + e.getMessage, e)
              System.exit(1)
            case _ =>
          }
      }
    }
  }

  private def parseTopicPartitionName(name: String): TopicAndPartition = {
    val index = name.lastIndexOf('-')
    TopicAndPartition(name.substring(0,index), name.substring(index+1).toInt)
  }

  def topics(): Iterable[String] = logs.keys.map(_.topic)

}