kafka.log.Log.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of kafka_2.10 Show documentation
kafka
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kafka.log

import kafka.api.OffsetRequest
import java.io.{IOException, File}
import java.util.{Comparator, Collections, ArrayList}
import java.util.concurrent.atomic._
import kafka.utils._
import scala.math._
import java.text.NumberFormat
import kafka.server.BrokerTopicStats
import kafka.message._
import kafka.common._
import kafka.metrics.KafkaMetricsGroup
import com.yammer.metrics.core.Gauge

object Log {
  val LogFileSuffix = ".log"
  val IndexFileSuffix = ".index"

  /**
   * Search for the greatest range with start <= the target value.
   */
  def findRange[T <: Range](ranges: Array[T], value: Long, arraySize: Int): Option[T] = {
    if(ranges.size < 1)
      return None

    // check out of bounds
    if(value < ranges(0).start)
      return None

    var low = 0
    var high = arraySize - 1
    while(low < high) {
      val mid = ceil((high + low) / 2.0).toInt
      val found = ranges(mid)
      if(found.start == value)
        return Some(found)
      else if (value < found.start)
        high = mid - 1
      else
        low = mid
    }
    Some(ranges(low))
  }

  def findRange[T <: Range](ranges: Array[T], value: Long): Option[T] =
    findRange(ranges, value, ranges.length)

  /**
   * Make log segment file name from offset bytes. All this does is pad out the offset number with zeros
   * so that ls sorts the files numerically
   */
  def filenamePrefixFromOffset(offset: Long): String = {
    val nf = NumberFormat.getInstance()
    nf.setMinimumIntegerDigits(20)
    nf.setMaximumFractionDigits(0)
    nf.setGroupingUsed(false)
    nf.format(offset)
  }
  
  def logFilename(dir: File, offset: Long) = 
    new File(dir, filenamePrefixFromOffset(offset) + LogFileSuffix)
  
  def indexFilename(dir: File, offset: Long) = 
    new File(dir, filenamePrefixFromOffset(offset) + IndexFileSuffix)

  def getEmptyOffsets(timestamp: Long): Seq[Long] =
    if (timestamp == OffsetRequest.LatestTime || timestamp == OffsetRequest.EarliestTime)
      Seq(0L)
    else Nil
}


/**
 * An append-only log for storing messages.
 * 
 * The log is a sequence of LogSegments, each with a base offset denoting the first message in the segment.
 * 
 * New log segments are created according to a configurable policy that controls the size in bytes or time interval
 * for a given segment.
 * 
 */
@threadsafe
private[kafka] class Log(val dir: File, 
                         val maxLogFileSize: Int,
                         val maxMessageSize: Int, 
                         val flushInterval: Int = Int.MaxValue,
                         val rollIntervalMs: Long = Long.MaxValue, 
                         val needsRecovery: Boolean, 
                         val maxIndexSize: Int = (10*1024*1024),
                         val indexIntervalBytes: Int = 4096,
                         time: Time = SystemTime,
                         brokerId: Int = 0) extends Logging with KafkaMetricsGroup {
  this.logIdent = "[Kafka Log on Broker " + brokerId + "], "

  import kafka.log.Log._
  
  /* A lock that guards all modifications to the log */
  private val lock = new Object

  /* The current number of unflushed messages appended to the write */
  private val unflushed = new AtomicInteger(0)

  /* last time it was flushed */
  private val lastflushedTime = new AtomicLong(System.currentTimeMillis)

  /* the actual segments of the log */
  private[log] val segments: SegmentList[LogSegment] = loadSegments()
    
  /* Calculate the offset of the next message */
  private var nextOffset: AtomicLong = new AtomicLong(segments.view.last.nextOffset())
  
  info("Completed load of log %s with log end offset %d".format(name, logEndOffset))

  newGauge(name + "-" + "NumLogSegments",
           new Gauge[Int] { def value = numberOfSegments })

  newGauge(name + "-" + "LogEndOffset",
           new Gauge[Long] { def value = logEndOffset })

  /* The name of this log */
  def name  = dir.getName()

  /* Load the log segments from the log files on disk */
  private def loadSegments(): SegmentList[LogSegment] = {
    // open all the segments read-only
    val logSegments = new ArrayList[LogSegment]
    val ls = dir.listFiles()

    if(ls != null) {
      for(file <- ls if file.isFile) {
        val filename = file.getName()
        if(!file.canRead) {
          throw new IOException("Could not read file " + file)
        } else if(filename.endsWith(IndexFileSuffix)) {
          // ensure that we have a corresponding log file for this index file
          val log = new File(file.getAbsolutePath.replace(IndexFileSuffix, LogFileSuffix))
          if(!log.exists) {
            warn("Found an orphaned index file, %s, with no corresponding log file.".format(file.getAbsolutePath))
            file.delete()
          }
        } else if(filename.endsWith(LogFileSuffix)) {
          val offset = filename.substring(0, filename.length - LogFileSuffix.length).toLong
          // TODO: we should ideally rebuild any missing index files, instead of erroring out
          if(!Log.indexFilename(dir, offset).exists)
            throw new IllegalStateException("Found log file with no corresponding index file.")
          logSegments.add(new LogSegment(dir = dir, 
                                         startOffset = offset,
                                         indexIntervalBytes = indexIntervalBytes, 
                                         maxIndexSize = maxIndexSize))
        }
      }
    }

    if(logSegments.size == 0) {
      // no existing segments, create a new mutable segment
      logSegments.add(new LogSegment(dir = dir, 
                                     startOffset = 0,
                                     indexIntervalBytes = indexIntervalBytes, 
                                     maxIndexSize = maxIndexSize))
    } else {
      // there is at least one existing segment, validate and recover them/it
      // sort segments into ascending order for fast searching
      Collections.sort(logSegments, new Comparator[LogSegment] {
        def compare(s1: LogSegment, s2: LogSegment): Int = {
          if(s1.start == s2.start) 0
          else if(s1.start < s2.start) -1
          else 1
        }
      })

      // reset the index size of the last (current active) log segment to its maximum value
      logSegments.get(logSegments.size() - 1).index.resize(maxIndexSize)

      // run recovery on the last segment if necessary
      if(needsRecovery) {
        var activeSegment = logSegments.get(logSegments.size - 1)
        try {
          recoverSegment(activeSegment)
        } catch {
          case e: InvalidOffsetException =>
            val startOffset = activeSegment.start
            warn("Found invalid offset during recovery of the active segment for topic partition " + dir.getName +". Deleting the segment and " +
                 "creating an empty one with starting offset " + startOffset)
            // truncate the active segment to its starting offset
            activeSegment.truncateTo(startOffset)
        }
      }
    }

    val segmentList = logSegments.toArray(new Array[LogSegment](logSegments.size))
    // Check for the index file of every segment, if it's empty or its last offset is greater than its base offset.
    for (s <- segmentList) {
      require(s.index.entries == 0 || s.index.lastOffset > s.index.baseOffset,
              "Corrupt index found, index file (%s) has non-zero size but the last offset is %d and the base offset is %d"
                .format(s.index.file.getAbsolutePath, s.index.lastOffset, s.index.baseOffset))
    }

    new SegmentList(segmentList)
  }
  
  /**
   * Run recovery on the given segment. This will rebuild the index from the log file and lop off any invalid bytes from the end of the log.
   */
  private def recoverSegment(segment: LogSegment) {
    info("Recovering log segment %s".format(segment.messageSet.file.getAbsolutePath))
    segment.index.truncate()
    var validBytes = 0
    var lastIndexEntry = 0
    val iter = segment.messageSet.iterator
    try {
      while(iter.hasNext) {
        val entry = iter.next
        entry.message.ensureValid()
        if(validBytes - lastIndexEntry > indexIntervalBytes) {
          // we need to decompress the message, if required, to get the offset of the first uncompressed message
          val startOffset =
            entry.message.compressionCodec match {
              case NoCompressionCodec =>
                entry.offset
              case _ =>
                ByteBufferMessageSet.decompress(entry.message).head.offset
          }
          segment.index.append(startOffset, validBytes)
          lastIndexEntry = validBytes
        }
        validBytes += MessageSet.entrySize(entry.message)
      }
    } catch {
      case e: InvalidMessageException => 
        logger.warn("Found invalid messages in log " + name)
    }
    val truncated = segment.messageSet.sizeInBytes - validBytes
    if(truncated > 0)
      warn("Truncated " + truncated + " invalid bytes from the log " + name + ".")
    segment.messageSet.truncateTo(validBytes)
  }

  /**
   * The number of segments in the log
   */
  def numberOfSegments: Int = segments.view.length

  /**
   * Close this log
   */
  def close() {
    debug("Closing log " + name)
    lock synchronized {
      for(seg <- segments.view)
        seg.close()
    }
  }

  /**
   * Append this message set to the active segment of the log, rolling over to a fresh segment if necessary.
   * 
   * This method will generally be responsible for assigning offsets to the messages, 
   * however if the assignOffsets=false flag is passed we will only check that the existing offsets are valid.
   * 
   * Returns a tuple containing (first_offset, last_offset) for the newly appended of the message set, 
   * or (-1,-1) if the message set is empty
   */
  def append(messages: ByteBufferMessageSet, assignOffsets: Boolean = true): (Long, Long) = {
    val messageSetInfo = analyzeAndValidateMessageSet(messages)

    // if we have any valid messages, append them to the log
    if(messageSetInfo.count == 0) {
      (-1L, -1L)
    } else {
      // trim any invalid bytes or partial messages before appending it to the on-disk log
      var validMessages = trimInvalidBytes(messages)

      try {
        // they are valid, insert them in the log
        val offsets = lock synchronized {
          val firstOffset = nextOffset.get

          // maybe roll the log if this segment is full
          val segment = maybeRoll(segments.view.last)
          
          // assign offsets to the messageset
          val lastOffset =
            if(assignOffsets) {
              val offsetCounter = new AtomicLong(nextOffset.get)
              try {
                validMessages = validMessages.assignOffsets(offsetCounter, messageSetInfo.codec)
              } catch {
                case e: IOException => throw new KafkaException("Error in validating messages while appending to log '%s'".format(name), e)
              }
              val assignedLastOffset = offsetCounter.get - 1
              val numMessages = assignedLastOffset - firstOffset + 1
              BrokerTopicStats.getBrokerTopicStats(topicName).messagesInRate.mark(numMessages)
              BrokerTopicStats.getBrokerAllTopicsStats.messagesInRate.mark(numMessages)
              assignedLastOffset
            } else {
              require(messageSetInfo.offsetsMonotonic, "Out of order offsets found in " + messages)
              require(messageSetInfo.firstOffset >= nextOffset.get, 
                      "Attempt to append a message set beginning with offset %d to a log with log end offset %d."
                      .format(messageSetInfo.firstOffset, nextOffset.get))
              messageSetInfo.lastOffset
            }

          // Check if the message sizes are valid. This check is done after assigning offsets to ensure the comparison
          // happens with the new message size (after re-compression, if any)
          for(messageAndOffset <- validMessages.shallowIterator) {
            if(MessageSet.entrySize(messageAndOffset.message) > maxMessageSize)
              throw new MessageSizeTooLargeException("Message size is %d bytes which exceeds the maximum configured message size of %d."
                .format(MessageSet.entrySize(messageAndOffset.message), maxMessageSize))
          }

          // now append to the log
          segment.append(firstOffset, validMessages)
          
          // advance the log end offset
          nextOffset.set(lastOffset + 1)

          trace("Appended message set to log %s with first offset: %d, next offset: %d, and messages: %s"
                  .format(this.name, firstOffset, nextOffset.get(), validMessages))

          // return the offset at which the messages were appended
          (firstOffset, lastOffset)
        }
        
        // maybe flush the log and index
        val numAppendedMessages = (offsets._2 - offsets._1 + 1).toInt
        maybeFlush(numAppendedMessages)
        
        // return the first and last offset
        offsets
      } catch {
        case e: IOException => throw new KafkaStorageException("I/O exception in append to log '%s'".format(name), e)
      }
    }
  }
  
  /* struct to hold various quantities we compute about each message set before appending to the log */
  case class MessageSetAppendInfo(firstOffset: Long, lastOffset: Long, codec: CompressionCodec, count: Int, offsetsMonotonic: Boolean)
  
  /**
   * Validate the following:
   * 1. each message matches its CRC
   * 
   * Also compute the following quantities:
   * 1. First offset in the message set
   * 2. Last offset in the message set
   * 3. Number of messages
   * 4. Whether the offsets are monotonically increasing
   * 5. Whether any compression codec is used (if many are used, then the last one is given)
   */
  private def analyzeAndValidateMessageSet(messages: ByteBufferMessageSet): MessageSetAppendInfo = {
    var messageCount = 0
    var firstOffset, lastOffset = -1L
    var codec: CompressionCodec = NoCompressionCodec
    var monotonic = true
    for(messageAndOffset <- messages.shallowIterator) {
      // update the first offset if on the first message
      if(firstOffset < 0)
        firstOffset = messageAndOffset.offset
      // check that offsets are monotonically increasing
      if(lastOffset >= messageAndOffset.offset)
        monotonic = false
      // update the last offset seen
      lastOffset = messageAndOffset.offset

      // check the validity of the message by checking CRC
      val m = messageAndOffset.message
      m.ensureValid()

      messageCount += 1;
      
      val messageCodec = m.compressionCodec
      if(messageCodec != NoCompressionCodec)
        codec = messageCodec
    }
    MessageSetAppendInfo(firstOffset, lastOffset, codec, messageCount, monotonic)
  }
  
  /**
   * Trim any invalid bytes from the end of this message set (if there are any)
   */
  private def trimInvalidBytes(messages: ByteBufferMessageSet): ByteBufferMessageSet = {
    val messageSetValidBytes = messages.validBytes
    if(messageSetValidBytes < 0)
      throw new InvalidMessageSizeException("Illegal length of message set " + messageSetValidBytes + " Message set cannot be appended to log. Possible causes are corrupted produce requests")
    if(messageSetValidBytes == messages.sizeInBytes) {
      messages
    } else {
      // trim invalid bytes
      val validByteBuffer = messages.buffer.duplicate()
      validByteBuffer.limit(messageSetValidBytes)
      new ByteBufferMessageSet(validByteBuffer)
    }
  }

  /**
   * Read a message set from the log. 
   * startOffset - The logical offset to begin reading at
   * maxLength - The maximum number of bytes to read
   * maxOffset - The first offset not included in the read
   */
  def read(startOffset: Long, maxLength: Int, maxOffset: Option[Long] = None): MessageSet = {
    trace("Reading %d bytes from offset %d in log %s of length %d bytes".format(maxLength, startOffset, name, size))
    val view = segments.view
        
    // check if the offset is valid and in range
    val first = view.head.start
    val next = nextOffset.get
    if(startOffset == next)
      return MessageSet.Empty
    else if(startOffset > next || startOffset < first)
      throw new OffsetOutOfRangeException("Request for offset %d but we only have log segments in the range %d to %d.".format(startOffset, first, next))
    
    // Do the read on the segment with a base offset less than the target offset
    // TODO: to handle sparse offsets, we need to skip to the next segment if this read doesn't find anything
    Log.findRange(view, startOffset, view.length) match {
      case None => throw new OffsetOutOfRangeException("Offset is earlier than the earliest offset")
      case Some(segment) => segment.read(startOffset, maxLength, maxOffset)
    }
  }

  /**
   * Delete any log segments matching the given predicate function
   */
  def markDeletedWhile(predicate: LogSegment => Boolean): Seq[LogSegment] = {
    lock synchronized {
      debug("Garbage collecting log..")
      debug("Segments of log %s : %s ".format(this.name, segments.view.mkString(",")))
      debug("Index files for log %s: %s".format(this.name, segments.view.map(_.index.file.exists()).mkString(",")))
      debug("Data files for log %s: %s".format(this.name, segments.view.map(_.messageSet.file.exists()).mkString(",")))
      val view = segments.view
      val deletable = view.takeWhile(predicate)
      for(seg <- deletable)
        seg.deleted = true
      var numToDelete = deletable.size
      // if we are deleting everything, create a new empty segment
      if(numToDelete == view.size) {
        if (view(numToDelete - 1).size > 0)
          roll()
        else {
          // If the last segment to be deleted is empty and we roll the log, the new segment will have the same
          // file name. So simply reuse the last segment and reset the modified time.
          view(numToDelete - 1).messageSet.file.setLastModified(time.milliseconds)
          numToDelete -=1
        }
      }
      segments.trunc(numToDelete)
    }
  }

  /**
   * Get the size of the log in bytes
   */
  def size: Long = segments.view.foldLeft(0L)(_ + _.size)

  /**
   *  Get the offset of the next message that will be appended
   */
  def logEndOffset: Long = nextOffset.get

  /**
   * Roll the log over if necessary
   */
  private def maybeRoll(segment: LogSegment): LogSegment = {
    if(segment.messageSet.sizeInBytes > maxLogFileSize) {
      info("Rolling %s due to full data log".format(name))
      roll()
    } else if((segment.firstAppendTime.isDefined) && (time.milliseconds - segment.firstAppendTime.get > rollIntervalMs)) {
      info("Rolling %s due to time based rolling".format(name))
      roll()
    } else if(segment.index.isFull) {
      info("Rolling %s due to full index maxIndexSize = %d, entries = %d, maxEntries = %d"
        .format(name, segment.index.maxIndexSize, segment.index.entries(), segment.index.maxEntries))
      roll()
    } else
      segment
  }

  /**
   * Create a new segment and make it active, and return it
   */
  def roll(): LogSegment = {
    lock synchronized {
      flush()
      rollToOffset(logEndOffset)
    }
  }
  
  /**
   * Roll the log over to the given new offset value
   */
  private def rollToOffset(newOffset: Long): LogSegment = {
    val logFile = logFilename(dir, newOffset)
    val indexFile = indexFilename(dir, newOffset)
    for(file <- List(logFile, indexFile); if file.exists) {
      warn("Newly rolled segment file " + file.getAbsolutePath + " already exists; deleting it first")
      file.delete()
    }
    info("Rolling log '" + name + "' to " + logFile.getAbsolutePath + " and " + indexFile.getAbsolutePath)
    segments.view.lastOption match {
      case Some(segment) => segment.index.trimToValidSize()
      case None => 
    }

    val segmentsView = segments.view
    if(segmentsView.size > 0 && segmentsView.last.start == newOffset)
      throw new KafkaException("Trying to roll a new log segment for topic partition %s with start offset %d while it already exists".format(dir.getName, newOffset))

    val segment = new LogSegment(dir, 
                                 startOffset = newOffset,
                                 indexIntervalBytes = indexIntervalBytes, 
                                 maxIndexSize = maxIndexSize)
    segments.append(segment)
    segment
  }

  /**
   * Flush the log if necessary
   */
  private def maybeFlush(numberOfMessages : Int) {
    if(unflushed.addAndGet(numberOfMessages) >= flushInterval)
      flush()
  }

  /**
   * Flush this log file to the physical disk
   */
  def flush() : Unit = {
    if (unflushed.get == 0)
      return

    lock synchronized {
      debug("Flushing log '" + name + "' last flushed: " + getLastFlushedTime + " current time: " +
          time.milliseconds)
      segments.view.last.flush()
      unflushed.set(0)
      lastflushedTime.set(time.milliseconds)
     }
  }

  def getOffsetsBefore(timestamp: Long, maxNumOffsets: Int): Seq[Long] = {
    val segsArray = segments.view
    var offsetTimeArray: Array[(Long, Long)] = null
    if(segsArray.last.size > 0)
      offsetTimeArray = new Array[(Long, Long)](segsArray.length + 1)
    else
      offsetTimeArray = new Array[(Long, Long)](segsArray.length)

    for(i <- 0 until segsArray.length)
      offsetTimeArray(i) = (segsArray(i).start, segsArray(i).messageSet.file.lastModified)
    if(segsArray.last.size > 0)
      offsetTimeArray(segsArray.length) = (logEndOffset, time.milliseconds)

    var startIndex = -1
    timestamp match {
      case OffsetRequest.LatestTime =>
        startIndex = offsetTimeArray.length - 1
      case OffsetRequest.EarliestTime =>
        startIndex = 0
      case _ =>
        var isFound = false
        debug("Offset time array = " + offsetTimeArray.foreach(o => "%d, %d".format(o._1, o._2)))
        startIndex = offsetTimeArray.length - 1
        while (startIndex >= 0 && !isFound) {
          if (offsetTimeArray(startIndex)._2 <= timestamp)
            isFound = true
          else
            startIndex -=1
        }
    }

    val retSize = maxNumOffsets.min(startIndex + 1)
    val ret = new Array[Long](retSize)
    for(j <- 0 until retSize) {
      ret(j) = offsetTimeArray(startIndex)._1
      startIndex -= 1
    }
    // ensure that the returned seq is in descending order of offsets
    ret.toSeq.sortBy(- _)
  }

  def delete(): Unit = {
    deleteSegments(segments.contents.get())
    Utils.rm(dir)
  }


  /* Attempts to delete all provided segments from a log and returns how many it was able to */
  def deleteSegments(segments: Seq[LogSegment]): Int = {
    var total = 0
    for(segment <- segments) {
      info("Deleting log segment " + segment.start + " from " + name)
      val deletedLog = segment.messageSet.delete()
      val deletedIndex = segment.index.delete()
      if(!deletedIndex || !deletedLog) {
        throw new KafkaStorageException("Deleting log segment " + segment.start + " failed.")
      } else {
        total += 1
      }
      if(segment.messageSet.file.exists())
        error("Data log file %s still exists".format(segment.messageSet.file.getAbsolutePath))
      if(segment.index.file.exists())
        error("Index file %s still exists".format(segment.index.file.getAbsolutePath))
    }
    total
  }
  
  def truncateTo(targetOffset: Long) {
    if(targetOffset < 0)
      throw new IllegalArgumentException("Cannot truncate to a negative offset (%d).".format(targetOffset))
    lock synchronized {
      val segmentsToBeDeleted = segments.view.filter(segment => segment.start > targetOffset)
      val viewSize = segments.view.size
      val numSegmentsDeleted = deleteSegments(segmentsToBeDeleted)
      /* We should not hit this error because segments.view is locked in markedDeletedWhile() */
      if(numSegmentsDeleted != segmentsToBeDeleted.size)
        error("Failed to delete some segments when attempting to truncate to offset " + targetOffset +")")
      if (numSegmentsDeleted == viewSize) {
        segments.trunc(segments.view.size)
        rollToOffset(targetOffset)
        this.nextOffset.set(targetOffset)
      } else {
        if(targetOffset > logEndOffset) {
          error("Target offset %d cannot be greater than the last message offset %d in the log %s".
                format(targetOffset, logEndOffset, segments.view.last.messageSet.file.getAbsolutePath))
        } else {
          // find the log segment that has this hw
          val segmentToBeTruncated = findRange(segments.view, targetOffset)
          segmentToBeTruncated match {
            case Some(segment) =>
              val truncatedSegmentIndex = segments.view.indexOf(segment)
              segments.truncLast(truncatedSegmentIndex)
              segment.truncateTo(targetOffset)
              this.nextOffset.set(targetOffset)
              info("Truncated log segment %s to target offset %d".format(segments.view.last.messageSet.file.getAbsolutePath, targetOffset))
            case None => // nothing to do
          }
        }
      }
    }
  }
    
    /**
   *  Truncate all segments in the log and start a new segment on a new offset
   */
  def truncateAndStartWithNewOffset(newOffset: Long) {
    lock synchronized {
      val deletedSegments = segments.trunc(segments.view.size)
      info("Truncate and start log '" + name + "' to " + newOffset)
      deleteSegments(deletedSegments)
      segments.append(new LogSegment(dir,
                                     newOffset,
                                     indexIntervalBytes = indexIntervalBytes, 
                                     maxIndexSize = maxIndexSize))
      this.nextOffset.set(newOffset)
    }
  }

  def topicName():String = {
    name.substring(0, name.lastIndexOf("-"))
  }

  def getLastFlushedTime():Long = {
    return lastflushedTime.get
  }
  
  override def toString() = "Log(" + this.dir + ")"
  
}