Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package kafka.server.epoch
import java.util.concurrent.locks.ReentrantReadWriteLock
import kafka.server.checkpoints.LeaderEpochCheckpoint
import org.apache.kafka.common.requests.EpochEndOffset._
import kafka.utils.CoreUtils._
import kafka.utils.Logging
import org.apache.kafka.common.TopicPartition
import scala.collection.Seq
import scala.collection.mutable.ArrayBuffer
/**
* Represents a cache of (LeaderEpoch => Offset) mappings for a particular replica.
*
* Leader Epoch = epoch assigned to each leader by the controller.
* Offset = offset of the first message in each epoch.
*
* @param topicPartition the associated topic partition
* @param checkpoint the checkpoint file
* @param logEndOffset function to fetch the current log end offset
*/
class LeaderEpochFileCache(topicPartition: TopicPartition,
logEndOffset: () => Long,
checkpoint: LeaderEpochCheckpoint) extends Logging {
this.logIdent = s"[LeaderEpochCache $topicPartition] "
private val lock = new ReentrantReadWriteLock()
private var epochs: ArrayBuffer[EpochEntry] = inWriteLock(lock) {
val read = checkpoint.read()
new ArrayBuffer(read.size) ++= read
}
/**
* Assigns the supplied Leader Epoch to the supplied Offset
* Once the epoch is assigned it cannot be reassigned
*/
def assign(epoch: Int, startOffset: Long): Unit = {
inWriteLock(lock) {
val updateNeeded = if (epochs.isEmpty) {
true
} else {
val lastEntry = epochs.last
lastEntry.epoch != epoch || startOffset < lastEntry.startOffset
}
if (updateNeeded) {
truncateAndAppend(EpochEntry(epoch, startOffset))
flush()
}
}
}
/**
* Remove any entries which violate monotonicity following the insertion of an assigned epoch.
*/
private def truncateAndAppend(entryToAppend: EpochEntry): Unit = {
validateAndMaybeWarn(entryToAppend)
val (retainedEpochs, removedEpochs) = epochs.partition { entry =>
entry.epoch < entryToAppend.epoch && entry.startOffset < entryToAppend.startOffset
}
epochs = retainedEpochs :+ entryToAppend
if (removedEpochs.isEmpty) {
debug(s"Appended new epoch entry $entryToAppend. Cache now contains ${epochs.size} entries.")
} else if (removedEpochs.size > 1 || removedEpochs.head.startOffset != entryToAppend.startOffset) {
// Only log a warning if there were non-trivial removals. If the start offset of the new entry
// matches the start offset of the removed epoch, then no data has been written and the truncation
// is expected.
warn(s"New epoch entry $entryToAppend caused truncation of conflicting entries $removedEpochs. " +
s"Cache now contains ${epochs.size} entries.")
}
}
def nonEmpty: Boolean = inReadLock(lock) {
epochs.nonEmpty
}
/**
* Returns the current Leader Epoch if one exists. This is the latest epoch
* which has messages assigned to it.
*/
def latestEpoch: Option[Int] = {
inReadLock(lock) {
epochs.lastOption.map(_.epoch)
}
}
/**
* Get the earliest cached entry if one exists.
*/
def earliestEntry: Option[EpochEntry] = {
inReadLock(lock) {
epochs.headOption
}
}
/**
* Returns the Leader Epoch and the End Offset for a requested Leader Epoch.
*
* The Leader Epoch returned is the largest epoch less than or equal to the requested Leader
* Epoch. The End Offset is the end offset of this epoch, which is defined as the start offset
* of the first Leader Epoch larger than the Leader Epoch requested, or else the Log End
* Offset if the latest epoch was requested.
*
* During the upgrade phase, where there are existing messages may not have a leader epoch,
* if requestedEpoch is < the first epoch cached, UNDEFINED_EPOCH_OFFSET will be returned
* so that the follower falls back to High Water Mark.
*
* @param requestedEpoch requested leader epoch
* @return found leader epoch and end offset
*/
def endOffsetFor(requestedEpoch: Int): (Int, Long) = {
inReadLock(lock) {
val epochAndOffset =
if (requestedEpoch == UNDEFINED_EPOCH) {
// This may happen if a bootstrapping follower sends a request with undefined epoch or
// a follower is on the older message format where leader epochs are not recorded
(UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET)
} else if (latestEpoch.contains(requestedEpoch)) {
// For the leader, the latest epoch is always the current leader epoch that is still being written to.
// Followers should not have any reason to query for the end offset of the current epoch, but a consumer
// might if it is verifying its committed offset following a group rebalance. In this case, we return
// the current log end offset which makes the truncation check work as expected.
(requestedEpoch, logEndOffset())
} else {
val (subsequentEpochs, previousEpochs) = epochs.partition { e => e.epoch > requestedEpoch}
if (subsequentEpochs.isEmpty) {
// The requested epoch is larger than any known epoch. This case should never be hit because
// the latest cached epoch is always the largest.
(UNDEFINED_EPOCH, UNDEFINED_EPOCH_OFFSET)
} else if (previousEpochs.isEmpty) {
// The requested epoch is smaller than any known epoch, so we return the start offset of the first
// known epoch which is larger than it. This may be inaccurate as there could have been
// epochs in between, but the point is that the data has already been removed from the log
// and we want to ensure that the follower can replicate correctly beginning from the leader's
// start offset.
(requestedEpoch, subsequentEpochs.head.startOffset)
} else {
// We have at least one previous epoch and one subsequent epoch. The result is the first
// prior epoch and the starting offset of the first subsequent epoch.
(previousEpochs.last.epoch, subsequentEpochs.head.startOffset)
}
}
debug(s"Processed end offset request for epoch $requestedEpoch and returning epoch ${epochAndOffset._1} " +
s"with end offset ${epochAndOffset._2} from epoch cache of size ${epochs.size}")
epochAndOffset
}
}
/**
* Removes all epoch entries from the store with start offsets greater than or equal to the passed offset.
*/
def truncateFromEnd(endOffset: Long): Unit = {
inWriteLock(lock) {
if (endOffset >= 0 && latestEntry.exists(_.startOffset >= endOffset)) {
val (subsequentEntries, previousEntries) = epochs.partition(_.startOffset >= endOffset)
epochs = previousEntries
flush()
debug(s"Cleared entries $subsequentEntries from epoch cache after " +
s"truncating to end offset $endOffset, leaving ${epochs.size} entries in the cache.")
}
}
}
/**
* Clears old epoch entries. This method searches for the oldest epoch < offset, updates the saved epoch offset to
* be offset, then clears any previous epoch entries.
*
* This method is exclusive: so clearEarliest(6) will retain an entry at offset 6.
*
* @param startOffset the offset to clear up to
*/
def truncateFromStart(startOffset: Long): Unit = {
inWriteLock(lock) {
if (epochs.nonEmpty) {
val (subsequentEntries, previousEntries) = epochs.partition(_.startOffset > startOffset)
previousEntries.lastOption.foreach { firstBeforeStartOffset =>
val updatedFirstEntry = EpochEntry(firstBeforeStartOffset.epoch, startOffset)
epochs = updatedFirstEntry +: subsequentEntries
flush()
debug(s"Cleared entries $previousEntries and rewrote first entry $updatedFirstEntry after " +
s"truncating to start offset $startOffset, leaving ${epochs.size} in the cache.")
}
}
}
}
/**
* Delete all entries.
*/
def clearAndFlush() = {
inWriteLock(lock) {
epochs.clear()
flush()
}
}
def clear() = {
inWriteLock(lock) {
epochs.clear()
}
}
// Visible for testing
def epochEntries: Seq[EpochEntry] = {
epochs
}
private def latestEntry: Option[EpochEntry] = epochs.lastOption
private def flush(): Unit = {
checkpoint.write(epochs)
}
private def validateAndMaybeWarn(entry: EpochEntry) = {
if (entry.epoch < 0) {
throw new IllegalArgumentException(s"Received invalid partition leader epoch entry $entry")
} else {
// If the latest append violates the monotonicity of epochs or starting offsets, our choices
// are either to raise an error, ignore the append, or allow the append and truncate the
// conflicting entries from the cache. Raising an error risks killing the fetcher threads in
// pathological cases (i.e. cases we are not yet aware of). We instead take the final approach
// and assume that the latest append is always accurate.
latestEntry.foreach { latest =>
if (entry.epoch < latest.epoch)
warn(s"Received leader epoch assignment $entry which has an epoch less than the epoch " +
s"of the latest entry $latest. This implies messages have arrived out of order.")
else if (entry.startOffset < latest.startOffset)
warn(s"Received leader epoch assignment $entry which has a starting offset which is less than " +
s"the starting offset of the latest entry $latest. This implies messages have arrived out of order.")
}
}
}
}
// Mapping of epoch to the first offset of the subsequent epoch
case class EpochEntry(epoch: Int, startOffset: Long) {
override def toString: String = {
s"EpochEntry(epoch=$epoch, startOffset=$startOffset)"
}
}