All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.clients.consumer.internals.SubscriptionState Maven / Gradle / Ivy

There is a newer version: 1.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.clients.consumer.internals;

import org.apache.kafka.clients.ApiVersions;
import org.apache.kafka.clients.Metadata;
import org.apache.kafka.clients.NodeApiVersions;
import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.NoOffsetForPartitionException;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.apache.kafka.common.IsolationLevel;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.internals.PartitionStates;
import org.apache.kafka.common.message.OffsetForLeaderEpochResponseData.EpochEndOffset;
import org.apache.kafka.common.utils.LogContext;
import org.slf4j.Logger;

import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.LongSupplier;
import java.util.function.Predicate;
import java.util.regex.Pattern;

import static org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils.hasUsableOffsetForLeaderEpochVersion;
import static org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.UNDEFINED_EPOCH;
import static org.apache.kafka.common.requests.OffsetsForLeaderEpochResponse.UNDEFINED_EPOCH_OFFSET;

/**
 * A class for tracking the topics, partitions, and offsets for the consumer. A partition
 * is "assigned" either directly with {@link #assignFromUser(Set)} (manual assignment)
 * or with {@link #assignFromSubscribed(Collection)} (automatic assignment from subscription).
 * 

* Once assigned, the partition is not considered "fetchable" until its initial position has * been set with {@link #seekValidated(TopicPartition, FetchPosition)}. Fetchable partitions * track a position which is the last offset that has been returned to the user. You can * suspend fetching from a partition through {@link #pause(TopicPartition)} without affecting the consumed * position. The partition will remain unfetchable until the {@link #resume(TopicPartition)} is * used. You can also query the pause state independently with {@link #isPaused(TopicPartition)}. *

* Note that pause state as well as the consumed positions are not preserved when partition * assignment is changed whether directly by the user or through a group rebalance. *

* Thread Safety: this class is thread-safe. */ public class SubscriptionState { private static final String SUBSCRIPTION_EXCEPTION_MESSAGE = "Subscription to topics, partitions and pattern are mutually exclusive"; private final Logger log; private enum SubscriptionType { NONE, AUTO_TOPICS, AUTO_PATTERN, USER_ASSIGNED } /* the type of subscription */ private SubscriptionType subscriptionType; /* the pattern user has requested */ private Pattern subscribedPattern; /* the list of topics the user has requested */ private Set subscription; /* The list of topics the group has subscribed to. This may include some topics which are not part * of `subscription` for the leader of a group since it is responsible for detecting metadata changes * which require a group rebalance. */ private Set groupSubscription; /* the partitions that are currently assigned, note that the order of partition matters (see FetchBuilder for more details) */ private final PartitionStates assignment; /* Default offset reset strategy */ private final OffsetResetStrategy defaultResetStrategy; /* User-provided listener to be invoked when assignment changes */ private Optional rebalanceListener; private int assignmentId = 0; @Override public synchronized String toString() { return "SubscriptionState{" + "type=" + subscriptionType + ", subscribedPattern=" + subscribedPattern + ", subscription=" + String.join(",", subscription) + ", groupSubscription=" + String.join(",", groupSubscription) + ", defaultResetStrategy=" + defaultResetStrategy + ", assignment=" + assignment.partitionStateValues() + " (id=" + assignmentId + ")}"; } public synchronized String prettyString() { switch (subscriptionType) { case NONE: return "None"; case AUTO_TOPICS: return "Subscribe(" + String.join(",", subscription) + ")"; case AUTO_PATTERN: return "Subscribe(" + subscribedPattern + ")"; case USER_ASSIGNED: return "Assign(" + assignedPartitions() + " , id=" + assignmentId + ")"; default: throw new IllegalStateException("Unrecognized subscription type: " + subscriptionType); } } public SubscriptionState(LogContext logContext, OffsetResetStrategy defaultResetStrategy) { this.log = logContext.logger(this.getClass()); this.defaultResetStrategy = defaultResetStrategy; this.subscription = new TreeSet<>(); // use a sorted set for better logging this.assignment = new PartitionStates<>(); this.groupSubscription = new HashSet<>(); this.subscribedPattern = null; this.subscriptionType = SubscriptionType.NONE; } /** * Monotonically increasing id which is incremented after every assignment change. This can * be used to check when an assignment has changed. * * @return The current assignment Id */ synchronized int assignmentId() { return assignmentId; } /** * This method sets the subscription type if it is not already set (i.e. when it is NONE), * or verifies that the subscription type is equal to the give type when it is set (i.e. * when it is not NONE) * @param type The given subscription type */ private void setSubscriptionType(SubscriptionType type) { if (this.subscriptionType == SubscriptionType.NONE) this.subscriptionType = type; else if (this.subscriptionType != type) throw new IllegalStateException(SUBSCRIPTION_EXCEPTION_MESSAGE); } public synchronized boolean subscribe(Set topics, Optional listener) { registerRebalanceListener(listener); setSubscriptionType(SubscriptionType.AUTO_TOPICS); return changeSubscription(topics); } public synchronized void subscribe(Pattern pattern, Optional listener) { registerRebalanceListener(listener); setSubscriptionType(SubscriptionType.AUTO_PATTERN); this.subscribedPattern = pattern; } public synchronized boolean subscribeFromPattern(Set topics) { if (subscriptionType != SubscriptionType.AUTO_PATTERN) throw new IllegalArgumentException("Attempt to subscribe from pattern while subscription type set to " + subscriptionType); return changeSubscription(topics); } private boolean changeSubscription(Set topicsToSubscribe) { if (subscription.equals(topicsToSubscribe)) return false; subscription = topicsToSubscribe; return true; } /** * Set the current group subscription. This is used by the group leader to ensure * that it receives metadata updates for all topics that the group is interested in. * * @param topics All topics from the group subscription * @return true if the group subscription contains topics which are not part of the local subscription */ synchronized boolean groupSubscribe(Collection topics) { if (!hasAutoAssignedPartitions()) throw new IllegalStateException(SUBSCRIPTION_EXCEPTION_MESSAGE); groupSubscription = new HashSet<>(topics); return !subscription.containsAll(groupSubscription); } /** * Reset the group's subscription to only contain topics subscribed by this consumer. */ synchronized void resetGroupSubscription() { groupSubscription = Collections.emptySet(); } /** * Change the assignment to the specified partitions provided by the user, * note this is different from {@link #assignFromSubscribed(Collection)} * whose input partitions are provided from the subscribed topics. */ public synchronized boolean assignFromUser(Set partitions) { setSubscriptionType(SubscriptionType.USER_ASSIGNED); if (this.assignment.partitionSet().equals(partitions)) return false; assignmentId++; // update the subscribed topics Set manualSubscribedTopics = new HashSet<>(); Map partitionToState = new HashMap<>(); for (TopicPartition partition : partitions) { TopicPartitionState state = assignment.stateValue(partition); if (state == null) state = new TopicPartitionState(); partitionToState.put(partition, state); manualSubscribedTopics.add(partition.topic()); } this.assignment.set(partitionToState); return changeSubscription(manualSubscribedTopics); } /** * @return true if assignments matches subscription, otherwise false */ public synchronized boolean checkAssignmentMatchedSubscription(Collection assignments) { for (TopicPartition topicPartition : assignments) { if (this.subscribedPattern != null) { if (!this.subscribedPattern.matcher(topicPartition.topic()).matches()) { log.info("Assigned partition {} for non-subscribed topic regex pattern; subscription pattern is {}", topicPartition, this.subscribedPattern); return false; } } else { if (!this.subscription.contains(topicPartition.topic())) { log.info("Assigned partition {} for non-subscribed topic; subscription is {}", topicPartition, this.subscription); return false; } } } return true; } /** * Change the assignment to the specified partitions returned from the coordinator, note this is * different from {@link #assignFromUser(Set)} which directly set the assignment from user inputs. */ public synchronized void assignFromSubscribed(Collection assignments) { if (!this.hasAutoAssignedPartitions()) throw new IllegalArgumentException("Attempt to dynamically assign partitions while manual assignment in use"); Map assignedPartitionStates = new HashMap<>(assignments.size()); for (TopicPartition tp : assignments) { TopicPartitionState state = this.assignment.stateValue(tp); if (state == null) state = new TopicPartitionState(); assignedPartitionStates.put(tp, state); } assignmentId++; this.assignment.set(assignedPartitionStates); } private void registerRebalanceListener(Optional listener) { this.rebalanceListener = Objects.requireNonNull(listener, "RebalanceListener cannot be null"); } /** * Check whether pattern subscription is in use. * */ synchronized boolean hasPatternSubscription() { return this.subscriptionType == SubscriptionType.AUTO_PATTERN; } public synchronized boolean hasNoSubscriptionOrUserAssignment() { return this.subscriptionType == SubscriptionType.NONE; } public synchronized void unsubscribe() { this.subscription = Collections.emptySet(); this.groupSubscription = Collections.emptySet(); this.assignment.clear(); this.subscribedPattern = null; this.subscriptionType = SubscriptionType.NONE; this.assignmentId++; } /** * Check whether a topic matches a subscribed pattern. * * @return true if pattern subscription is in use and the topic matches the subscribed pattern, false otherwise */ synchronized boolean matchesSubscribedPattern(String topic) { Pattern pattern = this.subscribedPattern; if (hasPatternSubscription() && pattern != null) return pattern.matcher(topic).matches(); return false; } public synchronized Set subscription() { if (hasAutoAssignedPartitions()) return this.subscription; return Collections.emptySet(); } public synchronized Set pausedPartitions() { return collectPartitions(TopicPartitionState::isPaused); } /** * Get the subscription topics for which metadata is required. For the leader, this will include * the union of the subscriptions of all group members. For followers, it is just that member's * subscription. This is used when querying topic metadata to detect the metadata changes which would * require rebalancing. The leader fetches metadata for all topics in the group so that it * can do the partition assignment (which requires at least partition counts for all topics * to be assigned). * * @return The union of all subscribed topics in the group if this member is the leader * of the current generation; otherwise it returns the same set as {@link #subscription()} */ synchronized Set metadataTopics() { if (groupSubscription.isEmpty()) return subscription; else if (groupSubscription.containsAll(subscription)) return groupSubscription; else { // When subscription changes `groupSubscription` may be outdated, ensure that // new subscription topics are returned. Set topics = new HashSet<>(groupSubscription); topics.addAll(subscription); return topics; } } synchronized boolean needsMetadata(String topic) { return subscription.contains(topic) || groupSubscription.contains(topic); } private TopicPartitionState assignedState(TopicPartition tp) { TopicPartitionState state = this.assignment.stateValue(tp); if (state == null) throw new IllegalStateException("No current assignment for partition " + tp); return state; } private TopicPartitionState assignedStateOrNull(TopicPartition tp) { return this.assignment.stateValue(tp); } public synchronized void seekValidated(TopicPartition tp, FetchPosition position) { assignedState(tp).seekValidated(position); } public void seek(TopicPartition tp, long offset) { seekValidated(tp, new FetchPosition(offset)); } public void seekUnvalidated(TopicPartition tp, FetchPosition position) { assignedState(tp).seekUnvalidated(position); } synchronized void maybeSeekUnvalidated(TopicPartition tp, FetchPosition position, OffsetResetStrategy requestedResetStrategy) { TopicPartitionState state = assignedStateOrNull(tp); if (state == null) { log.debug("Skipping reset of partition {} since it is no longer assigned", tp); } else if (!state.awaitingReset()) { log.debug("Skipping reset of partition {} since reset is no longer needed", tp); } else if (requestedResetStrategy != state.resetStrategy) { log.debug("Skipping reset of partition {} since an alternative reset has been requested", tp); } else { log.info("Resetting offset for partition {} to position {}.", tp, position); state.seekUnvalidated(position); } } /** * @return a modifiable copy of the currently assigned partitions */ public synchronized Set assignedPartitions() { return new HashSet<>(this.assignment.partitionSet()); } /** * @return a modifiable copy of the currently assigned partitions as a list */ public synchronized List assignedPartitionsList() { return new ArrayList<>(this.assignment.partitionSet()); } /** * Provides the number of assigned partitions in a thread safe manner. * @return the number of assigned partitions. */ synchronized int numAssignedPartitions() { return this.assignment.size(); } // Visible for testing public synchronized List fetchablePartitions(Predicate isAvailable) { // Since this is in the hot-path for fetching, we do this instead of using java.util.stream API List result = new ArrayList<>(); assignment.forEach((topicPartition, topicPartitionState) -> { // Cheap check is first to avoid evaluating the predicate if possible if (topicPartitionState.isFetchable() && isAvailable.test(topicPartition)) { result.add(topicPartition); } }); return result; } public synchronized boolean hasAutoAssignedPartitions() { return this.subscriptionType == SubscriptionType.AUTO_TOPICS || this.subscriptionType == SubscriptionType.AUTO_PATTERN; } public synchronized void position(TopicPartition tp, FetchPosition position) { assignedState(tp).position(position); } /** * Enter the offset validation state if the leader for this partition is known to support a usable version of the * OffsetsForLeaderEpoch API. If the leader node does not support the API, simply complete the offset validation. * * @param apiVersions supported API versions * @param tp topic partition to validate * @param leaderAndEpoch leader epoch of the topic partition * @return true if we enter the offset validation state */ public synchronized boolean maybeValidatePositionForCurrentLeader(ApiVersions apiVersions, TopicPartition tp, Metadata.LeaderAndEpoch leaderAndEpoch) { TopicPartitionState state = assignedStateOrNull(tp); if (state == null) { log.debug("Skipping validating position for partition {} which is not currently assigned.", tp); return false; } if (leaderAndEpoch.leader.isPresent()) { NodeApiVersions nodeApiVersions = apiVersions.get(leaderAndEpoch.leader.get().idString()); if (nodeApiVersions == null || hasUsableOffsetForLeaderEpochVersion(nodeApiVersions)) { return state.maybeValidatePosition(leaderAndEpoch); } else { // If the broker does not support a newer version of OffsetsForLeaderEpoch, we skip validation state.updatePositionLeaderNoValidation(leaderAndEpoch); return false; } } else { return state.maybeValidatePosition(leaderAndEpoch); } } /** * Attempt to complete validation with the end offset returned from the OffsetForLeaderEpoch request. * @return Log truncation details if detected and no reset policy is defined. */ public synchronized Optional maybeCompleteValidation(TopicPartition tp, FetchPosition requestPosition, EpochEndOffset epochEndOffset) { TopicPartitionState state = assignedStateOrNull(tp); if (state == null) { log.debug("Skipping completed validation for partition {} which is not currently assigned.", tp); } else if (!state.awaitingValidation()) { log.debug("Skipping completed validation for partition {} which is no longer expecting validation.", tp); } else { SubscriptionState.FetchPosition currentPosition = state.position; if (!currentPosition.equals(requestPosition)) { log.debug("Skipping completed validation for partition {} since the current position {} " + "no longer matches the position {} when the request was sent", tp, currentPosition, requestPosition); } else if (epochEndOffset.endOffset() == UNDEFINED_EPOCH_OFFSET || epochEndOffset.leaderEpoch() == UNDEFINED_EPOCH) { if (hasDefaultOffsetResetPolicy()) { log.info("Truncation detected for partition {} at offset {}, resetting offset", tp, currentPosition); requestOffsetReset(tp); } else { log.warn("Truncation detected for partition {} at offset {}, but no reset policy is set", tp, currentPosition); return Optional.of(new LogTruncation(tp, requestPosition, Optional.empty())); } } else if (epochEndOffset.endOffset() < currentPosition.offset) { if (hasDefaultOffsetResetPolicy()) { SubscriptionState.FetchPosition newPosition = new SubscriptionState.FetchPosition( epochEndOffset.endOffset(), Optional.of(epochEndOffset.leaderEpoch()), currentPosition.currentLeader); log.info("Truncation detected for partition {} at offset {}, resetting offset to " + "the first offset known to diverge {}", tp, currentPosition, newPosition); state.seekValidated(newPosition); } else { OffsetAndMetadata divergentOffset = new OffsetAndMetadata(epochEndOffset.endOffset(), Optional.of(epochEndOffset.leaderEpoch()), null); log.warn("Truncation detected for partition {} at offset {} (the end offset from the " + "broker is {}), but no reset policy is set", tp, currentPosition, divergentOffset); return Optional.of(new LogTruncation(tp, requestPosition, Optional.of(divergentOffset))); } } else { state.completeValidation(); } } return Optional.empty(); } public synchronized boolean awaitingValidation(TopicPartition tp) { return assignedState(tp).awaitingValidation(); } public synchronized void completeValidation(TopicPartition tp) { assignedState(tp).completeValidation(); } public synchronized FetchPosition validPosition(TopicPartition tp) { return assignedState(tp).validPosition(); } public synchronized FetchPosition position(TopicPartition tp) { return assignedState(tp).position; } public synchronized FetchPosition positionOrNull(TopicPartition tp) { final TopicPartitionState state = assignedStateOrNull(tp); if (state == null) { return null; } return assignedState(tp).position; } public synchronized Long partitionLag(TopicPartition tp, IsolationLevel isolationLevel) { TopicPartitionState topicPartitionState = assignedState(tp); if (topicPartitionState.position == null) { return null; } else if (isolationLevel == IsolationLevel.READ_COMMITTED) { return topicPartitionState.lastStableOffset == null ? null : topicPartitionState.lastStableOffset - topicPartitionState.position.offset; } else { return topicPartitionState.highWatermark == null ? null : topicPartitionState.highWatermark - topicPartitionState.position.offset; } } public synchronized Long partitionEndOffset(TopicPartition tp, IsolationLevel isolationLevel) { TopicPartitionState topicPartitionState = assignedState(tp); if (isolationLevel == IsolationLevel.READ_COMMITTED) { return topicPartitionState.lastStableOffset; } else { return topicPartitionState.highWatermark; } } public synchronized void requestPartitionEndOffset(TopicPartition tp) { TopicPartitionState topicPartitionState = assignedState(tp); topicPartitionState.requestEndOffset(); } public synchronized boolean partitionEndOffsetRequested(TopicPartition tp) { TopicPartitionState topicPartitionState = assignedState(tp); return topicPartitionState.endOffsetRequested(); } synchronized Long partitionLead(TopicPartition tp) { TopicPartitionState topicPartitionState = assignedState(tp); return topicPartitionState.logStartOffset == null ? null : topicPartitionState.position.offset - topicPartitionState.logStartOffset; } synchronized void updateHighWatermark(TopicPartition tp, long highWatermark) { assignedState(tp).highWatermark(highWatermark); } synchronized boolean tryUpdatingHighWatermark(TopicPartition tp, long highWatermark) { final TopicPartitionState state = assignedStateOrNull(tp); if (state != null) { assignedState(tp).highWatermark(highWatermark); return true; } return false; } synchronized boolean tryUpdatingLogStartOffset(TopicPartition tp, long highWatermark) { final TopicPartitionState state = assignedStateOrNull(tp); if (state != null) { assignedState(tp).logStartOffset(highWatermark); return true; } return false; } synchronized void updateLastStableOffset(TopicPartition tp, long lastStableOffset) { assignedState(tp).lastStableOffset(lastStableOffset); } synchronized boolean tryUpdatingLastStableOffset(TopicPartition tp, long lastStableOffset) { final TopicPartitionState state = assignedStateOrNull(tp); if (state != null) { assignedState(tp).lastStableOffset(lastStableOffset); return true; } return false; } /** * Set the preferred read replica with a lease timeout. After this time, the replica will no longer be valid and * {@link #preferredReadReplica(TopicPartition, long)} will return an empty result. * * @param tp The topic partition * @param preferredReadReplicaId The preferred read replica * @param timeMs The time at which this preferred replica is no longer valid */ public synchronized void updatePreferredReadReplica(TopicPartition tp, int preferredReadReplicaId, LongSupplier timeMs) { assignedState(tp).updatePreferredReadReplica(preferredReadReplicaId, timeMs); } /** * Tries to set the preferred read replica with a lease timeout. After this time, the replica will no longer be valid and * {@link #preferredReadReplica(TopicPartition, long)} will return an empty result. If the preferred replica of * the partition could not be updated (e.g. because the partition is not assigned) this method will return * {@code false}, otherwise it will return {@code true}. * * @param tp The topic partition * @param preferredReadReplicaId The preferred read replica * @param timeMs The time at which this preferred replica is no longer valid * @return {@code true} if the preferred read replica was updated, {@code false} otherwise. */ public synchronized boolean tryUpdatingPreferredReadReplica(TopicPartition tp, int preferredReadReplicaId, LongSupplier timeMs) { final TopicPartitionState state = assignedStateOrNull(tp); if (state != null) { assignedState(tp).updatePreferredReadReplica(preferredReadReplicaId, timeMs); return true; } return false; } /** * Get the preferred read replica * * @param tp The topic partition * @param timeMs The current time * @return Returns the current preferred read replica, if it has been set and if it has not expired. */ public synchronized Optional preferredReadReplica(TopicPartition tp, long timeMs) { final TopicPartitionState topicPartitionState = assignedStateOrNull(tp); if (topicPartitionState == null) { return Optional.empty(); } else { return topicPartitionState.preferredReadReplica(timeMs); } } /** * Unset the preferred read replica. This causes the fetcher to go back to the leader for fetches. * * @param tp The topic partition * @return the removed preferred read replica if set, Empty otherwise. */ public synchronized Optional clearPreferredReadReplica(TopicPartition tp) { final TopicPartitionState topicPartitionState = assignedStateOrNull(tp); if (topicPartitionState == null) { return Optional.empty(); } else { return topicPartitionState.clearPreferredReadReplica(); } } public synchronized Map allConsumed() { Map allConsumed = new HashMap<>(); assignment.forEach((topicPartition, partitionState) -> { if (partitionState.hasValidPosition()) allConsumed.put(topicPartition, new OffsetAndMetadata(partitionState.position.offset, partitionState.position.offsetEpoch, "")); }); return allConsumed; } public synchronized void requestOffsetReset(TopicPartition partition, OffsetResetStrategy offsetResetStrategy) { assignedState(partition).reset(offsetResetStrategy); } public synchronized void requestOffsetReset(Collection partitions, OffsetResetStrategy offsetResetStrategy) { partitions.forEach(tp -> { log.info("Seeking to {} offset of partition {}", offsetResetStrategy, tp); assignedState(tp).reset(offsetResetStrategy); }); } public void requestOffsetReset(TopicPartition partition) { requestOffsetReset(partition, defaultResetStrategy); } public synchronized void requestOffsetResetIfPartitionAssigned(TopicPartition partition) { final TopicPartitionState state = assignedStateOrNull(partition); if (state != null) { state.reset(defaultResetStrategy); } } synchronized void setNextAllowedRetry(Set partitions, long nextAllowResetTimeMs) { for (TopicPartition partition : partitions) { assignedState(partition).setNextAllowedRetry(nextAllowResetTimeMs); } } boolean hasDefaultOffsetResetPolicy() { return defaultResetStrategy != OffsetResetStrategy.NONE; } public synchronized boolean isOffsetResetNeeded(TopicPartition partition) { return assignedState(partition).awaitingReset(); } public synchronized OffsetResetStrategy resetStrategy(TopicPartition partition) { return assignedState(partition).resetStrategy(); } public synchronized boolean hasAllFetchPositions() { // Since this is in the hot-path for fetching, we do this instead of using java.util.stream API Iterator it = assignment.stateIterator(); while (it.hasNext()) { if (!it.next().hasValidPosition()) { return false; } } return true; } public synchronized Set initializingPartitions() { return collectPartitions(TopicPartitionState::shouldInitialize); } private Set collectPartitions(Predicate filter) { Set result = new HashSet<>(); assignment.forEach((topicPartition, topicPartitionState) -> { if (filter.test(topicPartitionState)) { result.add(topicPartition); } }); return result; } /** * Note: this will not attempt to reset partitions that are in the process of being assigned * and are pending the completion of any {@link ConsumerRebalanceListener#onPartitionsAssigned(Collection)} * callbacks. * *

* * This method only appears to be invoked the by the {@link KafkaConsumer} during its * {@link KafkaConsumer#poll(Duration)} logic. Direct calls to methods like * {@link #requestOffsetReset(TopicPartition)}, {@link #requestOffsetResetIfPartitionAssigned(TopicPartition)}, * etc. do not skip partitions pending assignment. */ public synchronized void resetInitializingPositions() { final Set partitionsWithNoOffsets = new HashSet<>(); assignment.forEach((tp, partitionState) -> { if (partitionState.shouldInitialize()) { if (defaultResetStrategy == OffsetResetStrategy.NONE) partitionsWithNoOffsets.add(tp); else requestOffsetReset(tp); } }); if (!partitionsWithNoOffsets.isEmpty()) throw new NoOffsetForPartitionException(partitionsWithNoOffsets); } public synchronized Set partitionsNeedingReset(long nowMs) { return collectPartitions(state -> state.awaitingReset() && !state.awaitingRetryBackoff(nowMs)); } public synchronized Set partitionsNeedingValidation(long nowMs) { return collectPartitions(state -> state.awaitingValidation() && !state.awaitingRetryBackoff(nowMs)); } public synchronized boolean isAssigned(TopicPartition tp) { return assignment.contains(tp); } public synchronized boolean isPaused(TopicPartition tp) { TopicPartitionState assignedOrNull = assignedStateOrNull(tp); return assignedOrNull != null && assignedOrNull.isPaused(); } synchronized boolean isFetchable(TopicPartition tp) { TopicPartitionState assignedOrNull = assignedStateOrNull(tp); return assignedOrNull != null && assignedOrNull.isFetchable(); } public synchronized boolean hasValidPosition(TopicPartition tp) { TopicPartitionState assignedOrNull = assignedStateOrNull(tp); return assignedOrNull != null && assignedOrNull.hasValidPosition(); } public synchronized void pause(TopicPartition tp) { assignedState(tp).pause(); } public synchronized void markPendingRevocation(Set tps) { tps.forEach(tp -> assignedState(tp).markPendingRevocation()); } // Visible for testing synchronized void markPendingOnAssignedCallback(Collection tps, boolean pendingOnAssignedCallback) { tps.forEach(tp -> assignedState(tp).markPendingOnAssignedCallback(pendingOnAssignedCallback)); } /** * Change the assignment to the specified partitions returned from the coordinator and mark * them as awaiting onPartitionsAssigned callback. This will ensure that the partitions are * included in the assignment, but are not fetchable or initialize positions while the * callback runs. This is expected to be used by the async consumer. * * @param fullAssignment Full collection of partitions assigned. Includes previously owned * and newly added partitions. * @param addedPartitions Subset of the fullAssignment containing the added partitions. These * are not fetchable until the onPartitionsAssigned callback completes. */ public synchronized void assignFromSubscribedAwaitingCallback(Collection fullAssignment, Collection addedPartitions) { assignFromSubscribed(fullAssignment); markPendingOnAssignedCallback(addedPartitions, true); } /** * Enable fetching and updating positions for the given partitions that were added to the * assignment, but waiting for the onPartitionsAssigned callback to complete. This is * expected to be used by the async consumer. */ public synchronized void enablePartitionsAwaitingCallback(Collection partitions) { markPendingOnAssignedCallback(partitions, false); } public synchronized void resume(TopicPartition tp) { assignedState(tp).resume(); } synchronized void requestFailed(Set partitions, long nextRetryTimeMs) { for (TopicPartition partition : partitions) { // by the time the request failed, the assignment may no longer // contain this partition any more, in which case we would just ignore. final TopicPartitionState state = assignedStateOrNull(partition); if (state != null) state.requestFailed(nextRetryTimeMs); } } synchronized void movePartitionToEnd(TopicPartition tp) { assignment.moveToEnd(tp); } public synchronized Optional rebalanceListener() { return rebalanceListener; } private static class TopicPartitionState { private FetchState fetchState; private FetchPosition position; // last consumed position private Long highWatermark; // the high watermark from last fetch private Long logStartOffset; // the log start offset private Long lastStableOffset; private boolean paused; // whether this partition has been paused by the user private boolean pendingRevocation; private boolean pendingOnAssignedCallback; private OffsetResetStrategy resetStrategy; // the strategy to use if the offset needs resetting private Long nextRetryTimeMs; private Integer preferredReadReplica; private Long preferredReadReplicaExpireTimeMs; private boolean endOffsetRequested; TopicPartitionState() { this.paused = false; this.pendingRevocation = false; this.pendingOnAssignedCallback = false; this.endOffsetRequested = false; this.fetchState = FetchStates.INITIALIZING; this.position = null; this.highWatermark = null; this.logStartOffset = null; this.lastStableOffset = null; this.resetStrategy = null; this.nextRetryTimeMs = null; this.preferredReadReplica = null; } public boolean endOffsetRequested() { return endOffsetRequested; } public void requestEndOffset() { endOffsetRequested = true; } private void transitionState(FetchState newState, Runnable runIfTransitioned) { FetchState nextState = this.fetchState.transitionTo(newState); if (nextState.equals(newState)) { this.fetchState = nextState; runIfTransitioned.run(); if (this.position == null && nextState.requiresPosition()) { throw new IllegalStateException("Transitioned subscription state to " + nextState + ", but position is null"); } else if (!nextState.requiresPosition()) { this.position = null; } } } private Optional preferredReadReplica(long timeMs) { if (preferredReadReplicaExpireTimeMs != null && timeMs > preferredReadReplicaExpireTimeMs) { preferredReadReplica = null; return Optional.empty(); } else { return Optional.ofNullable(preferredReadReplica); } } private void updatePreferredReadReplica(int preferredReadReplica, LongSupplier timeMs) { if (this.preferredReadReplica == null || preferredReadReplica != this.preferredReadReplica) { this.preferredReadReplica = preferredReadReplica; this.preferredReadReplicaExpireTimeMs = timeMs.getAsLong(); } } private Optional clearPreferredReadReplica() { if (preferredReadReplica != null) { int removedReplicaId = this.preferredReadReplica; this.preferredReadReplica = null; this.preferredReadReplicaExpireTimeMs = null; return Optional.of(removedReplicaId); } else { return Optional.empty(); } } private void reset(OffsetResetStrategy strategy) { transitionState(FetchStates.AWAIT_RESET, () -> { this.resetStrategy = strategy; this.nextRetryTimeMs = null; }); } /** * Check if the position exists and needs to be validated. If so, enter the AWAIT_VALIDATION state. This method * also will update the position with the current leader and epoch. * * @param currentLeaderAndEpoch leader and epoch to compare the offset with * @return true if the position is now awaiting validation */ private boolean maybeValidatePosition(Metadata.LeaderAndEpoch currentLeaderAndEpoch) { if (this.fetchState.equals(FetchStates.AWAIT_RESET)) { return false; } if (!currentLeaderAndEpoch.leader.isPresent()) { return false; } if (position != null && !position.currentLeader.equals(currentLeaderAndEpoch)) { FetchPosition newPosition = new FetchPosition(position.offset, position.offsetEpoch, currentLeaderAndEpoch); validatePosition(newPosition); preferredReadReplica = null; } return this.fetchState.equals(FetchStates.AWAIT_VALIDATION); } /** * For older versions of the API, we cannot perform offset validation so we simply transition directly to FETCHING */ private void updatePositionLeaderNoValidation(Metadata.LeaderAndEpoch currentLeaderAndEpoch) { if (position != null) { transitionState(FetchStates.FETCHING, () -> { this.position = new FetchPosition(position.offset, position.offsetEpoch, currentLeaderAndEpoch); this.nextRetryTimeMs = null; }); } } private void validatePosition(FetchPosition position) { if (position.offsetEpoch.isPresent() && position.currentLeader.epoch.isPresent()) { transitionState(FetchStates.AWAIT_VALIDATION, () -> { this.position = position; this.nextRetryTimeMs = null; }); } else { // If we have no epoch information for the current position, then we can skip validation transitionState(FetchStates.FETCHING, () -> { this.position = position; this.nextRetryTimeMs = null; }); } } /** * Clear the awaiting validation state and enter fetching. */ private void completeValidation() { if (hasPosition()) { transitionState(FetchStates.FETCHING, () -> this.nextRetryTimeMs = null); } } private boolean awaitingValidation() { return fetchState.equals(FetchStates.AWAIT_VALIDATION); } private boolean awaitingRetryBackoff(long nowMs) { return nextRetryTimeMs != null && nowMs < nextRetryTimeMs; } private boolean awaitingReset() { return fetchState.equals(FetchStates.AWAIT_RESET); } private void setNextAllowedRetry(long nextAllowedRetryTimeMs) { this.nextRetryTimeMs = nextAllowedRetryTimeMs; } private void requestFailed(long nextAllowedRetryTimeMs) { this.nextRetryTimeMs = nextAllowedRetryTimeMs; } private boolean hasValidPosition() { return fetchState.hasValidPosition(); } private boolean hasPosition() { return position != null; } private boolean isPaused() { return paused; } private void seekValidated(FetchPosition position) { transitionState(FetchStates.FETCHING, () -> { this.position = position; this.resetStrategy = null; this.nextRetryTimeMs = null; }); } private void seekUnvalidated(FetchPosition fetchPosition) { seekValidated(fetchPosition); validatePosition(fetchPosition); } private void position(FetchPosition position) { if (!hasValidPosition()) throw new IllegalStateException("Cannot set a new position without a valid current position"); this.position = position; } private FetchPosition validPosition() { if (hasValidPosition()) { return position; } else { return null; } } private void pause() { this.paused = true; } private void markPendingRevocation() { this.pendingRevocation = true; } private void markPendingOnAssignedCallback(boolean pendingOnAssignedCallback) { this.pendingOnAssignedCallback = pendingOnAssignedCallback; } private void resume() { this.paused = false; } /** * True if the partition is in {@link FetchStates#INITIALIZING} state. While in this * state, a position for the partition can be retrieved (based on committed offsets or * partitions offsets). * Note that retrieving a position does not mean that we can start fetching from the * partition (see {@link #isFetchable()}) */ private boolean shouldInitialize() { return fetchState.equals(FetchStates.INITIALIZING); } private boolean isFetchable() { return !paused && !pendingRevocation && !pendingOnAssignedCallback && hasValidPosition(); } private void highWatermark(Long highWatermark) { this.highWatermark = highWatermark; this.endOffsetRequested = false; } private void logStartOffset(Long logStartOffset) { this.logStartOffset = logStartOffset; } private void lastStableOffset(Long lastStableOffset) { this.lastStableOffset = lastStableOffset; this.endOffsetRequested = false; } private OffsetResetStrategy resetStrategy() { return resetStrategy; } } /** * The fetch state of a partition. This class is used to determine valid state transitions and expose the some of * the behavior of the current fetch state. Actual state variables are stored in the {@link TopicPartitionState}. */ interface FetchState { default FetchState transitionTo(FetchState newState) { if (validTransitions().contains(newState)) { return newState; } else { return this; } } /** * Return the valid states which this state can transition to */ Collection validTransitions(); /** * Test if this state requires a position to be set */ boolean requiresPosition(); /** * Test if this state is considered to have a valid position which can be used for fetching */ boolean hasValidPosition(); } /** * An enumeration of all the possible fetch states. The state transitions are encoded in the values returned by * {@link FetchState#validTransitions}. */ enum FetchStates implements FetchState { INITIALIZING() { @Override public Collection validTransitions() { return Arrays.asList(FetchStates.FETCHING, FetchStates.AWAIT_RESET, FetchStates.AWAIT_VALIDATION); } @Override public boolean requiresPosition() { return false; } @Override public boolean hasValidPosition() { return false; } }, FETCHING() { @Override public Collection validTransitions() { return Arrays.asList(FetchStates.FETCHING, FetchStates.AWAIT_RESET, FetchStates.AWAIT_VALIDATION); } @Override public boolean requiresPosition() { return true; } @Override public boolean hasValidPosition() { return true; } }, AWAIT_RESET() { @Override public Collection validTransitions() { return Arrays.asList(FetchStates.FETCHING, FetchStates.AWAIT_RESET); } @Override public boolean requiresPosition() { return false; } @Override public boolean hasValidPosition() { return false; } }, AWAIT_VALIDATION() { @Override public Collection validTransitions() { return Arrays.asList(FetchStates.FETCHING, FetchStates.AWAIT_RESET, FetchStates.AWAIT_VALIDATION); } @Override public boolean requiresPosition() { return true; } @Override public boolean hasValidPosition() { return false; } } } /** * Represents the position of a partition subscription. * * This includes the offset and epoch from the last record in * the batch from a FetchResponse. It also includes the leader epoch at the time the batch was consumed. */ public static class FetchPosition { public final long offset; final Optional offsetEpoch; final Metadata.LeaderAndEpoch currentLeader; FetchPosition(long offset) { this(offset, Optional.empty(), Metadata.LeaderAndEpoch.noLeaderOrEpoch()); } public FetchPosition(long offset, Optional offsetEpoch, Metadata.LeaderAndEpoch currentLeader) { this.offset = offset; this.offsetEpoch = Objects.requireNonNull(offsetEpoch); this.currentLeader = Objects.requireNonNull(currentLeader); } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; FetchPosition that = (FetchPosition) o; return offset == that.offset && offsetEpoch.equals(that.offsetEpoch) && currentLeader.equals(that.currentLeader); } @Override public int hashCode() { return Objects.hash(offset, offsetEpoch, currentLeader); } @Override public String toString() { return "FetchPosition{" + "offset=" + offset + ", offsetEpoch=" + offsetEpoch + ", currentLeader=" + currentLeader + '}'; } } public static class LogTruncation { public final TopicPartition topicPartition; public final FetchPosition fetchPosition; public final Optional divergentOffsetOpt; public LogTruncation(TopicPartition topicPartition, FetchPosition fetchPosition, Optional divergentOffsetOpt) { this.topicPartition = topicPartition; this.fetchPosition = fetchPosition; this.divergentOffsetOpt = divergentOffsetOpt; } @Override public String toString() { StringBuilder bldr = new StringBuilder() .append("(partition=") .append(topicPartition) .append(", fetchOffset=") .append(fetchPosition.offset) .append(", fetchEpoch=") .append(fetchPosition.offsetEpoch); if (divergentOffsetOpt.isPresent()) { OffsetAndMetadata divergentOffset = divergentOffsetOpt.get(); bldr.append(", divergentOffset=") .append(divergentOffset.offset()) .append(", divergentEpoch=") .append(divergentOffset.leaderEpoch()); } else { bldr.append(", divergentOffset=unknown") .append(", divergentEpoch=unknown"); } return bldr.append(")").toString(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy