All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils Maven / Gradle / Ivy

There is a newer version: 1.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.clients.consumer.internals;

import org.apache.kafka.clients.ApiVersions;
import org.apache.kafka.clients.NodeApiVersions;
import org.apache.kafka.clients.consumer.LogTruncationException;
import org.apache.kafka.clients.consumer.NoOffsetForPartitionException;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
import org.apache.kafka.clients.consumer.OffsetResetStrategy;
import org.apache.kafka.common.IsolationLevel;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.RetriableException;
import org.apache.kafka.common.errors.TopicAuthorizationException;
import org.apache.kafka.common.message.ApiVersionsResponseData;
import org.apache.kafka.common.message.ListOffsetsRequestData;
import org.apache.kafka.common.message.ListOffsetsResponseData;
import org.apache.kafka.common.protocol.ApiKeys;
import org.apache.kafka.common.protocol.Errors;
import org.apache.kafka.common.requests.ListOffsetsRequest;
import org.apache.kafka.common.requests.ListOffsetsResponse;
import org.apache.kafka.common.requests.OffsetsForLeaderEpochRequest;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.common.utils.Time;
import org.slf4j.Logger;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;

/**
 * Utility functions for fetching offsets, validating and resetting positions.
 */
class OffsetFetcherUtils {
    private final ConsumerMetadata metadata;
    private final SubscriptionState subscriptionState;
    private final Time time;
    private final long retryBackoffMs;
    private final ApiVersions apiVersions;
    private final Logger log;

    /**
     * Exception that occurred while validating positions, that will be propagated on the next
     * call to validate positions. This could be an error received in the
     * OffsetsForLeaderEpoch response, or a LogTruncationException detected when using a
     * successful response to validate the positions. It will be cleared when thrown.
     */
    private final AtomicReference cachedValidatePositionsException = new AtomicReference<>();
    /**
     * Exception that occurred while resetting positions, that will be propagated on the next
     * call to reset positions. This will have the error received in the response to the
     * ListOffsets request. It will be cleared when thrown on the next call to reset.
     */
    private final AtomicReference cachedResetPositionsException = new AtomicReference<>();
    private final AtomicInteger metadataUpdateVersion = new AtomicInteger(-1);

    OffsetFetcherUtils(LogContext logContext,
                       ConsumerMetadata metadata,
                       SubscriptionState subscriptionState,
                       Time time,
                       long retryBackoffMs,
                       ApiVersions apiVersions) {
        this.log = logContext.logger(getClass());
        this.metadata = metadata;
        this.subscriptionState = subscriptionState;
        this.time = time;
        this.retryBackoffMs = retryBackoffMs;
        this.apiVersions = apiVersions;
    }

    /**
     * Callback for the response of the list offset call.
     *
     * @param listOffsetsResponse The response from the server.
     * @return {@link OffsetFetcherUtils.ListOffsetResult} extracted from the response, containing the fetched offsets
     * and partitions to retry.
     */
    OffsetFetcherUtils.ListOffsetResult handleListOffsetResponse(ListOffsetsResponse listOffsetsResponse) {
        Map fetchedOffsets = new HashMap<>();
        Set partitionsToRetry = new HashSet<>();
        Set unauthorizedTopics = new HashSet<>();

        for (ListOffsetsResponseData.ListOffsetsTopicResponse topic : listOffsetsResponse.topics()) {
            for (ListOffsetsResponseData.ListOffsetsPartitionResponse partition : topic.partitions()) {
                TopicPartition topicPartition = new TopicPartition(topic.name(), partition.partitionIndex());
                Errors error = Errors.forCode(partition.errorCode());
                switch (error) {
                    case NONE:
                        if (!partition.oldStyleOffsets().isEmpty()) {
                            // Handle v0 response with offsets
                            long offset;
                            if (partition.oldStyleOffsets().size() > 1) {
                                throw new IllegalStateException("Unexpected partitionData response of length " +
                                        partition.oldStyleOffsets().size());
                            } else {
                                offset = partition.oldStyleOffsets().get(0);
                            }
                            log.debug("Handling v0 ListOffsetResponse response for {}. Fetched offset {}",
                                    topicPartition, offset);
                            if (offset != ListOffsetsResponse.UNKNOWN_OFFSET) {
                                OffsetFetcherUtils.ListOffsetData offsetData = new OffsetFetcherUtils.ListOffsetData(offset, null, Optional.empty());
                                fetchedOffsets.put(topicPartition, offsetData);
                            }
                        } else {
                            // Handle v1 and later response or v0 without offsets
                            log.debug("Handling ListOffsetResponse response for {}. Fetched offset {}, timestamp {}",
                                    topicPartition, partition.offset(), partition.timestamp());
                            if (partition.offset() != ListOffsetsResponse.UNKNOWN_OFFSET) {
                                Optional leaderEpoch = (partition.leaderEpoch() == ListOffsetsResponse.UNKNOWN_EPOCH)
                                        ? Optional.empty()
                                        : Optional.of(partition.leaderEpoch());
                                OffsetFetcherUtils.ListOffsetData offsetData = new OffsetFetcherUtils.ListOffsetData(partition.offset(), partition.timestamp(),
                                        leaderEpoch);
                                fetchedOffsets.put(topicPartition, offsetData);
                            }
                        }
                        break;
                    case UNSUPPORTED_FOR_MESSAGE_FORMAT:
                        // The message format on the broker side is before 0.10.0, which means it does not
                        // support timestamps. We treat this case the same as if we weren't able to find an
                        // offset corresponding to the requested timestamp and leave it out of the result.
                        log.debug("Cannot search by timestamp for partition {} because the message format version " +
                                "is before 0.10.0", topicPartition);
                        break;
                    case NOT_LEADER_OR_FOLLOWER:
                    case REPLICA_NOT_AVAILABLE:
                    case KAFKA_STORAGE_ERROR:
                    case OFFSET_NOT_AVAILABLE:
                    case LEADER_NOT_AVAILABLE:
                    case FENCED_LEADER_EPOCH:
                    case UNKNOWN_LEADER_EPOCH:
                        log.debug("Attempt to fetch offsets for partition {} failed due to {}, retrying.",
                                topicPartition, error);
                        partitionsToRetry.add(topicPartition);
                        break;
                    case UNKNOWN_TOPIC_OR_PARTITION:
                        log.warn("Received unknown topic or partition error in ListOffset request for partition {}", topicPartition);
                        partitionsToRetry.add(topicPartition);
                        break;
                    case TOPIC_AUTHORIZATION_FAILED:
                        unauthorizedTopics.add(topicPartition.topic());
                        break;
                    default:
                        log.warn("Attempt to fetch offsets for partition {} failed due to unexpected exception: {}, retrying.",
                                topicPartition, error.message());
                        partitionsToRetry.add(topicPartition);
                }
            }
        }

        if (!unauthorizedTopics.isEmpty())
            throw new TopicAuthorizationException(unauthorizedTopics);
        else
            return new OffsetFetcherUtils.ListOffsetResult(fetchedOffsets, partitionsToRetry);
    }

     Map> regroupPartitionMapByNode(Map partitionMap) {
        return partitionMap.entrySet()
                .stream()
                .collect(Collectors.groupingBy(entry -> metadata.fetch().leaderFor(entry.getKey()),
                        Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
    }

    Map getPartitionsToValidate() {
        RuntimeException exception = cachedValidatePositionsException.getAndSet(null);
        if (exception != null)
            throw exception;

        // Validate each partition against the current leader and epoch
        // If we see a new metadata version, check all partitions
        validatePositionsOnMetadataChange();

        // Collect positions needing validation, with backoff
        return subscriptionState
                .partitionsNeedingValidation(time.milliseconds())
                .stream()
                .filter(tp -> subscriptionState.position(tp) != null)
                .collect(Collectors.toMap(Function.identity(), subscriptionState::position));
    }

    void maybeSetValidatePositionsException(RuntimeException e) {
        if (!cachedValidatePositionsException.compareAndSet(null, e)) {
            log.error("Discarding error validating positions because another error is pending", e);
        }
    }

    /**
     * If we have seen new metadata (as tracked by {@link org.apache.kafka.clients.Metadata#updateVersion()}), then
     * we should check that all the assignments have a valid position.
     */
    void validatePositionsOnMetadataChange() {
        int newMetadataUpdateVersion = metadata.updateVersion();
        if (metadataUpdateVersion.getAndSet(newMetadataUpdateVersion) != newMetadataUpdateVersion) {
            subscriptionState.assignedPartitions().forEach(topicPartition -> {
                ConsumerMetadata.LeaderAndEpoch leaderAndEpoch = metadata.currentLeader(topicPartition);
                subscriptionState.maybeValidatePositionForCurrentLeader(apiVersions, topicPartition, leaderAndEpoch);
            });
        }
    }

    Map getOffsetResetTimestamp() {
        // Raise exception from previous offset fetch if there is one
        RuntimeException exception = cachedResetPositionsException.getAndSet(null);
        if (exception != null)
            throw exception;

        Set partitions = subscriptionState.partitionsNeedingReset(time.milliseconds());
        final Map offsetResetTimestamps = new HashMap<>();
        for (final TopicPartition partition : partitions) {
            Long timestamp = offsetResetStrategyTimestamp(partition);
            if (timestamp != null)
                offsetResetTimestamps.put(partition, timestamp);
        }

        return offsetResetTimestamps;
    }

    static Map buildListOffsetsResult(
        final Map timestampsToSearch,
        final Map fetchedOffsets,
        BiFunction resultMapper) {

        HashMap offsetsResults = new HashMap<>(timestampsToSearch.size());
        for (Map.Entry entry : timestampsToSearch.entrySet())
            offsetsResults.put(entry.getKey(), null);

        for (Map.Entry entry : fetchedOffsets.entrySet()) {
            ListOffsetData offsetData = entry.getValue();
            offsetsResults.put(entry.getKey(), resultMapper.apply(entry.getKey(), offsetData));
        }

        return offsetsResults;
    }

    static Map buildOffsetsForTimesResult(
        final Map timestampsToSearch,
        final Map fetchedOffsets) {
        return buildListOffsetsResult(timestampsToSearch, fetchedOffsets,
            (topicPartition, offsetData) -> new OffsetAndTimestamp(
                offsetData.offset,
                offsetData.timestamp,
                offsetData.leaderEpoch));
    }

    static Map buildOffsetsForTimeInternalResult(
            final Map timestampsToSearch,
            final Map fetchedOffsets) {
        HashMap offsetsResults = new HashMap<>(timestampsToSearch.size());
        for (Map.Entry entry : timestampsToSearch.entrySet()) {
            offsetsResults.put(entry.getKey(), null);
        }
        for (Map.Entry entry : fetchedOffsets.entrySet()) {
            ListOffsetData offsetData = entry.getValue();
            offsetsResults.put(entry.getKey(), new OffsetAndTimestampInternal(
                    offsetData.offset,
                    offsetData.timestamp,
                    offsetData.leaderEpoch));
        }
        return offsetsResults;
    }

    private Long offsetResetStrategyTimestamp(final TopicPartition partition) {
        OffsetResetStrategy strategy = subscriptionState.resetStrategy(partition);
        if (strategy == OffsetResetStrategy.EARLIEST)
            return ListOffsetsRequest.EARLIEST_TIMESTAMP;
        else if (strategy == OffsetResetStrategy.LATEST)
            return ListOffsetsRequest.LATEST_TIMESTAMP;
        else
            throw new NoOffsetForPartitionException(partition);
    }

    static Set topicsForPartitions(Collection partitions) {
        return partitions.stream().map(TopicPartition::topic).collect(Collectors.toSet());
    }

    void updateSubscriptionState(Map fetchedOffsets,
                                 IsolationLevel isolationLevel) {
        for (final Map.Entry entry : fetchedOffsets.entrySet()) {
            final TopicPartition partition = entry.getKey();

            // if the interested partitions are part of the subscriptions, use the returned offset to update
            // the subscription state as well:
            //   * with read-committed, the returned offset would be LSO;
            //   * with read-uncommitted, the returned offset would be HW;
            if (subscriptionState.isAssigned(partition)) {
                final long offset = entry.getValue().offset;
                if (isolationLevel == IsolationLevel.READ_COMMITTED) {
                    log.trace("Updating last stable offset for partition {} to {}", partition, offset);
                    subscriptionState.updateLastStableOffset(partition, offset);
                } else {
                    log.trace("Updating high watermark for partition {} to {}", partition, offset);
                    subscriptionState.updateHighWatermark(partition, offset);
                }
            }
        }
    }

    static OffsetResetStrategy timestampToOffsetResetStrategy(long timestamp) {
        if (timestamp == ListOffsetsRequest.EARLIEST_TIMESTAMP)
            return OffsetResetStrategy.EARLIEST;
        else if (timestamp == ListOffsetsRequest.LATEST_TIMESTAMP)
            return OffsetResetStrategy.LATEST;
        else
            return null;
    }

    void onSuccessfulResponseForResettingPositions(
            final Map resetTimestamps,
            final ListOffsetResult result) {
        if (!result.partitionsToRetry.isEmpty()) {
            subscriptionState.requestFailed(result.partitionsToRetry, time.milliseconds() + retryBackoffMs);
            metadata.requestUpdate(false);
        }

        for (Map.Entry fetchedOffset : result.fetchedOffsets.entrySet()) {
            TopicPartition partition = fetchedOffset.getKey();
            ListOffsetData offsetData = fetchedOffset.getValue();
            ListOffsetsRequestData.ListOffsetsPartition requestedReset = resetTimestamps.get(partition);
            resetPositionIfNeeded(
                    partition,
                    timestampToOffsetResetStrategy(requestedReset.timestamp()),
                    offsetData);
        }
    }

    void onFailedResponseForResettingPositions(
            final Map resetTimestamps,
            final RuntimeException error) {
        subscriptionState.requestFailed(resetTimestamps.keySet(), time.milliseconds() + retryBackoffMs);
        metadata.requestUpdate(false);

        if (!(error instanceof RetriableException) && !cachedResetPositionsException.compareAndSet(null,
                error))
            log.error("Discarding error resetting positions because another error is pending",
                    error);
    }


    void onSuccessfulResponseForValidatingPositions(
            final Map fetchPositions,
            final OffsetsForLeaderEpochUtils.OffsetForEpochResult offsetsResult) {
        List truncations = new ArrayList<>();
        if (!offsetsResult.partitionsToRetry().isEmpty()) {
            subscriptionState.setNextAllowedRetry(offsetsResult.partitionsToRetry(),
                    time.milliseconds() + retryBackoffMs);
            metadata.requestUpdate(false);
        }

        // For each OffsetsForLeader response, check if the end-offset is lower than our current offset
        // for the partition. If so, it means we have experienced log truncation and need to reposition
        // that partition's offset.
        // In addition, check whether the returned offset and epoch are valid. If not, then we should reset
        // its offset if reset policy is configured, or throw out of range exception.
        offsetsResult.endOffsets().forEach((topicPartition, respEndOffset) -> {
            SubscriptionState.FetchPosition requestPosition = fetchPositions.get(topicPartition);
            Optional truncationOpt =
                    subscriptionState.maybeCompleteValidation(topicPartition, requestPosition,
                            respEndOffset);
            truncationOpt.ifPresent(truncations::add);
        });

        if (!truncations.isEmpty()) {
            maybeSetValidatePositionsException(buildLogTruncationException(truncations));
        }
    }

    void onFailedResponseForValidatingPositions(final Map fetchPositions,
                                                final RuntimeException error) {
        subscriptionState.requestFailed(fetchPositions.keySet(), time.milliseconds() + retryBackoffMs);
        metadata.requestUpdate(false);

        if (!(error instanceof RetriableException)) {
            maybeSetValidatePositionsException(error);
        }
    }

    private LogTruncationException buildLogTruncationException(List truncations) {
        Map divergentOffsets = new HashMap<>();
        Map truncatedFetchOffsets = new HashMap<>();
        for (SubscriptionState.LogTruncation truncation : truncations) {
            truncation.divergentOffsetOpt.ifPresent(divergentOffset ->
                    divergentOffsets.put(truncation.topicPartition, divergentOffset));
            truncatedFetchOffsets.put(truncation.topicPartition, truncation.fetchPosition.offset);
        }
        return new LogTruncationException(truncatedFetchOffsets, divergentOffsets);
    }

    // Visible for testing
    void resetPositionIfNeeded(TopicPartition partition, OffsetResetStrategy requestedResetStrategy,
                               ListOffsetData offsetData) {
        SubscriptionState.FetchPosition position = new SubscriptionState.FetchPosition(
                offsetData.offset,
                Optional.empty(), // This will ensure we skip validation
                metadata.currentLeader(partition));
        offsetData.leaderEpoch.ifPresent(epoch -> metadata.updateLastSeenEpochIfNewer(partition, epoch));
        subscriptionState.maybeSeekUnvalidated(partition, position, requestedResetStrategy);
    }

    static Map> regroupFetchPositionsByLeader(
            Map partitionMap) {
        return partitionMap.entrySet()
                .stream()
                .filter(entry -> entry.getValue().currentLeader.leader.isPresent())
                .collect(Collectors.groupingBy(entry -> entry.getValue().currentLeader.leader.get(),
                        Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
    }

    static boolean hasUsableOffsetForLeaderEpochVersion(NodeApiVersions nodeApiVersions) {
        ApiVersionsResponseData.ApiVersion apiVersion = nodeApiVersions.apiVersion(ApiKeys.OFFSET_FOR_LEADER_EPOCH);
        if (apiVersion == null)
            return false;

        return OffsetsForLeaderEpochRequest.supportsTopicPermission(apiVersion.maxVersion());
    }

    static class ListOffsetResult {
        final Map fetchedOffsets;
        final Set partitionsToRetry;

        ListOffsetResult(Map fetchedOffsets,
                         Set partitionsNeedingRetry) {
            this.fetchedOffsets = fetchedOffsets;
            this.partitionsToRetry = partitionsNeedingRetry;
        }

        ListOffsetResult() {
            this.fetchedOffsets = new HashMap<>();
            this.partitionsToRetry = new HashSet<>();
        }
    }

    /**
     * Represents data about an offset returned by a broker.
     */
    static class ListOffsetData {
        final long offset;
        final Long timestamp; //  null if the broker does not support returning timestamps
        final Optional leaderEpoch; // empty if the leader epoch is not known

        ListOffsetData(long offset, Long timestamp, Optional leaderEpoch) {
            this.offset = offset;
            this.timestamp = timestamp;
            this.leaderEpoch = leaderEpoch;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy