All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.clients.consumer.internals.OffsetFetcher Maven / Gradle / Ivy

There is a newer version: 3.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.clients.consumer.internals;

import org.apache.kafka.clients.ApiVersions;
import org.apache.kafka.clients.ClientResponse;
import org.apache.kafka.clients.Metadata;
import org.apache.kafka.clients.NodeApiVersions;
import org.apache.kafka.clients.StaleMetadataException;
import org.apache.kafka.clients.consumer.LogTruncationException;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.consumer.OffsetAndTimestamp;
import org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils.ListOffsetData;
import org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils.ListOffsetResult;
import org.apache.kafka.clients.consumer.internals.OffsetsForLeaderEpochClient.OffsetForEpochResult;
import org.apache.kafka.clients.consumer.internals.SubscriptionState.FetchPosition;
import org.apache.kafka.common.IsolationLevel;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.RetriableException;
import org.apache.kafka.common.errors.TimeoutException;
import org.apache.kafka.common.message.ListOffsetsRequestData.ListOffsetsPartition;
import org.apache.kafka.common.requests.ListOffsetsRequest;
import org.apache.kafka.common.requests.ListOffsetsResponse;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Timer;
import org.slf4j.Logger;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.stream.Collectors;

import static org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils.buildOffsetsForTimesResult;
import static org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils.hasUsableOffsetForLeaderEpochVersion;
import static org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils.regroupFetchPositionsByLeader;
import static org.apache.kafka.clients.consumer.internals.OffsetFetcherUtils.topicsForPartitions;

/**
 * {@link OffsetFetcher} is responsible for fetching the {@link OffsetAndTimestamp offsets} for
 * a given set of {@link TopicPartition topic and partition pairs} and for validation and resetting of positions,
 * as needed.
 */
public class OffsetFetcher {

    private final Logger log;
    private final ConsumerMetadata metadata;
    private final SubscriptionState subscriptions;
    private final ConsumerNetworkClient client;
    private final Time time;
    private final long retryBackoffMs;
    private final long requestTimeoutMs;
    private final IsolationLevel isolationLevel;
    private final OffsetsForLeaderEpochClient offsetsForLeaderEpochClient;
    private final ApiVersions apiVersions;
    private final OffsetFetcherUtils offsetFetcherUtils;

    public OffsetFetcher(LogContext logContext,
                         ConsumerNetworkClient client,
                         ConsumerMetadata metadata,
                         SubscriptionState subscriptions,
                         Time time,
                         long retryBackoffMs,
                         long requestTimeoutMs,
                         IsolationLevel isolationLevel,
                         ApiVersions apiVersions) {
        this.log = logContext.logger(getClass());
        this.time = time;
        this.client = client;
        this.metadata = metadata;
        this.subscriptions = subscriptions;
        this.retryBackoffMs = retryBackoffMs;
        this.requestTimeoutMs = requestTimeoutMs;
        this.isolationLevel = isolationLevel;
        this.apiVersions = apiVersions;
        this.offsetsForLeaderEpochClient = new OffsetsForLeaderEpochClient(client, logContext);
        this.offsetFetcherUtils = new OffsetFetcherUtils(logContext, metadata, subscriptions,
                time, retryBackoffMs, apiVersions);
    }

    /**
     * Reset offsets for all assigned partitions that require it.
     *
     * @throws org.apache.kafka.clients.consumer.NoOffsetForPartitionException If no offset reset strategy is defined
     *                                                                         and one or more partitions aren't awaiting a seekToBeginning() or seekToEnd().
     */
    public void resetPositionsIfNeeded() {
        Map offsetResetTimestamps = offsetFetcherUtils.getOffsetResetTimestamp();

        if (offsetResetTimestamps.isEmpty())
            return;

        resetPositionsAsync(offsetResetTimestamps);
    }

    /**
     * Validate offsets for all assigned partitions for which a leader change has been detected.
     */
    public void validatePositionsIfNeeded() {
        Map partitionsToValidate =
                offsetFetcherUtils.getPartitionsToValidate();

        validatePositionsAsync(partitionsToValidate);
    }

    public Map offsetsForTimes(Map timestampsToSearch,
                                                                   Timer timer) {
        metadata.addTransientTopics(topicsForPartitions(timestampsToSearch.keySet()));

        try {
            Map fetchedOffsets = fetchOffsetsByTimes(timestampsToSearch,
                    timer, true).fetchedOffsets;

            return buildOffsetsForTimesResult(timestampsToSearch, fetchedOffsets);
        } finally {
            metadata.clearTransientTopics();
        }
    }

    private ListOffsetResult fetchOffsetsByTimes(Map timestampsToSearch,
                                                 Timer timer,
                                                 boolean requireTimestamps) {
        ListOffsetResult result = new ListOffsetResult();
        if (timestampsToSearch.isEmpty())
            return result;

        Map remainingToSearch = new HashMap<>(timestampsToSearch);
        do {
            RequestFuture future = sendListOffsetsRequests(remainingToSearch, requireTimestamps);

            future.addListener(new RequestFutureListener() {
                @Override
                public void onSuccess(ListOffsetResult value) {
                    synchronized (future) {
                        result.fetchedOffsets.putAll(value.fetchedOffsets);
                        remainingToSearch.keySet().retainAll(value.partitionsToRetry);

                        offsetFetcherUtils.updateSubscriptionState(value.fetchedOffsets, isolationLevel);
                    }
                }

                @Override
                public void onFailure(RuntimeException e) {
                    if (!(e instanceof RetriableException)) {
                        throw future.exception();
                    }
                }
            });

            // if timeout is set to zero, do not try to poll the network client at all
            // and return empty immediately; otherwise try to get the results synchronously
            // and throw timeout exception if it cannot complete in time
            if (timer.timeoutMs() == 0L)
                return result;

            client.poll(future, timer);

            if (!future.isDone()) {
                break;
            } else if (remainingToSearch.isEmpty()) {
                return result;
            } else {
                client.awaitMetadataUpdate(timer);
            }
        } while (timer.notExpired());

        throw new TimeoutException("Failed to get offsets by times in " + timer.elapsedMs() + "ms");
    }

    public Map beginningOffsets(Collection partitions, Timer timer) {
        return beginningOrEndOffset(partitions, ListOffsetsRequest.EARLIEST_TIMESTAMP, timer);
    }

    public Map endOffsets(Collection partitions, Timer timer) {
        return beginningOrEndOffset(partitions, ListOffsetsRequest.LATEST_TIMESTAMP, timer);
    }

    private Map beginningOrEndOffset(Collection partitions,
                                                           long timestamp,
                                                           Timer timer) {
        metadata.addTransientTopics(topicsForPartitions(partitions));
        try {
            Map timestampsToSearch = partitions.stream()
                    .distinct()
                    .collect(Collectors.toMap(Function.identity(), tp -> timestamp));

            ListOffsetResult result = fetchOffsetsByTimes(timestampsToSearch, timer, false);

            return result.fetchedOffsets.entrySet().stream()
                    .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().offset));
        } finally {
            metadata.clearTransientTopics();
        }
    }

    private void resetPositionsAsync(Map partitionResetTimestamps) {
        Map> timestampsToSearchByNode =
                groupListOffsetRequests(partitionResetTimestamps, new HashSet<>());
        for (Map.Entry> entry : timestampsToSearchByNode.entrySet()) {
            Node node = entry.getKey();
            final Map resetTimestamps = entry.getValue();
            subscriptions.setNextAllowedRetry(resetTimestamps.keySet(), time.milliseconds() + requestTimeoutMs);

            RequestFuture future = sendListOffsetRequest(node, resetTimestamps, false);
            future.addListener(new RequestFutureListener() {
                @Override
                public void onSuccess(ListOffsetResult result) {
                    offsetFetcherUtils.onSuccessfulRequestForResettingPositions(resetTimestamps, result);
                }

                @Override
                public void onFailure(RuntimeException e) {
                    offsetFetcherUtils.onFailedRequestForResettingPositions(resetTimestamps, e);
                }
            });
        }
    }

    /**
     * For each partition which needs validation, make an asynchronous request to get the end-offsets for the partition
     * with the epoch less than or equal to the epoch the partition last saw.
     *
     * 

* * Requests are grouped by Node for efficiency. */ private void validatePositionsAsync(Map partitionsToValidate) { final Map> regrouped = regroupFetchPositionsByLeader(partitionsToValidate); long nextResetTimeMs = time.milliseconds() + requestTimeoutMs; regrouped.forEach((node, fetchPositions) -> { if (node.isEmpty()) { metadata.requestUpdate(); return; } NodeApiVersions nodeApiVersions = apiVersions.get(node.idString()); if (nodeApiVersions == null) { client.tryConnect(node); return; } if (!hasUsableOffsetForLeaderEpochVersion(nodeApiVersions)) { log.debug("Skipping validation of fetch offsets for partitions {} since the broker does not " + "support the required protocol version (introduced in Kafka 2.3)", fetchPositions.keySet()); for (TopicPartition partition : fetchPositions.keySet()) { subscriptions.completeValidation(partition); } return; } subscriptions.setNextAllowedRetry(fetchPositions.keySet(), nextResetTimeMs); RequestFuture future = offsetsForLeaderEpochClient.sendAsyncRequest(node, fetchPositions); future.addListener(new RequestFutureListener() { @Override public void onSuccess(OffsetForEpochResult offsetsResult) { List truncations = new ArrayList<>(); if (!offsetsResult.partitionsToRetry().isEmpty()) { subscriptions.setNextAllowedRetry(offsetsResult.partitionsToRetry(), time.milliseconds() + retryBackoffMs); metadata.requestUpdate(); } // For each OffsetsForLeader response, check if the end-offset is lower than our current offset // for the partition. If so, it means we have experienced log truncation and need to reposition // that partition's offset. // // In addition, check whether the returned offset and epoch are valid. If not, then we should reset // its offset if reset policy is configured, or throw out of range exception. offsetsResult.endOffsets().forEach((topicPartition, respEndOffset) -> { FetchPosition requestPosition = fetchPositions.get(topicPartition); Optional truncationOpt = subscriptions.maybeCompleteValidation(topicPartition, requestPosition, respEndOffset); truncationOpt.ifPresent(truncations::add); }); if (!truncations.isEmpty()) { offsetFetcherUtils.maybeSetOffsetForLeaderException(buildLogTruncationException(truncations)); } } @Override public void onFailure(RuntimeException e) { subscriptions.requestFailed(fetchPositions.keySet(), time.milliseconds() + retryBackoffMs); metadata.requestUpdate(); if (!(e instanceof RetriableException)) { offsetFetcherUtils.maybeSetOffsetForLeaderException(e); } } }); }); } private LogTruncationException buildLogTruncationException(List truncations) { Map divergentOffsets = new HashMap<>(); Map truncatedFetchOffsets = new HashMap<>(); for (SubscriptionState.LogTruncation truncation : truncations) { truncation.divergentOffsetOpt.ifPresent(divergentOffset -> divergentOffsets.put(truncation.topicPartition, divergentOffset)); truncatedFetchOffsets.put(truncation.topicPartition, truncation.fetchPosition.offset); } return new LogTruncationException("Detected truncated partitions: " + truncations, truncatedFetchOffsets, divergentOffsets); } /** * Search the offsets by target times for the specified partitions. * * @param timestampsToSearch the mapping between partitions and target time * @param requireTimestamps true if we should fail with an UnsupportedVersionException if the broker does * not support fetching precise timestamps for offsets * @return A response which can be polled to obtain the corresponding timestamps and offsets. */ private RequestFuture sendListOffsetsRequests(final Map timestampsToSearch, final boolean requireTimestamps) { final Set partitionsToRetry = new HashSet<>(); Map> timestampsToSearchByNode = groupListOffsetRequests(timestampsToSearch, partitionsToRetry); if (timestampsToSearchByNode.isEmpty()) return RequestFuture.failure(new StaleMetadataException()); final RequestFuture listOffsetRequestsFuture = new RequestFuture<>(); final Map fetchedTimestampOffsets = new HashMap<>(); final AtomicInteger remainingResponses = new AtomicInteger(timestampsToSearchByNode.size()); for (Map.Entry> entry : timestampsToSearchByNode.entrySet()) { RequestFuture future = sendListOffsetRequest(entry.getKey(), entry.getValue(), requireTimestamps); future.addListener(new RequestFutureListener() { @Override public void onSuccess(ListOffsetResult partialResult) { synchronized (listOffsetRequestsFuture) { fetchedTimestampOffsets.putAll(partialResult.fetchedOffsets); partitionsToRetry.addAll(partialResult.partitionsToRetry); if (remainingResponses.decrementAndGet() == 0 && !listOffsetRequestsFuture.isDone()) { ListOffsetResult result = new ListOffsetResult(fetchedTimestampOffsets, partitionsToRetry); listOffsetRequestsFuture.complete(result); } } } @Override public void onFailure(RuntimeException e) { synchronized (listOffsetRequestsFuture) { if (!listOffsetRequestsFuture.isDone()) listOffsetRequestsFuture.raise(e); } } }); } return listOffsetRequestsFuture; } /** * Groups timestamps to search by node for topic partitions in `timestampsToSearch` that have * leaders available. Topic partitions from `timestampsToSearch` that do not have their leader * available are added to `partitionsToRetry` * * @param timestampsToSearch The mapping from partitions to the target timestamps * @param partitionsToRetry A set of topic partitions that will be extended with partitions * that need metadata update or re-connect to the leader. */ private Map> groupListOffsetRequests( Map timestampsToSearch, Set partitionsToRetry) { final Map partitionDataMap = new HashMap<>(); for (Map.Entry entry : timestampsToSearch.entrySet()) { TopicPartition tp = entry.getKey(); Long offset = entry.getValue(); Metadata.LeaderAndEpoch leaderAndEpoch = metadata.currentLeader(tp); if (!leaderAndEpoch.leader.isPresent()) { log.debug("Leader for partition {} is unknown for fetching offset {}", tp, offset); metadata.requestUpdate(); partitionsToRetry.add(tp); } else { Node leader = leaderAndEpoch.leader.get(); if (client.isUnavailable(leader)) { client.maybeThrowAuthFailure(leader); // The connection has failed and we need to await the backoff period before we can // try again. No need to request a metadata update since the disconnect will have // done so already. log.debug("Leader {} for partition {} is unavailable for fetching offset until reconnect backoff expires", leader, tp); partitionsToRetry.add(tp); } else { int currentLeaderEpoch = leaderAndEpoch.epoch.orElse(ListOffsetsResponse.UNKNOWN_EPOCH); partitionDataMap.put(tp, new ListOffsetsPartition() .setPartitionIndex(tp.partition()) .setTimestamp(offset) .setCurrentLeaderEpoch(currentLeaderEpoch)); } } } return offsetFetcherUtils.regroupPartitionMapByNode(partitionDataMap); } /** * Send the ListOffsetRequest to a specific broker for the partitions and target timestamps. * * @param node The node to send the ListOffsetRequest to. * @param timestampsToSearch The mapping from partitions to the target timestamps. * @param requireTimestamp True if we require a timestamp in the response. * @return A response which can be polled to obtain the corresponding timestamps and offsets. */ private RequestFuture sendListOffsetRequest(final Node node, final Map timestampsToSearch, boolean requireTimestamp) { ListOffsetsRequest.Builder builder = ListOffsetsRequest.Builder .forConsumer(requireTimestamp, isolationLevel, false) .setTargetTimes(ListOffsetsRequest.toListOffsetsTopics(timestampsToSearch)); log.debug("Sending ListOffsetRequest {} to broker {}", builder, node); return client.send(node, builder) .compose(new RequestFutureAdapter() { @Override public void onSuccess(ClientResponse response, RequestFuture future) { ListOffsetsResponse lor = (ListOffsetsResponse) response.responseBody(); log.trace("Received ListOffsetResponse {} from broker {}", lor, node); handleListOffsetResponse(lor, future); } }); } /** * Callback for the response of the list offset call above. * * @param listOffsetsResponse The response from the server. * @param future The future to be completed when the response returns. Note that any partition-level errors will * generally fail the entire future result. The one exception is UNSUPPORTED_FOR_MESSAGE_FORMAT, * which indicates that the broker does not support the v1 message format. Partitions with this * particular error are simply left out of the future map. Note that the corresponding timestamp * value of each partition may be null only for v0. In v1 and later the ListOffset API would not * return a null timestamp (-1 is returned instead when necessary). */ private void handleListOffsetResponse(ListOffsetsResponse listOffsetsResponse, RequestFuture future) { try { ListOffsetResult result = offsetFetcherUtils.handleListOffsetResponse(listOffsetsResponse); future.complete(result); } catch (RuntimeException e) { future.raise(e); } } /** * If we have seen new metadata (as tracked by {@link org.apache.kafka.clients.Metadata#updateVersion()}), then * we should check that all the assignments have a valid position. */ public void validatePositionsOnMetadataChange() { offsetFetcherUtils.validatePositionsOnMetadataChange(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy