All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.clients.consumer.internals.FetchCollector Maven / Gradle / Ivy

There is a newer version: 1.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.clients.consumer.internals;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.OffsetOutOfRangeException;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.RecordTooLargeException;
import org.apache.kafka.common.errors.TopicAuthorizationException;
import org.apache.kafka.common.message.FetchResponseData;
import org.apache.kafka.common.protocol.Errors;
import org.apache.kafka.common.record.RecordBatch;
import org.apache.kafka.common.requests.FetchResponse;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.common.utils.Time;
import org.slf4j.Logger;

import java.util.ArrayDeque;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;

import static org.apache.kafka.clients.consumer.internals.FetchUtils.requestMetadataUpdate;

/**
 * {@code FetchCollector} operates at the {@link RecordBatch} level, as that is what is stored in the
 * {@link FetchBuffer}. Each {@link org.apache.kafka.common.record.Record} in the {@link RecordBatch} is converted
 * to a {@link ConsumerRecord} and added to the returned {@link Fetch}.
 *
 * @param  Record key type
 * @param  Record value type
 */
public class FetchCollector {

    private final Logger log;
    private final ConsumerMetadata metadata;
    private final SubscriptionState subscriptions;
    private final FetchConfig fetchConfig;
    private final Deserializers deserializers;
    private final FetchMetricsManager metricsManager;
    private final Time time;

    public FetchCollector(final LogContext logContext,
                          final ConsumerMetadata metadata,
                          final SubscriptionState subscriptions,
                          final FetchConfig fetchConfig,
                          final Deserializers deserializers,
                          final FetchMetricsManager metricsManager,
                          final Time time) {
        this.log = logContext.logger(FetchCollector.class);
        this.metadata = metadata;
        this.subscriptions = subscriptions;
        this.fetchConfig = fetchConfig;
        this.deserializers = deserializers;
        this.metricsManager = metricsManager;
        this.time = time;
    }

    /**
     * Return the fetched {@link ConsumerRecord records}, empty the {@link FetchBuffer record buffer}, and
     * update the consumed position.
     *
     * 

* * NOTE: returning an {@link Fetch#empty() empty} fetch guarantees the consumed position is not updated. * * @param fetchBuffer {@link FetchBuffer} from which to retrieve the {@link ConsumerRecord records} * * @return A {@link Fetch} for the requested partitions * @throws OffsetOutOfRangeException If there is OffsetOutOfRange error in fetchResponse and * the defaultResetPolicy is NONE * @throws TopicAuthorizationException If there is TopicAuthorization error in fetchResponse. */ public Fetch collectFetch(final FetchBuffer fetchBuffer) { final Fetch fetch = Fetch.empty(); final Queue pausedCompletedFetches = new ArrayDeque<>(); int recordsRemaining = fetchConfig.maxPollRecords; try { while (recordsRemaining > 0) { final CompletedFetch nextInLineFetch = fetchBuffer.nextInLineFetch(); if (nextInLineFetch == null || nextInLineFetch.isConsumed()) { final CompletedFetch completedFetch = fetchBuffer.peek(); if (completedFetch == null) break; if (!completedFetch.isInitialized()) { try { fetchBuffer.setNextInLineFetch(initialize(completedFetch)); } catch (Exception e) { // Remove a completedFetch upon a parse with exception if (1) it contains no completedFetch, and // (2) there are no fetched completedFetch with actual content preceding this exception. // The first condition ensures that the completedFetches is not stuck with the same completedFetch // in cases such as the TopicAuthorizationException, and the second condition ensures that no // potential data loss due to an exception in a following record. if (fetch.isEmpty() && FetchResponse.recordsOrFail(completedFetch.partitionData).sizeInBytes() == 0) fetchBuffer.poll(); throw e; } } else { fetchBuffer.setNextInLineFetch(completedFetch); } fetchBuffer.poll(); } else if (subscriptions.isPaused(nextInLineFetch.partition)) { // when the partition is paused we add the records back to the completedFetches queue instead of draining // them so that they can be returned on a subsequent poll if the partition is resumed at that time log.debug("Skipping fetching records for assigned partition {} because it is paused", nextInLineFetch.partition); pausedCompletedFetches.add(nextInLineFetch); fetchBuffer.setNextInLineFetch(null); } else { final Fetch nextFetch = fetchRecords(nextInLineFetch, recordsRemaining); recordsRemaining -= nextFetch.numRecords(); fetch.add(nextFetch); } } } catch (KafkaException e) { if (fetch.isEmpty()) throw e; } finally { // add any polled completed fetches for paused partitions back to the completed fetches queue to be // re-evaluated in the next poll fetchBuffer.addAll(pausedCompletedFetches); } return fetch; } private Fetch fetchRecords(final CompletedFetch nextInLineFetch, int maxRecords) { final TopicPartition tp = nextInLineFetch.partition; if (!subscriptions.isAssigned(tp)) { // this can happen when a rebalance happened before fetched records are returned to the consumer's poll call log.debug("Not returning fetched records for partition {} since it is no longer assigned", tp); } else if (!subscriptions.isFetchable(tp)) { // this can happen when a partition is paused before fetched records are returned to the consumer's // poll call or if the offset is being reset log.debug("Not returning fetched records for assigned partition {} since it is no longer fetchable", tp); } else { SubscriptionState.FetchPosition position = subscriptions.position(tp); if (position == null) throw new IllegalStateException("Missing position for fetchable partition " + tp); if (nextInLineFetch.nextFetchOffset() == position.offset) { List> partRecords = nextInLineFetch.fetchRecords(fetchConfig, deserializers, maxRecords); log.trace("Returning {} fetched records at offset {} for assigned partition {}", partRecords.size(), position, tp); boolean positionAdvanced = false; if (nextInLineFetch.nextFetchOffset() > position.offset) { SubscriptionState.FetchPosition nextPosition = new SubscriptionState.FetchPosition( nextInLineFetch.nextFetchOffset(), nextInLineFetch.lastEpoch(), position.currentLeader); log.trace("Updating fetch position from {} to {} for partition {} and returning {} records from `poll()`", position, nextPosition, tp, partRecords.size()); subscriptions.position(tp, nextPosition); positionAdvanced = true; } Long partitionLag = subscriptions.partitionLag(tp, fetchConfig.isolationLevel); if (partitionLag != null) metricsManager.recordPartitionLag(tp, partitionLag); Long lead = subscriptions.partitionLead(tp); if (lead != null) { metricsManager.recordPartitionLead(tp, lead); } return Fetch.forPartition(tp, partRecords, positionAdvanced); } else { // these records aren't next in line based on the last consumed position, ignore them // they must be from an obsolete request log.debug("Ignoring fetched records for {} at offset {} since the current position is {}", tp, nextInLineFetch.nextFetchOffset(), position); } } log.trace("Draining fetched records for partition {}", tp); nextInLineFetch.drain(); return Fetch.empty(); } /** * Initialize a CompletedFetch object. */ protected CompletedFetch initialize(final CompletedFetch completedFetch) { final TopicPartition tp = completedFetch.partition; final Errors error = Errors.forCode(completedFetch.partitionData.errorCode()); boolean recordMetrics = true; try { if (!subscriptions.hasValidPosition(tp)) { // this can happen when a rebalance happened while fetch is still in-flight log.debug("Ignoring fetched records for partition {} since it no longer has valid position", tp); return null; } else if (error == Errors.NONE) { final CompletedFetch ret = handleInitializeSuccess(completedFetch); recordMetrics = ret == null; return ret; } else { handleInitializeErrors(completedFetch, error); return null; } } finally { if (recordMetrics) { completedFetch.recordAggregatedMetrics(0, 0); } if (error != Errors.NONE) // we move the partition to the end if there was an error. This way, it's more likely that partitions for // the same topic can remain together (allowing for more efficient serialization). subscriptions.movePartitionToEnd(tp); } } private CompletedFetch handleInitializeSuccess(final CompletedFetch completedFetch) { final TopicPartition tp = completedFetch.partition; final long fetchOffset = completedFetch.nextFetchOffset(); // we are interested in this fetch only if the beginning offset matches the // current consumed position SubscriptionState.FetchPosition position = subscriptions.positionOrNull(tp); if (position == null || position.offset != fetchOffset) { log.debug("Discarding stale fetch response for partition {} since its offset {} does not match " + "the expected offset {} or the partition has been unassigned", tp, fetchOffset, position); return null; } final FetchResponseData.PartitionData partition = completedFetch.partitionData; log.trace("Preparing to read {} bytes of data for partition {} with offset {}", FetchResponse.recordsSize(partition), tp, position); Iterator batches = FetchResponse.recordsOrFail(partition).batches().iterator(); if (!batches.hasNext() && FetchResponse.recordsSize(partition) > 0) { if (completedFetch.requestVersion < 3) { // Implement the pre KIP-74 behavior of throwing a RecordTooLargeException. Map recordTooLargePartitions = Collections.singletonMap(tp, fetchOffset); throw new RecordTooLargeException("There are some messages at [Partition=Offset]: " + recordTooLargePartitions + " whose size is larger than the fetch size " + fetchConfig.fetchSize + " and hence cannot be returned. Please considering upgrading your broker to 0.10.1.0 or " + "newer to avoid this issue. Alternately, increase the fetch size on the client (using " + ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG + ")", recordTooLargePartitions); } else { // This should not happen with brokers that support FetchRequest/Response V3 or higher (i.e. KIP-74) throw new KafkaException("Failed to make progress reading messages at " + tp + "=" + fetchOffset + ". Received a non-empty fetch response from the server, but no " + "complete records were found."); } } if (!updatePartitionState(partition, tp)) { return null; } completedFetch.setInitialized(); return completedFetch; } private boolean updatePartitionState(final FetchResponseData.PartitionData partitionData, final TopicPartition tp) { if (partitionData.highWatermark() >= 0) { log.trace("Updating high watermark for partition {} to {}", tp, partitionData.highWatermark()); if (!subscriptions.tryUpdatingHighWatermark(tp, partitionData.highWatermark())) { return false; } } if (partitionData.logStartOffset() >= 0) { log.trace("Updating log start offset for partition {} to {}", tp, partitionData.logStartOffset()); if (!subscriptions.tryUpdatingLogStartOffset(tp, partitionData.logStartOffset())) { return false; } } if (partitionData.lastStableOffset() >= 0) { log.trace("Updating last stable offset for partition {} to {}", tp, partitionData.lastStableOffset()); if (!subscriptions.tryUpdatingLastStableOffset(tp, partitionData.lastStableOffset())) { return false; } } if (FetchResponse.isPreferredReplica(partitionData)) { return subscriptions.tryUpdatingPreferredReadReplica( tp, partitionData.preferredReadReplica(), () -> { long expireTimeMs = time.milliseconds() + metadata.metadataExpireMs(); log.debug("Updating preferred read replica for partition {} to {}, set to expire at {}", tp, partitionData.preferredReadReplica(), expireTimeMs); return expireTimeMs; }); } return true; } private void handleInitializeErrors(final CompletedFetch completedFetch, final Errors error) { final TopicPartition tp = completedFetch.partition; final long fetchOffset = completedFetch.nextFetchOffset(); if (error == Errors.NOT_LEADER_OR_FOLLOWER || error == Errors.REPLICA_NOT_AVAILABLE || error == Errors.KAFKA_STORAGE_ERROR || error == Errors.FENCED_LEADER_EPOCH || error == Errors.OFFSET_NOT_AVAILABLE) { log.debug("Error in fetch for partition {}: {}", tp, error.exceptionName()); requestMetadataUpdate(metadata, subscriptions, tp); } else if (error == Errors.UNKNOWN_TOPIC_OR_PARTITION) { log.warn("Received unknown topic or partition error in fetch for partition {}", tp); requestMetadataUpdate(metadata, subscriptions, tp); } else if (error == Errors.UNKNOWN_TOPIC_ID) { log.warn("Received unknown topic ID error in fetch for partition {}", tp); requestMetadataUpdate(metadata, subscriptions, tp); } else if (error == Errors.INCONSISTENT_TOPIC_ID) { log.warn("Received inconsistent topic ID error in fetch for partition {}", tp); requestMetadataUpdate(metadata, subscriptions, tp); } else if (error == Errors.OFFSET_OUT_OF_RANGE) { Optional clearedReplicaId = subscriptions.clearPreferredReadReplica(tp); if (!clearedReplicaId.isPresent()) { // If there's no preferred replica to clear, we're fetching from the leader so handle this error normally SubscriptionState.FetchPosition position = subscriptions.positionOrNull(tp); if (position == null || fetchOffset != position.offset) { log.debug("Discarding stale fetch response for partition {} since the fetched offset {} " + "does not match the current offset {} or the partition has been unassigned", tp, fetchOffset, position); } else { String errorMessage = "Fetch position " + position + " is out of range for partition " + tp; if (subscriptions.hasDefaultOffsetResetPolicy()) { log.info("{}, resetting offset", errorMessage); subscriptions.requestOffsetResetIfPartitionAssigned(tp); } else { log.info("{}, raising error to the application since no reset policy is configured", errorMessage); throw new OffsetOutOfRangeException(errorMessage, Collections.singletonMap(tp, position.offset)); } } } else { log.debug("Unset the preferred read replica {} for partition {} since we got {} when fetching {}", clearedReplicaId.get(), tp, error, fetchOffset); } } else if (error == Errors.TOPIC_AUTHORIZATION_FAILED) { //we log the actual partition and not just the topic to help with ACL propagation issues in large clusters log.warn("Not authorized to read from partition {}.", tp); throw new TopicAuthorizationException(Collections.singleton(tp.topic())); } else if (error == Errors.UNKNOWN_LEADER_EPOCH) { log.debug("Received unknown leader epoch error in fetch for partition {}", tp); } else if (error == Errors.UNKNOWN_SERVER_ERROR) { log.warn("Unknown server error while fetching offset {} for topic-partition {}", fetchOffset, tp); } else if (error == Errors.CORRUPT_MESSAGE) { throw new KafkaException("Encountered corrupt message when fetching offset " + fetchOffset + " for topic-partition " + tp); } else { throw new IllegalStateException("Unexpected error code " + error.code() + " while fetching at offset " + fetchOffset + " from topic-partition " + tp); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy