All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.common.requests.FetchRequest Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.common.requests;

import org.apache.kafka.common.IsolationLevel;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.protocol.ApiKeys;
import org.apache.kafka.common.protocol.Errors;
import org.apache.kafka.common.protocol.types.Field;
import org.apache.kafka.common.protocol.types.Schema;
import org.apache.kafka.common.protocol.types.Struct;
import org.apache.kafka.common.protocol.types.Type;
import org.apache.kafka.common.record.MemoryRecords;
import org.apache.kafka.common.utils.Utils;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;

import static org.apache.kafka.common.protocol.CommonFields.CURRENT_LEADER_EPOCH;
import static org.apache.kafka.common.protocol.CommonFields.PARTITION_ID;
import static org.apache.kafka.common.protocol.CommonFields.TOPIC_NAME;
import static org.apache.kafka.common.requests.FetchMetadata.FINAL_EPOCH;
import static org.apache.kafka.common.requests.FetchMetadata.INVALID_SESSION_ID;

public class FetchRequest extends AbstractRequest {
    public static final int CONSUMER_REPLICA_ID = -1;

    private static final Field.ComplexArray TOPICS = new Field.ComplexArray("topics",
            "Topics to fetch in the order provided.");
    private static final Field.ComplexArray FORGOTTEN_TOPICS = new Field.ComplexArray("forgotten_topics_data",
            "Topics to remove from the fetch session.");
    private static final Field.Int32 MAX_BYTES = new Field.Int32("max_bytes",
            "Maximum bytes to accumulate in the response. Note that this is not an absolute maximum, " +
                    "if the first message in the first non-empty partition of the fetch is larger than this " +
                    "value, the message will still be returned to ensure that progress can be made.");
    private static final Field.Int8 ISOLATION_LEVEL = new Field.Int8("isolation_level",
            "This setting controls the visibility of transactional records. Using READ_UNCOMMITTED " +
                    "(isolation_level = 0) makes all records visible. With READ_COMMITTED (isolation_level = 1), " +
                    "non-transactional and COMMITTED transactional records are visible. To be more concrete, " +
                    "READ_COMMITTED returns all data from offsets smaller than the current LSO (last stable offset), " +
                    "and enables the inclusion of the list of aborted transactions in the result, which allows " +
                    "consumers to discard ABORTED transactional records");
    private static final Field.Int32 SESSION_ID = new Field.Int32("session_id", "The fetch session ID");
    private static final Field.Int32 SESSION_EPOCH = new Field.Int32("session_epoch", "The fetch session epoch");
    private static final Field.Str RACK_ID = new Field.Str("rack_id", "The consumer's rack id");

    // topic level fields
    private static final Field.ComplexArray PARTITIONS = new Field.ComplexArray("partitions",
            "Partitions to fetch.");

    // partition level fields
    private static final Field.Int32 REPLICA_ID = new Field.Int32("replica_id",
            "Broker id of the follower. For normal consumers, use -1.");
    private static final Field.Int64 FETCH_OFFSET = new Field.Int64("fetch_offset", "Message offset.");
    private static final Field.Int32 PARTITION_MAX_BYTES = new Field.Int32("partition_max_bytes",
            "Maximum bytes to fetch.");
    private static final Field.Int32 MAX_WAIT_TIME = new Field.Int32("max_wait_time",
            "Maximum time in ms to wait for the response.");
    private static final Field.Int32 MIN_BYTES = new Field.Int32("min_bytes",
            "Minimum bytes to accumulate in the response.");
    private static final Field.Int64 LOG_START_OFFSET = new Field.Int64("log_start_offset",
            "Earliest available offset of the follower replica. " +
                    "The field is only used when request is sent by follower. ");

    private static final Field PARTITIONS_V0 = PARTITIONS.withFields(
            PARTITION_ID,
            FETCH_OFFSET,
            PARTITION_MAX_BYTES);

    private static final Field TOPICS_V0 = TOPICS.withFields(
            TOPIC_NAME,
            PARTITIONS_V0);

    private static final Schema FETCH_REQUEST_V0 = new Schema(
            REPLICA_ID,
            MAX_WAIT_TIME,
            MIN_BYTES,
            TOPICS_V0);

    // The V1 Fetch Request body is the same as V0.
    // Only the version number is incremented to indicate a newer client
    private static final Schema FETCH_REQUEST_V1 = FETCH_REQUEST_V0;

    // V2 bumped to indicate the client support message format V1 which uses relative offset and has timestamp.
    private static final Schema FETCH_REQUEST_V2 = FETCH_REQUEST_V1;

    // V3 added top level max_bytes field - the total size of partition data to accumulate in response.
    // The partition ordering is now relevant - partitions will be processed in order they appear in request.
    private static final Schema FETCH_REQUEST_V3 = new Schema(
            REPLICA_ID,
            MAX_WAIT_TIME,
            MIN_BYTES,
            MAX_BYTES,
            TOPICS_V0);

    // V4 adds the fetch isolation level and exposes magic v2 (via the response).

    private static final Schema FETCH_REQUEST_V4 = new Schema(
            REPLICA_ID,
            MAX_WAIT_TIME,
            MIN_BYTES,
            MAX_BYTES,
            ISOLATION_LEVEL,
            TOPICS_V0);


    // V5 added log_start_offset field - the earliest available offset of partition data that can be consumed.
    private static final Field PARTITIONS_V5 = PARTITIONS.withFields(
            PARTITION_ID,
            FETCH_OFFSET,
            LOG_START_OFFSET,
            PARTITION_MAX_BYTES);

    private static final Field TOPICS_V5 = TOPICS.withFields(
            TOPIC_NAME,
            PARTITIONS_V5);

    private static final Schema FETCH_REQUEST_V5 = new Schema(
            REPLICA_ID,
            MAX_WAIT_TIME,
            MIN_BYTES,
            MAX_BYTES,
            ISOLATION_LEVEL,
            TOPICS_V5);

    // V6 bumped up to indicate that the client supports KafkaStorageException. The KafkaStorageException will be
    // translated to NotLeaderOrFollowerException in the response if version <= 5
    private static final Schema FETCH_REQUEST_V6 = FETCH_REQUEST_V5;

    // V7 added incremental fetch requests.
    private static final Field.Array FORGOTTEN_PARTITIONS = new Field.Array("partitions", Type.INT32,
            "Partitions to remove from the fetch session.");
    private static final Field FORGOTTEN_TOPIC_DATA_V7 = FORGOTTEN_TOPICS.withFields(
            TOPIC_NAME,
            FORGOTTEN_PARTITIONS);

    private static final Schema FETCH_REQUEST_V7 = new Schema(
            REPLICA_ID,
            MAX_WAIT_TIME,
            MIN_BYTES,
            MAX_BYTES,
            ISOLATION_LEVEL,
            SESSION_ID,
            SESSION_EPOCH,
            TOPICS_V5,
            FORGOTTEN_TOPIC_DATA_V7);

    // V8 bump used to indicate that on quota violation brokers send out responses before throttling.
    private static final Schema FETCH_REQUEST_V8 = FETCH_REQUEST_V7;

    // V9 adds the current leader epoch (see KIP-320)
    private static final Field FETCH_REQUEST_PARTITION_V9 = PARTITIONS.withFields(
            PARTITION_ID,
            CURRENT_LEADER_EPOCH,
            FETCH_OFFSET,
            LOG_START_OFFSET,
            PARTITION_MAX_BYTES);

    private static final Field FETCH_REQUEST_TOPIC_V9 = TOPICS.withFields(
            TOPIC_NAME,
            FETCH_REQUEST_PARTITION_V9);

    private static final Schema FETCH_REQUEST_V9 = new Schema(
            REPLICA_ID,
            MAX_WAIT_TIME,
            MIN_BYTES,
            MAX_BYTES,
            ISOLATION_LEVEL,
            SESSION_ID,
            SESSION_EPOCH,
            FETCH_REQUEST_TOPIC_V9,
            FORGOTTEN_TOPIC_DATA_V7);

    // V10 bumped up to indicate ZStandard capability. (see KIP-110)
    private static final Schema FETCH_REQUEST_V10 = FETCH_REQUEST_V9;

    // V11 added rack ID to support read from followers (KIP-392)
    private static final Schema FETCH_REQUEST_V11 = new Schema(
            REPLICA_ID,
            MAX_WAIT_TIME,
            MIN_BYTES,
            MAX_BYTES,
            ISOLATION_LEVEL,
            SESSION_ID,
            SESSION_EPOCH,
            FETCH_REQUEST_TOPIC_V9,
            FORGOTTEN_TOPIC_DATA_V7,
            RACK_ID);

    public static Schema[] schemaVersions() {
        return new Schema[]{FETCH_REQUEST_V0, FETCH_REQUEST_V1, FETCH_REQUEST_V2, FETCH_REQUEST_V3, FETCH_REQUEST_V4,
            FETCH_REQUEST_V5, FETCH_REQUEST_V6, FETCH_REQUEST_V7, FETCH_REQUEST_V8, FETCH_REQUEST_V9,
            FETCH_REQUEST_V10, FETCH_REQUEST_V11};
    }

    // default values for older versions where a request level limit did not exist
    public static final int DEFAULT_RESPONSE_MAX_BYTES = Integer.MAX_VALUE;
    public static final long INVALID_LOG_START_OFFSET = -1L;

    private final int replicaId;
    private final int maxWait;
    private final int minBytes;
    private final int maxBytes;
    private final IsolationLevel isolationLevel;

    // Note: the iteration order of this map is significant, since it determines the order
    // in which partitions appear in the message.  For this reason, this map should have a
    // deterministic iteration order, like LinkedHashMap or TreeMap (but unlike HashMap).
    private final Map fetchData;

    private final List toForget;
    private final FetchMetadata metadata;
    private final String rackId;

    public static final class PartitionData {
        public final long fetchOffset;
        public final long logStartOffset;
        public final int maxBytes;
        public final Optional currentLeaderEpoch;

        public PartitionData(long fetchOffset, long logStartOffset, int maxBytes, Optional currentLeaderEpoch) {
            this.fetchOffset = fetchOffset;
            this.logStartOffset = logStartOffset;
            this.maxBytes = maxBytes;
            this.currentLeaderEpoch = currentLeaderEpoch;
        }

        @Override
        public String toString() {
            return "(fetchOffset=" + fetchOffset +
                    ", logStartOffset=" + logStartOffset +
                    ", maxBytes=" + maxBytes +
                    ", currentLeaderEpoch=" + currentLeaderEpoch +
                    ")";
        }

        @Override
        public int hashCode() {
            return Objects.hash(fetchOffset, logStartOffset, maxBytes, currentLeaderEpoch);
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            PartitionData that = (PartitionData) o;
            return fetchOffset == that.fetchOffset &&
                    logStartOffset == that.logStartOffset &&
                    maxBytes == that.maxBytes &&
                    currentLeaderEpoch.equals(that.currentLeaderEpoch);
        }
    }

    static final class TopicAndPartitionData {
        public final String topic;
        public final LinkedHashMap partitions;

        public TopicAndPartitionData(String topic) {
            this.topic = topic;
            this.partitions = new LinkedHashMap<>();
        }

        public static  List> batchByTopic(Iterator> iter) {
            List> topics = new ArrayList<>();
            while (iter.hasNext()) {
                Map.Entry topicEntry = iter.next();
                String topic = topicEntry.getKey().topic();
                int partition = topicEntry.getKey().partition();
                T partitionData = topicEntry.getValue();
                if (topics.isEmpty() || !topics.get(topics.size() - 1).topic.equals(topic))
                    topics.add(new TopicAndPartitionData(topic));
                topics.get(topics.size() - 1).partitions.put(partition, partitionData);
            }
            return topics;
        }
    }

    public static class Builder extends AbstractRequest.Builder {
        private final int maxWait;
        private final int minBytes;
        private final int replicaId;
        private final Map fetchData;
        private IsolationLevel isolationLevel = IsolationLevel.READ_UNCOMMITTED;
        private int maxBytes = DEFAULT_RESPONSE_MAX_BYTES;
        private FetchMetadata metadata = FetchMetadata.LEGACY;
        private List toForget = Collections.emptyList();
        private String rackId = "";

        public static Builder forConsumer(int maxWait, int minBytes, Map fetchData) {
            return new Builder(ApiKeys.FETCH.oldestVersion(), ApiKeys.FETCH.latestVersion(),
                CONSUMER_REPLICA_ID, maxWait, minBytes, fetchData);
        }

        public static Builder forReplica(short allowedVersion, int replicaId, int maxWait, int minBytes,
                                         Map fetchData) {
            return new Builder(allowedVersion, allowedVersion, replicaId, maxWait, minBytes, fetchData);
        }

        public Builder(short minVersion, short maxVersion, int replicaId, int maxWait, int minBytes,
                        Map fetchData) {
            super(ApiKeys.FETCH, minVersion, maxVersion);
            this.replicaId = replicaId;
            this.maxWait = maxWait;
            this.minBytes = minBytes;
            this.fetchData = fetchData;
        }

        public Builder isolationLevel(IsolationLevel isolationLevel) {
            this.isolationLevel = isolationLevel;
            return this;
        }

        public Builder metadata(FetchMetadata metadata) {
            this.metadata = metadata;
            return this;
        }

        public Builder rackId(String rackId) {
            this.rackId = rackId;
            return this;
        }

        public Map fetchData() {
            return this.fetchData;
        }

        public Builder setMaxBytes(int maxBytes) {
            this.maxBytes = maxBytes;
            return this;
        }

        public List toForget() {
            return toForget;
        }

        public Builder toForget(List toForget) {
            this.toForget = toForget;
            return this;
        }

        @Override
        public FetchRequest build(short version) {
            if (version < 3) {
                maxBytes = DEFAULT_RESPONSE_MAX_BYTES;
            }

            return new FetchRequest(version, replicaId, maxWait, minBytes, maxBytes, fetchData,
                isolationLevel, toForget, metadata, rackId);
        }

        @Override
        public String toString() {
            StringBuilder bld = new StringBuilder();
            bld.append("(type=FetchRequest").
                    append(", replicaId=").append(replicaId).
                    append(", maxWait=").append(maxWait).
                    append(", minBytes=").append(minBytes).
                    append(", maxBytes=").append(maxBytes).
                    append(", fetchData=").append(fetchData).
                    append(", isolationLevel=").append(isolationLevel).
                    append(", toForget=").append(Utils.join(toForget, ", ")).
                    append(", metadata=").append(metadata).
                    append(", rackId=").append(rackId).
                    append(")");
            return bld.toString();
        }
    }

    private FetchRequest(short version, int replicaId, int maxWait, int minBytes, int maxBytes,
                         Map fetchData, IsolationLevel isolationLevel,
                         List toForget, FetchMetadata metadata, String rackId) {
        super(ApiKeys.FETCH, version);
        this.replicaId = replicaId;
        this.maxWait = maxWait;
        this.minBytes = minBytes;
        this.maxBytes = maxBytes;
        this.fetchData = fetchData;
        this.isolationLevel = isolationLevel;
        this.toForget = toForget;
        this.metadata = metadata;
        this.rackId = rackId;
    }

    public FetchRequest(Struct struct, short version) {
        super(ApiKeys.FETCH, version);
        replicaId = struct.get(REPLICA_ID);
        maxWait = struct.get(MAX_WAIT_TIME);
        minBytes = struct.get(MIN_BYTES);
        maxBytes = struct.getOrElse(MAX_BYTES, DEFAULT_RESPONSE_MAX_BYTES);

        if (struct.hasField(ISOLATION_LEVEL))
            isolationLevel = IsolationLevel.forId(struct.get(ISOLATION_LEVEL));
        else
            isolationLevel = IsolationLevel.READ_UNCOMMITTED;
        toForget = new ArrayList<>(0);
        if (struct.hasField(FORGOTTEN_TOPICS)) {
            for (Object forgottenTopicObj : struct.get(FORGOTTEN_TOPICS)) {
                Struct forgottenTopic = (Struct) forgottenTopicObj;
                String topicName = forgottenTopic.get(TOPIC_NAME);
                for (Object partObj : forgottenTopic.get(FORGOTTEN_PARTITIONS)) {
                    Integer part = (Integer) partObj;
                    toForget.add(new TopicPartition(topicName, part));
                }
            }
        }
        metadata = new FetchMetadata(struct.getOrElse(SESSION_ID, INVALID_SESSION_ID),
            struct.getOrElse(SESSION_EPOCH, FINAL_EPOCH));

        fetchData = new LinkedHashMap<>();
        for (Object topicResponseObj : struct.get(TOPICS)) {
            Struct topicResponse = (Struct) topicResponseObj;
            String topic = topicResponse.get(TOPIC_NAME);
            for (Object partitionResponseObj : topicResponse.get(PARTITIONS)) {
                Struct partitionResponse = (Struct) partitionResponseObj;
                int partition = partitionResponse.get(PARTITION_ID);
                long offset = partitionResponse.get(FETCH_OFFSET);
                int maxBytes = partitionResponse.get(PARTITION_MAX_BYTES);
                long logStartOffset = partitionResponse.getOrElse(LOG_START_OFFSET, INVALID_LOG_START_OFFSET);

                // Current leader epoch added in v9
                Optional currentLeaderEpoch = RequestUtils.getLeaderEpoch(partitionResponse, CURRENT_LEADER_EPOCH);
                PartitionData partitionData = new PartitionData(offset, logStartOffset, maxBytes, currentLeaderEpoch);
                fetchData.put(new TopicPartition(topic, partition), partitionData);
            }
        }
        rackId = struct.getOrElse(RACK_ID, "");
    }

    @Override
    public AbstractResponse getErrorResponse(int throttleTimeMs, Throwable e) {
        // The error is indicated in two ways: by setting the same error code in all partitions, and by
        // setting the top-level error code.  The form where we set the same error code in all partitions
        // is needed in order to maintain backwards compatibility with older versions of the protocol
        // in which there was no top-level error code. Note that for incremental fetch responses, there
        // may not be any partitions at all in the response.  For this reason, the top-level error code
        // is essential for them.
        Errors error = Errors.forException(e);
        LinkedHashMap> responseData = new LinkedHashMap<>();
        for (Map.Entry entry : fetchData.entrySet()) {
            FetchResponse.PartitionData partitionResponse = new FetchResponse.PartitionData<>(error,
                FetchResponse.INVALID_HIGHWATERMARK, FetchResponse.INVALID_LAST_STABLE_OFFSET,
                FetchResponse.INVALID_LOG_START_OFFSET, Optional.empty(), null, MemoryRecords.EMPTY);
            responseData.put(entry.getKey(), partitionResponse);
        }
        return new FetchResponse<>(error, responseData, throttleTimeMs, metadata.sessionId());
    }

    public int replicaId() {
        return replicaId;
    }

    public int maxWait() {
        return maxWait;
    }

    public int minBytes() {
        return minBytes;
    }

    public int maxBytes() {
        return maxBytes;
    }

    public Map fetchData() {
        return fetchData;
    }

    public List toForget() {
        return toForget;
    }

    public boolean isFromFollower() {
        return replicaId >= 0;
    }

    public IsolationLevel isolationLevel() {
        return isolationLevel;
    }

    public FetchMetadata metadata() {
        return metadata;
    }

    public String rackId() {
        return rackId;
    }

    public static FetchRequest parse(ByteBuffer buffer, short version) {
        return new FetchRequest(ApiKeys.FETCH.parseRequest(version, buffer), version);
    }

    @Override
    protected Struct toStruct() {
        Struct struct = new Struct(ApiKeys.FETCH.requestSchema(version()));
        List> topicsData =
            TopicAndPartitionData.batchByTopic(fetchData.entrySet().iterator());

        struct.set(REPLICA_ID, replicaId);
        struct.set(MAX_WAIT_TIME, maxWait);
        struct.set(MIN_BYTES, minBytes);
        struct.setIfExists(MAX_BYTES, maxBytes);
        struct.setIfExists(ISOLATION_LEVEL, isolationLevel.id());
        struct.setIfExists(SESSION_ID, metadata.sessionId());
        struct.setIfExists(SESSION_EPOCH, metadata.epoch());

        List topicArray = new ArrayList<>();
        for (TopicAndPartitionData topicEntry : topicsData) {
            Struct topicData = struct.instance(TOPICS);
            topicData.set(TOPIC_NAME, topicEntry.topic);
            List partitionArray = new ArrayList<>();
            for (Map.Entry partitionEntry : topicEntry.partitions.entrySet()) {
                PartitionData fetchPartitionData = partitionEntry.getValue();
                Struct partitionData = topicData.instance(PARTITIONS);
                partitionData.set(PARTITION_ID, partitionEntry.getKey());
                partitionData.set(FETCH_OFFSET, fetchPartitionData.fetchOffset);
                partitionData.set(PARTITION_MAX_BYTES, fetchPartitionData.maxBytes);
                partitionData.setIfExists(LOG_START_OFFSET, fetchPartitionData.logStartOffset);
                RequestUtils.setLeaderEpochIfExists(partitionData, CURRENT_LEADER_EPOCH, fetchPartitionData.currentLeaderEpoch);
                partitionArray.add(partitionData);
            }
            topicData.set(PARTITIONS, partitionArray.toArray());
            topicArray.add(topicData);
        }
        struct.set(TOPICS, topicArray.toArray());
        if (struct.hasField(FORGOTTEN_TOPICS)) {
            Map> topicsToPartitions = new HashMap<>();
            for (TopicPartition part : toForget) {
                List partitions = topicsToPartitions.computeIfAbsent(part.topic(), topic -> new ArrayList<>());
                partitions.add(part.partition());
            }
            List toForgetStructs = new ArrayList<>();
            for (Map.Entry> entry : topicsToPartitions.entrySet()) {
                Struct toForgetStruct = struct.instance(FORGOTTEN_TOPICS);
                toForgetStruct.set(TOPIC_NAME, entry.getKey());
                toForgetStruct.set(FORGOTTEN_PARTITIONS, entry.getValue().toArray());
                toForgetStructs.add(toForgetStruct);
            }
            struct.set(FORGOTTEN_TOPICS, toForgetStructs.toArray());
        }
        struct.setIfExists(RACK_ID, rackId);
        return struct;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy