All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.streamnative.pulsar.handlers.kop.proxy.KafkaProxyRequestHandler Maven / Gradle / Ivy

There is a newer version: 4.0.0.4
Show newest version
/**
 * Copyright (c) 2019 - 2024 StreamNative, Inc.. All Rights Reserved.
 */
/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.streamnative.pulsar.handlers.kop.proxy;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.kafka.common.message.DeleteRecordsRequestData.DeleteRecordsTopic;
import static org.apache.kafka.common.message.DescribeClusterResponseData.DescribeClusterBroker;
import static org.apache.kafka.common.message.DescribeClusterResponseData.DescribeClusterBrokerCollection;
import static org.apache.kafka.common.message.FetchRequestData.FetchTopic;
import static org.apache.kafka.common.message.FetchResponseData.FetchableTopicResponse;
import static org.apache.kafka.common.message.ListOffsetsRequestData.ListOffsetsPartition;
import static org.apache.kafka.common.message.ListOffsetsRequestData.ListOffsetsTopic;
import static org.apache.kafka.common.message.ListOffsetsResponseData.ListOffsetsPartitionResponse;
import static org.apache.kafka.common.message.ListOffsetsResponseData.ListOffsetsTopicResponse;
import static org.apache.kafka.common.message.ProduceRequestData.TopicProduceData;
import static org.apache.kafka.common.message.ProduceRequestData.TopicProduceDataCollection;
import static org.apache.kafka.common.message.ProduceResponseData.PartitionProduceResponse;
import static org.apache.kafka.common.message.ProduceResponseData.TopicProduceResponse;
import static org.apache.kafka.common.requests.ProduceResponse.PartitionResponse;
import static org.apache.kafka.common.requests.ProduceResponse.RecordError;

import com.github.benmanes.caffeine.cache.Cache;
import com.google.common.annotations.VisibleForTesting;
import io.netty.buffer.ByteBuf;
import io.netty.channel.Channel;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelInboundHandlerAdapter;
import io.netty.handler.ssl.SslHandshakeCompletionEvent;
import io.netty.util.ReferenceCountUtil;
import io.streamnative.pulsar.handlers.kop.EndPoint;
import io.streamnative.pulsar.handlers.kop.KafkaRequestHandler;
import io.streamnative.pulsar.handlers.kop.KafkaServiceConfiguration;
import io.streamnative.pulsar.handlers.kop.security.Authenticator;
import io.streamnative.pulsar.handlers.kop.security.ProxySslSaslServer;
import io.streamnative.pulsar.handlers.kop.security.Session;
import io.streamnative.pulsar.handlers.kop.security.SslAuthenticator;
import io.streamnative.pulsar.handlers.kop.utils.CoreUtils;
import io.streamnative.pulsar.handlers.kop.utils.KafkaResponseUtils;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.UnsupportedVersionException;
import org.apache.kafka.common.message.DeleteRecordsRequestData;
import org.apache.kafka.common.message.FetchRequestData;
import org.apache.kafka.common.message.FetchResponseData;
import org.apache.kafka.common.message.FindCoordinatorResponseData;
import org.apache.kafka.common.message.ListOffsetsResponseData;
import org.apache.kafka.common.message.MetadataResponseData;
import org.apache.kafka.common.message.ProduceRequestData;
import org.apache.kafka.common.message.ProduceResponseData;
import org.apache.kafka.common.message.SaslAuthenticateRequestData;
import org.apache.kafka.common.message.SaslHandshakeRequestData;
import org.apache.kafka.common.protocol.ApiKeys;
import org.apache.kafka.common.protocol.Errors;
import org.apache.kafka.common.requests.AddOffsetsToTxnResponse;
import org.apache.kafka.common.requests.AddPartitionsToTxnResponse;
import org.apache.kafka.common.requests.DeleteRecordsRequest;
import org.apache.kafka.common.requests.DeleteRecordsResponse;
import org.apache.kafka.common.requests.DescribeClusterResponse;
import org.apache.kafka.common.requests.EndTxnResponse;
import org.apache.kafka.common.requests.FetchRequest;
import org.apache.kafka.common.requests.FetchResponse;
import org.apache.kafka.common.requests.FindCoordinatorRequest;
import org.apache.kafka.common.requests.FindCoordinatorResponse;
import org.apache.kafka.common.requests.HeartbeatResponse;
import org.apache.kafka.common.requests.InitProducerIdResponse;
import org.apache.kafka.common.requests.JoinGroupResponse;
import org.apache.kafka.common.requests.KopResponseUtils;
import org.apache.kafka.common.requests.LeaveGroupResponse;
import org.apache.kafka.common.requests.ListOffsetsRequest;
import org.apache.kafka.common.requests.ListOffsetsResponse;
import org.apache.kafka.common.requests.MetadataResponse;
import org.apache.kafka.common.requests.OffsetCommitResponse;
import org.apache.kafka.common.requests.OffsetDeleteResponse;
import org.apache.kafka.common.requests.OffsetFetchResponse;
import org.apache.kafka.common.requests.ProduceRequest;
import org.apache.kafka.common.requests.ProduceResponse;
import org.apache.kafka.common.requests.RequestHeader;
import org.apache.kafka.common.requests.SaslAuthenticateRequest;
import org.apache.kafka.common.requests.SaslHandshakeRequest;
import org.apache.kafka.common.requests.SyncGroupResponse;
import org.apache.kafka.common.requests.TxnOffsetCommitResponse;
import org.apache.kafka.common.security.auth.SecurityProtocol;
import org.apache.pulsar.broker.authentication.AuthenticationProviderTls;
import org.apache.pulsar.common.util.FutureUtil;
import org.apache.pulsar.common.util.Murmur3_32Hash;

@Slf4j
public class KafkaProxyRequestHandler extends ChannelInboundHandlerAdapter {

    private final LinkedBlockingQueue requestQueue = new LinkedBlockingQueue<>(500);
    private final AtomicBoolean isActive = new AtomicBoolean(false);
    private final Node selfNode;
    private final List replicaIds;
    private final BrokerConnectionGroup connectionGroup;
    @VisibleForTesting
    final Cache leaderCache;
    private ChannelHandlerContext ctx;
    private Authenticator authenticator;

    public KafkaProxyRequestHandler(final EndPoint advertisedEndPoint,
                                    final ConnectionFactory connectionFactory,
                                    final boolean isClientAuth,
                                    final Cache leaderCache) throws IOException {
        this.selfNode = new Node(Murmur3_32Hash.getInstance().makeHash(
                (advertisedEndPoint.getHostname() + advertisedEndPoint.getPort()).getBytes(UTF_8)
        ), advertisedEndPoint.getHostname(), advertisedEndPoint.getPort());
        this.replicaIds = Collections.singletonList(selfNode.id());
        this.connectionGroup = new BrokerConnectionGroup(connectionFactory);
        if (advertisedEndPoint.getSecurityProtocol().equals(SecurityProtocol.SSL) && isClientAuth) {
            AuthenticationProviderTls authenticationProviderTls = new AuthenticationProviderTls();
            this.authenticator = new SslAuthenticator(authenticationProviderTls, new KafkaServiceConfiguration());
        }
        this.leaderCache = leaderCache;
    }

    @Override
    public void channelRead(final ChannelHandlerContext ctx, final Object msg) throws Exception {
        final var buf = (ByteBuf) msg;
        KafkaProxyExtension.BYTES_COUNTER.inc(buf.readableBytes());
        try {
            final var channel = ctx.channel();
            final var inflightRequest = new InflightRequest(buf, channel.remoteAddress());
            if (log.isDebugEnabled()) {
                log.debug("[{}] Received kafka cmd {}", channel, inflightRequest);
            }

            final var apiKeys = inflightRequest.getHeader().apiKey();
            inflightRequest.registerCallback(() -> flush(channel), ctx.executor());
            KafkaProxyExtension.OPS_COUNTER.inc();

            if (!ApiKeys.PRODUCE.equals(apiKeys) || ((ProduceRequest) inflightRequest.getRequest()).acks() != 0) {
                requestQueue.put(inflightRequest);
            }
            // TODO: *_GROUPS requests might need to be split for multiple group coordinators
            switch (apiKeys) {
                case API_VERSIONS -> handleApiVersions(inflightRequest);
                case METADATA -> handleMetadata(inflightRequest);
                case PRODUCE -> handleProduce(inflightRequest);
                case FIND_COORDINATOR -> handleFindCoordinator(inflightRequest);
                case JOIN_GROUP, SYNC_GROUP, LEAVE_GROUP, OFFSET_FETCH, OFFSET_COMMIT, HEARTBEAT, OFFSET_DELETE,
                        TXN_OFFSET_COMMIT -> handleGroupRequest(apiKeys, inflightRequest);
                case LIST_OFFSETS -> handleListOffsets(inflightRequest);
                case FETCH -> handleFetch(inflightRequest);
                case SASL_HANDSHAKE, SASL_AUTHENTICATE -> connectionGroup.authenticate(inflightRequest);
                case CREATE_TOPICS, DELETE_TOPICS, DESCRIBE_CONFIGS, ALTER_CONFIGS,
                        LIST_GROUPS, DELETE_GROUPS, DESCRIBE_GROUPS ->
                        connectionGroup.getMetadataBroker().forwardRequest(inflightRequest);
                case DESCRIBE_CLUSTER -> handleDescribeCluster(inflightRequest);
                case DELETE_RECORDS -> handleDeleteRecords(inflightRequest);
                case INIT_PRODUCER_ID, ADD_PARTITIONS_TO_TXN, ADD_OFFSETS_TO_TXN, END_TXN ->
                        handleTxnRequest(apiKeys, inflightRequest);
                case WRITE_TXN_MARKERS -> throw new IllegalStateException(apiKeys + " should be handled in broker");
                default -> inflightRequest.complete(inflightRequest.getRequest().getErrorResponse(
                        new UnsupportedVersionException("API " + apiKeys + " is not supported")));
            }
        } catch (IOException e) {
            log.warn("{}", e.getMessage());
            close(ctx);
        } catch (Throwable throwable) {
            log.error("[{}] Unexpected exception when handling request", ctx.channel(), throwable);
            close(ctx);
        } finally {
            ReferenceCountUtil.safeRelease(buf);
        }
    }

    @Override
    public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exception {
        // Handle ssl handshake completion event
        if (evt instanceof SslHandshakeCompletionEvent) {
            if (((SslHandshakeCompletionEvent) evt).isSuccess()) {
                if (this.authenticator != null && this.authenticator instanceof SslAuthenticator) {
                    this.authenticator.authenticate(ctx,
                        null,
                        null,
                        null,
                        null);
                    Session session = this.authenticator.session();
                    if (session != null && session.getPrincipal() != null && session.getPrincipal().getName() != null) {
                        InflightRequest saslHandshake = newSaslHandshake(ProxySslSaslServer.PROXY_SSL_MECHANISM);
                        connectionGroup.addSaslRequestBuffer(saslHandshake);
                        InflightRequest saslAuthenticate = newSaslAuthenticate(
                            session.getPrincipal().getName().getBytes(UTF_8));
                        connectionGroup.addSaslRequestBuffer(saslAuthenticate);
                    }
                }
            }
        } else {
            super.userEventTriggered(ctx, evt);
        }
    }

    private static InflightRequest newSaslHandshake(final String mechanism) {
        var request = new SaslHandshakeRequest.Builder(new SaslHandshakeRequestData()
            .setMechanism(mechanism)).build();
        ByteBuf byteBuf = KopResponseUtils.serializeRequestToPooledBuffer(new RequestHeader(ApiKeys.SASL_HANDSHAKE,
            (short) 1, "kop-proxy", 0), request);
        // fake address
        InetSocketAddress localhost = InetSocketAddress.createUnresolved("localhost", 65535);
        return new InflightRequest(byteBuf, localhost);
    }

    private static InflightRequest newSaslAuthenticate(final byte[] saslAuthBytes) {
        var request = new SaslAuthenticateRequest
            .Builder(new SaslAuthenticateRequestData().setAuthBytes(saslAuthBytes))
            .build();
        ByteBuf byteBuf = KopResponseUtils.serializeRequestToPooledBuffer(new RequestHeader(ApiKeys.SASL_AUTHENTICATE,
            (short) 2, "kop-proxy", 1), request);
        // fake address
        InetSocketAddress localhost = InetSocketAddress.createUnresolved("localhost", 65535);
        return new InflightRequest(byteBuf, localhost);
    }

    @Override
    public void channelActive(ChannelHandlerContext ctx) throws Exception {
        super.channelActive(ctx);
        this.ctx = ctx;
        this.connectionGroup.setClientChannel(ctx);
        isActive.set(true);
        KafkaProxyExtension.ACTIVE_CONNECTIONS.inc();
        KafkaProxyExtension.NEW_CONNECTIONS.inc();
    }

    @Override
    public void channelInactive(final ChannelHandlerContext ctx) throws Exception {
        super.channelInactive(ctx);
        log.info("close channel {}", ctx.channel());
        KafkaProxyExtension.ACTIVE_CONNECTIONS.dec();
        connectionGroup.close();
    }

    @Override
    public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
        log.error("[{}] Unexpected exception", ctx.channel(), cause);
        close(ctx);
    }

    private void close(final ChannelHandlerContext ctx) {
        if (isActive.compareAndSet(true, false)) {
            ctx.close();
            if (!requestQueue.isEmpty()) {
                log.info("[{}] Close with {} pending requests", ctx, requestQueue.size());
            }
            requestQueue.clear();
            connectionGroup.close();
        }
    }

    private void flush(final Channel channel) {
        while (isActive.get()) {
            final var inflightRequest = requestQueue.peek();
            if (inflightRequest == null) {
                break;
            }
            if (!inflightRequest.hasReceivedResponse()) {
                break;
            } else if (!requestQueue.remove(inflightRequest)) { // it has been removed by another thread
                continue;
            }

            if (inflightRequest.hasFailed(e -> {
                if (e instanceof ConnectionToBroker.ConnectError) {
                    log.warn("[{}] {} failed with {}", channel, inflightRequest.getHeader(), e.getMessage());
                } else {
                    log.error("[{}] request {} completed exceptionally", channel, inflightRequest.getHeader(), e);
                }
                close(ctx);
            })) {
                return;
            }

            final var buf = inflightRequest.toResponseBuf();
            if (log.isDebugEnabled()) {
                log.debug("[{}] Write kafka cmd to client ({} requests left): {}",
                        channel, requestQueue.size(), inflightRequest.getHeader());
            }
            channel.writeAndFlush(buf).addListener(future -> {
                if (!future.isSuccess()) {
                    log.error("[{}] Failed to write {}", channel, inflightRequest.getHeader(), future.cause());
                }
            });
        }
    }

    private void handleApiVersions(final InflightRequest inflightRequest) {
        short version = inflightRequest.getHeader().apiVersion();
        inflightRequest.complete(KafkaRequestHandler.overloadDefaultApiVersionsResponse(
                !ApiKeys.API_VERSIONS.isVersionSupported(version)));
    }

    private void handleMetadata(final InflightRequest inflightRequest) throws IOException {
        inflightRequest.setResponseMapper(originalResponse -> {
            final var metadataResponse = (MetadataResponse) originalResponse;
            final var data = metadataResponse.data();
            final var brokers = data.brokers();

            if (log.isDebugEnabled()) {
                final var leaderMap = new HashMap();
                data.topics().forEach(topic -> topic.partitions().forEach(partition -> {
                    final var topicPartition = new TopicPartition(topic.name(), partition.partitionIndex());
                    final var broker = brokers.find(partition.leaderId());
                    if (broker != null) {
                        leaderMap.put(topicPartition, broker.host() + ":" + broker.port());
                    } else {
                        leaderMap.put(topicPartition, Errors.forCode(partition.errorCode()).message());
                    }
                }));
                log.debug("[{}] MetadataResponse: {}", inflightRequest.getHeader(), leaderMap);
            }

            data.topics().forEach(topic -> {
                final String topicName = topic.name();
                topic.partitions().forEach(partition -> {
                    final var broker = brokers.find(partition.leaderId());
                    if (broker != null) {
                        leaderCache.put(new TopicPartition(topicName, partition.partitionIndex()),
                                InetSocketAddress.createUnresolved(broker.host(), broker.port()));
                    }
                    partition.setLeaderId(selfNode.id());
                    partition.setReplicaNodes(replicaIds);
                    partition.setIsrNodes(replicaIds);
                });
            });

            data.setControllerId(selfNode.id());
            brokers.clear();
            brokers.add(new MetadataResponseData.MetadataResponseBroker().setNodeId(selfNode.id())
                    .setHost(selfNode.host()).setPort(selfNode.port()));
            return metadataResponse;
        });
        connectionGroup.getMetadataBroker().forwardRequest(inflightRequest);
    }

    @VisibleForTesting
    void handleProduce(final InflightRequest inflightRequest) throws IOException {
        final var request = (ProduceRequest) inflightRequest.getRequest();
        final var errorsMap = new HashMap();

        final var partitionDataMap = new HashMap>();
        for (var topicData : request.data().topicData()) {
            final var topic = topicData.name();
            for (var partitionData : topicData.partitionData()) {
                final var topicPartition = new TopicPartition(topic, partitionData.index());
                final var leader = leaderCache.getIfPresent(topicPartition);
                if (leader == null) {
                    errorsMap.put(topicPartition, Errors.NOT_LEADER_OR_FOLLOWER);
                    continue;
                }
                partitionDataMap.computeIfAbsent(leader, __ -> new HashMap<>())
                        .computeIfAbsent(topic, __ -> new TopicProduceData().setName(topic))
                        .partitionData().add(partitionData);
            }
        }

        if (partitionDataMap.isEmpty()) {
            log.warn("No leader found for {}", inflightRequest.getHeader());
            inflightRequest.complete(createProduceResponse(errorsMap));
            return;
        }

        final Function> getLeader = address -> {
            try {
                return Optional.of(connectionGroup.getLeader(address));
            } catch (IOException e) {
                log.warn("[{}] Failed to connect to leader {}: {}", ctx, address, e.getMessage());
                Optional.ofNullable(partitionDataMap.get(address)).ifPresent(map -> map.forEach((topic, data) ->
                    data.partitionData().stream().map(__ -> new TopicPartition(topic, __.index()))
                        .forEach(topicPartition -> {
                            leaderCache.invalidate(topicPartition);
                            errorsMap.put(topicPartition, Errors.NOT_LEADER_OR_FOLLOWER);
                        })));
                return Optional.empty();
            }
        };

        final boolean cacheRequest = request.acks() != 0;
        // If there is only 1 broker to send, forward the request directly. Otherwise, N Produce requests need to be
        // created to N brokers.
        if (errorsMap.isEmpty() && partitionDataMap.size() == 1) {
            inflightRequest.setResponseMapper(originalResponse -> {
                if (errorsMap.isEmpty()) {
                    return originalResponse;
                }
                if (originalResponse == null) {
                    return createProduceResponse(errorsMap);
                }
                final var produceResponse = (ProduceResponse) originalResponse;
                return createProduceResponse(errorsMap, produceResponse.data());
            });
            getLeader.apply(partitionDataMap.keySet().iterator().next()).ifPresentOrElse(leader -> {
                // When errorsMap is not empty, the response needs to be merged with errorsMap
                inflightRequest.setSkipParsingResponse(errorsMap.isEmpty());
                leader.forwardRequest(inflightRequest, cacheRequest);
            }, () -> inflightRequest.complete(null));
        } else {
            final var responseFutures = new ArrayList>();
            partitionDataMap.forEach((address, topicDataMap) -> {
                getLeader.apply(address).ifPresent(connection -> {
                    final var singleRequest = new ProduceRequest(new ProduceRequestData().setAcks(request.acks())
                            .setTimeoutMs(request.timeout()).setTransactionalId(request.transactionalId())
                            .setTopicData(new TopicProduceDataCollection(topicDataMap.values().iterator())),
                            request.version());
                    final var buf = KopResponseUtils.serializeRequestToPooledBuffer(
                            inflightRequest.getHeader(), singleRequest);
                    final var singleInflightRequest = new InflightRequest(
                            buf, inflightRequest.getRemoteAddress(), false);
                    responseFutures.add(singleInflightRequest.getResponseFuture());
                    connection.forwardRequest(singleInflightRequest);
                });
            });
            FutureUtil.waitForAll(responseFutures).thenAccept(__ -> {
                final var map = CoreUtils.mapValue(errorsMap, PartitionResponse::new);
                responseFutures.stream().map(CompletableFuture::join).forEach(singleResponse -> {
                    singleResponse.data().responses().forEach(topicProduceResponse -> {
                        final var topic = topicProduceResponse.name();
                        topicProduceResponse.partitionResponses().forEach(r -> {
                            final var topicPartition = new TopicPartition(topic, r.index());
                            map.put(topicPartition, new PartitionResponse(Errors.forCode(r.errorCode()), r.baseOffset(),
                                    r.logAppendTimeMs(), r.logStartOffset(), r.recordErrors().stream().map(e ->
                                    new RecordError(e.batchIndex(), e.batchIndexErrorMessage())).toList(),
                                    r.errorMessage()));
                        });
                    });
                });
                if (log.isDebugEnabled()) {
                    log.debug("[{}] ProduceResponse: {}", inflightRequest.getHeader(), CoreUtils.mapValue(map,
                            r -> r.error));
                }
                inflightRequest.complete(new ProduceResponse(map));
            }).exceptionally(e -> {
                log.error("[{}] Failed to wait for the produce responses", ctx, e);
                close(ctx);
                return null;
            });
        }
    }

    private static ProduceResponse createProduceResponse(final Map errorsMap) {
        return createProduceResponse(errorsMap, new ProduceResponseData());
    }

    private static ProduceResponse createProduceResponse(final Map errorsMap,
                                                         final ProduceResponseData responseData) {
        errorsMap.forEach((topicPartition, errors) -> {
            final var topic = topicPartition.topic();
            var topicProduceResponse = responseData.responses().find(topic);
            if (topicProduceResponse == null) {
                topicProduceResponse = new TopicProduceResponse().setName(topic);
                responseData.responses().add(topicProduceResponse);
            }
            topicProduceResponse.partitionResponses().add(new PartitionProduceResponse()
                .setErrorCode(errors.code()).setIndex(topicPartition.partition()));
        });
        return new ProduceResponse(responseData);
    }

    private void handleFindCoordinator(final InflightRequest inflightRequest) throws IOException {
        final var request = (FindCoordinatorRequest) inflightRequest.getRequest();
        inflightRequest.setResponseMapper(__ -> {
            // Ignore the original response. This request was sent just to execute the necessary operations in broker.
            final var data = new FindCoordinatorResponseData();
            if (request.version() < FindCoordinatorRequest.MIN_BATCHED_VERSION) {
                data.setErrorCode(Errors.NONE.code()).setErrorMessage(Errors.NONE.message())
                        .setHost(selfNode.host()).setPort(selfNode.port()).setNodeId(selfNode.id());
            } else {
                final var coordinatorKeys = request.data().coordinatorKeys();
                data.setCoordinators(coordinatorKeys.stream().map(key ->
                        new FindCoordinatorResponseData.Coordinator()
                                .setKey(key)
                                .setErrorCode(Errors.NONE.code())
                                .setErrorMessage(Errors.NONE.message())
                                .setHost(selfNode.host())
                                .setPort(selfNode.port())
                                .setNodeId(selfNode.id())
                ).toList());
            }
            return new FindCoordinatorResponse(data);
        });
        connectionGroup.getMetadataBroker().forwardRequest(inflightRequest);
    }

    private void handleListOffsets(final InflightRequest inflightRequest) throws IOException {
        if (inflightRequest.getHeader().apiVersion() == 0) {
            // TODO: handle ListOffset request v0
            throw new RuntimeException("KoP proxy does not support ListOffset v0 yet");
        } else {
            final var request = (ListOffsetsRequest) inflightRequest.getRequest();
            if (request.data().topics().size() == 0) {
                inflightRequest.complete(new ListOffsetsResponse(new ListOffsetsResponseData()));
                return;
            }
            handleListOffsetsV1OrAbove(inflightRequest, request);
        }
    }

    private void handleListOffsetsV1OrAbove(final InflightRequest originalRequest, final ListOffsetsRequest request)
            throws IOException {
        final var errorsMap = new HashMap();
        final var leaderToOffsetData = new HashMap>>();
        request.data().topics().forEach(topic-> {
            topic.partitions().forEach(partitionData -> {
                final var topicPartition = new TopicPartition(topic.name(), partitionData.partitionIndex());
                final var leader = leaderCache.getIfPresent(topicPartition);
                if (leader == null) {
                    errorsMap.put(topicPartition, Errors.UNKNOWN_TOPIC_OR_PARTITION);
                    return;
                }
                leaderToOffsetData.computeIfAbsent(leader, __ -> new HashMap<>())
                        .computeIfAbsent(topic.name(), __ -> new ArrayList<>()).add(partitionData);
            });
        });

        if (leaderToOffsetData.size() == 1) {
            final var leader = leaderToOffsetData.keySet().iterator().next();
            originalRequest.setSkipParsingResponse(true);
            connectionGroup.getLeader(leader).forwardRequest(originalRequest);
        } else {
            final var responseFutures = new ArrayList>();
            leaderToOffsetData.forEach((leader, offsetData) -> {
                try {
                    final var connection = connectionGroup.getLeader(leader);
                    final var targetTimes = offsetData.entrySet().stream().map(e -> new ListOffsetsTopic()
                            .setName(e.getKey()).setPartitions(e.getValue())).toList();
                    final var singleRequest = ListOffsetsRequest.Builder
                            .forConsumer(true, request.isolationLevel(), false)
                            .setTargetTimes(targetTimes)
                            .build(request.version());
                    final var buf = KopResponseUtils.serializeRequestToPooledBuffer(
                            originalRequest.getHeader(), singleRequest);
                    final var singleInflightRequest = new InflightRequest(
                            buf, originalRequest.getRemoteAddress(), false);
                    responseFutures.add(singleInflightRequest.getResponseFuture());
                    connection.forwardRequest(singleInflightRequest);
                } catch (IOException e) {
                    log.warn("[{}] Failed to connect to leader {}: {}", ctx, leader, e.getMessage());
                    offsetData.forEach((topic, partitions) -> partitions.forEach(partition -> {
                        final var topicPartition = new TopicPartition(topic, partition.partitionIndex());
                        errorsMap.put(topicPartition, Errors.UNKNOWN_TOPIC_OR_PARTITION);
                        leaderCache.invalidate(topicPartition);
                    }));
                }
            });
            FutureUtil.waitForAll(responseFutures).thenAccept(__ -> {
                final var topicMap = new HashMap>();
                responseFutures.stream().map(CompletableFuture::join).forEach(response -> {
                    response.data().topics().forEach(topic -> {
                        final var partitions = topicMap.get(topic.name());
                        if (partitions == null) {
                            topicMap.put(topic.name(), topic.partitions());
                        } else {
                            partitions.addAll(topic.partitions());
                        }
                    });
                });
                errorsMap.forEach((topicPartition, errors) -> topicMap.computeIfAbsent(topicPartition.topic(),
                                topic -> new ArrayList<>()
                ).add(new ListOffsetsPartitionResponse().setErrorCode(errors.code())));
                final var data = topicMap.entrySet().stream().map(e -> new ListOffsetsTopicResponse()
                        .setName(e.getKey())
                        .setPartitions(e.getValue())).toList();
                originalRequest.complete(new ListOffsetsResponse(new ListOffsetsResponseData().setTopics(data)));
            });
        }
    }

    private void handleFetch(final InflightRequest inflightRequest) throws IOException {
        final var request = (FetchRequest) inflightRequest.getRequest();
        final var errorsMap = new HashMap();
        final var fetchPartitionMap = new HashMap>();
        request.data().topics().forEach(fetchTopic -> {
            final var topic = fetchTopic.topic();
            fetchTopic.partitions().forEach(fetchPartition -> {
                final var topicPartition = new TopicPartition(topic, fetchPartition.partition());
                final var leader = leaderCache.getIfPresent(topicPartition);
                if (leader == null) {
                    errorsMap.put(topicPartition, Errors.NOT_LEADER_OR_FOLLOWER);
                    return;
                }
                fetchPartitionMap.computeIfAbsent(leader, __ -> new HashMap<>()).computeIfAbsent(topic, __ ->
                        new FetchTopic().setTopicId(fetchTopic.topicId())
                                .setTopic(topic)
                ).partitions().add(fetchPartition);
            });
        });

        if (fetchPartitionMap.isEmpty()) {
            log.warn("No leader found for {}", inflightRequest.getRequest());
            inflightRequest.complete(createFetchResponse(errorsMap, new FetchResponseData()));
        }
        if (fetchPartitionMap.size() == 1) {
            final var leader = fetchPartitionMap.keySet().iterator().next();
            inflightRequest.setSkipParsingResponse(true);
            connectionGroup.getLeader(leader).forwardRequest(inflightRequest);
        } else {
            final var responseFutures = new ArrayList>();
            fetchPartitionMap.forEach((leader, fetchTopics) -> {
                try {
                    final var connection = connectionGroup.getLeader(leader);
                    final var singleRequest = new FetchRequest(new FetchRequestData().setMaxWaitMs(request.maxWait())
                            .setMaxBytes(request.maxBytes()).setMinBytes(request.minBytes())
                            .setIsolationLevel(request.isolationLevel().id())
                            .setSessionEpoch(request.metadata().epoch()).setSessionId(request.metadata().sessionId())
                            .setReplicaId(request.replicaId()).setRackId(request.rackId())
                            .setTopics(fetchTopics.values().stream().toList()), request.version());
                    final var singleInflightRequest = new InflightRequest(
                            KopResponseUtils.serializeRequestToPooledBuffer(inflightRequest.getHeader(), singleRequest),
                            inflightRequest.getRemoteAddress(), false);
                    // The records buffer of a FetchResponse is only valid when the original buffer is valid, so here
                    // we cannot parse the buffer into a FetchResponse in ConnectionBroker because the buffer will be
                    // released after the response future is completed.
                    singleInflightRequest.setSkipParsingResponse(true);
                    responseFutures.add(singleInflightRequest.getResponseFuture());
                    connection.forwardRequest(singleInflightRequest);
                } catch (IOException e) {
                    log.warn("[{}] Failed to connect to leader {}: {}", ctx, leader, e.getMessage());
                    fetchTopics.values().forEach(fetchTopic -> {
                        fetchTopic.partitions().forEach(partition -> {
                            final var topicPartition = new TopicPartition(fetchTopic.topic(), partition.partition());
                            errorsMap.put(topicPartition, Errors.NOT_LEADER_OR_FOLLOWER);
                            leaderCache.invalidate(topicPartition);
                        });
                    });
                }
            });
            FutureUtil.waitForAll(responseFutures).thenAccept(__ -> {
                // The records fields are slices of `buf`, so we have to delay the release after the FETCH response is
                // serialized to a new allocated buffer.
                final var buffersToRelease = responseFutures.stream().map(CompletableFuture::join).toList();
                final var map = new HashMap();
                buffersToRelease.forEach(buf -> {
                    final var fetchResponse = (FetchResponse) FetchResponse.parseResponse(buf.nioBuffer(),
                            inflightRequest.getHeader());
                    fetchResponse.data().responses().forEach(topic -> {
                        final var topicResponse = map.get(topic.topic());
                        if (topicResponse == null) {
                            map.put(topic.topic(), new FetchableTopicResponse().setTopicId(topic.topicId())
                                    .setTopic(topic.topic()).setPartitions(topic.partitions()));
                            return;
                        }
                        topicResponse.partitions().addAll(topic.partitions());
                    });
                });
                final var data = new FetchResponseData().setResponses(map.values().stream().toList())
                        .setSessionId(request.metadata().sessionId());
                inflightRequest.complete(Pair.of(createFetchResponse(errorsMap, data), buffersToRelease));
            });
        }
    }

    private static FetchResponse createFetchResponse(final Map errorsMap,
                                                     final FetchResponseData responseData) {
        errorsMap.forEach((topicPartition, errors) -> {
            final var topic = topicPartition.topic();
            var topicResponse = responseData.responses().stream().filter(__ -> __.topic().equals(topic))
                    .findFirst().orElse(null);
            if (topicResponse == null) {
                topicResponse = new FetchableTopicResponse().setTopic(topic);
                responseData.responses().add(topicResponse);
            }
            var partitionResponse = topicResponse.partitions().stream().filter(__ ->
                            __.partitionIndex() == topicPartition.partition()).findFirst().orElse(null);
            if (partitionResponse == null) {
                partitionResponse = new FetchResponseData.PartitionData().setPartitionIndex(topicPartition.partition())
                        .setErrorCode(errors.code());
                topicResponse.partitions().add(partitionResponse);
            }
        });
        return new FetchResponse(responseData);
    }

    private void handleDeleteRecords(final InflightRequest inflightRequest) throws IOException {
        final var request = (DeleteRecordsRequest) inflightRequest.getRequest();
        final var deleteRecordsMap = new HashMap>();
        final var errorsMap = new HashMap();
        request.data().topics().forEach(topic -> topic.partitions().forEach(partition -> {
            final var topicPartition = new TopicPartition(topic.name(), partition.partitionIndex());
            final var leader = leaderCache.getIfPresent(topicPartition);
            if (leader == null) {
                errorsMap.put(topicPartition, Errors.NOT_LEADER_OR_FOLLOWER);
                return;
            }
            deleteRecordsMap.computeIfAbsent(leader, __ -> new HashMap<>()).computeIfAbsent(topic.name(), __ ->
                    new DeleteRecordsTopic().setName(__)).partitions().add(partition);
        }));
        if (deleteRecordsMap.size() == 1) {
            inflightRequest.setResponseMapper(originalResponse -> {
                final var response = (DeleteRecordsResponse) originalResponse;
                response.data().topics().forEach(topic -> topic.partitions().forEach(partition -> errorsMap.put(
                        new TopicPartition(topic.name(), partition.partitionIndex()),
                        Errors.forCode(partition.errorCode()))));
                return KafkaResponseUtils.newDeleteRecords(errorsMap);
            });
            connectionGroup.getLeader(deleteRecordsMap.keySet().iterator().next()).forwardRequest(inflightRequest);
        } else {
            final var responseFutures = new ArrayList>();
            deleteRecordsMap.forEach((leader, topics) -> {
                try {
                    final var connection = connectionGroup.getLeader(leader);
                    final var singleRequest = new DeleteRecordsRequest.Builder(new DeleteRecordsRequestData()
                            .setTimeoutMs(request.data().timeoutMs()).setTopics(topics.values().stream().toList())
                    ).build(request.version());
                    final var singleInflightRequest = new InflightRequest(
                            KopResponseUtils.serializeRequestToPooledBuffer(inflightRequest.getHeader(), singleRequest),
                            inflightRequest.getRemoteAddress());
                    final var responseFuture = new CompletableFuture();
                    responseFutures.add(responseFuture);
                    singleInflightRequest.setResponseMapper(response -> {
                        responseFuture.complete((DeleteRecordsResponse) response);
                        return response;
                    });
                    connection.forwardRequest(singleInflightRequest);
                } catch (IOException e) {
                    log.warn("[{}] Failed to connect to leader {}: {}", ctx, leader, e.getMessage());
                    topics.values().forEach(topic -> topic.partitions().forEach(partition -> {
                        final var topicPartition = new TopicPartition(topic.name(), partition.partitionIndex());
                        errorsMap.put(topicPartition, Errors.NOT_LEADER_OR_FOLLOWER);
                        leaderCache.invalidate(topicPartition);
                    }));
                }
            });
            FutureUtil.waitForAll(responseFutures).thenAccept(__ -> {
                responseFutures.stream().map(CompletableFuture::join).forEach(singleResponse -> {
                    singleResponse.data().topics().forEach(topic -> topic.partitions().forEach(partition ->
                        errorsMap.put(new TopicPartition(topic.name(), partition.partitionIndex()),
                                Errors.forCode(partition.errorCode()))
                    ));
                });
                inflightRequest.complete(KafkaResponseUtils.newDeleteRecords(errorsMap));
            }).exceptionally(e -> {
                log.error("[{}] Failed to wait for the delete records responses", ctx, e);
                close(ctx);
                return null;
            });
        }
    }

    private void handleDescribeCluster(final InflightRequest inflightRequest) throws IOException {
        inflightRequest.setResponseMapper(response -> {
            final var clusterResponse = (DescribeClusterResponse) response;
            clusterResponse.data().setControllerId(selfNode.id());
            clusterResponse.data().setBrokers(new DescribeClusterBrokerCollection(Collections.singletonList(
                    new DescribeClusterBroker().setBrokerId(selfNode.id()).setHost(selfNode.host())
                            .setPort(selfNode.port())).iterator()));
            return response;
        });
        connectionGroup.getMetadataBroker().forwardRequest(inflightRequest);
    }

    private void handleGroupRequest(final ApiKeys apiKeys, final InflightRequest inflightRequest) throws IOException {
        final var groupId = inflightRequest.groupId();
        inflightRequest.setResponseMapper(response -> {
            final var error = switch (apiKeys) {
                case JOIN_GROUP -> ((JoinGroupResponse) response).error();
                case SYNC_GROUP -> ((SyncGroupResponse) response).error();
                case LEAVE_GROUP -> ((LeaveGroupResponse) response).error();
                case OFFSET_FETCH -> ((OffsetFetchResponse) response).error();
                case OFFSET_COMMIT -> ((OffsetCommitResponse) response).errorCounts().keySet().stream()
                        .filter(__ -> !__.equals(Errors.NONE)).findFirst().orElse(Errors.NONE);
                case HEARTBEAT -> ((HeartbeatResponse) response).error();
                case OFFSET_DELETE -> Errors.forCode(((OffsetDeleteResponse) response).data().errorCode());
                case TXN_OFFSET_COMMIT -> ((TxnOffsetCommitResponse) response).errors().values().stream()
                        .filter(__ -> !__.equals(Errors.NONE)).findFirst().orElse(Errors.NONE);
                default -> throw new IllegalStateException(apiKeys + " is not group request");
            };
            if (error == Errors.NOT_COORDINATOR) {
                log.info("[{}] [group: {}] Disconnect the outdated group coordinator", ctx, groupId);
                try {
                    connectionGroup.getGroupCoordinator(groupId).disconnectBroker();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return response;
        });
        connectionGroup.getGroupCoordinator(groupId).forwardRequest(inflightRequest);
    }

    private void handleTxnRequest(final ApiKeys apiKeys, final InflightRequest inflightRequest) throws IOException {
        final var txnId = inflightRequest.txnId();
        inflightRequest.setResponseMapper(response -> {
            final var error = switch (apiKeys) {
                case INIT_PRODUCER_ID -> ((InitProducerIdResponse) response).error();
                case ADD_PARTITIONS_TO_TXN -> ((AddPartitionsToTxnResponse) response).errors().values().stream()
                        .filter(__ -> !__.equals(Errors.NONE)).findFirst().orElse(Errors.NONE);
                case ADD_OFFSETS_TO_TXN -> Errors.forCode(((AddOffsetsToTxnResponse) response).data().errorCode());
                case END_TXN -> ((EndTxnResponse) response).error();
                default -> throw new IllegalStateException(apiKeys + " is not txn request");
            };
            if (error == Errors.NOT_COORDINATOR) {
                log.info("[{}] [txnId: {}] Disconnect the outdated transaction coordinator", ctx, txnId);
                try {
                    connectionGroup.getGroupCoordinator(txnId).disconnectBroker();
                } catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            return response;
        });
        connectionGroup.getTransactionCoordinator(txnId).forwardRequest(inflightRequest);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy