io.streamnative.pulsar.handlers.kop.KafkaRequestHandler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pulsar-protocol-handler-kafka Show documentation
Show all versions of pulsar-protocol-handler-kafka Show documentation
Kafka on Pulsar implemented using Pulsar Protocol Handler
/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.streamnative.pulsar.handlers.kop;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.streamnative.pulsar.handlers.kop.KafkaServiceConfiguration.TENANT_ALLNAMESPACES_PLACEHOLDER;
import static io.streamnative.pulsar.handlers.kop.KafkaServiceConfiguration.TENANT_PLACEHOLDER;
import static io.streamnative.pulsar.handlers.kop.utils.KafkaResponseUtils.buildOffsetFetchResponse;
import static io.streamnative.pulsar.handlers.kop.utils.KafkaResponseUtils.newCoordinator;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import io.netty.buffer.ByteBuf;
import io.netty.channel.ChannelHandlerContext;
import io.streamnative.pulsar.handlers.kop.coordinator.group.GroupCoordinator;
import io.streamnative.pulsar.handlers.kop.coordinator.group.GroupMetadata.GroupOverview;
import io.streamnative.pulsar.handlers.kop.coordinator.transaction.TransactionCoordinator;
import io.streamnative.pulsar.handlers.kop.exceptions.KoPTopicException;
import io.streamnative.pulsar.handlers.kop.offset.OffsetAndMetadata;
import io.streamnative.pulsar.handlers.kop.offset.OffsetMetadata;
import io.streamnative.pulsar.handlers.kop.security.SaslAuthenticator;
import io.streamnative.pulsar.handlers.kop.security.Session;
import io.streamnative.pulsar.handlers.kop.security.auth.Authorizer;
import io.streamnative.pulsar.handlers.kop.security.auth.Resource;
import io.streamnative.pulsar.handlers.kop.security.auth.ResourceType;
import io.streamnative.pulsar.handlers.kop.security.auth.SimpleAclAuthorizer;
import io.streamnative.pulsar.handlers.kop.storage.AppendRecordsContext;
import io.streamnative.pulsar.handlers.kop.storage.PartitionLog;
import io.streamnative.pulsar.handlers.kop.storage.ReplicaManager;
import io.streamnative.pulsar.handlers.kop.utils.CoreUtils;
import io.streamnative.pulsar.handlers.kop.utils.GroupIdUtils;
import io.streamnative.pulsar.handlers.kop.utils.KafkaRequestUtils;
import io.streamnative.pulsar.handlers.kop.utils.KafkaResponseUtils;
import io.streamnative.pulsar.handlers.kop.utils.KopTopic;
import io.streamnative.pulsar.handlers.kop.utils.MessageMetadataUtils;
import io.streamnative.pulsar.handlers.kop.utils.MetadataUtils;
import io.streamnative.pulsar.handlers.kop.utils.OffsetFinder;
import io.streamnative.pulsar.handlers.kop.utils.TopicNameUtils;
import io.streamnative.pulsar.handlers.kop.utils.delayed.DelayedOperation;
import io.streamnative.pulsar.handlers.kop.utils.delayed.DelayedOperationKey;
import io.streamnative.pulsar.handlers.kop.utils.delayed.DelayedOperationPurgatory;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;
import org.apache.bookkeeper.common.util.OrderedScheduler;
import org.apache.bookkeeper.mledger.AsyncCallbacks;
import org.apache.bookkeeper.mledger.ManagedLedgerException;
import org.apache.bookkeeper.mledger.Position;
import org.apache.bookkeeper.mledger.impl.ManagedLedgerImpl;
import org.apache.bookkeeper.mledger.impl.PositionImpl;
import org.apache.commons.collections4.ListUtils;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.kafka.common.InvalidRecordException;
import org.apache.kafka.common.IsolationLevel;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.acl.AclOperation;
import org.apache.kafka.common.config.ConfigResource;
import org.apache.kafka.common.errors.ApiException;
import org.apache.kafka.common.errors.AuthenticationException;
import org.apache.kafka.common.errors.InvalidTopicException;
import org.apache.kafka.common.errors.LeaderNotAvailableException;
import org.apache.kafka.common.internals.Topic;
import org.apache.kafka.common.message.AddOffsetsToTxnRequestData;
import org.apache.kafka.common.message.AddOffsetsToTxnResponseData;
import org.apache.kafka.common.message.AddPartitionsToTxnRequestData;
import org.apache.kafka.common.message.AddPartitionsToTxnResponseData;
import org.apache.kafka.common.message.AlterConfigsRequestData;
import org.apache.kafka.common.message.AlterConfigsResponseData;
import org.apache.kafka.common.message.CreatePartitionsRequestData;
import org.apache.kafka.common.message.CreateTopicsRequestData;
import org.apache.kafka.common.message.DeleteGroupsRequestData;
import org.apache.kafka.common.message.DeleteRecordsRequestData;
import org.apache.kafka.common.message.DeleteTopicsRequestData;
import org.apache.kafka.common.message.DescribeClusterResponseData;
import org.apache.kafka.common.message.DescribeConfigsRequestData;
import org.apache.kafka.common.message.DescribeConfigsResponseData;
import org.apache.kafka.common.message.EndTxnRequestData;
import org.apache.kafka.common.message.EndTxnResponseData;
import org.apache.kafka.common.message.FetchRequestData;
import org.apache.kafka.common.message.FetchResponseData;
import org.apache.kafka.common.message.FindCoordinatorResponseData;
import org.apache.kafka.common.message.InitProducerIdRequestData;
import org.apache.kafka.common.message.InitProducerIdResponseData;
import org.apache.kafka.common.message.JoinGroupRequestData;
import org.apache.kafka.common.message.LeaveGroupRequestData;
import org.apache.kafka.common.message.ListOffsetsRequestData;
import org.apache.kafka.common.message.ListOffsetsResponseData;
import org.apache.kafka.common.message.OffsetCommitRequestData;
import org.apache.kafka.common.message.OffsetDeleteRequestData;
import org.apache.kafka.common.message.OffsetDeleteResponseData;
import org.apache.kafka.common.message.ProduceRequestData;
import org.apache.kafka.common.message.SaslAuthenticateResponseData;
import org.apache.kafka.common.message.SyncGroupRequestData;
import org.apache.kafka.common.message.TxnOffsetCommitRequestData;
import org.apache.kafka.common.protocol.ApiKeys;
import org.apache.kafka.common.protocol.Errors;
import org.apache.kafka.common.record.ControlRecordType;
import org.apache.kafka.common.record.EndTransactionMarker;
import org.apache.kafka.common.record.MemoryRecords;
import org.apache.kafka.common.record.MutableRecordBatch;
import org.apache.kafka.common.record.RecordBatch;
import org.apache.kafka.common.requests.AbstractRequest;
import org.apache.kafka.common.requests.AbstractResponse;
import org.apache.kafka.common.requests.AddOffsetsToTxnRequest;
import org.apache.kafka.common.requests.AddOffsetsToTxnResponse;
import org.apache.kafka.common.requests.AddPartitionsToTxnRequest;
import org.apache.kafka.common.requests.AddPartitionsToTxnResponse;
import org.apache.kafka.common.requests.AlterConfigsRequest;
import org.apache.kafka.common.requests.AlterConfigsResponse;
import org.apache.kafka.common.requests.ApiError;
import org.apache.kafka.common.requests.ApiVersionsResponse;
import org.apache.kafka.common.requests.CreatePartitionsRequest;
import org.apache.kafka.common.requests.CreateTopicsRequest;
import org.apache.kafka.common.requests.DeleteGroupsRequest;
import org.apache.kafka.common.requests.DeleteRecordsRequest;
import org.apache.kafka.common.requests.DeleteTopicsRequest;
import org.apache.kafka.common.requests.DescribeClusterRequest;
import org.apache.kafka.common.requests.DescribeClusterResponse;
import org.apache.kafka.common.requests.DescribeConfigsRequest;
import org.apache.kafka.common.requests.DescribeConfigsResponse;
import org.apache.kafka.common.requests.DescribeGroupsRequest;
import org.apache.kafka.common.requests.EndTxnRequest;
import org.apache.kafka.common.requests.EndTxnResponse;
import org.apache.kafka.common.requests.FetchRequest;
import org.apache.kafka.common.requests.FetchResponse;
import org.apache.kafka.common.requests.FindCoordinatorRequest;
import org.apache.kafka.common.requests.HeartbeatRequest;
import org.apache.kafka.common.requests.HeartbeatResponse;
import org.apache.kafka.common.requests.InitProducerIdRequest;
import org.apache.kafka.common.requests.InitProducerIdResponse;
import org.apache.kafka.common.requests.JoinGroupRequest;
import org.apache.kafka.common.requests.JoinGroupResponse;
import org.apache.kafka.common.requests.LeaveGroupRequest;
import org.apache.kafka.common.requests.ListGroupsRequest;
import org.apache.kafka.common.requests.ListOffsetRequestV0;
import org.apache.kafka.common.requests.ListOffsetsRequest;
import org.apache.kafka.common.requests.ListOffsetsResponse;
import org.apache.kafka.common.requests.MetadataRequest;
import org.apache.kafka.common.requests.MetadataResponse.PartitionMetadata;
import org.apache.kafka.common.requests.MetadataResponse.TopicMetadata;
import org.apache.kafka.common.requests.OffsetCommitRequest;
import org.apache.kafka.common.requests.OffsetCommitResponse;
import org.apache.kafka.common.requests.OffsetDeleteRequest;
import org.apache.kafka.common.requests.OffsetDeleteResponse;
import org.apache.kafka.common.requests.OffsetFetchRequest;
import org.apache.kafka.common.requests.OffsetFetchResponse;
import org.apache.kafka.common.requests.ProduceRequest;
import org.apache.kafka.common.requests.ProduceResponse;
import org.apache.kafka.common.requests.ProduceResponse.PartitionResponse;
import org.apache.kafka.common.requests.ResponseCallbackWrapper;
import org.apache.kafka.common.requests.SaslAuthenticateResponse;
import org.apache.kafka.common.requests.SyncGroupRequest;
import org.apache.kafka.common.requests.SyncGroupResponse;
import org.apache.kafka.common.requests.TransactionResult;
import org.apache.kafka.common.requests.TxnOffsetCommitRequest;
import org.apache.kafka.common.requests.TxnOffsetCommitResponse;
import org.apache.kafka.common.requests.WriteTxnMarkersRequest;
import org.apache.kafka.common.requests.WriteTxnMarkersResponse;
import org.apache.kafka.common.utils.SystemTime;
import org.apache.kafka.common.utils.Time;
import org.apache.pulsar.broker.PulsarService;
import org.apache.pulsar.broker.service.persistent.PersistentTopic;
import org.apache.pulsar.client.admin.PulsarAdmin;
import org.apache.pulsar.client.admin.PulsarAdminException;
import org.apache.pulsar.common.naming.NamespaceName;
import org.apache.pulsar.common.naming.TopicName;
import org.apache.pulsar.common.schema.KeyValue;
import org.apache.pulsar.common.util.FutureUtil;
import org.apache.pulsar.common.util.Murmur3_32Hash;
import org.apache.pulsar.metadata.api.MetadataStoreException;
import org.apache.pulsar.metadata.api.extended.MetadataStoreExtended;
/**
* This class contains all the request handling methods.
*/
@Slf4j
@Getter
public class KafkaRequestHandler extends KafkaCommandDecoder {
private static final int THROTTLE_TIME_MS = 10;
private static final String POLICY_ROOT = "/admin/policies/";
private final PulsarService pulsarService;
private final KafkaTopicManager topicManager;
private final TenantContextManager tenantContextManager;
private final ReplicaManager replicaManager;
private final KopBrokerLookupManager kopBrokerLookupManager;
@Getter
private final LookupClient lookupClient;
@Getter
private final KafkaTopicManagerSharedState kafkaTopicManagerSharedState;
private final String clusterName;
private final ScheduledExecutorService executor;
private final PulsarAdmin admin;
private final MetadataStoreExtended metadataStore;
private final SaslAuthenticator authenticator;
private final Authorizer authorizer;
private final AdminManager adminManager;
private final Boolean tlsEnabled;
private final EndPoint advertisedEndPoint;
private final boolean skipMessagesWithoutIndex;
private final int defaultNumPartitions;
public final int maxReadEntriesNum;
private final int failedAuthenticationDelayMs;
// store the group name for current connected client.
private final ConcurrentHashMap> currentConnectedGroup;
private final ConcurrentSkipListSet currentConnectedClientId;
private final String groupIdStoredPath;
private final Set groupIds = new HashSet<>();
// key is the topic(partition), value is the future that indicates whether the PersistentTopic instance of the key
// is found.
private final Map pendingTopicFuturesMap = new ConcurrentHashMap<>();
// DelayedOperation for produce and fetch
private final DelayedOperationPurgatory fetchPurgatory;
// Flag to manage throttling-publish-buffer by atomically enable/disable read-channel.
private final long maxPendingBytes;
private final long resumeThresholdPendingBytes;
private final AtomicLong pendingBytes = new AtomicLong(0);
private final String defaultNamespacePrefix;
private volatile boolean autoReadDisabledPublishBufferLimiting = false;
private String getCurrentTenant() {
return getCurrentTenant(kafkaConfig.getKafkaMetadataTenant());
}
private String getCurrentTenant(String defaultTenant) {
if (kafkaConfig.isKafkaEnableMultiTenantMetadata()
&& authenticator != null
&& authenticator.session() != null
&& authenticator.session().getPrincipal() != null
&& authenticator.session().getPrincipal().getTenantSpec() != null) {
String tenantSpec = authenticator.session().getPrincipal().getTenantSpec();
return extractTenantFromTenantSpec(tenantSpec);
}
// fallback to using system (default) tenant
if (log.isDebugEnabled()) {
log.debug("using {} as tenant", defaultTenant);
}
return defaultTenant;
}
public String currentNamespacePrefix() {
String currentTenant = getCurrentTenant(kafkaConfig.getKafkaTenant());
return MetadataUtils.constructUserTopicsNamespace(currentTenant, kafkaConfig);
}
private static String extractTenantFromTenantSpec(String tenantSpec) {
if (tenantSpec != null && !tenantSpec.isEmpty()) {
String tenant = tenantSpec;
// username can be "tenant" or "tenant/namespace"
if (tenantSpec.contains("/")) {
tenant = tenantSpec.substring(0, tenantSpec.indexOf('/'));
}
if (log.isDebugEnabled()) {
log.debug("using {} as tenant", tenant);
}
return tenant;
} else {
return tenantSpec;
}
}
public GroupCoordinator getGroupCoordinator() {
return tenantContextManager.getGroupCoordinator(getCurrentTenant());
}
public TransactionCoordinator getTransactionCoordinator() {
throwIfTransactionCoordinatorDisabled();
return tenantContextManager.getTransactionCoordinator(getCurrentTenant());
}
public KafkaRequestHandler(PulsarService pulsarService,
KafkaServiceConfiguration kafkaConfig,
TenantContextManager tenantContextManager,
ReplicaManager replicaManager,
KopBrokerLookupManager kopBrokerLookupManager,
AdminManager adminManager,
DelayedOperationPurgatory fetchPurgatory,
Boolean tlsEnabled,
EndPoint advertisedEndPoint,
boolean skipMessagesWithoutIndex,
RequestStats requestStats,
OrderedScheduler sendResponseScheduler,
KafkaTopicManagerSharedState kafkaTopicManagerSharedState,
KafkaTopicLookupService kafkaTopicLookupService,
LookupClient lookupClient) throws Exception {
super(requestStats, kafkaConfig, sendResponseScheduler);
this.pulsarService = pulsarService;
this.tenantContextManager = tenantContextManager;
this.replicaManager = replicaManager;
this.kopBrokerLookupManager = kopBrokerLookupManager;
this.lookupClient = lookupClient;
this.clusterName = kafkaConfig.getClusterName();
this.executor = pulsarService.getExecutor();
this.admin = pulsarService.getAdminClient();
this.metadataStore = pulsarService.getLocalMetadataStore();
final boolean authenticationEnabled = pulsarService.getBrokerService().isAuthenticationEnabled()
&& !kafkaConfig.getSaslAllowedMechanisms().isEmpty();
this.authenticator = authenticationEnabled
? new SaslAuthenticator(pulsarService, kafkaConfig.getSaslAllowedMechanisms(), kafkaConfig)
: null;
final boolean authorizationEnabled = pulsarService.getBrokerService().isAuthorizationEnabled();
this.authorizer = authorizationEnabled && authenticationEnabled
? new SimpleAclAuthorizer(pulsarService, kafkaConfig)
: null;
this.adminManager = adminManager;
this.fetchPurgatory = fetchPurgatory;
this.tlsEnabled = tlsEnabled;
this.advertisedEndPoint = advertisedEndPoint;
this.skipMessagesWithoutIndex = skipMessagesWithoutIndex;
this.topicManager = new KafkaTopicManager(this, kafkaTopicLookupService);
this.defaultNumPartitions = kafkaConfig.getDefaultNumPartitions();
this.maxReadEntriesNum = kafkaConfig.getMaxReadEntriesNum();
this.currentConnectedGroup = new ConcurrentHashMap<>();
this.currentConnectedClientId = new ConcurrentSkipListSet<>();
this.groupIdStoredPath = kafkaConfig.getGroupIdZooKeeperPath();
this.maxPendingBytes = kafkaConfig.getMaxMessagePublishBufferSizeInMB() * 1024L * 1024L;
this.resumeThresholdPendingBytes = this.maxPendingBytes / 2;
this.defaultNamespacePrefix = kafkaConfig.getDefaultNamespacePrefix();
this.failedAuthenticationDelayMs = kafkaConfig.getFailedAuthenticationDelayMs();
this.kafkaTopicManagerSharedState = kafkaTopicManagerSharedState;
// update alive channel count stats
RequestStats.ALIVE_CHANNEL_COUNT_INSTANCE.incrementAndGet();
}
@Override
public void channelActive(ChannelHandlerContext ctx) throws Exception {
super.channelActive(ctx);
topicManager.setRemoteAddress(ctx.channel().remoteAddress());
if (authenticator != null) {
authenticator.reset();
}
// update active channel count stats
RequestStats.ACTIVE_CHANNEL_COUNT_INSTANCE.incrementAndGet();
log.info("channel active: {}", ctx.channel());
}
@Override
public void channelInactive(ChannelHandlerContext ctx) throws Exception {
super.channelInactive(ctx);
// update active channel count stats
RequestStats.ACTIVE_CHANNEL_COUNT_INSTANCE.decrementAndGet();
close();
}
@Override
protected void close() {
if (isActive.getAndSet(false)) {
super.close();
topicManager.close();
String clientHost = ctx.channel().remoteAddress().toString();
if (currentConnectedGroup.containsKey(clientHost)){
log.info("currentConnectedGroup remove {}", clientHost);
currentConnectedGroup.remove(clientHost);
}
// Try to remove all stored groupID on the metadata store.
if (log.isDebugEnabled()) {
log.debug("Try to remove all stored groupID on the metadata store. Current connected clientIds: {}",
currentConnectedClientId);
}
if (kafkaConfig.isKopEnableGroupLevelConsumerMetrics()) {
currentConnectedClientId.forEach(clientId -> {
String path = groupIdStoredPath + GroupIdUtils.groupIdPathFormat(clientHost, clientId);
metadataStore.delete(path, Optional.empty())
.whenComplete((__, ex) -> {
if (ex != null) {
if (ex.getCause() instanceof MetadataStoreException.NotFoundException) {
if (log.isDebugEnabled()) {
log.debug("The groupId store path doesn't exist. Path: [{}]", path);
}
return;
}
log.error("Delete groupId failed. Path: [{}]", path, ex);
return;
}
if (log.isDebugEnabled()) {
log.debug("Delete groupId success. Path: [{}]", path);
}
});
});
}
// update alive channel count stat
RequestStats.ALIVE_CHANNEL_COUNT_INSTANCE.decrementAndGet();
}
}
@Override
protected boolean hasAuthenticated() {
return authenticator == null || authenticator.complete();
}
@Override
protected void channelPrepare(ChannelHandlerContext ctx,
ByteBuf requestBuf,
BiConsumer registerRequestParseLatency,
BiConsumer registerRequestLatency)
throws AuthenticationException {
if (authenticator != null) {
authenticator.authenticate(ctx, requestBuf, registerRequestParseLatency, registerRequestLatency,
this::validateTenantAccessForSession);
if (authenticator.complete() && kafkaConfig.isKafkaEnableMultiTenantMetadata()) {
setRequestStats(requestStats.forTenant(getCurrentTenant()));
}
}
}
@Override
protected void maybeDelayCloseOnAuthenticationFailure() {
if (this.failedAuthenticationDelayMs > 0) {
this.ctx.executor().schedule(
this::completeCloseOnAuthenticationFailure,
this.failedAuthenticationDelayMs,
TimeUnit.MILLISECONDS);
} else {
this.completeCloseOnAuthenticationFailure();
}
}
@Override
protected void completeCloseOnAuthenticationFailure() {
if (isActive.get() && authenticator != null) {
authenticator.sendAuthenticationFailureResponse(__ -> this.close());
}
}
@Override
protected void handleApiVersionsRequest(KafkaHeaderAndRequest apiVersionRequest,
CompletableFuture resultFuture) {
if (!ApiKeys.API_VERSIONS.isVersionSupported(apiVersionRequest.getHeader().apiVersion())) {
// Notify Client that API_VERSION is UNSUPPORTED.
AbstractResponse apiResponse = overloadDefaultApiVersionsResponse(true);
resultFuture.complete(apiResponse);
} else {
AbstractResponse apiResponse = overloadDefaultApiVersionsResponse(false);
resultFuture.complete(apiResponse);
}
}
public static ApiVersionsResponse overloadDefaultApiVersionsResponse(boolean unsupportedApiVersion) {
if (unsupportedApiVersion){
return KafkaResponseUtils.newApiVersions(Errors.UNSUPPORTED_VERSION);
} else {
List versionList = new ArrayList<>();
for (ApiKeys apiKey : ApiKeys.values()) {
if (apiKey.minRequiredInterBrokerMagic <= RecordBatch.CURRENT_MAGIC_VALUE) {
switch (apiKey) {
case LIST_OFFSETS:
// V0 is needed for librdkafka
versionList.add(new ApiVersion((short) 2, (short) 0, apiKey.latestVersion()));
break;
default:
versionList.add(new ApiVersion(apiKey));
}
}
}
return KafkaResponseUtils.newApiVersions(versionList);
}
}
@Override
protected void handleInactive(KafkaHeaderAndRequest kafkaHeaderAndRequest,
CompletableFuture resultFuture) {
AbstractRequest request = kafkaHeaderAndRequest.getRequest();
AbstractResponse apiResponse = request.getErrorResponse(new LeaderNotAvailableException("Channel is closing!"));
log.error("Kafka API {} is send to a closing channel", kafkaHeaderAndRequest.getHeader().apiKey());
resultFuture.complete(apiResponse);
}
// Leverage pulsar admin to get partitioned topic metadata
// NOTE: the returned future never completes exceptionally
@VisibleForTesting
protected CompletableFuture getTopicMetadataAsync(String topic,
boolean allowAutoTopicCreation) {
final CompletableFuture future = new CompletableFuture<>();
final TopicName topicName = TopicName.get(topic);
admin.topics().getPartitionedTopicMetadataAsync(topic).whenComplete((metadata, e) -> {
if (e == null) {
if (log.isDebugEnabled()) {
log.debug("Topic {} has {} partitions", topic, metadata.partitions);
}
if (metadata.properties != null
&& metadata.properties.get(PartitionLog.KAFKA_TOPIC_UUID_PROPERTY_NAME) != null) {
future.complete(TopicAndMetadata.success(topic, metadata.partitions));
} else {
admin.topics().updatePropertiesAsync(topic, Map.of(
PartitionLog.KAFKA_TOPIC_UUID_PROPERTY_NAME, UUID.randomUUID().toString()
)).whenComplete((__, updateException) -> {
if (updateException == null) {
future.complete(TopicAndMetadata.success(topic, metadata.partitions));
return;
}
if (updateException instanceof PulsarAdminException.ConflictException) {
log.warn("[{}] Failed to update properties for topic {}",
ctx.channel(), topicName, updateException);
future.complete(TopicAndMetadata.success(topic, metadata.partitions));
return;
}
log.error("[{}] Failed to update properties for topic {}",
ctx.channel(), topicName, updateException);
future.complete(TopicAndMetadata.failure(topic, Errors.UNKNOWN_SERVER_ERROR));
});
}
} else if (e instanceof PulsarAdminException.NotFoundException) {
(allowAutoTopicCreation ? checkAllowAutoTopicCreation(topicName.getNamespace())
: CompletableFuture.completedFuture(false)
).whenComplete((allowed, err) -> {
if (err != null) {
log.error("[{}] Cannot get policies for namespace {}",
ctx.channel(), topicName.getNamespace(), err);
future.complete(TopicAndMetadata.failure(topic, Errors.UNKNOWN_SERVER_ERROR));
return;
}
if (allowed) {
Map properties =
Map.of(PartitionLog.KAFKA_TOPIC_UUID_PROPERTY_NAME, UUID.randomUUID().toString());
admin.topics().createPartitionedTopicAsync(topic, defaultNumPartitions, properties)
.whenComplete((__, createException) -> {
if (createException == null) {
future.complete(TopicAndMetadata.success(topic, defaultNumPartitions));
} else {
log.warn("[{}] Failed to create partitioned topic {}: {}",
ctx.channel(), topicName, createException.getMessage());
future.complete(TopicAndMetadata.failure(topic, Errors.UNKNOWN_SERVER_ERROR));
}
});
} else {
try {
Topic.validate(topicName.getLocalName());
future.complete(TopicAndMetadata.failure(topic, Errors.UNKNOWN_TOPIC_OR_PARTITION));
} catch (InvalidTopicException ignored) {
future.complete(TopicAndMetadata.failure(topic, Errors.INVALID_TOPIC_EXCEPTION));
}
}
});
} else {
log.error("[{}] Failed to get partitioned topic {}", ctx.channel(), topic, e);
future.complete(TopicAndMetadata.failure(topic, Errors.UNKNOWN_SERVER_ERROR));
}
});
return future;
}
private CompletableFuture checkAllowAutoTopicCreation(String namespace) {
return admin.namespaces().getPoliciesAsync(namespace).thenApply(policies -> {
if (policies != null && policies.autoTopicCreationOverride != null) {
return policies.autoTopicCreationOverride.isAllowAutoTopicCreation();
} else {
return kafkaConfig.isAllowAutoTopicCreation();
}
});
}
private CompletableFuture> getAllowedNamespacesAsync() {
return pulsarService.getPulsarResources().getDynamicConfigResources().getDynamicConfigurationAsync()
.thenApply(__ -> __.orElse(Collections.emptyMap()).get(KafkaServiceConfiguration.KOP_ALLOWED_NAMESPACES))
.thenApply(configValue -> {
if (configValue == null || configValue.isEmpty()) {
return kafkaConfig.getKopAllowedNamespaces();
}
final var namespaces = Arrays.stream(configValue.split(",")).collect(Collectors.toSet());
for (String namespace : namespaces) {
final var tokens = namespace.split("/");
if (tokens.length != 2) {
log.warn("Invalid namespace {} ({} tokens) in {}", namespace, tokens.length, configValue);
return kafkaConfig.getKopAllowedNamespaces();
}
try {
NamespaceName.validateNamespaceName(tokens[0], tokens[1]);
} catch (IllegalArgumentException e) {
log.warn("Invalid namespace {}: {}", namespace, e.getMessage());
return kafkaConfig.getKopAllowedNamespaces();
}
}
return namespaces;
})
.thenCompose(allowedNamespaces -> {
final var currentTenant = getCurrentTenant(kafkaConfig.getKafkaTenant());
return expandAllowedNamespaces(allowedNamespaces, currentTenant, pulsarService);
});
}
@VisibleForTesting
static CompletableFuture> expandAllowedNamespaces(Set allowedNamespaces,
String currentTenant,
PulsarService pulsarService) {
Set result = new CopyOnWriteArraySet<>();
List> results = new ArrayList<>();
for (String namespaceTemplate : allowedNamespaces) {
String namespace = namespaceTemplate.replace(TENANT_PLACEHOLDER, currentTenant);
if (!namespace.endsWith("/" + TENANT_ALLNAMESPACES_PLACEHOLDER)) {
result.add(namespace);
results.add(CompletableFuture.completedFuture(namespace));
} else {
int slash = namespace.lastIndexOf('/');
String tenant = namespace.substring(0, slash);
results.add(pulsarService.getPulsarResources()
.getNamespaceResources()
.listNamespacesAsync(tenant)
.thenAccept(namespaces -> namespaces.forEach(ns -> result.add(tenant + "/" + ns))));
}
}
return CompletableFuture
.allOf(results.toArray(new CompletableFuture>[0]))
.thenApply(f -> result);
}
private List analyzeFullTopicNames(final Stream fullTopicNames) {
// key is the topic name, value is a list of the topic's partition indexes
final Map> topicToPartitionIndexes = new HashMap<>();
fullTopicNames.forEach(fullTopicName -> {
final TopicName topicName = TopicName.get(fullTopicName);
// Skip Pulsar's system topic
if (topicName.getLocalName().startsWith("__change_events")
&& topicName.getPartitionedTopicName().endsWith("__change_events")) {
return;
}
topicToPartitionIndexes.computeIfAbsent(
topicName.getPartitionedTopicName(),
ignored -> new ArrayList<>()
).add(topicName.getPartitionIndex());
});
if (topicToPartitionIndexes.isEmpty()) {
return Collections.emptyList();
}
// Check missed partitions
final List topicAndMetadataList = new ArrayList<>();
topicToPartitionIndexes.forEach((topic, partitionIndexes) -> {
Collections.sort(partitionIndexes);
final int lastIndex = partitionIndexes.get(partitionIndexes.size() - 1);
if (lastIndex < 0) {
topicAndMetadataList.add(TopicAndMetadata.success(topic, 0)); // non-partitioned topic
} else if (lastIndex == partitionIndexes.size() - 1) {
topicAndMetadataList.add(TopicAndMetadata.success(topic, partitionIndexes.size()));
} else {
// The partitions should be [0, 1, ..., n-1], `n` is the number of partitions. If the last index is not
// `n-1`, there must be some missed partitions.
log.warn("The partitions of topic {} is wrong ({}), try to create missed partitions",
topic, partitionIndexes.size());
admin.topics().createMissedPartitionsAsync(topic);
}
});
return topicAndMetadataList;
}
private CompletableFuture> authorizeNamespacesAsync(final Collection namespaces,
final AclOperation aclOperation) {
final Map> futureMap = namespaces.stream().collect(
Collectors.toMap(
namespace -> namespace,
namespace -> authorize(aclOperation, Resource.of(ResourceType.NAMESPACE, namespace))
));
return CoreUtils.waitForAll(futureMap.values()).thenApply(__ ->
futureMap.entrySet().stream().filter(e -> {
if (!e.getValue().join()) {
log.warn("Failed to authorize {} for ACL operation {}", e.getKey(), aclOperation);
return false;
}
return true;
}).map(Map.Entry::getKey).collect(Collectors.toList())
);
}
private CompletableFuture> listAllTopicsFromNamespacesAsync(final List namespaces) {
return CoreUtils.waitForAll(namespaces.stream()
.map(namespace -> pulsarService.getNamespaceService()
.getListOfPersistentTopics(NamespaceName.get(namespace))
).collect(Collectors.toList()),
topics -> topics.stream().flatMap(List::stream));
}
private CompletableFuture> authorizeTopicsAsync(final Collection topics,
final AclOperation aclOperation) {
final Map> futureMap = topics.stream().collect(
Collectors.toMap(
topic -> topic,
topic -> authorize(aclOperation, Resource.of(ResourceType.TOPIC, topic))
));
return CoreUtils.waitForAll(futureMap.values()).thenApply(__ ->
ListPair.of(futureMap.entrySet()
.stream()
.collect(Collectors.groupingBy(e -> e.getValue().join()))
).map(Map.Entry::getKey));
}
private CompletableFuture> findTopicMetadata(final ListPair listPair,
final boolean allowTopicAutoCreation) {
final Map> futureMap = CoreUtils.listToMap(
listPair.getSuccessfulList(),
topic -> getTopicMetadataAsync(topic, allowTopicAutoCreation)
);
return CoreUtils.waitForAll(futureMap.values()).thenApply(__ ->
CoreUtils.mapToList(futureMap, (___, value) -> value.join())
).thenApply(authorizedTopicAndMetadataList ->
ListUtils.union(authorizedTopicAndMetadataList,
CoreUtils.listToList(listPair.getFailedList(),
topic -> TopicAndMetadata.failure(topic, Errors.TOPIC_AUTHORIZATION_FAILED)))
);
}
private CompletableFuture> getTopicsAsync(MetadataRequest request,
Set fullTopicNames) {
// The implementation of MetadataRequest#isAllTopics() in kafka-clients 2.0 is wrong.
// Because in version 0, an empty topic list indicates "request metadata for all topics."
if ((request.topics() == null) || (request.topics().isEmpty() && request.version() == 0)) {
// clean all cache when get all metadata for librdkafka(<1.0.0).
kopBrokerLookupManager.clear();
return getAllowedNamespacesAsync()
.thenCompose(namespaces -> authorizeNamespacesAsync(namespaces, AclOperation.DESCRIBE))
.thenCompose(this::listAllTopicsFromNamespacesAsync)
.thenApply(this::analyzeFullTopicNames);
} else {
return authorizeTopicsAsync(fullTopicNames, AclOperation.DESCRIBE)
.thenCompose(authorizedTopicsPair -> findTopicMetadata(authorizedTopicsPair,
request.allowAutoTopicCreation()));
}
}
@Override
protected void handleTopicMetadataRequest(KafkaHeaderAndRequest metadataHar,
CompletableFuture resultFuture) {
// Get all kop brokers in local cache
List allNodes = Collections.synchronizedList(
new ArrayList<>(adminManager.getBrokers(advertisedEndPoint.getListenerName())));
// Each Pulsar broker can manage metadata like controller in Kafka,
// Kafka's AdminClient needs to find a controller node for metadata management.
// So here we return an random broker as a controller for the given listenerName.
final int controllerId = adminManager.getControllerId(advertisedEndPoint.getListenerName());
final String namespacePrefix = currentNamespacePrefix();
final MetadataRequest request = (MetadataRequest) metadataHar.getRequest();
// This map is used to find the original topic name. Both key and value don't have the "-partition-" suffix.
final Map fullTopicNameToOriginal = (request.topics() == null)
? Collections.emptyMap()
: request.topics().stream().distinct().collect(
Collectors.toMap(
topic -> new KopTopic(topic, namespacePrefix).getFullName(),
topic -> topic
));
// NOTE: for all topics METADATA request, remove the default namespace prefix just for backward compatibility.
final Function getOriginalTopic = fullTopicName -> fullTopicNameToOriginal.isEmpty()
? KopTopic.convert(fullTopicName, defaultNamespacePrefix)
: fullTopicNameToOriginal.getOrDefault(fullTopicName, fullTopicName);
final String metadataNamespace = kafkaConfig.getKafkaMetadataNamespace();
getTopicsAsync(request, fullTopicNameToOriginal.keySet()).whenComplete((topicAndMetadataList, e) -> {
if (e != null) {
log.error("[{}] Request {}: Exception fetching metadata", ctx.channel(), metadataHar.getHeader(), e);
resultFuture.completeExceptionally(e);
return;
}
final ListPair listPair =
ListPair.split(topicAndMetadataList.stream(), TopicAndMetadata::hasNoError);
CoreUtils.waitForAll(listPair.getSuccessfulList().stream()
.map(topicAndMetadata -> topicAndMetadata.lookupAsync(
this::lookup, getOriginalTopic, metadataNamespace))
.collect(Collectors.toList()), successfulTopicMetadataList -> {
final List topicMetadataList = ListUtils.union(successfulTopicMetadataList,
CoreUtils.listToList(listPair.getFailedList(),
metadata -> metadata.toTopicMetadata(getOriginalTopic, metadataNamespace))
);
resultFuture.complete(
KafkaResponseUtils.newMetadata(allNodes, clusterName,
controllerId, topicMetadataList, request.version()));
return null;
})
.exceptionally(lookupException -> {
log.error("[{}] Unexpected exception during lookup", ctx.channel(), lookupException);
resultFuture.completeExceptionally(lookupException);
return null;
});
});
}
private void disableCnxAutoRead() {
if (ctx != null && ctx.channel().config().isAutoRead()) {
ctx.channel().config().setAutoRead(false);
if (log.isDebugEnabled()) {
log.debug("[{}] disable auto read", ctx.channel());
}
}
}
private void enableCnxAutoRead() {
if (ctx != null && !ctx.channel().config().isAutoRead()
&& !autoReadDisabledPublishBufferLimiting) {
// Resume reading from socket if pending-request is not reached to threshold
ctx.channel().config().setAutoRead(true);
// triggers channel read
ctx.read();
if (log.isDebugEnabled()) {
log.debug("[{}] enable auto read", ctx.channel());
}
}
}
private void startSendOperationForThrottling(long msgSize) {
final long currentPendingBytes = pendingBytes.addAndGet(msgSize);
if (currentPendingBytes >= maxPendingBytes && !autoReadDisabledPublishBufferLimiting && maxPendingBytes > 0) {
if (log.isDebugEnabled()) {
log.debug("[{}] disable auto read because currentPendingBytes({}) > maxPendingBytes({})",
ctx.channel(), currentPendingBytes, maxPendingBytes);
}
disableCnxAutoRead();
autoReadDisabledPublishBufferLimiting = true;
setPausedConnections(pulsarService, 1);
}
}
@VisibleForTesting
public static void setPausedConnections(PulsarService pulsarService, int numConnections) {
pulsarService.getBrokerService().pausedConnections(numConnections);
}
private void completeSendOperationForThrottling(long msgSize) {
final long currentPendingBytes = pendingBytes.addAndGet(-msgSize);
if (currentPendingBytes < resumeThresholdPendingBytes && autoReadDisabledPublishBufferLimiting) {
if (log.isDebugEnabled()) {
log.debug("[{}] enable auto read because currentPendingBytes({}) < resumeThreshold({})",
ctx.channel(), currentPendingBytes, resumeThresholdPendingBytes);
}
autoReadDisabledPublishBufferLimiting = false;
enableCnxAutoRead();
resumePausedConnections(pulsarService, 1);
}
}
@VisibleForTesting
public static void resumePausedConnections(PulsarService pulsarService, int numConnections) {
pulsarService.getBrokerService().resumedConnections(numConnections);
}
@Override
protected void handleProduceRequest(KafkaHeaderAndRequest produceHar,
CompletableFuture resultFuture) {
checkArgument(produceHar.getRequest() instanceof ProduceRequest);
ProduceRequest produceRequest = (ProduceRequest) produceHar.getRequest();
final int numPartitions = produceRequest
.data()
.topicData()
.stream()
.mapToInt(t -> t.partitionData().size())
.sum();
if (numPartitions == 0) {
resultFuture.complete(new ProduceResponse(Collections.emptyMap()));
return;
}
final Map unauthorizedTopicResponsesMap = new ConcurrentHashMap<>();
final Map invalidRequestResponses = new HashMap<>();
final Map authorizedRequestInfo = new ConcurrentHashMap<>();
int timeoutMs = produceRequest.timeout();
short requiredAcks = produceRequest.acks();
String namespacePrefix = currentNamespacePrefix();
final AtomicInteger unfinishedAuthorizationCount = new AtomicInteger(numPartitions);
Runnable completeOne = () -> {
// When complete one authorization or failed, will do the action first.
if (unfinishedAuthorizationCount.decrementAndGet() == 0) {
if (authorizedRequestInfo.isEmpty()) {
resultFuture.complete(new ProduceResponse(unauthorizedTopicResponsesMap));
return;
}
AppendRecordsContext appendRecordsContext = AppendRecordsContext.get(
topicManager,
this::startSendOperationForThrottling,
this::completeSendOperationForThrottling,
pendingTopicFuturesMap,
ctx);
ReplicaManager replicaManager = getReplicaManager();
replicaManager.appendRecords(
timeoutMs,
requiredAcks,
false,
namespacePrefix,
authorizedRequestInfo,
PartitionLog.AppendOrigin.Client,
appendRecordsContext
).whenComplete((response, ex) -> {
if (ex != null) {
resultFuture.completeExceptionally(ex.getCause());
return;
}
Map mergedResponse = new HashMap<>();
mergedResponse.putAll(response);
mergedResponse.putAll(unauthorizedTopicResponsesMap);
mergedResponse.putAll(invalidRequestResponses);
resultFuture.complete(new ProduceResponse(mergedResponse));
response.keySet().forEach(tp -> {
replicaManager.tryCompleteDelayedFetch(new DelayedOperationKey.TopicPartitionOperationKey(tp));
});
});
}
};
produceRequest.data().topicData().forEach((ProduceRequestData.TopicProduceData topicProduceData) -> {
topicProduceData.partitionData().forEach(partitionProduceData -> {
MemoryRecords records = (MemoryRecords) partitionProduceData.records();
int index = partitionProduceData.index();
String name = topicProduceData.name();
TopicPartition topicPartition = new TopicPartition(name, index);
try {
validateRecords(produceHar.getRequest().version(), records);
} catch (ApiException ex) {
invalidRequestResponses.put(topicPartition,
new ProduceResponse.PartitionResponse(Errors.forException(ex)));
completeOne.run();
return;
}
final String fullPartitionName = KopTopic.toString(topicPartition, namespacePrefix);
authorize(AclOperation.WRITE, Resource.of(ResourceType.TOPIC, fullPartitionName))
.whenCompleteAsync((isAuthorized, ex) -> {
if (ex != null) {
log.error("Write topic authorize failed, topic - {}. {}",
fullPartitionName, ex.getMessage());
unauthorizedTopicResponsesMap.put(topicPartition,
new ProduceResponse.PartitionResponse(Errors.TOPIC_AUTHORIZATION_FAILED));
completeOne.run();
return;
}
if (!isAuthorized) {
unauthorizedTopicResponsesMap.put(topicPartition,
new ProduceResponse.PartitionResponse(Errors.TOPIC_AUTHORIZATION_FAILED));
completeOne.run();
return;
}
authorizedRequestInfo.put(topicPartition, records);
completeOne.run();
}, ctx.executor());
});
});
}
private void validateRecords(short version, MemoryRecords records) {
if (version >= 3) {
Iterator iterator = records.batches().iterator();
if (!iterator.hasNext()) {
throw new InvalidRecordException("Produce requests with version " + version + " must have at least "
+ "one record batch");
}
MutableRecordBatch entry = iterator.next();
if (entry.magic() != RecordBatch.MAGIC_VALUE_V2) {
throw new InvalidRecordException("Produce requests with version " + version + " are only allowed to "
+ "contain record batches with magic version 2");
}
if (iterator.hasNext()) {
throw new InvalidRecordException("Produce requests with version " + version + " are only allowed to "
+ "contain exactly one record batch");
}
}
}
@Override
protected void handleFindCoordinatorRequest(KafkaHeaderAndRequest findCoordinator,
CompletableFuture resultFuture) {
checkArgument(findCoordinator.getRequest() instanceof FindCoordinatorRequest);
FindCoordinatorRequest request = (FindCoordinatorRequest) findCoordinator.getRequest();
List coordinatorKeys = request.version() < FindCoordinatorRequest.MIN_BATCHED_VERSION
? Collections.singletonList(request.data().key()) : request.data().coordinatorKeys();
List> futures =
new ArrayList<>(coordinatorKeys.size());
for (String coordinatorKey : coordinatorKeys) {
CompletableFuture future =
findSingleCoordinator(coordinatorKey, findCoordinator);
futures.add(future);
}
CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
.whenComplete((ignore, ex) -> {
if (ex != null) {
resultFuture.completeExceptionally(ex);
return;
}
List coordinators = new ArrayList<>(futures.size());
for (CompletableFuture future : futures) {
coordinators.add(future.join());
}
resultFuture.complete(KafkaResponseUtils.newFindCoordinator(coordinators, request.version()));
});
}
private CompletableFuture findSingleCoordinator(
String coordinatorKey, KafkaHeaderAndRequest findCoordinator) {
FindCoordinatorRequest request = (FindCoordinatorRequest) findCoordinator.getRequest();
CompletableFuture findSingleCoordinatorResult =
new CompletableFuture<>();
if (request.data().keyType() == FindCoordinatorRequest.CoordinatorType.TRANSACTION.id()) {
TransactionCoordinator transactionCoordinator = getTransactionCoordinator();
int partition = transactionCoordinator.partitionFor(coordinatorKey);
String pulsarTopicName = transactionCoordinator.getTopicPartitionName(partition);
findBroker(TopicName.get(pulsarTopicName))
.whenComplete((KafkaResponseUtils.BrokerLookupResult result, Throwable throwable) -> {
if (result.error != Errors.NONE || throwable != null) {
log.error("[{}] Request {}: Error while find coordinator.",
ctx.channel(), findCoordinator.getHeader(), throwable);
findSingleCoordinatorResult.complete(
newCoordinator(Errors.LEADER_NOT_AVAILABLE, null, coordinatorKey));
return;
}
if (log.isDebugEnabled()) {
log.debug("[{}] Found node {} as coordinator for key {} partition {}.",
ctx.channel(), result.node, request.data().key(), partition);
}
findSingleCoordinatorResult.complete(
newCoordinator(result.error, result.node, coordinatorKey));
});
} else if (request.data().keyType() == FindCoordinatorRequest.CoordinatorType.GROUP.id()) {
authorize(AclOperation.DESCRIBE, Resource.of(ResourceType.GROUP, coordinatorKey))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("Describe group authorize failed, group - {}. {}",
request.data().key(), ex.getMessage());
findSingleCoordinatorResult.complete(
newCoordinator(Errors.GROUP_AUTHORIZATION_FAILED, null, coordinatorKey));
return;
}
if (!isAuthorized) {
findSingleCoordinatorResult.complete(
newCoordinator(Errors.GROUP_AUTHORIZATION_FAILED, null, coordinatorKey));
return;
}
CompletableFuture storeGroupIdFuture;
int partition = getGroupCoordinator().partitionFor(coordinatorKey);
String pulsarTopicName = getGroupCoordinator().getTopicPartitionName(partition);
if (kafkaConfig.isKopEnableGroupLevelConsumerMetrics()) {
String groupIdPath = GroupIdUtils.groupIdPathFormat(findCoordinator.getClientHost(),
findCoordinator.getHeader().clientId());
currentConnectedClientId.add(findCoordinator.getHeader().clientId());
// Store group name to metadata store for current client, use to collect consumer metrics.
storeGroupIdFuture = storeGroupId(coordinatorKey, groupIdPath);
} else {
storeGroupIdFuture = CompletableFuture.completedFuture(null);
}
// Store group name to metadata store for current client, use to collect consumer metrics.
storeGroupIdFuture.whenComplete((__, e) -> {
if (e != null) {
log.warn("Store groupId failed, the groupId might already stored.", e);
}
findBroker(TopicName.get(pulsarTopicName))
.whenComplete((KafkaResponseUtils.BrokerLookupResult result,
Throwable throwable) -> {
if (result.error != Errors.NONE || throwable != null) {
log.error("[{}] Request {}: Error while find coordinator.",
ctx.channel(), findCoordinator.getHeader(), throwable);
findSingleCoordinatorResult.complete(
newCoordinator(Errors.LEADER_NOT_AVAILABLE, null, coordinatorKey));
return;
}
if (log.isDebugEnabled()) {
log.debug("[{}] Found node {} as coordinator for key {} partition {}.",
ctx.channel(), result.node, request.data().key(), partition);
}
findSingleCoordinatorResult.complete(
newCoordinator(result.error, result.node, coordinatorKey));
});
});
});
} else {
findSingleCoordinatorResult.completeExceptionally(
new NotImplementedException("FindCoordinatorRequest not support unknown type "
+ request.data().keyType()));
}
return findSingleCoordinatorResult;
}
@VisibleForTesting
protected CompletableFuture storeGroupId(String groupId, String groupIdPath) {
String path = groupIdStoredPath + groupIdPath;
CompletableFuture future = new CompletableFuture<>();
metadataStore.put(path, groupId.getBytes(UTF_8), Optional.empty())
.thenAccept(__ -> future.complete(null))
.exceptionally(ex -> {
future.completeExceptionally(ex);
return null;
});
return future;
}
@VisibleForTesting
public void replaceTopicPartition(Map replacedMap,
Map replacingIndex) {
Map newMap = new HashMap<>();
replacedMap.entrySet().removeIf(entry -> {
if (replacingIndex.containsKey(entry.getKey())) {
newMap.put(replacingIndex.get(entry.getKey()), entry.getValue());
} else {
newMap.put(new TopicPartition(KopTopic.convert(entry.getKey().topic(), defaultNamespacePrefix),
entry.getKey().partition()
), entry.getValue());
}
return true;
});
replacedMap.putAll(newMap);
}
@Override
protected void handleOffsetFetchRequest(KafkaHeaderAndRequest offsetFetch,
CompletableFuture resultFuture) {
checkArgument(offsetFetch.getRequest() instanceof OffsetFetchRequest);
OffsetFetchRequest request = (OffsetFetchRequest) offsetFetch.getRequest();
checkState(getGroupCoordinator() != null,
"Group Coordinator not started");
List> futures = new ArrayList<>();
if (request.version() >= 8) {
request.data().groups().forEach(group -> {
String groupId = group.groupId();
List partitions = new ArrayList<>();
// null topics means no partitions specified, so we should fetch all partitions
if (group.topics() != null) {
group
.topics()
.forEach(topic -> {
topic.partitionIndexes()
.forEach(partition -> partitions.add(new TopicPartition(topic.name(), partition)));
});
}
futures.add(getOffsetFetchForGroup(groupId, partitions));
});
} else {
// old clients
String groupId = request.data().groupId();
if (request.data().topics() == null) {
futures.add(getOffsetFetchForGroup(groupId, null));
} else {
List partitions = new ArrayList<>();
request.data().topics().forEach(topic -> topic.partitionIndexes()
.forEach(partition -> partitions.add(new TopicPartition(topic.name(), partition))));
futures.add(getOffsetFetchForGroup(groupId, partitions));
}
}
FutureUtil.waitForAll(futures).whenComplete((___, error) -> {
if (error != null) {
resultFuture.complete(request.getErrorResponse(error));
return;
}
List partitionsResponses = new ArrayList<>();
futures.forEach(f -> {
partitionsResponses.add(f.join());
});
resultFuture.complete(buildOffsetFetchResponse(partitionsResponses, request.version()));
});
}
protected CompletableFuture getOffsetFetchForGroup(
String groupId,
List partitions
) {
CompletableFuture resultFuture = new CompletableFuture<>();
CompletableFuture> authorizeFuture = new CompletableFuture<>();
// replace
Map replacingIndex = new HashMap<>();
List authorizedPartitions = new ArrayList<>();
Map unauthorizedPartitionData =
Maps.newConcurrentMap();
Map unknownPartitionData =
Maps.newConcurrentMap();
if (partitions == null || partitions.isEmpty()) {
// fetch all partitions
authorizeFuture.complete(null);
} else {
AtomicInteger partitionCount = new AtomicInteger(partitions.size());
Runnable completeOneAuthorization = () -> {
if (partitionCount.decrementAndGet() == 0) {
authorizeFuture.complete(authorizedPartitions);
}
};
final String namespacePrefix = currentNamespacePrefix();
partitions.forEach(tp -> {
try {
String fullName = new KopTopic(tp.topic(), namespacePrefix).getFullName();
authorize(AclOperation.DESCRIBE, Resource.of(ResourceType.TOPIC, fullName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("Describe topic authorize failed, topic - {}. {}",
fullName, ex.getMessage());
unauthorizedPartitionData.put(tp, OffsetFetchResponse.UNAUTHORIZED_PARTITION);
completeOneAuthorization.run();
return;
}
if (!isAuthorized) {
unauthorizedPartitionData.put(tp, OffsetFetchResponse.UNAUTHORIZED_PARTITION);
completeOneAuthorization.run();
return;
}
TopicPartition newTopicPartition = new TopicPartition(
fullName, tp.partition());
replacingIndex.put(newTopicPartition, tp);
authorizedPartitions.add(newTopicPartition);
completeOneAuthorization.run();
});
} catch (KoPTopicException e) {
log.warn("Invalid topic name: {}", tp.topic(), e);
unknownPartitionData.put(tp, OffsetFetchResponse.UNKNOWN_PARTITION);
}
});
}
authorizeFuture.whenComplete((partitionList, ex) -> {
KeyValue> keyValue =
getGroupCoordinator().handleFetchOffsets(
groupId,
Optional.ofNullable(partitionList)
);
if (log.isDebugEnabled()) {
log.debug("OFFSET_FETCH Unknown partitions: {}, Unauthorized partitions: {}.",
unknownPartitionData, unauthorizedPartitionData);
}
if (log.isTraceEnabled()) {
StringBuffer traceInfo = new StringBuffer();
replacingIndex.forEach((inner, outer) ->
traceInfo.append(String.format("\tinnerName:%s, outerName:%s%n", inner, outer)));
log.trace("OFFSET_FETCH TopicPartition relations: \n{}", traceInfo);
}
// recover to original topic name
Map partitionsResponses = keyValue.getValue();
replaceTopicPartition(partitionsResponses, replacingIndex);
partitionsResponses.putAll(unauthorizedPartitionData);
partitionsResponses.putAll(unknownPartitionData);
Errors errors = keyValue.getKey();
resultFuture.complete(new KafkaResponseUtils.OffsetFetchResponseGroupData(groupId, errors,
partitionsResponses));
});
return resultFuture;
}
private CompletableFuture> fetchOffset(String topicName,
TopicPartition partition,
String namespacePrefix,
long timestamp,
boolean readCommitted) {
CompletableFuture> partitionData = new CompletableFuture<>();
topicManager.getTopic(topicName).thenAccept((perTopicOpt) -> {
if (!perTopicOpt.isPresent()) {
partitionData.complete(Pair.of(Errors.UNKNOWN_TOPIC_OR_PARTITION, null));
return;
}
PersistentTopic perTopic = perTopicOpt.get();
ManagedLedgerImpl managedLedger = (ManagedLedgerImpl) perTopic.getManagedLedger();
PositionImpl lac = (PositionImpl) managedLedger.getLastConfirmedEntry();
if (lac == null) {
log.error("[{}] Unexpected LastConfirmedEntry for topic {}, managed ledger: {}",
ctx, perTopic.getName(), managedLedger.getName());
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, -1L));
return;
}
final long logEndOffset = MessageMetadataUtils.getLogEndOffset(managedLedger);
if (timestamp == ListOffsetsRequest.LATEST_TIMESTAMP) {
final CompletableFuture lsoFuture;
if (readCommitted) {
PartitionLog partitionLog = replicaManager.getPartitionLog(
partition, namespacePrefix);
lsoFuture = partitionLog.awaitInitialisation().thenApply(__ -> {
final long lso = partitionLog.firstUndecidedOffset().orElse(logEndOffset);
if (log.isDebugEnabled()) {
log.debug("[{}] Get last stable offset for topic {} highWatermark: {}, lso: {}",
ctx, perTopic.getName(), logEndOffset, lso);
}
return lso;
});
} else {
lsoFuture = CompletableFuture.completedFuture(logEndOffset);
}
lsoFuture.whenComplete((lso, throwable) -> {
if (throwable != null) {
log.error("[{}] Failed to get last stable offset for topic {}",
ctx, perTopic.getName(), throwable);
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, null));
return;
}
partitionData.complete(Pair.of(Errors.NONE, lso));
});
} else if (timestamp == ListOffsetsRequest.EARLIEST_TIMESTAMP) {
PositionImpl position = OffsetFinder.getFirstValidPosition(managedLedger);
if (position == null) {
log.error("[{}] Failed to find first valid position for topic {}", ctx, perTopic.getName());
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, -1L));
return;
}
if (log.isDebugEnabled()) {
log.debug("[{}] Get earliest position for topic {}: {}, lac: {}",
ctx, perTopic.getName(), position, lac);
}
if (position.compareTo(lac) > 0) {
partitionData.complete(Pair.of(Errors.NONE, logEndOffset));
} else {
MessageMetadataUtils.getOffsetOfPosition(managedLedger, position, false,
timestamp, skipMessagesWithoutIndex)
.whenComplete((offset, throwable) -> {
if (throwable != null) {
log.error("[{}] Failed to get offset for position {}",
perTopic, position, throwable);
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, null));
return;
}
if (log.isDebugEnabled()) {
log.debug("[{}] Get offset of position for topic {}: {}, lac: {}, offset: {}",
ctx, perTopic.getName(), position, lac, offset);
}
partitionData.complete(Pair.of(Errors.NONE, offset));
});
}
} else {
fetchOffsetByTimestamp(partitionData, managedLedger, lac, timestamp, perTopic.getName());
}
}).exceptionally(e -> {
Throwable throwable = FutureUtil.unwrapCompletionException(e);
log.error("Failed while get persistentTopic topic: {} ts: {}. ", topicName, timestamp, throwable);
partitionData.complete(Pair.of(Errors.UNKNOWN_TOPIC_OR_PARTITION, null));
return null;
});
return partitionData;
}
private void fetchOffsetByTimestamp(CompletableFuture> partitionData,
ManagedLedgerImpl managedLedger,
PositionImpl lac,
long timestamp,
String topic) {
// find with real wanted timestamp
OffsetFinder offsetFinder = new OffsetFinder(managedLedger);
offsetFinder.findMessages(timestamp, new AsyncCallbacks.FindEntryCallback() {
@Override
public void findEntryComplete(Position position, Object ctx) {
PositionImpl finalPosition;
if (position == null) {
finalPosition = OffsetFinder.getFirstValidPosition(managedLedger);
if (finalPosition == null) {
log.warn("Unable to find position for topic {} time {}. get NULL position",
topic, timestamp);
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, null));
return;
}
} else {
finalPosition = (PositionImpl) position;
}
if (log.isDebugEnabled()) {
log.debug("Find position for topic {} time {}. position: {}",
topic, timestamp, finalPosition);
}
if (finalPosition.compareTo(lac) > 0 || MessageMetadataUtils.getCurrentOffset(managedLedger) < 0) {
long offset = Math.max(0, MessageMetadataUtils.getCurrentOffset(managedLedger));
partitionData.complete(Pair.of(Errors.NONE, offset));
} else {
MessageMetadataUtils.getOffsetOfPosition(managedLedger, finalPosition, true,
timestamp, skipMessagesWithoutIndex)
.whenComplete((offset, throwable) -> {
if (throwable != null) {
log.error("[{}] Failed to get offset for position {}",
topic, finalPosition, throwable);
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, null));
return;
}
partitionData.complete(Pair.of(Errors.NONE, offset));
});
}
}
@Override
public void findEntryFailed(ManagedLedgerException exception,
Optional position, Object ctx) {
if (exception instanceof ManagedLedgerException.NonRecoverableLedgerException) {
// The position doesn't exist, it usually happens when the rollover of managed ledger leads to
// the deletion of all expired ledgers. In this case, there's only one empty ledger in the managed
// ledger. So here we complete it with the latest offset.
partitionData.complete(Pair.of(Errors.NONE, MessageMetadataUtils.getLogEndOffset(managedLedger)));
} else {
log.warn("Unable to find position for topic {} time {}. Exception:",
topic, timestamp, exception);
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, null));
}
}
});
}
private void waitResponseDataComplete(CompletableFuture resultFuture,
Map>> responseData,
boolean legacy) {
CompletableFuture
.allOf(responseData.values().toArray(new CompletableFuture>[0]))
.whenComplete((ignore, ex) -> {
ListOffsetsResponse response = KafkaResponseUtils.newListOffset(
CoreUtils.mapValue(responseData, CompletableFuture::join), legacy);
resultFuture.complete(response);
});
}
private void handleListOffsetRequestV1AndAbove(KafkaHeaderAndRequest listOffset,
CompletableFuture resultFuture) {
ListOffsetsRequest request = (ListOffsetsRequest) listOffset.getRequest();
boolean readCommitted = IsolationLevel.READ_COMMITTED.equals(request.isolationLevel());
Map>> responseData =
Maps.newConcurrentMap();
ListOffsetsRequestData data = request.data();
if (data.topics().size() == 0) {
resultFuture.complete(new ListOffsetsResponse(new ListOffsetsResponseData()));
return;
}
AtomicInteger partitions = new AtomicInteger(
data.topics().stream().map(ListOffsetsRequestData.ListOffsetsTopic::partitions)
.mapToInt(Collection::size).sum()
);
Runnable completeOne = () -> {
if (partitions.decrementAndGet() == 0) {
waitResponseDataComplete(resultFuture, responseData, false);
}
};
String namespacePrefix = currentNamespacePrefix();
KafkaRequestUtils.forEachListOffsetRequest(request, (topic, times) -> {
String fullPartitionName = KopTopic.toString(topic, namespacePrefix);
authorize(AclOperation.DESCRIBE, Resource.of(ResourceType.TOPIC, fullPartitionName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("Describe topic authorize failed, topic - {}. {}",
fullPartitionName, ex.getMessage());
responseData.put(topic, CompletableFuture.completedFuture(
Pair.of(Errors.TOPIC_AUTHORIZATION_FAILED, null)
));
completeOne.run();
return;
}
if (!isAuthorized) {
responseData.put(topic, CompletableFuture.completedFuture(
Pair.of(Errors.TOPIC_AUTHORIZATION_FAILED, null)
));
completeOne.run();
return;
}
responseData.put(topic, fetchOffset(
fullPartitionName,
topic,
namespacePrefix,
times.timestamp(),
readCommitted));
completeOne.run();
});
});
}
// Some info can be found here
// https://cfchou.github.io/blog/2015/04/23/a-closer-look-at-kafka-offsetrequest/ through web.archive.org
private void handleListOffsetRequestV0(KafkaHeaderAndRequest listOffset,
CompletableFuture resultFuture) {
ListOffsetRequestV0 request =
byteBufToListOffsetRequestV0(listOffset.getBuffer());
boolean readCommitted = IsolationLevel.READ_COMMITTED.equals(request.isolationLevel());
Map>> responseData =
Maps.newConcurrentMap();
if (request.offsetData().size() == 0) {
resultFuture.complete(new ListOffsetsResponse(new ListOffsetsResponseData()));
return;
}
AtomicInteger partitions = new AtomicInteger(request.offsetData().size());
Runnable completeOne = () -> {
if (partitions.decrementAndGet() == 0) {
waitResponseDataComplete(resultFuture, responseData, true);
}
};
// in v0, the iterator is offsetData,
// in v1, the iterator is partitionTimestamps,
if (log.isDebugEnabled()) {
log.debug("received a v0 listOffset: {}", request.toString(true));
}
String namespacePrefix = currentNamespacePrefix();
KafkaRequestUtils.LegacyUtils.forEachListOffsetRequest(request, topic -> times -> maxNumOffsets -> {
String fullPartitionName = KopTopic.toString(topic, namespacePrefix);
authorize(AclOperation.DESCRIBE, Resource.of(ResourceType.TOPIC, fullPartitionName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("Describe topic authorize failed, topic - {}. {}",
fullPartitionName, ex.getMessage());
responseData.put(topic, CompletableFuture.completedFuture(
Pair.of(Errors.TOPIC_AUTHORIZATION_FAILED, null)));
completeOne.run();
return;
}
if (!isAuthorized) {
responseData.put(topic, CompletableFuture.completedFuture(
Pair.of(Errors.TOPIC_AUTHORIZATION_FAILED, null)));
completeOne.run();
return;
}
CompletableFuture> partitionData;
// num_num_offsets > 1 is not handled for now, returning an error
if (maxNumOffsets > 1) {
log.warn("request is asking for multiples offsets for {}, not supported for now",
fullPartitionName);
partitionData = new CompletableFuture<>();
partitionData.complete(Pair.of(Errors.UNKNOWN_SERVER_ERROR, null));
}
partitionData = fetchOffset(fullPartitionName, topic, namespacePrefix, times, readCommitted);
responseData.put(topic, partitionData);
completeOne.run();
});
});
}
// get offset from underline managedLedger
@Override
protected void handleListOffsetRequest(KafkaHeaderAndRequest listOffset,
CompletableFuture resultFuture) {
checkArgument(listOffset.getRequest() instanceof ListOffsetsRequest);
// the only difference between v0 and v1 is the `max_num_offsets => INT32`
// v0 is required because it is used by librdkafka
if (listOffset.getHeader().apiVersion() == 0) {
handleListOffsetRequestV0(listOffset, resultFuture);
} else {
handleListOffsetRequestV1AndAbove(listOffset, resultFuture);
}
}
// For non exist topics handleOffsetCommitRequest return UNKNOWN_TOPIC_OR_PARTITION
private Map nonExistingTopicErrors(OffsetCommitRequest request) {
// TODO: in Kafka Metadata cache, all topics in the cluster is included, we should support it?
// we could get all the topic info by listTopic?
// https://github.com/streamnative/kop/issues/51
return Maps.newHashMap();
// return request.offsetData().entrySet().stream()
// .filter(entry ->
// // filter not exist topics
// !topicManager.topicExists(pulsarTopicName(entry.getKey(), namespace).toString()))
// .collect(Collectors.toMap(
// e -> e.getKey(),
// e -> Errors.UNKNOWN_TOPIC_OR_PARTITION
// ));
}
private Map nonExistingTopicErrors() {
// TODO: The check for the existence of the topic is missing
return Maps.newHashMap();
}
@VisibleForTesting
Map convertOffsetCommitRequestRetentionMs(
Map convertedOffsetData,
long retentionTime,
short apiVersion,
long currentTimeStamp,
long configOffsetsRetentionMs) {
// V2 adds retention time to the request and V5 removes retention time
long offsetRetention;
if (apiVersion <= 1 || apiVersion >= 5 || retentionTime == OffsetCommitRequest.DEFAULT_RETENTION_TIME) {
offsetRetention = configOffsetsRetentionMs;
} else {
offsetRetention = retentionTime;
}
// commit from kafka
// > commit timestamp is always set to now.
// > "default" expiration timestamp is now + retention (and retention may be overridden if v2)
// > expire timestamp is computed differently for v1 and v2.
// > - If v1 and no explicit commit timestamp is provided we use default expiration timestamp.
// > - If v1 and explicit commit timestamp is provided we calculate retention from
// > that explicit commit timestamp
// > - If v2 we use the default expiration timestamp
// commit from kafka
long defaultExpireTimestamp = offsetRetention + currentTimeStamp;
long finalOffsetRetention = offsetRetention;
return CoreUtils.mapValue(convertedOffsetData, (partitionData) -> {
String metadata;
if (partitionData.committedMetadata() == null) {
metadata = OffsetMetadata.NO_METADATA;
} else {
metadata = partitionData.committedMetadata();
}
long expireTimeStamp;
if (partitionData.commitTimestamp() == OffsetCommitRequest.DEFAULT_TIMESTAMP) {
expireTimeStamp = defaultExpireTimestamp;
} else {
expireTimeStamp = finalOffsetRetention + partitionData.commitTimestamp();
}
return OffsetAndMetadata.apply(
partitionData.committedOffset(),
metadata,
currentTimeStamp,
expireTimeStamp);
});
}
@Override
protected void handleOffsetCommitRequest(KafkaHeaderAndRequest offsetCommit,
CompletableFuture resultFuture) {
checkArgument(offsetCommit.getRequest() instanceof OffsetCommitRequest);
checkState(getGroupCoordinator() != null,
"Group Coordinator not started");
OffsetCommitRequest request = (OffsetCommitRequest) offsetCommit.getRequest();
OffsetCommitRequestData data = request.data();
// TODO not process nonExistingTopic at this time.
Map nonExistingTopicErrors = nonExistingTopicErrors(request);
Map unauthorizedTopicErrors = Maps.newConcurrentMap();
if (data.topics().isEmpty()) {
resultFuture.complete(KafkaResponseUtils.newOffsetCommit(Maps.newHashMap()));
return;
}
// convert raw topic name to KoP full name
// we need to ensure that topic name in __consumer_offsets is globally unique
Map convertedOffsetData =
Maps.newConcurrentMap();
Map replacingIndex = Maps.newConcurrentMap();
AtomicInteger unfinishedAuthorizationCount = new AtomicInteger(
data.topics().stream().map(OffsetCommitRequestData.OffsetCommitRequestTopic::partitions)
.mapToInt(Collection::size).sum());
Consumer completeOne = (action) -> {
// When complete one authorization or failed, will do the action first.
action.run();
if (unfinishedAuthorizationCount.decrementAndGet() == 0) {
if (log.isTraceEnabled()) {
StringBuffer traceInfo = new StringBuffer();
replacingIndex.forEach((inner, outer) ->
traceInfo.append(String.format("\tinnerName:%s, outerName:%s%n", inner, outer)));
log.trace("OFFSET_COMMIT TopicPartition relations: \n{}", traceInfo);
}
if (convertedOffsetData.isEmpty()) {
Map offsetCommitResult = Maps.newHashMap();
offsetCommitResult.putAll(nonExistingTopicErrors);
offsetCommitResult.putAll(unauthorizedTopicErrors);
OffsetCommitResponse response = KafkaResponseUtils.newOffsetCommit(offsetCommitResult);
resultFuture.complete(response);
} else {
Map convertedPartitionData =
convertOffsetCommitRequestRetentionMs(
convertedOffsetData,
KafkaRequestUtils.LegacyUtils.getRetentionTime(request),
offsetCommit.getHeader().apiVersion(),
Time.SYSTEM.milliseconds(),
getGroupCoordinator().offsetConfig().offsetsRetentionMs()
);
getGroupCoordinator().handleCommitOffsets(
data.groupId(),
data.memberId(),
data.generationId(),
convertedPartitionData
).thenAccept(offsetCommitResult -> {
// recover to original topic name
replaceTopicPartition(offsetCommitResult, replacingIndex);
offsetCommitResult.putAll(nonExistingTopicErrors);
offsetCommitResult.putAll(unauthorizedTopicErrors);
OffsetCommitResponse response = KafkaResponseUtils.newOffsetCommit(offsetCommitResult);
resultFuture.complete(response);
});
}
}
};
final String namespacePrefix = currentNamespacePrefix();
data.topics().forEach((OffsetCommitRequestData.OffsetCommitRequestTopic topicData) -> {
topicData.partitions().forEach((OffsetCommitRequestData.OffsetCommitRequestPartition partitionData) -> {
TopicPartition tp = new TopicPartition(topicData.name(), partitionData.partitionIndex());
KopTopic kopTopic;
try {
kopTopic = new KopTopic(tp.topic(), namespacePrefix);
} catch (KoPTopicException e) {
log.warn("Invalid topic name: {}", tp.topic(), e);
completeOne.accept(() -> nonExistingTopicErrors.put(tp, Errors.UNKNOWN_TOPIC_OR_PARTITION));
return;
}
String fullTopicName = kopTopic.getFullName();
authorize(AclOperation.READ, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("OffsetCommit authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
completeOne.accept(
() -> unauthorizedTopicErrors.put(tp, Errors.TOPIC_AUTHORIZATION_FAILED));
return;
}
if (!isAuthorized) {
completeOne.accept(
() -> unauthorizedTopicErrors.put(tp, Errors.TOPIC_AUTHORIZATION_FAILED));
return;
}
completeOne.accept(() -> {
TopicPartition newTopicPartition = new TopicPartition(
new KopTopic(tp.topic(), namespacePrefix).getFullName(), tp.partition());
convertedOffsetData.put(newTopicPartition, partitionData);
replacingIndex.put(newTopicPartition, tp);
});
});
});
});
}
@Override
protected void handleFetchRequest(KafkaHeaderAndRequest fetch,
CompletableFuture resultFuture) {
checkArgument(fetch.getRequest() instanceof FetchRequest);
FetchRequest request = (FetchRequest) fetch.getRequest();
FetchRequestData data = request.data();
if (log.isDebugEnabled()) {
log.debug("[{}] Request {} Fetch request. Size: {}. Each item: ",
ctx.channel(), fetch.getHeader(), data.topics().size());
data.topics().forEach((topicData) -> {
log.debug("Fetch request topic: data:{}.", topicData.toString());
});
}
int numPartitions = data.topics().stream().mapToInt(topic -> topic.partitions().size()).sum();
if (numPartitions == 0) {
resultFuture.complete(new FetchResponse(new FetchResponseData()
.setErrorCode(Errors.NONE.code())
.setSessionId(request.metadata().sessionId())
.setResponses(new ArrayList<>())));
return;
}
ConcurrentHashMap erroneous =
new ConcurrentHashMap<>();
ConcurrentHashMap interesting =
new ConcurrentHashMap<>();
AtomicInteger unfinishedAuthorizationCount = new AtomicInteger(numPartitions);
Runnable completeOne = () -> {
if (unfinishedAuthorizationCount.decrementAndGet() == 0) {
TransactionCoordinator transactionCoordinator = null;
if (request.isolationLevel().equals(IsolationLevel.READ_COMMITTED)
&& kafkaConfig.isKafkaTransactionCoordinatorEnabled()) {
transactionCoordinator = getTransactionCoordinator();
}
String namespacePrefix = currentNamespacePrefix();
int fetchMaxBytes = request.maxBytes();
int fetchMinBytes = Math.min(request.minBytes(), fetchMaxBytes);
if (interesting.isEmpty()) {
if (log.isDebugEnabled()) {
log.debug("Fetch interesting is empty. Partitions: [{}]", data.topics());
}
resultFuture.complete(new FetchResponse(new FetchResponseData()
.setErrorCode(Errors.NONE.code())
.setSessionId(request.metadata().sessionId())
.setResponses(buildFetchResponses(erroneous))));
} else {
MessageFetchContext context = MessageFetchContext
.get(this, transactionCoordinator, maxReadEntriesNum, namespacePrefix,
getKafkaTopicManagerSharedState(), this.executor, fetch);
this.getReplicaManager().fetchMessage(
request.maxWait(),
fetchMinBytes,
fetchMaxBytes,
interesting,
request.isolationLevel(),
context
).thenAccept(resultMap -> {
Map all = new HashMap<>();
resultMap.forEach((tp, results) -> {
all.put(tp, results.toPartitionData());
});
all.putAll(erroneous);
boolean triggeredCompletion = resultFuture.complete(new ResponseCallbackWrapper(
new FetchResponse(new FetchResponseData()
.setErrorCode(Errors.NONE.code())
.setThrottleTimeMs(0)
.setSessionId(request.metadata().sessionId())
.setResponses(buildFetchResponses(all))),
() -> resultMap.forEach((__, readRecordsResult) -> {
readRecordsResult.recycle();
})
));
if (!triggeredCompletion) {
resultMap.forEach((__, readRecordsResult) -> {
readRecordsResult.recycle();
});
}
context.recycle();
});
}
}
};
// Regular Kafka consumers need READ permission on each partition they are fetching.
data.topics().forEach(topicData -> {
topicData.partitions().forEach((partitionData) -> {
TopicPartition topicPartition = new TopicPartition(topicData.topic(), partitionData.partition());
final String fullTopicName = KopTopic.toString(topicPartition, this.currentNamespacePrefix());
authorize(AclOperation.READ, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("Read topic authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
erroneous.put(topicPartition, errorResponse(Errors.TOPIC_AUTHORIZATION_FAILED));
completeOne.run();
return;
}
if (!isAuthorized) {
erroneous.put(topicPartition, errorResponse(Errors.TOPIC_AUTHORIZATION_FAILED));
completeOne.run();
return;
}
interesting.put(topicPartition, partitionData);
completeOne.run();
});
});
});
}
public static List buildFetchResponses(
Map partitionData) {
List result = new ArrayList<>();
partitionData.keySet()
.stream()
.map(topicPartition -> topicPartition.topic())
.distinct()
.forEach(topic -> {
FetchResponseData.FetchableTopicResponse fetchableTopicResponse =
new FetchResponseData.FetchableTopicResponse()
.setTopic(topic)
.setPartitions(new ArrayList<>());
result.add(fetchableTopicResponse);
partitionData.forEach((tp, data) -> {
if (tp.topic().equals(topic)) {
fetchableTopicResponse.partitions().add(new FetchResponseData.PartitionData()
.setPartitionIndex(tp.partition())
.setErrorCode(data.errorCode())
.setHighWatermark(data.highWatermark())
.setLastStableOffset(data.lastStableOffset())
.setLogStartOffset(data.logStartOffset())
.setAbortedTransactions(data.abortedTransactions())
.setPreferredReadReplica(data.preferredReadReplica())
.setRecords(data.records()));
}
});
});
return result;
}
private static FetchResponseData.PartitionData errorResponse(Errors error) {
return new FetchResponseData.PartitionData()
.setErrorCode(error.code())
.setHighWatermark(FetchResponse.INVALID_HIGH_WATERMARK)
.setLastStableOffset(FetchResponse.INVALID_LAST_STABLE_OFFSET)
.setLogStartOffset(FetchResponse.INVALID_LOG_START_OFFSET)
.setRecords(MemoryRecords.EMPTY);
}
@Override
protected void handleJoinGroupRequest(KafkaHeaderAndRequest joinGroup,
CompletableFuture resultFuture) {
checkArgument(joinGroup.getRequest() instanceof JoinGroupRequest);
checkState(getGroupCoordinator() != null,
"Group Coordinator not started");
JoinGroupRequest request = (JoinGroupRequest) joinGroup.getRequest();
JoinGroupRequestData data = request.data();
Map protocols = new HashMap<>();
data.protocols()
.forEach(protocol -> protocols.put(protocol.name(), protocol.metadata()));
getGroupCoordinator().handleJoinGroup(
data.groupId(),
data.memberId(),
joinGroup.getHeader().clientId(),
joinGroup.getClientHost(),
data.rebalanceTimeoutMs(),
data.sessionTimeoutMs(),
data.protocolType(),
protocols
).thenAccept(joinGroupResult -> {
Map members = new HashMap<>(joinGroupResult.getMembers());
JoinGroupResponse response = KafkaResponseUtils.newJoinGroup(
joinGroupResult.getError(),
joinGroupResult.getGenerationId(),
joinGroupResult.getProtocolName(),
joinGroupResult.getProtocolType(),
joinGroupResult.getMemberId(),
joinGroupResult.getLeaderId(),
members,
request.version()
);
if (log.isTraceEnabled()) {
log.trace("Sending join group response {} for correlation id {} to client {}.",
response, joinGroup.getHeader().correlationId(), joinGroup.getHeader().clientId());
}
resultFuture.complete(response);
});
}
@Override
protected void handleSyncGroupRequest(KafkaHeaderAndRequest syncGroup,
CompletableFuture resultFuture) {
checkArgument(syncGroup.getRequest() instanceof SyncGroupRequest);
SyncGroupRequest request = (SyncGroupRequest) syncGroup.getRequest();
SyncGroupRequestData data = request.data();
groupIds.add(data.groupId());
Map assignments = data.assignments()
.stream()
.collect(Collectors.toMap(
SyncGroupRequestData.SyncGroupRequestAssignment::memberId,
SyncGroupRequestData.SyncGroupRequestAssignment::assignment));
getGroupCoordinator().handleSyncGroup(
data.groupId(),
data.generationId(),
data.memberId(),
assignments
).thenAccept(syncGroupResult -> {
SyncGroupResponse response = KafkaResponseUtils.newSyncGroup(
syncGroupResult.getKey(),
data.protocolType(),
data.protocolName(),
syncGroupResult.getValue()
);
resultFuture.complete(response);
});
}
@Override
protected void handleHeartbeatRequest(KafkaHeaderAndRequest heartbeat,
CompletableFuture resultFuture) {
checkArgument(heartbeat.getRequest() instanceof HeartbeatRequest);
HeartbeatRequest request = (HeartbeatRequest) heartbeat.getRequest();
// let the coordinator to handle heartbeat
getGroupCoordinator().handleHeartbeat(
request.data().groupId(),
request.data().memberId(),
request.data().generationId()
).thenAccept(errors -> {
HeartbeatResponse response = KafkaResponseUtils.newHeartbeat(errors);
if (log.isTraceEnabled()) {
log.trace("Sending heartbeat response {} for correlation id {} to client {}.",
response, heartbeat.getHeader().correlationId(), heartbeat.getHeader().clientId());
}
resultFuture.complete(response);
});
}
@Override
protected void handleLeaveGroupRequest(KafkaHeaderAndRequest leaveGroup,
CompletableFuture resultFuture) {
checkArgument(leaveGroup.getRequest() instanceof LeaveGroupRequest);
LeaveGroupRequest request = (LeaveGroupRequest) leaveGroup.getRequest();
LeaveGroupRequestData data = request.data();
Set members = data.members().stream()
.map(LeaveGroupRequestData.MemberIdentity::memberId)
.collect(Collectors.toSet());
if (!data.memberId().isEmpty()) {
// old clients
members.add(data.memberId());
}
// let the coordinator to handle heartbeat
getGroupCoordinator().handleLeaveGroup(
data.groupId(),
members
).thenAccept(errors -> resultFuture.complete(KafkaResponseUtils.newLeaveGroup(errors)));
}
@Override
protected void handleDescribeGroupRequest(KafkaHeaderAndRequest describeGroup,
CompletableFuture resultFuture) {
checkArgument(describeGroup.getRequest() instanceof DescribeGroupsRequest);
DescribeGroupsRequest request = (DescribeGroupsRequest) describeGroup.getRequest();
// let the coordinator to handle heartbeat
resultFuture.complete(KafkaResponseUtils.newDescribeGroups(request.data().groups().stream()
.map(groupId -> Pair.of(groupId, getGroupCoordinator().handleDescribeGroup(groupId)))
.collect(Collectors.toMap(Pair::getLeft, Pair::getRight))
));
}
@Override
protected void handleListGroupsRequest(KafkaHeaderAndRequest listGroups,
CompletableFuture resultFuture) {
checkArgument(listGroups.getRequest() instanceof ListGroupsRequest);
KeyValue> listResult = getGroupCoordinator().handleListGroups();
resultFuture.complete(KafkaResponseUtils.newListGroups(listResult.getKey(), listResult.getValue()));
}
@Override
protected void handleDeleteGroupsRequest(KafkaHeaderAndRequest deleteGroups,
CompletableFuture resultFuture) {
checkArgument(deleteGroups.getRequest() instanceof DeleteGroupsRequest);
DeleteGroupsRequest request = (DeleteGroupsRequest) deleteGroups.getRequest();
DeleteGroupsRequestData data = request.data();
resultFuture.complete(KafkaResponseUtils.newDeleteGroups(
getGroupCoordinator().handleDeleteGroups(data.groupsNames())
));
}
@Override
protected void handleSaslAuthenticate(KafkaHeaderAndRequest saslAuthenticate,
CompletableFuture resultFuture) {
resultFuture.complete(new SaslAuthenticateResponse(
new SaslAuthenticateResponseData()
.setErrorCode(Errors.ILLEGAL_SASL_STATE.code())
.setErrorMessage("SaslAuthenticate request received after successful authentication")));
}
@Override
protected void handleSaslHandshake(KafkaHeaderAndRequest saslHandshake,
CompletableFuture resultFuture) {
resultFuture.complete(KafkaResponseUtils.newSaslHandshake(Errors.ILLEGAL_SASL_STATE));
}
@Override
protected void handleCreateTopics(KafkaHeaderAndRequest createTopics,
CompletableFuture resultFuture) {
checkArgument(createTopics.getRequest() instanceof CreateTopicsRequest);
CreateTopicsRequest request = (CreateTopicsRequest) createTopics.getRequest();
final Map result = Maps.newConcurrentMap();
final Map validTopics = Maps.newHashMap();
final Set duplicateTopics = new HashSet<>();
request.data().topics().forEach((CreateTopicsRequestData.CreatableTopic topic) -> {
if (duplicateTopics.add(topic.name())) {
validTopics.put(topic.name(), topic);
} else {
final String errorMessage = "Create topics request from client `" + createTopics.getHeader().clientId()
+ "` contains multiple entries for the following topics: " + duplicateTopics;
result.put(topic.name(), new ApiError(Errors.INVALID_REQUEST, errorMessage));
}
});
if (validTopics.isEmpty()) {
resultFuture.complete(KafkaResponseUtils.newCreateTopics(result));
return;
}
String namespacePrefix = currentNamespacePrefix();
final AtomicInteger validTopicsCount = new AtomicInteger(validTopics.size());
final Map authorizedTopics = Maps.newConcurrentMap();
Runnable createTopicsAsync = () -> {
if (authorizedTopics.isEmpty()) {
resultFuture.complete(KafkaResponseUtils.newCreateTopics(result));
return;
}
// TODO: handle request.validateOnly()
adminManager.createTopicsAsync(authorizedTopics, request.data().timeoutMs(), namespacePrefix)
.thenApply(validResult -> {
result.putAll(validResult);
resultFuture.complete(KafkaResponseUtils.newCreateTopics(result));
return null;
});
};
BiConsumer completeOneTopic = (topic, topicDetails) -> {
authorizedTopics.put(topic, topicDetails);
if (validTopicsCount.decrementAndGet() == 0) {
createTopicsAsync.run();
}
};
BiConsumer completeOneErrorTopic = (topic, error) -> {
result.put(topic, error);
if (validTopicsCount.decrementAndGet() == 0) {
createTopicsAsync.run();
}
};
validTopics.forEach((topic, details) -> {
KopTopic kopTopic;
try {
kopTopic = new KopTopic(topic, namespacePrefix);
} catch (KoPTopicException e) {
completeOneErrorTopic.accept(topic, ApiError.fromThrowable(e));
return;
}
String fullTopicName = kopTopic.getFullName();
authorize(AclOperation.CREATE, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("CreateTopics authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
completeOneErrorTopic
.accept(topic, new ApiError(Errors.TOPIC_AUTHORIZATION_FAILED, ex.getMessage()));
return;
}
if (!isAuthorized) {
log.error("CreateTopics authorize failed, topic - {}.", fullTopicName);
completeOneErrorTopic
.accept(topic, new ApiError(Errors.TOPIC_AUTHORIZATION_FAILED, null));
return;
}
completeOneTopic.accept(topic, details);
});
});
}
@Override
protected void handleAlterConfigs(KafkaHeaderAndRequest describeConfigs,
CompletableFuture resultFuture) {
checkArgument(describeConfigs.getRequest() instanceof AlterConfigsRequest);
AlterConfigsRequest request = (AlterConfigsRequest) describeConfigs.getRequest();
if (request.configs().isEmpty()) {
resultFuture.complete(new AlterConfigsResponse(new AlterConfigsResponseData()));
return;
}
AlterConfigsResponseData data = new AlterConfigsResponseData();
request.data().resources().forEach((AlterConfigsRequestData.AlterConfigsResource resource) -> {
byte resourceType = resource.resourceType();
String resourceName = resource.resourceName();
resource.configs().forEach((AlterConfigsRequestData.AlterableConfig entry) -> {
log.info("Ignoring ALTER_CONFIG for {} (type {}) {} = {}", resourceName, resourceType,
entry.name(), entry.value());
});
data.responses().add(new AlterConfigsResponseData.AlterConfigsResourceResponse()
.setErrorCode(ApiError.NONE.error().code())
.setErrorMessage(ApiError.NONE.error().message())
.setResourceName(resourceName)
.setResourceType(resourceType));
});
resultFuture.complete(new AlterConfigsResponse(data));
}
@Override
protected void handleDescribeConfigs(KafkaHeaderAndRequest describeConfigs,
CompletableFuture resultFuture) {
checkArgument(describeConfigs.getRequest() instanceof DescribeConfigsRequest);
DescribeConfigsRequest request = (DescribeConfigsRequest) describeConfigs.getRequest();
DescribeConfigsRequestData data = request.data();
if (data.resources().isEmpty()) {
resultFuture.complete(new DescribeConfigsResponse(new DescribeConfigsResponseData()));
return;
}
Collection authorizedResources = Collections.synchronizedList(new ArrayList<>());
Map failedConfigResourceMap =
Maps.newConcurrentMap();
AtomicInteger unfinishedAuthorizationCount = new AtomicInteger(data.resources().size());
String namespacePrefix = currentNamespacePrefix();
Consumer completeOne = (action) -> {
// When complete one authorization or failed, will do the action first.
action.run();
if (unfinishedAuthorizationCount.decrementAndGet() == 0) {
adminManager.describeConfigsAsync(authorizedResources.stream()
.collect(Collectors.toMap(
configResource -> configResource,
configResource -> data.resources().stream()
.filter(r -> r.resourceName().equals(configResource.name())
&& r.resourceType() == configResource.type().id())
.findAny()
.map(__ -> new HashSet<>())
)),
namespacePrefix
).thenApply(configResourceConfigMap -> {
DescribeConfigsResponseData responseData = new DescribeConfigsResponseData();
configResourceConfigMap.putAll(failedConfigResourceMap);
configResourceConfigMap.forEach((ConfigResource resource,
DescribeConfigsResponse.Config result) -> {
responseData.results().add(new DescribeConfigsResponseData.DescribeConfigsResult()
.setResourceName(resource.name())
.setResourceType(resource.type().id())
.setErrorCode(result.error().error().code())
.setErrorMessage(result.error().messageWithFallback())
.setConfigs(result.entries().stream().map(c -> {
return new DescribeConfigsResponseData.DescribeConfigsResourceResult()
.setName(c.name())
.setConfigSource(c.source().id())
.setReadOnly(c.isReadOnly())
.setConfigType(c.type().id())
.setValue(c.value())
.setDocumentation("");
}).collect(Collectors.toList())));
});
resultFuture.complete(new DescribeConfigsResponse(responseData));
return null;
});
}
};
// Do authorization for each of resource
data.resources().forEach((DescribeConfigsRequestData.DescribeConfigsResource configRes) -> {
ConfigResource configResource = new ConfigResource(ConfigResource.Type.forId(configRes.resourceType()),
configRes.resourceName());
switch (configResource.type()) {
case TOPIC:
KopTopic kopTopic;
try {
kopTopic = new KopTopic(configResource.name(), namespacePrefix);
} catch (KoPTopicException e) {
completeOne.accept(() -> {
final ApiError error = new ApiError(
Errors.UNKNOWN_TOPIC_OR_PARTITION,
"Topic " + configResource.name() + " doesn't exist");
failedConfigResourceMap.put(configResource, new DescribeConfigsResponse.Config(
error, Collections.emptyList()));
});
return;
}
String fullTopicName = kopTopic.getFullName();
authorize(AclOperation.DESCRIBE_CONFIGS, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("DescribeConfigs in topic authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
completeOne.accept(() -> failedConfigResourceMap.put(configResource,
new DescribeConfigsResponse.Config(
new ApiError(Errors.TOPIC_AUTHORIZATION_FAILED, null),
Collections.emptyList())));
return;
}
if (isAuthorized) {
completeOne.accept(() -> authorizedResources.add(configResource));
return;
}
completeOne.accept(() -> failedConfigResourceMap.put(configResource,
new DescribeConfigsResponse.Config(
new ApiError(Errors.TOPIC_AUTHORIZATION_FAILED, null),
Collections.emptyList())));
});
break;
case BROKER:
// Current KoP don't support Broker Resource.
// but we are not exposing anything to the client, so it is fine to serve requests.
completeOne.accept(() -> authorizedResources.add(configResource));
break;
case UNKNOWN:
case BROKER_LOGGER:
default:
completeOne.accept(() -> log.error("KoP doesn't support resource type: " + configResource.type()));
break;
}
});
}
@Override
protected void handleDescribeCluster(KafkaHeaderAndRequest describeConfigs,
CompletableFuture resultFuture) {
checkArgument(describeConfigs.getRequest() instanceof DescribeClusterRequest);
DescribeClusterResponseData data = new DescribeClusterResponseData();
List allNodes = new ArrayList<>(adminManager.getBrokers(advertisedEndPoint.getListenerName()));
// Each Pulsar broker can manage metadata like controller in Kafka,
// Kafka's AdminClient needs to find a controller node for metadata management.
// So here we return an random broker as a controller for the given listenerName.
final int controllerId = adminManager.getControllerId(advertisedEndPoint.getListenerName());
DescribeClusterResponse response = new DescribeClusterResponse(data);
data.setControllerId(controllerId);
data.setClusterId(clusterName);
data.setErrorCode(Errors.NONE.code());
data.setErrorMessage(Errors.NONE.message());
allNodes.forEach(node -> {
data.brokers().add(new DescribeClusterResponseData.DescribeClusterBroker()
.setBrokerId(node.id())
.setHost(node.host())
.setPort(node.port()));
});
resultFuture.complete(response);
}
@Override
protected void handleInitProducerId(KafkaHeaderAndRequest kafkaHeaderAndRequest,
CompletableFuture response) {
InitProducerIdRequest request = (InitProducerIdRequest) kafkaHeaderAndRequest.getRequest();
InitProducerIdRequestData data = request.data();
TransactionCoordinator transactionCoordinator = getTransactionCoordinator();
transactionCoordinator.handleInitProducerId(
data.transactionalId(), data.transactionTimeoutMs(), Optional.empty(),
(resp) -> {
InitProducerIdResponseData responseData = new InitProducerIdResponseData()
.setErrorCode(resp.getError().code())
.setProducerId(resp.getProducerId())
.setProducerEpoch(resp.getProducerEpoch());
response.complete(new InitProducerIdResponse(responseData));
});
}
@Override
protected void handleAddPartitionsToTxn(KafkaHeaderAndRequest kafkaHeaderAndRequest,
CompletableFuture response) {
AddPartitionsToTxnRequest request = (AddPartitionsToTxnRequest) kafkaHeaderAndRequest.getRequest();
AddPartitionsToTxnRequestData data = request.data();
List partitionsToAdd = request.partitions();
Map unauthorizedTopicErrors = Maps.newConcurrentMap();
Map nonExistingTopicErrors = Maps.newConcurrentMap();
Set authorizedPartitions = Sets.newConcurrentHashSet();
TransactionCoordinator transactionCoordinator = getTransactionCoordinator();
AtomicInteger unfinishedAuthorizationCount = new AtomicInteger(partitionsToAdd.size());
Consumer completeOne = (action) -> {
action.run();
if (unfinishedAuthorizationCount.decrementAndGet() == 0) {
if (!unauthorizedTopicErrors.isEmpty() || !nonExistingTopicErrors.isEmpty()) {
Map partitionErrors = Maps.newHashMap();
partitionErrors.putAll(unauthorizedTopicErrors);
partitionErrors.putAll(nonExistingTopicErrors);
for (TopicPartition topicPartition : authorizedPartitions) {
partitionErrors.put(topicPartition, Errors.OPERATION_NOT_ATTEMPTED);
}
response.complete(new AddPartitionsToTxnResponse(0, partitionErrors));
} else {
transactionCoordinator.handleAddPartitionsToTransaction(data.transactionalId(),
data.producerId(), data.producerEpoch(), authorizedPartitions, (errors) -> {
AddPartitionsToTxnResponseData responseData = new AddPartitionsToTxnResponseData();
// TODO: handle PRODUCER_FENCED errors
Map topicPartitionErrorsMap =
addPartitionError(partitionsToAdd, errors);
topicPartitionErrorsMap.keySet()
.stream()
.map(TopicPartition::topic)
.distinct()
.forEach(topicName -> {
AddPartitionsToTxnResponseData.AddPartitionsToTxnTopicResult topicResult =
new AddPartitionsToTxnResponseData.AddPartitionsToTxnTopicResult()
.setName(topicName);
responseData.results().add(topicResult);
topicPartitionErrorsMap.forEach((TopicPartition tp, Errors error) -> {
if (tp.topic().equals(topicName)) {
topicResult.results()
.add(new AddPartitionsToTxnResponseData
.AddPartitionsToTxnPartitionResult()
.setPartitionIndex(tp.partition())
.setErrorCode(error.code()));
}
});
});
response.complete(
new AddPartitionsToTxnResponse(responseData));
});
}
}
};
String namespacePrefix = currentNamespacePrefix();
partitionsToAdd.forEach(tp -> {
String fullPartitionName;
try {
fullPartitionName = KopTopic.toString(tp, namespacePrefix);
} catch (KoPTopicException e) {
log.warn("Invalid topic name: {}", tp.topic(), e);
completeOne.accept(() -> nonExistingTopicErrors.put(tp, Errors.UNKNOWN_TOPIC_OR_PARTITION));
return;
}
authorize(AclOperation.WRITE, Resource.of(ResourceType.TOPIC, fullPartitionName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("AddPartitionsToTxn topic authorize failed, topic - {}. {}",
fullPartitionName, ex.getMessage());
completeOne.accept(() ->
unauthorizedTopicErrors.put(tp, Errors.TOPIC_AUTHORIZATION_FAILED));
return;
}
if (!isAuthorized) {
completeOne.accept(() ->
unauthorizedTopicErrors.put(tp, Errors.TOPIC_AUTHORIZATION_FAILED));
return;
}
completeOne.accept(() -> authorizedPartitions.add(tp));
});
});
}
@Override
protected void handleAddOffsetsToTxn(KafkaHeaderAndRequest kafkaHeaderAndRequest,
CompletableFuture response) {
AddOffsetsToTxnRequest request = (AddOffsetsToTxnRequest) kafkaHeaderAndRequest.getRequest();
AddOffsetsToTxnRequestData data = request.data();
int partition = getGroupCoordinator().partitionFor(data.groupId());
String offsetTopicName = getGroupCoordinator().getGroupManager().getOffsetConfig().offsetsTopicName();
TransactionCoordinator transactionCoordinator = getTransactionCoordinator();
Set topicPartitions = Collections.singleton(new TopicPartition(offsetTopicName, partition));
transactionCoordinator.handleAddPartitionsToTransaction(
data.transactionalId(),
data.producerId(),
data.producerEpoch(),
topicPartitions,
(errors) -> {
AddOffsetsToTxnResponseData responseData = new AddOffsetsToTxnResponseData()
.setErrorCode(errors.code());
// TODO: handle PRODUCER_FENCED errors
response.complete(
new AddOffsetsToTxnResponse(responseData));
});
}
private Map addPartitionError(Collection partitions, Errors errors) {
Map result = Maps.newHashMap();
for (TopicPartition partition : partitions) {
result.put(partition, errors);
}
return result;
}
@Override
protected void handleTxnOffsetCommit(KafkaHeaderAndRequest kafkaHeaderAndRequest,
CompletableFuture response) {
TxnOffsetCommitRequest request = (TxnOffsetCommitRequest) kafkaHeaderAndRequest.getRequest();
TxnOffsetCommitRequestData data = request.data();
if (data.topics().isEmpty()) {
response.complete(new TxnOffsetCommitResponse(0, Maps.newHashMap()));
return;
}
// TODO not process nonExistingTopic at this time.
Map nonExistingTopicErrors = nonExistingTopicErrors();
Map unauthorizedTopicErrors = Maps.newConcurrentMap();
// convert raw topic name to KoP full name
// we need to ensure that topic name in __consumer_offsets is globally unique
Map convertedOffsetData = Maps.newConcurrentMap();
Map replacingIndex = Maps.newHashMap();
AtomicInteger unfinishedAuthorizationCount = new AtomicInteger(request.offsets().size());
Consumer completeOne = (action) -> {
action.run();
if (unfinishedAuthorizationCount.decrementAndGet() == 0) {
if (log.isTraceEnabled()) {
StringBuffer traceInfo = new StringBuffer();
replacingIndex.forEach((inner, outer) ->
traceInfo.append(String.format("\tinnerName:%s, outerName:%s%n", inner, outer)));
log.trace("TXN_OFFSET_COMMIT TopicPartition relations: \n{}", traceInfo);
}
getGroupCoordinator().handleTxnCommitOffsets(
data.groupId(),
data.producerId(),
data.producerEpoch(),
convertTxnOffsets(convertedOffsetData)
).whenComplete((resultMap, throwable) -> {
// recover to original topic name
replaceTopicPartition(resultMap, replacingIndex);
resultMap.putAll(nonExistingTopicErrors);
resultMap.putAll(unauthorizedTopicErrors);
response.complete(new TxnOffsetCommitResponse(0, resultMap));
});
}
};
final String namespacePrefix = currentNamespacePrefix();
request.offsets().forEach((tp, commitOffset) -> {
KopTopic kopTopic;
try {
kopTopic = new KopTopic(tp.topic(), namespacePrefix);
} catch (KoPTopicException e) {
log.warn("Invalid topic name: {}", tp.topic(), e);
completeOne.accept(() -> nonExistingTopicErrors.put(tp, Errors.UNKNOWN_TOPIC_OR_PARTITION));
return;
}
String fullTopicName = kopTopic.getFullName();
authorize(AclOperation.READ, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("TxnOffsetCommit authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
completeOne.accept(
() -> unauthorizedTopicErrors.put(tp, Errors.TOPIC_AUTHORIZATION_FAILED));
return;
}
if (!isAuthorized) {
completeOne.accept(()-> unauthorizedTopicErrors.put(tp, Errors.TOPIC_AUTHORIZATION_FAILED));
return;
}
completeOne.accept(() -> {
TopicPartition newTopicPartition = new TopicPartition(fullTopicName, tp.partition());
convertedOffsetData.put(newTopicPartition, commitOffset);
replacingIndex.put(newTopicPartition, tp);
});
});
});
}
private Map convertTxnOffsets(
Map offsetsMap) {
long currentTimestamp = SystemTime.SYSTEM.milliseconds();
Map offsetAndMetadataMap = new HashMap<>();
for (Map.Entry entry : offsetsMap.entrySet()) {
TxnOffsetCommitRequest.CommittedOffset partitionData = entry.getValue();
String metadata = KafkaRequestUtils.getMetadata(partitionData);
long offset = KafkaRequestUtils.getOffset(partitionData);
offsetAndMetadataMap.put(entry.getKey(),
OffsetAndMetadata.apply(offset, metadata, currentTimestamp, -1));
}
return offsetAndMetadataMap;
}
@Override
protected void handleEndTxn(KafkaHeaderAndRequest kafkaHeaderAndRequest,
CompletableFuture response) {
EndTxnRequest request = (EndTxnRequest) kafkaHeaderAndRequest.getRequest();
EndTxnRequestData data = request.data();
TransactionCoordinator transactionCoordinator = getTransactionCoordinator();
transactionCoordinator.handleEndTransaction(
data.transactionalId(),
data.producerId(),
data.producerEpoch(),
data.committed() ? TransactionResult.COMMIT : TransactionResult.ABORT,
errors -> response.complete(new EndTxnResponse(new EndTxnResponseData().setErrorCode(errors.code()))));
}
@Override
protected void handleWriteTxnMarkers(KafkaHeaderAndRequest kafkaHeaderAndRequest,
CompletableFuture response) {
WriteTxnMarkersRequest request = (WriteTxnMarkersRequest) kafkaHeaderAndRequest.getRequest();
Map> errors = new ConcurrentHashMap<>();
List markers = request.markers();
AtomicInteger numAppends = new AtomicInteger(markers.size());
if (numAppends.get() == 0) {
response.complete(new WriteTxnMarkersResponse(errors));
return;
}
BiConsumer> updateErrors = (producerId, currentErrors) -> {
Map previousErrors = errors.putIfAbsent(producerId, currentErrors);
if (previousErrors != null) {
previousErrors.putAll(currentErrors);
}
};
Runnable completeOne = () -> {
if (numAppends.decrementAndGet() == 0) {
response.complete(new WriteTxnMarkersResponse(errors));
}
};
for (WriteTxnMarkersRequest.TxnMarkerEntry marker : markers) {
long producerId = marker.producerId();
TransactionResult transactionResult = marker.transactionResult();
Map controlRecords = generateTxnMarkerRecords(marker);
AppendRecordsContext appendRecordsContext = AppendRecordsContext.get(
topicManager,
this::startSendOperationForThrottling,
this::completeSendOperationForThrottling,
this.pendingTopicFuturesMap,
ctx);
getReplicaManager().appendRecords(
kafkaConfig.getRequestTimeoutMs(),
(short) 1,
true,
currentNamespacePrefix(),
controlRecords,
PartitionLog.AppendOrigin.Coordinator,
appendRecordsContext
).whenComplete((result, ex) -> {
if (ex != null) {
log.error("[{}] Append txn marker ({}) failed.", ctx.channel(), marker, ex);
Map currentErrors = new HashMap<>();
controlRecords.forEach(((topicPartition, partitionResponse) -> currentErrors.put(topicPartition,
Errors.KAFKA_STORAGE_ERROR)));
updateErrors.accept(producerId, currentErrors);
completeOne.run();
return;
}
Map currentErrors = new HashMap<>();
result.forEach(((topicPartition, partitionResponse) -> {
if (log.isDebugEnabled()) {
log.debug("[{}] Append txn marker to topic : [{}], response: [{}].",
ctx.channel(), topicPartition, partitionResponse);
}
currentErrors.put(topicPartition, partitionResponse.error);
}));
updateErrors.accept(producerId, currentErrors);
final String metadataNamespace = kafkaConfig.getKafkaMetadataNamespace();
Set successfulOffsetsPartitions = result.keySet()
.stream()
.filter(topicPartition ->
KopTopic.isGroupMetadataTopicName(topicPartition.topic(), metadataNamespace))
.collect(Collectors.toSet());
if (!successfulOffsetsPartitions.isEmpty()) {
getGroupCoordinator().scheduleHandleTxnCompletion(
producerId,
successfulOffsetsPartitions
.stream().map(TopicPartition::partition).collect(Collectors.toSet()),
transactionResult
).whenComplete((__, e) -> {
if (e != null) {
log.error("Received an exception while trying to update the offsets cache on "
+ "transaction marker append", e);
ConcurrentHashMap updatedErrors = new ConcurrentHashMap<>();
successfulOffsetsPartitions.forEach(partition ->
updatedErrors.put(partition, Errors.forException(e.getCause())));
updateErrors.accept(producerId, updatedErrors);
}
completeOne.run();
});
return;
}
completeOne.run();
});
}
}
private Map generateTxnMarkerRecords(WriteTxnMarkersRequest.TxnMarkerEntry marker) {
Map txnMarkerRecordsMap = Maps.newHashMap();
ControlRecordType controlRecordType = marker.transactionResult().equals(TransactionResult.COMMIT)
? ControlRecordType.COMMIT : ControlRecordType.ABORT;
EndTransactionMarker endTransactionMarker = new EndTransactionMarker(
controlRecordType, marker.coordinatorEpoch());
for (TopicPartition topicPartition : marker.partitions()) {
MemoryRecords memoryRecords = MemoryRecords.withEndTransactionMarker(
marker.producerId(), marker.producerEpoch(), endTransactionMarker);
txnMarkerRecordsMap.put(topicPartition, memoryRecords);
}
return txnMarkerRecordsMap;
}
@Override
protected void handleDeleteTopics(KafkaHeaderAndRequest deleteTopics,
CompletableFuture resultFuture) {
checkArgument(deleteTopics.getRequest() instanceof DeleteTopicsRequest);
DeleteTopicsRequest request = (DeleteTopicsRequest) deleteTopics.getRequest();
DeleteTopicsRequestData data = request.data();
List topicsToDelete = data.topics();
if (topicsToDelete == null || topicsToDelete.isEmpty()) {
resultFuture.complete(KafkaResponseUtils.newDeleteTopics(Maps.newHashMap()));
return;
}
Map deleteTopicsResponse = Maps.newConcurrentMap();
AtomicInteger topicToDeleteCount = new AtomicInteger(topicsToDelete.size());
BiConsumer completeOne = (topic, errors) -> {
deleteTopicsResponse.put(topic, errors);
if (errors == Errors.NONE) {
// create topic ZNode to trigger the coordinator DeleteTopicsEvent event
metadataStore.put(
KopEventManager.getDeleteTopicsPath()
+ "/" + TopicNameUtils.getTopicNameWithUrlEncoded(topic),
new byte[0],
Optional.empty());
}
if (topicToDeleteCount.decrementAndGet() == 0) {
resultFuture.complete(KafkaResponseUtils.newDeleteTopics(deleteTopicsResponse));
}
};
final String namespacePrefix = currentNamespacePrefix();
topicsToDelete.forEach((DeleteTopicsRequestData.DeleteTopicState topicState) -> {
String topic = topicState.name();
KopTopic kopTopic;
try {
kopTopic = new KopTopic(topic, namespacePrefix);
} catch (KoPTopicException e) {
completeOne.accept(topic, Errors.UNKNOWN_TOPIC_OR_PARTITION);
return;
}
String fullTopicName = kopTopic.getFullName();
authorize(AclOperation.DELETE, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorize, ex) -> {
if (ex != null) {
log.error("DeleteTopics authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
completeOne.accept(topic, Errors.TOPIC_AUTHORIZATION_FAILED);
return;
}
if (!isAuthorize) {
completeOne.accept(topic, Errors.TOPIC_AUTHORIZATION_FAILED);
return;
}
adminManager.deleteTopic(fullTopicName,
__ -> completeOne.accept(topic, Errors.NONE),
__ -> completeOne.accept(topic, Errors.UNKNOWN_TOPIC_OR_PARTITION));
});
});
}
@Override
protected void handleDeleteRecords(KafkaHeaderAndRequest deleteTopics,
CompletableFuture resultFuture) {
checkArgument(deleteTopics.getRequest() instanceof DeleteRecordsRequest);
DeleteRecordsRequest request = (DeleteRecordsRequest) deleteTopics.getRequest();
Map partitionOffsets = new HashMap<>();
request.data().topics().forEach((DeleteRecordsRequestData.DeleteRecordsTopic topic) -> {
String name = topic.name();
topic.partitions().forEach(partition -> {
TopicPartition topicPartition = new TopicPartition(name, partition.partitionIndex());
partitionOffsets.put(topicPartition, partition.offset());
});
});
if (partitionOffsets.isEmpty()) {
resultFuture.complete(KafkaResponseUtils.newDeleteRecords(Maps.newHashMap()));
return;
}
Map deleteRecordsResponse =
Maps.newConcurrentMap();
AtomicInteger topicToDeleteCount = new AtomicInteger(partitionOffsets.size());
BiConsumer completeOne = (topic, errors) -> {
deleteRecordsResponse.put(topic, errors);
if (topicToDeleteCount.decrementAndGet() == 0) {
resultFuture.complete(KafkaResponseUtils.newDeleteRecords(deleteRecordsResponse));
}
};
final String namespacePrefix = currentNamespacePrefix();
partitionOffsets.forEach((topicPartition, offset) -> {
KopTopic kopTopic;
try {
kopTopic = new KopTopic(topicPartition.topic(), namespacePrefix);
} catch (KoPTopicException e) {
completeOne.accept(topicPartition, Errors.UNKNOWN_TOPIC_OR_PARTITION);
return;
}
String fullTopicName = kopTopic.getPartitionName(topicPartition.partition());
authorize(AclOperation.DELETE, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorize, ex) -> {
if (ex != null) {
log.error("DeleteTopics authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
completeOne.accept(topicPartition, Errors.TOPIC_AUTHORIZATION_FAILED);
return;
}
if (!isAuthorize) {
completeOne.accept(topicPartition, Errors.TOPIC_AUTHORIZATION_FAILED);
return;
}
topicManager
.getTopicConsumerManager(fullTopicName)
.thenAccept(topicManager -> topicManager.findPositionForIndex(offset)
.thenAccept(
position -> adminManager.truncateTopic(fullTopicName, offset, position,
__ -> completeOne.accept(topicPartition, Errors.NONE),
__ -> completeOne.accept(topicPartition,
Errors.UNKNOWN_TOPIC_OR_PARTITION))));
});
});
}
@Override
protected void handleOffsetDelete(KafkaHeaderAndRequest offsetDelete,
CompletableFuture response) {
checkArgument(offsetDelete.getRequest() instanceof OffsetDeleteRequest);
OffsetDeleteRequest request = (OffsetDeleteRequest) offsetDelete.getRequest();
String groupId = request.data().groupId();
Map topicPartitionErrors = Maps.newConcurrentMap();
List topicPartitions = new ArrayList<>();
Map replacingIndex = new ConcurrentHashMap<>();
authorize(AclOperation.DELETE, Resource.of(ResourceType.GROUP, groupId)).thenAccept(authorized -> {
if (!authorized) {
response.complete(request.getErrorResponse(-1, Errors.GROUP_AUTHORIZATION_FAILED));
return;
}
final String namespacePrefix = currentNamespacePrefix();
request.data().topics().forEach((OffsetDeleteRequestData.OffsetDeleteRequestTopic topic) -> {
String name = topic.name();
topic.partitions().forEach(partition -> {
TopicPartition topicPartition = new TopicPartition(name, partition.partitionIndex());
KopTopic kopTopic;
try {
kopTopic = new KopTopic(topicPartition.topic(), namespacePrefix);
} catch (KoPTopicException e) {
topicPartitionErrors.put(topicPartition, Errors.UNKNOWN_TOPIC_OR_PARTITION);
return;
}
String fullTopicName = kopTopic.getPartitionName(topicPartition.partition());
authorize(AclOperation.READ, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorize, ex) -> {
if (ex != null) {
log.error("OffsetDelete authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
topicPartitionErrors.put(topicPartition, Errors.TOPIC_AUTHORIZATION_FAILED);
return;
}
if (!isAuthorize) {
topicPartitionErrors.put(topicPartition, Errors.TOPIC_AUTHORIZATION_FAILED);
return;
}
String fullName = kopTopic.getFullName();
TopicPartition newTopicPartition = new TopicPartition(
fullName, topicPartition.partition());
replacingIndex.put(newTopicPartition, topicPartition);
topicPartitions.add(newTopicPartition);
});
});
});
getGroupCoordinator().handleDeleteOffsets(groupId, topicPartitions)
.thenAccept(offsetDeleteTopicPartitionResponse -> {
Errors groupError = offsetDeleteTopicPartitionResponse.getLeft();
if (groupError != Errors.NONE) {
response.complete(request.getErrorResponse(0, groupError));
return;
}
topicPartitionErrors.putAll(offsetDeleteTopicPartitionResponse.getRight());
// recover to original topic name
replaceTopicPartition(topicPartitionErrors, replacingIndex);
OffsetDeleteResponseData.OffsetDeleteResponseTopicCollection topics =
new OffsetDeleteResponseData.OffsetDeleteResponseTopicCollection();
Map> topicPartitionMap =
topicPartitionErrors.keySet().stream()
.collect(Collectors.groupingBy(TopicPartition::topic));
for (Map.Entry> entry : topicPartitionMap.entrySet()) {
String topic = entry.getKey();
List topicPartitionsList = entry.getValue();
OffsetDeleteResponseData.OffsetDeleteResponsePartitionCollection partitions =
new OffsetDeleteResponseData.OffsetDeleteResponsePartitionCollection();
for (TopicPartition topicPartition : topicPartitionsList) {
Errors error = topicPartitionErrors.get(topicPartition);
partitions.add(new OffsetDeleteResponseData.OffsetDeleteResponsePartition()
.setPartitionIndex(topicPartition.partition())
.setErrorCode(error.code()));
}
topics.add(new OffsetDeleteResponseData.OffsetDeleteResponseTopic()
.setName(topic)
.setPartitions(partitions));
}
response.complete(new OffsetDeleteResponse(new OffsetDeleteResponseData()
.setTopics(topics)));
});
}).exceptionally(ex -> {
log.error("OffsetDelete authorize failed, groupId - {}. {}",
groupId, ex.getMessage());
response.complete(request.getErrorResponse(-1, Errors.GROUP_AUTHORIZATION_FAILED));
return null;
});
}
@Override
protected void handleCreatePartitions(KafkaHeaderAndRequest createPartitions,
CompletableFuture resultFuture) {
checkArgument(createPartitions.getRequest() instanceof CreatePartitionsRequest);
CreatePartitionsRequest request = (CreatePartitionsRequest) createPartitions.getRequest();
final Map result = Maps.newConcurrentMap();
final Map validTopics = Maps.newHashMap();
final Set duplicateTopics = new HashSet<>();
KafkaRequestUtils.forEachCreatePartitionsRequest(request, (topic, newPartition) -> {
if (duplicateTopics.add(topic)) {
validTopics.put(topic, newPartition);
} else {
final String errorMessage = "Create topics partitions request from client `"
+ createPartitions.getHeader().clientId()
+ "` contains multiple entries for the following topics: " + duplicateTopics;
result.put(topic, new ApiError(Errors.INVALID_REQUEST, errorMessage));
}
});
if (validTopics.isEmpty()) {
resultFuture.complete(KafkaResponseUtils.newCreatePartitions(result));
return;
}
String namespacePrefix = currentNamespacePrefix();
final AtomicInteger validTopicsCount = new AtomicInteger(validTopics.size());
final Map authorizedTopics = Maps.newConcurrentMap();
Runnable createPartitionsAsync = () -> {
if (authorizedTopics.isEmpty()) {
resultFuture.complete(KafkaResponseUtils.newCreatePartitions(result));
return;
}
adminManager.createPartitionsAsync(authorizedTopics, request.data().timeoutMs(), namespacePrefix)
.thenApply(validResult -> {
result.putAll(validResult);
resultFuture.complete(KafkaResponseUtils.newCreatePartitions(result));
return null;
});
};
BiConsumer completeOneTopic =
(topic, newPartitions) -> {
authorizedTopics.put(topic, newPartitions);
if (validTopicsCount.decrementAndGet() == 0) {
createPartitionsAsync.run();
}
};
BiConsumer completeOneErrorTopic = (topic, error) -> {
result.put(topic, error);
if (validTopicsCount.decrementAndGet() == 0) {
createPartitionsAsync.run();
}
};
validTopics.forEach((topic, newPartitions) -> {
try {
KopTopic kopTopic = new KopTopic(topic, namespacePrefix);
String fullTopicName = kopTopic.getFullName();
authorize(AclOperation.ALTER, Resource.of(ResourceType.TOPIC, fullTopicName))
.whenComplete((isAuthorized, ex) -> {
if (ex != null) {
log.error("CreatePartitions authorize failed, topic - {}. {}",
fullTopicName, ex.getMessage());
completeOneErrorTopic.accept(topic,
new ApiError(Errors.TOPIC_AUTHORIZATION_FAILED, ex.getMessage()));
return;
}
if (!isAuthorized) {
completeOneErrorTopic.accept(topic,
new ApiError(Errors.TOPIC_AUTHORIZATION_FAILED, null));
return;
}
completeOneTopic.accept(topic, newPartitions);
});
} catch (KoPTopicException e) {
completeOneErrorTopic.accept(topic, ApiError.fromThrowable(e));
}
});
}
@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) {
log.error("Caught error in handler, closing channel", cause);
this.close();
}
public CompletableFuture lookup(TopicName topic) {
return findBroker(topic).thenApply(KafkaResponseUtils.BrokerLookupResult::toPartitionMetadata);
}
// The returned future never completes exceptionally
public CompletableFuture findBroker(TopicName topic) {
if (log.isDebugEnabled()) {
log.debug("[{}] Handle Lookup for {}", ctx.channel(), topic);
}
final CompletableFuture future = new CompletableFuture<>();
kopBrokerLookupManager.findBroker(topic.toString(), advertisedEndPoint)
.thenApply(listenerInetSocketAddressOpt -> listenerInetSocketAddressOpt
.map(inetSocketAddress -> newPartitionMetadata(topic, newNode(inetSocketAddress)))
.orElse(null)
)
.whenComplete((partitionMetadata, e) -> {
if (e != null || partitionMetadata == null) {
log.warn("[{}] Request {}: Exception while find Broker metadata", ctx.channel(), e);
future.complete(newFailedPartitionMetadata(topic));
} else {
future.complete(partitionMetadata);
}
});
return future;
}
static Node newNode(InetSocketAddress address) {
if (log.isDebugEnabled()) {
log.debug("Return Broker Node of {}. {}:{}", address, address.getHostString(), address.getPort());
}
return new Node(
Murmur3_32Hash.getInstance().makeHash((address.getHostString() + address.getPort()).getBytes(UTF_8)),
address.getHostString(),
address.getPort());
}
static KafkaResponseUtils.BrokerLookupResult newPartitionMetadata(TopicName topicName, Node node) {
int pulsarPartitionIndex = topicName.getPartitionIndex();
int kafkaPartitionIndex = pulsarPartitionIndex == -1 ? 0 : pulsarPartitionIndex;
if (log.isDebugEnabled()) {
log.debug("Return PartitionMetadata node: {}, topicName: {}", node, topicName);
}
TopicPartition topicPartition = new TopicPartition(topicName.toString(), kafkaPartitionIndex);
return KafkaResponseUtils.newMetadataPartition(topicPartition, node);
}
static KafkaResponseUtils.BrokerLookupResult newFailedPartitionMetadata(TopicName topicName) {
int pulsarPartitionIndex = topicName.getPartitionIndex();
int kafkaPartitionIndex = pulsarPartitionIndex == -1 ? 0 : pulsarPartitionIndex;
log.warn("Failed find Broker metadata, create PartitionMetadata with NOT_LEADER_FOR_PARTITION");
TopicPartition topicPartition = new TopicPartition(topicName.toString(), kafkaPartitionIndex);
// most of this error happens when topic is in loading/unloading status,
return KafkaResponseUtils.newMetadataPartition(
Errors.NOT_LEADER_OR_FOLLOWER, topicPartition);
}
private void throwIfTransactionCoordinatorDisabled() {
if (!kafkaConfig.isKafkaTransactionCoordinatorEnabled()) {
throw new IllegalArgumentException("Broker has disabled transaction coordinator, "
+ "please enable it before using transaction.");
}
}
@VisibleForTesting
protected CompletableFuture authorize(AclOperation operation, Resource resource) {
Session session = authenticator != null ? authenticator.session() : null;
return authorize(operation, resource, session);
}
protected CompletableFuture authorize(AclOperation operation, Resource resource, Session session) {
if (authorizer == null) {
return CompletableFuture.completedFuture(true);
}
if (session == null) {
return CompletableFuture.completedFuture(false);
}
CompletableFuture isAuthorizedFuture = null;
switch (operation) {
case READ:
isAuthorizedFuture = authorizer.canConsumeAsync(session.getPrincipal(), resource);
break;
case IDEMPOTENT_WRITE:
case WRITE:
isAuthorizedFuture = authorizer.canProduceAsync(session.getPrincipal(), resource);
break;
case DESCRIBE:
if (resource.getResourceType() == ResourceType.TOPIC) {
isAuthorizedFuture = authorizer.canLookupAsync(session.getPrincipal(), resource);
} else if (resource.getResourceType() == ResourceType.NAMESPACE) {
isAuthorizedFuture = authorizer.canGetTopicList(session.getPrincipal(), resource);
} else if (resource.getResourceType() == ResourceType.GROUP) {
isAuthorizedFuture = authorizer.canDescribeConsumerGroup(session.getPrincipal(), resource);
}
break;
case CREATE:
isAuthorizedFuture = authorizer.canCreateTopicAsync(session.getPrincipal(), resource);
break;
case DELETE:
if (resource.getResourceType() == ResourceType.GROUP) {
isAuthorizedFuture = authorizer.canDeleteGroupAsync(session.getPrincipal(), resource);
break;
}
isAuthorizedFuture = authorizer.canDeleteTopicAsync(session.getPrincipal(), resource);
break;
case ALTER:
isAuthorizedFuture = authorizer.canAlterTopicAsync(session.getPrincipal(), resource);
break;
case DESCRIBE_CONFIGS:
isAuthorizedFuture = authorizer.canManageTenantAsync(session.getPrincipal(), resource);
break;
case ANY:
if (resource.getResourceType() == ResourceType.TENANT) {
isAuthorizedFuture = authorizer.canAccessTenantAsync(session.getPrincipal(), resource);
}
break;
case ALTER_CONFIGS:
case CLUSTER_ACTION:
case UNKNOWN:
case ALL:
default:
break;
}
if (isAuthorizedFuture == null) {
return FutureUtil.failedFuture(
new IllegalStateException("AclOperation [" + operation.name() + "] is not supported."));
}
return isAuthorizedFuture;
}
/**
* If we are using kafkaEnableMultiTenantMetadata we need to ensure
* that the TenantSpec refer to an existing tenant.
* @param session
* @return whether the tenant is accessible
*/
private boolean validateTenantAccessForSession(Session session)
throws AuthenticationException {
if (!kafkaConfig.isKafkaEnableMultiTenantMetadata()) {
// we are not leveraging kafkaEnableMultiTenantMetadata feature
// the client will access only system tenant
return true;
}
String tenantSpec = session.getPrincipal().getTenantSpec();
if (tenantSpec == null) {
// we are not leveraging kafkaEnableMultiTenantMetadata feature
// the client will access only system tenant
return true;
}
String currentTenant = extractTenantFromTenantSpec(tenantSpec);
try {
Boolean granted = authorize(AclOperation.ANY,
Resource.of(ResourceType.TENANT, currentTenant), session)
.get();
return granted != null && granted;
} catch (ExecutionException | InterruptedException err) {
log.error("Internal error while verifying tenant access", err);
throw new AuthenticationException("Internal error while verifying tenant access:" + err, err);
}
}
/**
* Return the threadpool that executes the conversion of data during Fetches.
* We don't want to decode data inside the critical threads like the ManagedLedger Ordered Executor threads.
* @return a executor.
*/
public Executor getDecodeExecutor() {
return this.executor;
}
}