com.expediagroup.rhapsody.kafka.factory.KafkaFluxFactory Maven / Gradle / Ivy
/**
* Copyright 2019 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.expediagroup.rhapsody.kafka.factory;
import java.time.Duration;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.TopicPartition;
import org.reactivestreams.Publisher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.expediagroup.rhapsody.api.Acknowledgeable;
import com.expediagroup.rhapsody.core.transformer.AutoAcknowledgementConfig;
import com.expediagroup.rhapsody.core.transformer.AutoAcknowledgingTransformer;
import com.expediagroup.rhapsody.kafka.acknowledgement.MultipleReceiverAcknowledgementStrategy;
import com.expediagroup.rhapsody.kafka.acknowledgement.OrderManagingReceiverAcknowledgementStrategy;
import com.expediagroup.rhapsody.kafka.acknowledgement.ReceiverAcknowledgementStrategy;
import com.expediagroup.rhapsody.kafka.extractor.ConsumerRecordExtraction;
import com.expediagroup.rhapsody.util.ConfigLoading;
import reactor.core.publisher.Flux;
import reactor.core.publisher.GroupedFlux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
import reactor.core.scheduler.Schedulers;
import reactor.kafka.receiver.KafkaReceiver;
import reactor.kafka.receiver.ReceiverOptions;
import reactor.kafka.receiver.ReceiverPartition;
public class KafkaFluxFactory {
public static final String POLL_TIMEOUT_CONFIG = "poll.timeout";
public static final String CLOSE_TIMEOUT_CONFIG = "close.timeout";
public static final String MAX_COMMIT_ATTEMPTS_CONFIG = "max.commit.attempts";
public static final String BLOCK_REQUEST_ON_PARTITION_ASSIGNMENT_CONFIG = "block.request.on.partition.assignment";
private static final Duration DEFAULT_CLOSE_TIMEOUT = Duration.ofSeconds(30L);
private static final boolean DEFAULT_BLOCK_REQUEST_ON_PARTITION_ASSIGNMENT = false;
private static final Logger LOGGER = LoggerFactory.getLogger(KafkaFluxFactory.class);
private static final Map REGISTRATION_COUNTS_BY_CLIENT_ID = new ConcurrentHashMap<>();
private final KafkaConfigFactory configFactory;
public KafkaFluxFactory(KafkaConfigFactory configFactory) {
this.configFactory = configFactory;
}
public Flux>> receiveAutoGroup(Collection topics,
AutoAcknowledgementConfig autoAcknowledgementConfig,
Function super Flux>, ? extends Publisher>> pregroup) {
return receiveAuto(topics, autoAcknowledgementConfig)
.transform(pregroup)
.groupBy(ConsumerRecordExtraction::extractTopicPartition);
}
public Flux> receiveAuto(Collection topics, AutoAcknowledgementConfig autoAcknowledgementConfig) {
return receive(topics, new MultipleReceiverAcknowledgementStrategy())
.transformDeferred(new AutoAcknowledgingTransformer<>(autoAcknowledgementConfig, KafkaFluxFactory::collectAcknowledgers, KafkaFluxFactory::acknowledge))
.map(Acknowledgeable::get);
}
public Flux>>> receiveGroup(Collection topics,
ReceiverAcknowledgementStrategy acknowledgementStrategy,
Function super Flux>>, ? extends Publisher>>> pregroup) {
return receive(topics, acknowledgementStrategy)
.transform(pregroup)
.groupBy(KafkaFluxFactory::extractTopicPartition);
}
public Flux>> receive(Collection topics) {
return receive(topics, new OrderManagingReceiverAcknowledgementStrategy());
}
public Flux>> receive(Collection topics, ReceiverAcknowledgementStrategy acknowledgementStrategy) {
Map properties = configFactory.create();
ReceiverOptions receiverOptions = ReceiverOptions.create(properties);
// Reactor allows controlling the timeout of its polls to Kafka. This config can be
// increased if the Kafka cluster is slow to respond.
receiverOptions.pollTimeout(ConfigLoading.load(properties, POLL_TIMEOUT_CONFIG, Duration::parse, receiverOptions.pollTimeout()));
// Closing the underlying Kafka Consumer is a fallible process. In order to not infinitely
// deadlock a Consumer during this process (which can lead to non-consumption of assigned
// partitions), we use a default equal to what's used in KafkaConsumer::close
receiverOptions.closeTimeout(ConfigLoading.load(properties, CLOSE_TIMEOUT_CONFIG, Duration::parse, DEFAULT_CLOSE_TIMEOUT));
// Reactor takes control of offset committing by disabling the native Kafka auto commit
// and periodically committing offsets of acknowledged Records. Since the native
// auto-commit is disabled, we are reusing the native property used to configure offset
// commit intervals.
receiverOptions.commitInterval(
ConfigLoading.load(properties, ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG, Long::valueOf)
.map(Duration::ofMillis)
.orElse(receiverOptions.commitInterval()));
// Committing Offsets can fail for retriable reasons. This config can be increased if
// failing to commit offsets is found to be particularly frequent
receiverOptions.maxCommitAttempts(ConfigLoading.load(properties, MAX_COMMIT_ATTEMPTS_CONFIG, Integer::valueOf, receiverOptions.maxCommitAttempts()));
// Subscribers may want to block request Threads on assignment of partitions AND subsequent
// fetching/updating of offset positions on those partitions such that all imminently
// produced Records to the subscribed Topics will be received by the associated Consumer
// Group. This can help avoid timing problems, particularly with tests, and avoids having
// to use `auto.offset.reset = "earliest"` to guarantee receipt of Records immediately
// produced by the request Thread (directly or indirectly)
CompletableFuture> assignedPartitions =
ConfigLoading.load(properties, BLOCK_REQUEST_ON_PARTITION_ASSIGNMENT_CONFIG, Boolean::valueOf, DEFAULT_BLOCK_REQUEST_ON_PARTITION_ASSIGNMENT) ?
new CompletableFuture<>() : CompletableFuture.completedFuture(Collections.emptyList());
Future> assignedPartitionPositions = assignedPartitions.thenAccept(partitions -> partitions.forEach(ReceiverPartition::position));
receiverOptions.addAssignListener(assignedPartitions::complete);
// Use a dedicated and identifiable Scheduler for publishing
Scheduler scheduler = Schedulers.newElastic(KafkaFluxFactory.class.getSimpleName() + "-" + extractClientId(receiverOptions));
receiverOptions.schedulerSupplier(() -> scheduler);
// 1) KafkaReceivers are not thread-safe and therefore must not be shared among
// Subscriptions. We therefore defer KafkaReceiver creation on a per-Subscription basis
// 2) Every time a KafkaReceiver is created for a new Subscription, its Consumer's Client
// ID must be made unique in order to avoid conflicting registration with external
// resources, i.e. JMX. Since we don't want to recreate the Properties from the
// ConfigFactory (and/or reparse those Properties) on every subscription AND we want to
// maintain thread safety, we create a "unique" ReceiverOptions from the base Options
// 3) Subscribe to Kafka Records first in order to trigger polling and then block
// requesting Thread on assigned partition positions iff necessary
return Flux.defer(() -> KafkaReceiver.create(createUniqueReceiverOptions(receiverOptions, topics)).receive())
.transform(acknowledgementStrategy.createRecordTransformer(properties))
.transform(records -> assignedPartitionPositions.isDone() ? records : records.mergeWith(blockRequestOn(assignedPartitionPositions)));
}
private static ReceiverOptions createUniqueReceiverOptions(ReceiverOptions receiverOptions, Collection topics) {
ReceiverOptions uniqueReceiverOptions = ReceiverOptions.create(receiverOptions.consumerProperties())
.consumerProperty(CommonClientConfigs.CLIENT_ID_CONFIG, registerNewClient(extractClientId(receiverOptions)))
.pollTimeout(receiverOptions.pollTimeout())
.closeTimeout(receiverOptions.closeTimeout())
.commitInterval(receiverOptions.commitInterval())
.maxCommitAttempts(receiverOptions.maxCommitAttempts())
.schedulerSupplier(receiverOptions.schedulerSupplier())
.subscription(topics);
receiverOptions.assignListeners().forEach(uniqueReceiverOptions::addAssignListener);
return uniqueReceiverOptions;
}
private static Mono blockRequestOn(Future> future) {
return Mono.empty().doOnRequest(requested -> {
try {
future.get();
} catch (Exception e) {
LOGGER.error("Failed to block Request Thread on Future", e);
}
});
}
private static Mono
© 2015 - 2025 Weber Informatics LLC | Privacy Policy