io.github.clescot.kafka.connect.http.sink.HttpSinkTask Maven / Gradle / Ivy
package io.github.clescot.kafka.connect.http.sink;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import dev.failsafe.RetryPolicy;
import io.github.clescot.kafka.connect.http.HttpTask;
import io.github.clescot.kafka.connect.http.VersionUtils;
import io.github.clescot.kafka.connect.http.client.Configuration;
import io.github.clescot.kafka.connect.http.client.HttpClientFactory;
import io.github.clescot.kafka.connect.http.client.HttpException;
import io.github.clescot.kafka.connect.http.core.HttpExchange;
import io.github.clescot.kafka.connect.http.core.HttpRequest;
import io.github.clescot.kafka.connect.http.core.queue.KafkaRecord;
import io.github.clescot.kafka.connect.http.sink.mapper.HttpRequestMapper;
import io.github.clescot.kafka.connect.http.sink.mapper.HttpRequestMapperFactory;
import io.github.clescot.kafka.connect.http.sink.publish.KafkaProducer;
import io.github.clescot.kafka.connect.http.sink.publish.PublishConfigurer;
import io.github.clescot.kafka.connect.http.sink.publish.PublishMode;
import io.micrometer.core.instrument.composite.CompositeMeterRegistry;
import org.apache.commons.jexl3.JexlBuilder;
import org.apache.commons.jexl3.JexlEngine;
import org.apache.commons.jexl3.JexlFeatures;
import org.apache.commons.jexl3.introspection.JexlPermissions;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.config.AbstractConfig;
import org.apache.kafka.connect.connector.ConnectRecord;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.sink.ErrantRecordReporter;
import org.apache.kafka.connect.sink.SinkRecord;
import org.apache.kafka.connect.sink.SinkTask;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.concurrent.*;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static io.github.clescot.kafka.connect.http.sink.HttpSinkConfigDefinition.CONFIGURATION_IDS;
import static io.github.clescot.kafka.connect.http.sink.HttpSinkConfigDefinition.HTTP_CLIENT_ASYNC_FIXED_THREAD_POOL_SIZE;
public abstract class HttpSinkTask extends SinkTask {
private static final Logger LOGGER = LoggerFactory.getLogger(HttpSinkTask.class);
private static final VersionUtils VERSION_UTILS = new VersionUtils();
public static final String DEFAULT = "default";
private HttpRequestMapper defaultHttpRequestMapper;
private List httpRequestMappers;
public static final String DEFAULT_CONFIGURATION_ID = DEFAULT;
private final HttpClientFactory httpClientFactory;
private ErrantRecordReporter errantRecordReporter;
private HttpTask httpTask;
private KafkaProducer producer;
private Queue queue;
private PublishMode publishMode;
private HttpSinkConnectorConfig httpSinkConnectorConfig;
private static CompositeMeterRegistry meterRegistry;
private ExecutorService executorService;
private List messageSplitters;
private List requestGroupers;
@SuppressWarnings("java:S5993")
public HttpSinkTask(HttpClientFactory httpClientFactory, KafkaProducer producer) {
this.httpClientFactory = httpClientFactory;
this.producer = producer;
}
@Override
public String version() {
return VERSION_UTILS.getVersion();
}
private List> buildCustomConfigurations(HttpClientFactory httpClientFactory,
AbstractConfig config,
Configuration defaultConfiguration,
ExecutorService executorService) {
CopyOnWriteArrayList> configurations = Lists.newCopyOnWriteArrayList();
for (String configId : Optional.ofNullable(config.getList(CONFIGURATION_IDS)).orElse(Lists.newArrayList())) {
Configuration configuration = new Configuration<>(configId, httpClientFactory, config, executorService, meterRegistry);
if (configuration.getHttpClient() == null) {
configuration.setHttpClient(defaultConfiguration.getHttpClient());
}
//we reuse the default retry policy if not set
Optional> defaultRetryPolicy = defaultConfiguration.getRetryPolicy();
if (configuration.getRetryPolicy().isEmpty() && defaultRetryPolicy.isPresent()) {
configuration.setRetryPolicy(defaultRetryPolicy.get());
}
//we reuse the default success response code regex if not set
configuration.setSuccessResponseCodeRegex(defaultConfiguration.getSuccessResponseCodeRegex());
Optional defaultRetryResponseCodeRegex = defaultConfiguration.getRetryResponseCodeRegex();
if (configuration.getRetryResponseCodeRegex().isEmpty() && defaultRetryResponseCodeRegex.isPresent()) {
configuration.setRetryResponseCodeRegex(defaultRetryResponseCodeRegex.get());
}
configurations.add(configuration);
}
return configurations;
}
public static synchronized void setMeterRegistry(CompositeMeterRegistry compositeMeterRegistry) {
if (meterRegistry == null) {
meterRegistry = compositeMeterRegistry;
}
}
protected static synchronized void clearMeterRegistry(){
meterRegistry = null;
}
/**
* @param settings configure the connector
*/
@Override
public void start(Map settings) {
List> customConfigurations;
Configuration defaultConfiguration;
Preconditions.checkNotNull(settings, "settings cannot be null");
HttpSinkConfigDefinition httpSinkConfigDefinition = new HttpSinkConfigDefinition(settings);
this.httpSinkConnectorConfig = new HttpSinkConnectorConfig(httpSinkConfigDefinition.config(), settings);
//build executorService
Optional customFixedThreadPoolSize = Optional.ofNullable(httpSinkConnectorConfig.getInt(HTTP_CLIENT_ASYNC_FIXED_THREAD_POOL_SIZE));
customFixedThreadPoolSize.ifPresent(integer -> this.executorService = buildExecutorService(integer));
//build meterRegistry
MeterRegistryFactory meterRegistryFactory = new MeterRegistryFactory();
setMeterRegistry(meterRegistryFactory.buildMeterRegistry(httpSinkConnectorConfig));
//build httpRequestMappers
JexlEngine jexlEngine = buildJexlEngine();
//message splitters
MessageSplitterFactory messageSplitterFactory = new MessageSplitterFactory();
this.messageSplitters = messageSplitterFactory.buildMessageSplitters(httpSinkConnectorConfig, jexlEngine);
//HttpRequestMappers
HttpRequestMapperFactory httpRequestMapperFactory = new HttpRequestMapperFactory();
this.defaultHttpRequestMapper = httpRequestMapperFactory.buildDefaultHttpRequestMapper(httpSinkConnectorConfig, jexlEngine);
this.httpRequestMappers = httpRequestMapperFactory.buildCustomHttpRequestMappers(httpSinkConnectorConfig, jexlEngine);
//request groupers
RequestGrouperFactory requestGrouperFactory = new RequestGrouperFactory();
this.requestGroupers = requestGrouperFactory.buildRequestGroupers(httpSinkConnectorConfig);
//configurations
defaultConfiguration = new Configuration<>(DEFAULT_CONFIGURATION_ID, httpClientFactory, httpSinkConnectorConfig, executorService, meterRegistry);
customConfigurations = buildCustomConfigurations(httpClientFactory, httpSinkConnectorConfig, defaultConfiguration, executorService);
httpTask = new HttpTask<>(httpSinkConnectorConfig, defaultConfiguration, customConfigurations, meterRegistry, executorService);
try {
errantRecordReporter = context.errantRecordReporter();
if (errantRecordReporter == null) {
LOGGER.warn("Dead Letter Queue (DLQ) is not enabled. it is recommended to configure a Dead Letter Queue for a better error handling.");
}
} catch (NoSuchMethodError | NoClassDefFoundError e) {
LOGGER.warn("errantRecordReporter has been added to Kafka Connect since 2.6.0 release. you should upgrade the Kafka Connect Runtime shortly.");
errantRecordReporter = null;
}
//configure publishMode
this.publishMode = httpSinkConnectorConfig.getPublishMode();
LOGGER.debug("publishMode: {}", publishMode);
PublishConfigurer publishConfigurer = PublishConfigurer.build();
switch (publishMode) {
case PRODUCER:
publishConfigurer.configureProducerPublishMode(httpSinkConnectorConfig, producer);
break;
case IN_MEMORY_QUEUE:
this.queue = publishConfigurer.configureInMemoryQueue(httpSinkConnectorConfig);
break;
case NONE:
default:
LOGGER.debug("NONE publish mode");
}
}
private static JexlEngine buildJexlEngine() {
// Restricted permissions to a safe set but with URI allowed
JexlPermissions permissions = new JexlPermissions.ClassPermissions(SinkRecord.class, ConnectRecord.class, HttpRequest.class);
// Create the engine
JexlFeatures features = new JexlFeatures()
.loops(false)
.sideEffectGlobal(false)
.sideEffect(false);
return new JexlBuilder().features(features).permissions(permissions).create();
}
/**
*
* @param customFixedThreadPoolSize max thread pool size for the executorService.
* @return executorService
*/
public ExecutorService buildExecutorService(Integer customFixedThreadPoolSize) {
return Executors.newFixedThreadPool(customFixedThreadPoolSize);
}
@Override
@SuppressWarnings("java:S3864")
public void put(Collection records) {
Preconditions.checkNotNull(records, "records collection to be processed is null");
if (records.isEmpty()) {
LOGGER.debug("no records");
return;
}
Preconditions.checkNotNull(httpTask, "httpTask is null. 'start' method must be called once before put");
//we submit futures to the pool
Stream stream = records.stream();
List> requests = stream
.filter(sinkRecord -> sinkRecord.value() != null)
.peek(this::debugConnectRecord)
.map(this::splitMessage)
.flatMap(List::stream)
.map(this::toHttpRequests)
.collect(Collectors.toList());
List> groupedRequests = groupRequests(requests);
//List-> SinkRecord
List> completableFutures = groupedRequests.stream()
.map(this::call)
.collect(Collectors.toList());
List httpExchanges = completableFutures.stream().map(CompletableFuture::join).collect(Collectors.toList());
LOGGER.debug("HttpExchanges created :'{}'", httpExchanges.size());
}
private List> groupRequests(List> pairList) {
if (requestGroupers != null && !requestGroupers.isEmpty()) {
return requestGroupers.stream().map(requestGrouper -> requestGrouper.group(pairList)).reduce(Lists.newArrayList(), (l, r) -> {
l.addAll(r);
return l;
});
} else {
return pairList;
}
}
private List splitMessage(SinkRecord sinkRecord) {
Optional splitterFound = messageSplitters.stream().filter(messageSplitter -> messageSplitter.matches(sinkRecord)).findFirst();
//splitter
List results;
if (splitterFound.isPresent()) {
results = splitterFound.get().split(sinkRecord);
} else {
results = List.of(sinkRecord);
}
return results;
}
private void debugConnectRecord(SinkRecord sinkRecord) {
Object value = sinkRecord.value();
if (value != null) {
Class> valueClass = value.getClass();
LOGGER.debug("valueClass is '{}'", valueClass.getName());
LOGGER.debug("value Schema from SinkRecord is '{}'", sinkRecord.valueSchema());
}
}
private CompletableFuture call(Pair pair) {
return httpTask
.call(pair.getRight())
.thenApply(
publish(pair.getLeft(), this.publishMode, httpSinkConnectorConfig)
)
.exceptionally(throwable -> {
LOGGER.error(throwable.getMessage());
if (errantRecordReporter != null) {
// Send errant record to error reporter
Future future = errantRecordReporter.report(pair.getLeft(), throwable);
// Optionally wait until the failure's been recorded in Kafka
try {
future.get();
} catch (InterruptedException | ExecutionException ex) {
Thread.currentThread().interrupt();
LOGGER.error(ex.getMessage());
}
}
return null;
});
}
private @NotNull Pair toHttpRequests(SinkRecord sinkRecord) {
HttpRequestMapper httpRequestMapper = httpRequestMappers.stream()
.filter(mapper -> mapper.matches(sinkRecord))
.findFirst()
.orElse(defaultHttpRequestMapper);
//build HttpRequest
HttpRequest httpRequest = httpRequestMapper.map(sinkRecord);
return Pair.of(sinkRecord, httpRequest);
}
private @NotNull Function publish(SinkRecord sinkRecord, PublishMode publishMode, HttpSinkConnectorConfig connectorConfig) throws HttpException {
return httpExchange -> {
//publish eventually to 'in memory' queue
if (PublishMode.IN_MEMORY_QUEUE.equals(publishMode)) {
publishInInMemoryQueueMode(sinkRecord, connectorConfig, httpExchange);
} else if (PublishMode.PRODUCER.equals(publishMode)) {
publishInProducerMode(connectorConfig, httpExchange);
} else {
LOGGER.debug("publish.mode : 'NONE' http exchange NOT published :'{}'", httpExchange);
}
return httpExchange;
};
}
private void publishInInMemoryQueueMode(SinkRecord sinkRecord, HttpSinkConnectorConfig connectorConfig, HttpExchange httpExchange) {
LOGGER.debug("publish.mode : 'IN_MEMORY_QUEUE': http exchange published to queue '{}':{}", connectorConfig.getQueueName(), httpExchange);
boolean offer = queue.offer(new KafkaRecord(sinkRecord.headers(), sinkRecord.keySchema(), sinkRecord.key(), httpExchange));
if (!offer) {
LOGGER.error("sinkRecord(topic:{},partition:{},key:{},timestamp:{}) not added to the 'in memory' queue:{}",
sinkRecord.topic(),
sinkRecord.kafkaPartition(),
sinkRecord.key(),
sinkRecord.timestamp(),
connectorConfig.getQueueName()
);
}
}
private void publishInProducerMode(HttpSinkConnectorConfig connectorConfig, HttpExchange httpExchange) {
LOGGER.debug("publish.mode : 'PRODUCER' : HttpExchange success will be published at topic : '{}'", connectorConfig.getProducerSuccessTopic());
LOGGER.debug("publish.mode : 'PRODUCER' : HttpExchange error will be published at topic : '{}'", connectorConfig.getProducerErrorTopic());
String targetTopic = httpExchange.isSuccess() ? connectorConfig.getProducerSuccessTopic() : connectorConfig.getProducerErrorTopic();
String producerContent = connectorConfig.getProducerContent();
ProducerRecord myRecord;
if("response".equalsIgnoreCase(producerContent)) {
myRecord = new ProducerRecord<>(targetTopic, httpExchange.getHttpResponse());
}else{
myRecord = new ProducerRecord<>(targetTopic, httpExchange);
}
LOGGER.trace("before send to {}", targetTopic);
RecordMetadata recordMetadata;
try {
recordMetadata = this.producer.send(myRecord).get(3, TimeUnit.SECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new HttpException(e);
} catch (Exception e) {
throw new HttpException(e);
}
long offset = recordMetadata.offset();
int partition = recordMetadata.partition();
long timestamp = recordMetadata.timestamp();
String topic = recordMetadata.topic();
LOGGER.debug("✉✉ record sent ✉✉ : topic:{},partition:{},offset:{},timestamp:{}", topic, partition, offset, timestamp);
}
@Override
public void stop() {
if (httpTask == null) {
LOGGER.error("httpTask hasn't been created with the 'start' method");
return;
}
if (executorService != null) {
if (!executorService.isShutdown()) {
executorService.shutdown();
}
try {
boolean awaitTermination = executorService.awaitTermination(30, TimeUnit.SECONDS);
if (!awaitTermination) {
LOGGER.warn("timeout elapsed before executor termination");
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new ConnectException(e);
}
LOGGER.info("executor is shutdown : '{}'", executorService.isShutdown());
LOGGER.info("executor tasks are terminated : '{}'", executorService.isTerminated());
}
}
protected void setQueue(Queue queue) {
this.queue = queue;
}
public Configuration getDefaultConfiguration() {
Preconditions.checkNotNull(httpTask, "httpTask has not been initialized in the start method");
return httpTask.getDefaultConfiguration();
}
public List> getCustomConfigurations() {
Preconditions.checkNotNull(httpTask, "httpTask has not been initialized in the start method");
return httpTask.getCustomConfigurations();
}
public HttpTask getHttpTask() {
return httpTask;
}
protected HttpRequestMapper getDefaultHttpRequestMapper() {
return defaultHttpRequestMapper;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy