Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.kestra.plugin.kafka.Consume Maven / Gradle / Ivy
package io.kestra.plugin.kafka;
import io.confluent.kafka.serializers.KafkaAvroSerializerConfig;
import io.kestra.core.exceptions.IllegalVariableEvaluationException;
import io.kestra.core.models.annotations.Example;
import io.kestra.core.models.annotations.Plugin;
import io.kestra.core.models.executions.metrics.Counter;
import io.kestra.core.models.tasks.RunnableTask;
import io.kestra.core.runners.RunContext;
import io.kestra.core.serializers.FileSerde;
import io.kestra.core.utils.Await;
import io.kestra.core.utils.Rethrow;
import io.kestra.plugin.kafka.serdes.SerdeType;
import lombok.AccessLevel;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.ToString;
import lombok.experimental.SuperBuilder;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.IsolationLevel;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.header.Headers;
import org.apache.kafka.common.serialization.Deserializer;
import org.jetbrains.annotations.Nullable;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
import java.time.ZonedDateTime;
import java.time.chrono.ChronoZonedDateTime;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import static io.kestra.core.utils.Rethrow.throwConsumer;
title = "Consume messages from one or more Kafka topics."
examples = {
code = {
"topic: test_kestra",
" bootstrap.servers: localhost:9092",
" schema.registry.url: http://localhost:8085",
"keyDeserializer: STRING",
"valueDeserializer: AVRO",
title = "Connect to a Kafka cluster with SSL.",
code = {
" security.protocol: SSL",
" bootstrap.servers: localhost:19092",
" ssl.key.password: my-ssl-password",
" ssl.keystore.type: PKCS12",
" ssl.keystore.location: my-base64-encoded-keystore",
" ssl.keystore.password: my-ssl-password",
" ssl.truststore.location: my-base64-encoded-truststore",
" ssl.truststore.password: my-ssl-password",
"- kestra_workerinstance",
"keyDeserializer: STRING",
"valueDeserializer: STRING"
public class Consume extends AbstractKafkaConnection implements RunnableTask, ConsumeInterface {
private Object topic;
private String topicPattern;
private List partitions;
private String groupId;
title = "The deserializer used for the key.",
description = "Possible values are: `STRING`, `INTEGER`, `FLOAT`, `DOUBLE`, `LONG`, `SHORT`, `BYTE_ARRAY`, `BYTE_BUFFER`, `BYTES`, `UUID`, `VOID`, `AVRO`, `JSON`."
private SerdeType keyDeserializer = SerdeType.STRING;
title = "The deserializer used for the value.",
description = "Possible values are: `STRING`, `INTEGER`, `FLOAT`, `DOUBLE`, `LONG`, `SHORT`, `BYTE_ARRAY`, `BYTE_BUFFER`, `BYTES`, `UUID`, `VOID`, `AVRO`, `JSON`."
private SerdeType valueDeserializer = SerdeType.STRING;
private String since;
private Duration pollDuration = Duration.ofSeconds(5);
private Integer maxRecords;
private Duration maxDuration;
private ConsumerSubscription subscription;
@SuppressWarnings({"unchecked", "rawtypes"})
public Output run(RunContext runContext) throws Exception {
// ugly hack to force use of Kestra plugins classLoader
final Properties consumerProps = createProperties(, runContext);
if (this.groupId != null) {
consumerProps.put(ConsumerConfig.GROUP_ID_CONFIG, runContext.render(groupId));
} else if (consumerProps.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
// groupId can be passed from properties
this.groupId = consumerProps.getProperty(ConsumerConfig.GROUP_ID_CONFIG);
if (!consumerProps.contains(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG)) {
// by default, we disable auto-commit
consumerProps.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
if (!consumerProps.contains(ConsumerConfig.ISOLATION_LEVEL_CONFIG)) {
// by default, we only read committed offsets in case of transactions
consumerProps.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, IsolationLevel.READ_COMMITTED.toString().toLowerCase(Locale.ROOT));
final Properties serdesProperties = createProperties(this.serdeProperties, runContext);
// by default, enable Avro LogicalType
serdesProperties.put(KafkaAvroSerializerConfig.AVRO_USE_LOGICAL_TYPE_CONVERTERS_CONFIG, true);
final Deserializer keyDeserializer = getTypedDeserializer(this.keyDeserializer);
final Deserializer valDeserializer = getTypedDeserializer(this.valueDeserializer);
keyDeserializer.configure(serdesProperties, true);
valDeserializer.configure(serdesProperties, false);
File tempFile = runContext.tempFile(".ion").toFile();
try (
BufferedOutputStream output = new BufferedOutputStream(new FileOutputStream(tempFile));
KafkaConsumer consumer = new KafkaConsumer(consumerProps, keyDeserializer, valDeserializer);
) {
this.subscription = topicSubscription(runContext);
this.subscription.subscribe(consumer, this);
Map count = new HashMap<>();
AtomicInteger total = new AtomicInteger();
ZonedDateTime started =;
ConsumerRecords records;
boolean empty;
do {
records = consumer.poll(this.pollDuration);
empty = records.isEmpty();
records.forEach(throwConsumer(record -> {
// using HashMap for null values
Map map = new HashMap<>();
map.put("key", record.key());
map.put("value", record.value());
map.put("headers", processHeaders(record.headers()));
map.put("topic", record.topic());
map.put("partition", record.partition());
map.put("timestamp", Instant.ofEpochMilli(record.timestamp()));
map.put("offset", record.offset());
FileSerde.write(output, map);
count.compute(record.topic(), (s, integer) -> integer == null ? 1 : integer + 1);
while (!this.ended(empty, total, started));
if (this.groupId != null) {
// flush & close
.forEach((s, integer) -> runContext.metric(Counter.of("records", integer, "topic", s)));
return Output.builder()
private boolean ended(Boolean empty, AtomicInteger count, ZonedDateTime start) {
if (empty) {
return true;
if (this.maxRecords != null && count.get() > this.maxRecords) {
return true;
if (this.maxDuration != null && > {
return true;
return false;
ConsumerSubscription topicSubscription(final RunContext runContext) throws IllegalVariableEvaluationException {
if (this.topic != null && (partitions != null && !partitions.isEmpty())) {
List topicPartitions = getTopicPartitions(runContext);
return TopicPartitionsSubscription.forTopicPartitions(groupId, topicPartitions, evaluateSince(runContext));
if (this.topic != null && groupId == null) {
return TopicPartitionsSubscription.forTopics(null, evaluateTopics(runContext), evaluateSince(runContext));
if (this.topic != null) {
return new TopicListSubscription(groupId, evaluateTopics(runContext));
if (this.topicPattern != null) {
try {
return new TopicPatternSubscription(groupId, Pattern.compile(this.topicPattern));
} catch (PatternSyntaxException e) {
throw new IllegalArgumentException("Invalid regex for `topicPattern`: " + this.topicPattern);
throw new IllegalArgumentException("Failed to create KafkaConsumer subscription");
private List getTopicPartitions(RunContext runContext) throws IllegalVariableEvaluationException {
List topics = evaluateTopics(runContext);
.flatMap(topic -> -> new TopicPartition(topic, partition)))
* @return the configured `topic` list.
private List evaluateTopics(final RunContext runContext) throws IllegalVariableEvaluationException {
List topics;
if (this.topic instanceof String) {
topics = List.of(runContext.render((String) this.topic));
} else if (this.topic instanceof List) {
topics = runContext.render((List) this.topic);
} else {
throw new IllegalArgumentException("Invalid topics with type '" + this.topic.getClass().getName() + "'");
return topics;
* @return the configured `since` property.
private Long evaluateSince(final RunContext runContext) throws IllegalVariableEvaluationException {
return Optional.ofNullable(this.since)
* Validates the task's configurations.
void validateConfiguration() {
if (this.topic == null && this.topicPattern == null) {
throw new IllegalArgumentException(
"Invalid Configuration: You must configure one of the following two settings: `topic` or `topicPattern`."
if (this.topic != null && this.topicPattern != null) {
throw new IllegalArgumentException(
"Invalid Configuration: Both `topic` and `topicPattern` was configured. You must configure only one of the following two settings: `topic` or `topicPattern`."
if (this.topicPattern != null && this.groupId == null) {
throw new IllegalArgumentException(
"Invalid Configuration: `groupId` cannot be null when `topicPattern` is configured."
static List> processHeaders(final Headers headers) {
return StreamSupport
.stream(headers.spliterator(), false)
.map(header -> Pair.of(header.key(), new String(header.value(), StandardCharsets.UTF_8)))
public static class Output implements io.kestra.core.models.tasks.Output {
title = "Number of messages consumed from a Kafka topic."
private final Integer messagesCount;
title = "URI of a file in Kestra's internal storage containing the messages."
private URI uri;
* Interface to wrap a {@link Consumer} subscription.
interface ConsumerSubscription {
void subscribe(Consumer consumer, ConsumeInterface consumeInterface);
default void waitForSubscription(final Consumer consumer,
final ConsumeInterface consumeInterface) {
var timeout = consumeInterface.getMaxDuration() != null ?
consumeInterface.getMaxDuration() :
// Wait for the subscription to happen, this avoids possible no result for the first poll due to the poll timeout
Await.until(() -> !consumer.subscription().isEmpty(), timeout);
* A topic pattern subscription.
record TopicPatternSubscription(String groupId, Pattern pattern) implements ConsumerSubscription {
public void subscribe(final Consumer consumer,
final ConsumeInterface consumeInterface) {
public String toString() {
return "[Subscription pattern=" + pattern + ", groupId=" + groupId + "]";
* A topic list subscription.
record TopicListSubscription(String groupId, List topics) implements ConsumerSubscription {
public void subscribe(final Consumer consumer, final ConsumeInterface consumeInterface) {
waitForSubscription(consumer, consumeInterface);
public String toString() {
return "[Subscription topics=" + topics + ", groupId=" + groupId + "]";
* A topic-partitions subscription.
static final class TopicPartitionsSubscription implements ConsumerSubscription {
private final String groupId;
private final List topics;
private final Long fromTimestamp;
private List topicPartitions;
public static TopicPartitionsSubscription forTopicPartitions(final String groupId,
final List topicPartitions,
final Long fromTimestamp) {
return new TopicPartitionsSubscription(
public static TopicPartitionsSubscription forTopics(final String groupId,
final List topics,
final Long fromTimestamp) {
return new TopicPartitionsSubscription(groupId, null, topics, fromTimestamp);
TopicPartitionsSubscription(final String groupId,
final List topicPartitions,
final List topics,
Long fromTimestamp) {
this.groupId = groupId;
this.topicPartitions = topicPartitions;
this.topics = topics;
this.fromTimestamp = fromTimestamp;
public void subscribe(final Consumer consumer, final ConsumeInterface consumeInterface) {
if (this.topicPartitions == null) {
this.topicPartitions = allPartitionsForTopics(consumer, topics);
if (this.fromTimestamp == null) {
Map topicPartitionsTimestamp = this.topicPartitions
.collect(Collectors.toMap(Function.identity(), tp -> fromTimestamp));
.forEach((tp, offsetAndTimestamp) -> {, offsetAndTimestamp.timestamp());
List topics() {
return topics;
Long fromTimestamp() {
return fromTimestamp;
List topicPartitions() {
return topicPartitions;
private List allPartitionsForTopics(final Consumer, ?> consumer, final List topics) {
return topics
.flatMap(s -> consumer.partitionsFor(s).stream())
.map(info -> new TopicPartition(info.topic(), info.partition()))
public String toString() {
return "[Subscription topics=" + topics + ", groupId=" + groupId + "]";