org.graylog2.inputs.transports.KafkaTransport Maven / Gradle / Ivy
/*
* Copyright (C) 2020 Graylog, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* .
*/
package org.graylog2.inputs.transports;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.InstrumentedExecutorService;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.MetricSet;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableMap;
import com.google.common.eventbus.EventBus;
import com.google.common.eventbus.Subscribe;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.common.util.concurrent.Uninterruptibles;
import com.google.inject.assistedinject.Assisted;
import com.google.inject.assistedinject.AssistedInject;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.InvalidOffsetException;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.errors.AuthorizationException;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.serialization.ByteArrayDeserializer;
import org.graylog.shaded.kafka09.consumer.Consumer;
import org.graylog.shaded.kafka09.consumer.ConsumerConfig;
import org.graylog.shaded.kafka09.consumer.ConsumerIterator;
import org.graylog.shaded.kafka09.consumer.ConsumerTimeoutException;
import org.graylog.shaded.kafka09.consumer.KafkaStream;
import org.graylog.shaded.kafka09.consumer.TopicFilter;
import org.graylog.shaded.kafka09.consumer.Whitelist;
import org.graylog.shaded.kafka09.javaapi.consumer.ConsumerConnector;
import org.graylog.shaded.kafka09.message.MessageAndMetadata;
import org.graylog2.plugin.LocalMetricRegistry;
import org.graylog2.plugin.ServerStatus;
import org.graylog2.plugin.configuration.Configuration;
import org.graylog2.plugin.configuration.ConfigurationRequest;
import org.graylog2.plugin.configuration.fields.BooleanField;
import org.graylog2.plugin.configuration.fields.ConfigurationField;
import org.graylog2.plugin.configuration.fields.DropdownField;
import org.graylog2.plugin.configuration.fields.NumberField;
import org.graylog2.plugin.configuration.fields.TextField;
import org.graylog2.plugin.inputs.MessageInput;
import org.graylog2.plugin.inputs.annotations.ConfigClass;
import org.graylog2.plugin.inputs.annotations.FactoryClass;
import org.graylog2.plugin.inputs.codecs.CodecAggregator;
import org.graylog2.plugin.inputs.transports.ThrottleableTransport;
import org.graylog2.plugin.inputs.transports.Transport;
import org.graylog2.plugin.journal.RawMessage;
import org.graylog2.plugin.lifecycles.Lifecycle;
import org.graylog2.plugin.system.NodeId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Named;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import static com.codahale.metrics.MetricRegistry.name;
public class KafkaTransport extends ThrottleableTransport {
public static final String CK_LEGACY = "legacy_mode";
public static final String CK_FETCH_MIN_BYTES = "fetch_min_bytes";
public static final String CK_FETCH_WAIT_MAX = "fetch_wait_max";
public static final String CK_ZOOKEEPER = "zookeeper";
public static final String CK_BOOTSTRAP = "bootstrap_server";
public static final String CK_TOPIC_FILTER = "topic_filter";
public static final String CK_THREADS = "threads";
public static final String CK_OFFSET_RESET = "offset_reset";
public static final String CK_GROUP_ID = "group_id";
public static final String CK_CUSTOM_PROPERTIES = "custom_properties";
// See https://kafka.apache.org/090/documentation.html for available values for "auto.offset.reset".
private static final ImmutableMap OFFSET_RESET_VALUES = ImmutableMap.of(
"largest", "Automatically reset the offset to the latest offset", // "largest" OR "latest"
"smallest", "Automatically reset the offset to the earliest offset" // "smallest" OR "earliest"
);
private static final String DEFAULT_OFFSET_RESET = "largest";
private static final String DEFAULT_GROUP_ID = "graylog2";
private static final Logger LOG = LoggerFactory.getLogger(KafkaTransport.class);
private final Configuration configuration;
private final MetricRegistry localRegistry;
private final NodeId nodeId;
private final EventBus serverEventBus;
private final ServerStatus serverStatus;
private final ScheduledExecutorService scheduler;
private final MetricRegistry metricRegistry;
private final AtomicLong totalBytesRead = new AtomicLong(0);
private final AtomicLong lastSecBytesRead = new AtomicLong(0);
private final AtomicLong lastSecBytesReadTmp = new AtomicLong(0);
private final ExecutorService executor;
private volatile boolean stopped = false;
private volatile boolean paused = true;
private volatile CountDownLatch pausedLatch = new CountDownLatch(1);
private CountDownLatch stopLatch;
private ConsumerConnector cc;
@AssistedInject
public KafkaTransport(@Assisted Configuration configuration,
LocalMetricRegistry localRegistry,
NodeId nodeId,
EventBus serverEventBus,
ServerStatus serverStatus,
@Named("daemonScheduler") ScheduledExecutorService scheduler) {
super(serverEventBus, configuration);
this.configuration = configuration;
this.localRegistry = localRegistry;
this.nodeId = nodeId;
this.serverEventBus = serverEventBus;
this.serverStatus = serverStatus;
this.scheduler = scheduler;
this.metricRegistry = localRegistry;
final int numThreads = configuration.getInt(CK_THREADS);
this.executor = executorService(numThreads);
localRegistry.register("read_bytes_1sec", new Gauge() {
@Override
public Long getValue() {
return lastSecBytesRead.get();
}
});
localRegistry.register("written_bytes_1sec", new Gauge() {
@Override
public Long getValue() {
return 0L;
}
});
localRegistry.register("read_bytes_total", new Gauge() {
@Override
public Long getValue() {
return totalBytesRead.get();
}
});
localRegistry.register("written_bytes_total", new Gauge() {
@Override
public Long getValue() {
return 0L;
}
});
}
@Subscribe
public void lifecycleStateChange(Lifecycle lifecycle) {
LOG.debug("Lifecycle changed to {}", lifecycle);
switch (lifecycle) {
case PAUSED:
case FAILED:
case HALTING:
pausedLatch = new CountDownLatch(1);
paused = true;
break;
default:
paused = false;
pausedLatch.countDown();
break;
}
}
@Override
public void setMessageAggregator(CodecAggregator ignored) {
}
@Override
public void doLaunch(final MessageInput input) {
final boolean legacyMode = configuration.getBoolean(CK_LEGACY, true);
if (legacyMode) {
final String zooKeper = configuration.getString(CK_ZOOKEEPER);
if (Strings.isNullOrEmpty(zooKeper)) {
throw new IllegalArgumentException("ZooKeeper configuration setting cannot be empty");
}
} else {
final String bootStrap = configuration.getString(CK_BOOTSTRAP);
if (Strings.isNullOrEmpty(bootStrap)) {
throw new IllegalArgumentException("Bootstrap server configuration setting cannot be empty");
}
}
serverStatus.awaitRunning(() -> lifecycleStateChange(Lifecycle.RUNNING));
// listen for lifecycle changes
serverEventBus.register(this);
if (legacyMode) {
doLaunchLegacy(input);
} else {
doLaunchConsumer(input);
}
scheduler.scheduleAtFixedRate(() -> lastSecBytesRead.set(lastSecBytesReadTmp.getAndSet(0)), 1, 1, TimeUnit.SECONDS);
}
private void doLaunchConsumer(final MessageInput input) {
final Properties props = new Properties();
props.put("group.id", configuration.getString(CK_GROUP_ID, DEFAULT_GROUP_ID));
props.put("fetch.min.bytes", String.valueOf(configuration.getInt(CK_FETCH_MIN_BYTES)));
props.put("fetch.max.wait.ms", String.valueOf(configuration.getInt(CK_FETCH_WAIT_MAX)));
//noinspection ConstantConditions
props.put("bootstrap.servers", configuration.getString(CK_BOOTSTRAP));
// Map largest -> latest, smallest -> earliest
final String resetValue = configuration.getString(CK_OFFSET_RESET, DEFAULT_OFFSET_RESET);
props.put("auto.offset.reset", resetValue.equals("largest") ? "latest" : "earliest");
// Default auto commit interval is 60 seconds. Reduce to 1 second to minimize message duplication
// if something breaks.
props.put("auto.commit.interval.ms", "1000");
props.put(org.apache.kafka.clients.consumer.ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
props.put(org.apache.kafka.clients.consumer.ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ByteArrayDeserializer.class.getName());
insertCustomProperties(props);
final int numThreads = configuration.getInt(CK_THREADS);
// this is being used during shutdown to first stop all submitted jobs before committing the offsets back to zookeeper
// and then shutting down the connection.
// this is to avoid yanking away the connection from the consumer runnables
stopLatch = new CountDownLatch(numThreads);
IntStream.range(0, numThreads).forEach(i -> executor.submit(new ConsumerRunnable(props, input, i)));
}
private class ConsumerRunnable implements Runnable {
private final MessageInput input;
private final KafkaConsumer consumer;
public ConsumerRunnable(Properties props, MessageInput input, int threadId) {
this.input = input;
final Properties nprops = (Properties) props.clone();
nprops.put("client.id", "gl2-" + nodeId.getShortNodeId() + "-" + input.getId() + "-" + threadId);
consumer = new KafkaConsumer<>(nprops);
//noinspection ConstantConditions
consumer.subscribe(Pattern.compile(configuration.getString(CK_TOPIC_FILTER)));
}
private void consumeRecords(ConsumerRecords consumerRecords) {
for (final ConsumerRecord record : consumerRecords) {
if (paused) {
// we try not to spin here, so we wait until the lifecycle goes back to running.
LOG.debug("Message processing is paused, blocking until message processing is turned back on.");
Uninterruptibles.awaitUninterruptibly(pausedLatch);
}
// check for being stopped before actually getting the message, otherwise we could end up losing that message
if (stopped) {
break;
}
if (isThrottled()) {
blockUntilUnthrottled();
}
// process the message, this will immediately mark the message as having been processed. this gets tricky
// if we get an exception about processing it down below.
final byte[] bytes = record.value();
// it is possible that the message is null
if (bytes == null) {
continue;
}
totalBytesRead.addAndGet(bytes.length);
lastSecBytesReadTmp.addAndGet(bytes.length);
final RawMessage rawMessage = new RawMessage(bytes);
input.processRawMessage(rawMessage);
}
}
private Optional> tryPoll() {
try {
final ConsumerRecords consumerRecords = consumer.poll(Duration.ofSeconds(1));
return Optional.of(consumerRecords);
} catch (WakeupException e) {
LOG.error("WakeupException in poll.");
} catch (InvalidOffsetException | AuthorizationException e) {
LOG.error("Exception in poll.", e);
}
return Optional.empty();
}
@Override
public void run() {
while (!stopped) {
final Optional> consumerRecords;
try {
consumerRecords = tryPoll();
if (! consumerRecords.isPresent()) {
LOG.error("Caught recoverable exception. Retrying");
Thread.sleep(2000);
continue;
}
} catch (KafkaException | InterruptedException e) {
LOG.error("Caught unrecoverable exception in poll. Stopping input", e);
stopped = true;
break;
}
try {
consumeRecords(consumerRecords.get());
} catch (Exception e) {
LOG.error("Exception in consumer thread. Stopping input", e);
stopped = true;
break;
}
}
// explicitly commit our offsets when stopping.
// this might trigger a couple of times, but it won't hurt
consumer.commitAsync();
stopLatch.countDown();
consumer.close(Duration.ofSeconds(5));
}
}
private void doLaunchLegacy(final MessageInput input) {
final Properties props = new Properties();
props.put("group.id", configuration.getString(CK_GROUP_ID, DEFAULT_GROUP_ID));
props.put("client.id", "gl2-" + nodeId.getShortNodeId() + "-" + input.getId());
props.put("fetch.min.bytes", String.valueOf(configuration.getInt(CK_FETCH_MIN_BYTES)));
props.put("fetch.wait.max.ms", String.valueOf(configuration.getInt(CK_FETCH_WAIT_MAX)));
props.put("zookeeper.connect", configuration.getString(CK_ZOOKEEPER));
props.put("auto.offset.reset", configuration.getString(CK_OFFSET_RESET, DEFAULT_OFFSET_RESET));
// Default auto commit interval is 60 seconds. Reduce to 1 second to minimize message duplication
// if something breaks.
props.put("auto.commit.interval.ms", "1000");
// Set a consumer timeout to avoid blocking on the consumer iterator.
props.put("consumer.timeout.ms", "1000");
insertCustomProperties(props);
final int numThreads = configuration.getInt(CK_THREADS);
final ConsumerConfig consumerConfig = new ConsumerConfig(props);
cc = Consumer.createJavaConsumerConnector(consumerConfig);
final TopicFilter filter = new Whitelist(configuration.getString(CK_TOPIC_FILTER));
final List> streams = cc.createMessageStreamsByFilter(filter, numThreads);
// this is being used during shutdown to first stop all submitted jobs before committing the offsets back to zookeeper
// and then shutting down the connection.
// this is to avoid yanking away the connection from the consumer runnables
stopLatch = new CountDownLatch(streams.size());
for (final KafkaStream stream : streams) {
executor.submit(new Runnable() {
@Override
public void run() {
final ConsumerIterator consumerIterator = stream.iterator();
boolean retry;
do {
retry = false;
try {
// we have to use hasNext() here instead foreach, because next() marks the message as processed immediately
// noinspection WhileLoopReplaceableByForEach
while (consumerIterator.hasNext()) {
if (paused) {
// we try not to spin here, so we wait until the lifecycle goes back to running.
LOG.debug(
"Message processing is paused, blocking until message processing is turned back on.");
Uninterruptibles.awaitUninterruptibly(pausedLatch);
}
// check for being stopped before actually getting the message, otherwise we could end up losing that message
if (stopped) {
break;
}
if (isThrottled()) {
blockUntilUnthrottled();
}
// process the message, this will immediately mark the message as having been processed. this gets tricky
// if we get an exception about processing it down below.
final MessageAndMetadata message = consumerIterator.next();
final byte[] bytes = message.message();
// it is possible that the message is null
if (bytes == null) {
continue;
}
totalBytesRead.addAndGet(bytes.length);
lastSecBytesReadTmp.addAndGet(bytes.length);
final RawMessage rawMessage = new RawMessage(bytes);
input.processRawMessage(rawMessage);
}
} catch (ConsumerTimeoutException e) {
// Happens when there is nothing to consume, retry to check again.
retry = true;
} catch (Exception e) {
LOG.error("Kafka consumer error, stopping consumer thread.", e);
}
} while (retry && !stopped);
// explicitly commit our offsets when stopping.
// this might trigger a couple of times, but it won't hurt
cc.commitOffsets();
stopLatch.countDown();
}
});
}
}
private void insertCustomProperties(Properties props) {
try {
final Properties customProperties = new Properties();
customProperties.load(new ByteArrayInputStream(configuration.getString(CK_CUSTOM_PROPERTIES, "").getBytes(StandardCharsets.UTF_8)));
props.putAll(customProperties);
} catch (IOException e) {
LOG.error("Failed to read custom properties", e);
}
}
private ExecutorService executorService(int numThreads) {
final ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat("kafka-transport-%d").build();
return new InstrumentedExecutorService(
Executors.newFixedThreadPool(numThreads, threadFactory),
metricRegistry,
name(this.getClass(), "executor-service"));
}
@Override
public void doStop() {
stopped = true;
serverEventBus.unregister(this);
if (stopLatch != null) {
try {
// unpause the processors if they are blocked. this will cause them to see that we are stopping, even if they were paused.
if (pausedLatch != null && pausedLatch.getCount() > 0) {
pausedLatch.countDown();
}
final boolean allStoppedOrderly = stopLatch.await(5, TimeUnit.SECONDS);
stopLatch = null;
if (!allStoppedOrderly) {
// timed out
LOG.info(
"Stopping Kafka input timed out (waited 5 seconds for consumer threads to stop). Forcefully closing connection now. " +
"This is usually harmless when stopping the input.");
}
} catch (InterruptedException e) {
LOG.debug("Interrupted while waiting to stop input.");
}
}
if (cc != null) {
cc.shutdown();
cc = null;
}
executor.shutdown();
try {
executor.awaitTermination(1, TimeUnit.SECONDS);
} catch (InterruptedException e) {
LOG.error("Interrupted in transport executor shutdown.");
}
}
@Override
public MetricSet getMetricSet() {
return localRegistry;
}
@FactoryClass
public interface Factory extends Transport.Factory {
@Override
KafkaTransport create(Configuration configuration);
@Override
Config getConfig();
}
@ConfigClass
public static class Config extends ThrottleableTransport.Config {
@Override
public ConfigurationRequest getRequestedConfiguration() {
final ConfigurationRequest cr = super.getRequestedConfiguration();
cr.addField(new BooleanField(CK_LEGACY,
"Legacy mode",
true,
"Use old ZooKeeper-based consumer API. (Used before Graylog 3.3)",
10
));
cr.addField(new TextField(
CK_BOOTSTRAP,
"Bootstrap Servers",
"127.0.0.1:9092",
"Comma separated list of one or more Kafka brokers. (Format: \"host1:port1,host2:port2\")." +
"Not used in legacy mode.",
ConfigurationField.Optional.OPTIONAL,
11));
cr.addField(new TextField(
CK_ZOOKEEPER,
"ZooKeeper address (legacy mode only)",
"127.0.0.1:2181",
"Host and port of the ZooKeeper that is managing your Kafka cluster. Not used in consumer API (non-legacy) mode.",
ConfigurationField.Optional.OPTIONAL,
12));
cr.addField(new TextField(
CK_TOPIC_FILTER,
"Topic filter regex",
"^your-topic$",
"Every topic that matches this regular expression will be consumed.",
ConfigurationField.Optional.NOT_OPTIONAL));
cr.addField(new NumberField(
CK_FETCH_MIN_BYTES,
"Fetch minimum bytes",
5,
"Wait for a message batch to reach at least this size or the configured maximum wait time before fetching.",
ConfigurationField.Optional.NOT_OPTIONAL));
cr.addField(new NumberField(
CK_FETCH_WAIT_MAX,
"Fetch maximum wait time (ms)",
100,
"Wait for this time or the configured minimum size of a message batch before fetching.",
ConfigurationField.Optional.NOT_OPTIONAL));
cr.addField(new NumberField(
CK_THREADS,
"Processor threads",
2,
"Number of processor threads to spawn. Use one thread per Kafka topic partition.",
ConfigurationField.Optional.NOT_OPTIONAL));
cr.addField(new DropdownField(
CK_OFFSET_RESET,
"Auto offset reset",
DEFAULT_OFFSET_RESET,
OFFSET_RESET_VALUES,
"What to do when there is no initial offset in Kafka or if an offset is out of range",
ConfigurationField.Optional.OPTIONAL));
cr.addField(new TextField(
CK_GROUP_ID,
"Consumer group id",
DEFAULT_GROUP_ID,
"Name of the consumer group the Kafka input belongs to",
ConfigurationField.Optional.OPTIONAL));
cr.addField(new TextField(
CK_CUSTOM_PROPERTIES,
"Custom Kafka properties",
"",
"A newline separated list of Kafka properties. (e.g.: \"ssl.keystore.location=/etc/graylog/server/kafka.keystore.jks\").",
ConfigurationField.Optional.OPTIONAL,
ConfigurationField.PLACE_AT_END_POSITION,
TextField.Attribute.TEXTAREA,
TextField.Attribute.IS_SENSITIVE
));
return cr;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy