com.datasift.dropwizard.kafka.KafkaConsumerFactory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of dropwizard-extra-kafka Show documentation
Show all versions of dropwizard-extra-kafka Show documentation
Dropwizard integration for working with Kafka.
The newest version!
package com.datasift.dropwizard.kafka;
import io.dropwizard.util.Duration;
import io.dropwizard.util.Size;
import com.datasift.dropwizard.kafka.consumer.KafkaConsumer;
import com.datasift.dropwizard.kafka.consumer.KafkaConsumerHealthCheck;
import com.datasift.dropwizard.kafka.consumer.StreamProcessor;
import com.datasift.dropwizard.kafka.consumer.SynchronousConsumer;
import io.dropwizard.setup.Environment;
import com.datasift.dropwizard.zookeeper.ZooKeeperFactory;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.ImmutableMap;
import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.message.Message;
import kafka.serializer.Decoder;
import kafka.serializer.DefaultDecoder;
import kafka.utils.VerifiableProperties;
import org.hibernate.validator.constraints.NotEmpty;
import javax.validation.Valid;
import javax.validation.constraints.Min;
import javax.validation.constraints.NotNull;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledExecutorService;
/**
* A factory for creating and managing {@link KafkaConsumer} instances.
*
* The {@link KafkaConsumer} implementation will be determined by the configuration used to create
* it.
*
* The resultant {@link KafkaConsumer} will have its lifecycle managed by the {@link Environment}
* and will have {@link com.codahale.metrics.health.HealthCheck}s installed to monitor its status.
*/
public class KafkaConsumerFactory extends KafkaClientFactory {
private static final Decoder DefaultDecoder = new DefaultDecoder(new VerifiableProperties());
/**
* A description of the initial offset to consume from a partition when no committed offset
* exists.
*
*
* - SMALLEST
- Use the smallest (i.e. earliest) available offset. In effect,
* consuming the entire log.
* - LARGEST
- Use the largest (i.e. latest) available offset. In effect,
* tailing the end of the log.
*
*/
public enum InitialOffset { SMALLEST, LARGEST }
@Valid
@NotNull
protected ZooKeeperFactory zookeeper = new ZooKeeperFactory();
@NotEmpty
protected String group = "";
@NotNull
protected Map partitions = ImmutableMap.of();
protected Duration timeout = null;
@NotNull
protected Size receiveBufferSize = Size.kilobytes(64);
@NotNull
protected Size fetchSize = Size.kilobytes(300);
@NotNull
protected Duration backOffIncrement = Duration.seconds(1);
@Min(0)
protected int queuedChunks = 100;
protected boolean autoCommit = true;
@NotNull
protected Duration autoCommitInterval = Duration.seconds(10);
@NotNull
protected InitialOffset initialOffset = InitialOffset.LARGEST;
@Min(0)
protected int rebalanceRetries = 4;
@NotNull
protected Duration initialRecoveryDelay = Duration.milliseconds(500);
@NotNull
protected Duration maxRecoveryDelay = Duration.minutes(5);
@NotNull
protected Duration retryResetDelay = Duration.minutes(2);
@Min(-1)
protected int maxRecoveryAttempts = 20;
@NotNull
protected boolean shutdownOnFatal = false;
@NotNull
protected Duration shutdownGracePeriod = Duration.seconds(5);
@NotNull
protected Duration startDelay = Duration.seconds(2);
/**
* Returns the {@link ZooKeeperFactory} of the ZooKeeper quorum to use.
*
* @return the ZooKeeper quorum to use.
*/
@JsonProperty
public ZooKeeperFactory getZookeeper() {
return zookeeper;
}
/**
* Sets the {@link ZooKeeperFactory} of the ZooKeeper quorum to use.
*
* @param zookeeper the ZooKeeper quorum to use.
*/
@JsonProperty
public void setZookeeper(final ZooKeeperFactory zookeeper) {
this.zookeeper = zookeeper;
}
/**
* Returns the consumer group the {@link KafkaConsumer} belongs to.
*
* @return the consumer group the {@link KafkaConsumer} belongs to.
*/
@JsonProperty
public String getGroup() {
return group;
}
/**
* Sets the consumer group the {@link KafkaConsumer} belongs to.
*
* @param group the consumer group the {@link KafkaConsumer} belongs to.
*/
@JsonProperty
public void setGroup(final String group) {
this.group = group;
}
/**
* Returns a mapping of the number of partitions to consume from each topic.
*
* Topics not referenced will not be consumed from.
*
* @return a Map of topics to the number of partitions to consume from them.
*/
@JsonProperty
public Map getPartitions() {
return partitions;
}
/**
* Sets a mapping of the number of partitions to consume from each topic.
*
* Topics not referenced will not be consumed from.
*
* @param partitions a Map of topics to the number of partitions to consume from them.
*/
@JsonProperty
public void setPartitions(final Map partitions) {
this.partitions = partitions;
}
/**
* Returns the time the {@link KafkaConsumer} should wait to receive messages before timing out
* the stream.
*
* When a {@link KafkaConsumer} times out a stream, a {@link
* kafka.consumer.ConsumerTimeoutException} will be thrown by that streams' {@link
* kafka.consumer.ConsumerIterator}.
*
* @return the maximum time to wait when receiving messages from a broker before timing out.
*
* @see kafka.consumer.ConsumerIterator#next()
*/
@JsonProperty
public Duration getTimeout() {
return timeout == null
? Duration.milliseconds(-1)
: timeout;
}
/**
* Sets the time the {@link KafkaConsumer} should wait to receive messages before timing out
* the stream.
*
* When a {@link KafkaConsumer} times out a stream, a {@link
* kafka.consumer.ConsumerTimeoutException} will be thrown by that streams' {@link
* kafka.consumer.ConsumerIterator}.
*
* @param timeout the maximum time to wait when receiving messages before timing out.
*
* @see kafka.consumer.ConsumerIterator#next()
*/
@JsonProperty
public void setTimeout(final Duration timeout) {
this.timeout = timeout;
}
/**
* Returns the size of the client-side receive buffer.
*
* @return the size of the client-side receive buffer.
*/
@JsonProperty
public Size getReceiveBufferSize() {
return receiveBufferSize;
}
/**
* Sets the size of the client-side receive buffer.
*
* @param size the size of the client-side receive buffer.
*/
@JsonProperty
public void setReceiveBufferSize(final Size size) {
this.receiveBufferSize = size;
}
/**
* Returns the maximum size of a batch of messages to fetch in a single request.
*
* This dictates the maximum size of a message that may be received by the {@link
* KafkaConsumer}. Messages larger than this size will cause a {@link
* kafka.common.InvalidMessageSizeException} to be thrown during iteration of the stream.
*
* @return the maximum size of a batch of messages to receive in a single request.
*
* @see kafka.javaapi.message.ByteBufferMessageSet#iterator()
*/
@JsonProperty
public Size getFetchSize() {
return fetchSize;
}
/**
* Sets the maximum size of a batch of messages to fetch in a single request.
*
* This dictates the maximum size of a message that may be received by the {@link
* KafkaConsumer}. Messages larger than this size will cause a {@link
* kafka.common.InvalidMessageSizeException} to be thrown during iteration of the stream.
*
* @param size the maximum size of a batch of messages to receive in a single request.
*
* @see kafka.javaapi.message.ByteBufferMessageSet#iterator()
*/
@JsonProperty
public void setFetchSize(final Size size) {
this.fetchSize = size;
}
/**
* Returns the cumulative delay before polling a broker again when no data is returned.
*
* When fetching data from a broker, if there is no new data, there will be a delay before
* polling the broker again. This controls the duration of the delay by increasing it linearly,
* on each poll attempt.
*
* @return the amount by which the retry timeout will be increased after each attempt.
*/
@JsonProperty
public Duration getBackOffIncrement() {
return backOffIncrement;
}
/**
* Sets the cumulative delay before polling a broker again when no data is returned.
*
* When fetching data from a broker, if there is no new data, there will be a delay before
* polling the broker again. This controls the duration of the delay by increasing it linearly,
* on each poll attempt.
*
* @param increment the amount by which the retry timeout will be increased after each attempt.
*/
@JsonProperty
public void setBackOffIncrement(final Duration increment) {
this.backOffIncrement = increment;
}
/**
* Returns the maximum number of chunks to queue in internal buffers.
*
* The consumer internally buffers fetched messages in a set of queues, which are used to
* iterate the stream. This controls the size of these queues.
*
* Once a queue has been filled, it will block subsequent attempts to fill it until (some of) it
* has been iterated.
*/
@JsonProperty
public int getQueuedChunks() {
return queuedChunks;
}
/**
* Sets the maximum number of chunks to queue in internal buffers.
*
* The consumer internally buffers fetched messages in a set of queues, which are used to
* iterate the stream. This controls the size of these queues.
*
* Once a queue has been filled, it will block subsequent attempts to fill it until (some of) it
* has been iterated.
*/
@JsonProperty
public void setQueuedChunks(final int maxChunks) {
this.queuedChunks = maxChunks;
}
/**
* Returns whether to automatically commit the offsets that have been consumed.
*
* @return true to commit the last consumed offset periodically; false to never commit offsets.
*
* @see #getAutoCommitInterval
*/
@JsonProperty
public boolean getAutoCommit() {
return autoCommit;
}
/**
* Sets whether to automatically commit the offsets that have been consumed.
*
* @param autoCommit true to commit the last consumed offset periodically;
* false to never commit offsets.
*
* @see #getAutoCommitInterval
*/
@JsonProperty
public void setAutoCommit(final boolean autoCommit) {
this.autoCommit = autoCommit;
}
/**
* Sets the frequency to automatically commit previously consumed offsets, if enabled.
*
* @return the frequency to automatically commit the previously consumed offsets, when enabled.
*
* @see #getAutoCommit
*/
@JsonProperty
public Duration getAutoCommitInterval() {
return autoCommitInterval;
}
/**
* Returns the frequency to automatically commit previously consumed offsets, if enabled.
*
* @return the frequency to automatically commit the previously consumed offsets, when enabled.
*
* @see #getAutoCommit
*/
@JsonProperty
public void setAutoCommitInterval(final Duration autoCommitInterval) {
this.autoCommitInterval = autoCommitInterval;
}
/**
* Returns the setting for the initial offset to consume from when no committed offset exists.
*
* @return the initial offset to consume from in a partition.
*
* @see InitialOffset
*/
@JsonProperty
public InitialOffset getInitialOffset() {
return initialOffset;
}
/**
* Sets the setting for the initial offset to consume from when no committed offset exists.
*
* @param initialOffset the initial offset to consume from in a partition.
*
* @see InitialOffset
*/
@JsonProperty
public void setInitialOffset(final InitialOffset initialOffset) {
this.initialOffset = initialOffset;
}
/**
* Returns the maximum number of retries during a re-balance.
*
* @return the maximum number of times to retry a re-balance operation.
*/
@JsonProperty
public int getRebalanceRetries() {
return rebalanceRetries;
}
/**
* Sets the maximum number of retries during a re-balance.
*
* @param rebalanceRetries the maximum number of times to retry a re-balance operation.
*/
@JsonProperty
public void setRebalanceRetries(final int rebalanceRetries) {
this.rebalanceRetries = rebalanceRetries;
}
@JsonProperty
public Duration getInitialRecoveryDelay() {
return initialRecoveryDelay;
}
@JsonProperty
public void setInitialRecoveryDelay(final Duration initialRecoveryDelay) {
this.initialRecoveryDelay = initialRecoveryDelay;
}
@JsonProperty
public Duration getMaxRecoveryDelay() {
return maxRecoveryDelay;
}
@JsonProperty
public void setMaxRecoveryDelay(final Duration maxRecoveryDelay) {
this.maxRecoveryDelay = maxRecoveryDelay;
}
@JsonProperty
public Duration getRetryResetDelay() {
return retryResetDelay;
}
@JsonProperty
public void setRetryResetDelay(final Duration retryResetDelay) {
this.retryResetDelay = retryResetDelay;
}
@JsonProperty
public int getMaxRecoveryAttempts() {
return maxRecoveryAttempts;
}
@JsonProperty
public void setMaxRecoveryAttempts(final int maxRecoveryAttempts) {
this.maxRecoveryAttempts = maxRecoveryAttempts;
}
@JsonProperty
public boolean isShutdownOnFatal() {
return shutdownOnFatal;
}
@JsonProperty
public void setShutdownOnFatal(final boolean shutdownOnFatal) {
this.shutdownOnFatal = shutdownOnFatal;
}
@JsonProperty
public Duration getShutdownGracePeriod() {
return shutdownGracePeriod;
}
@JsonProperty
public void setShutdownGracePeriod(final Duration shutdownGracePeriod) {
this.shutdownGracePeriod = shutdownGracePeriod;
}
@JsonProperty
public Duration getStartDelay() { return startDelay; }
@JsonProperty
public void setStartDelay(final Duration startDelay) {
this.startDelay = startDelay;
}
/**
* Prepares a {@link KafkaConsumerBuilder} for a given {@link StreamProcessor}.
*
* @param processor the {@link StreamProcessor} to process the stream with.
* @return a {@link KafkaConsumerBuilder} to build a {@link KafkaConsumer} for the given
* processor.
*/
public KafkaConsumerBuilder processWith(final StreamProcessor processor) {
return processWith(DefaultDecoder, processor);
}
/**
* Prepares a {@link KafkaConsumerBuilder} for a given {@link Decoder} and {@link
* StreamProcessor}.
*
* The decoder instance is used to decode {@link Message}s in the stream before being passed to
* the processor.
*
* @param decoder the {@link Decoder} instance to decode messages with
* @param processor a {@link StreamProcessor} to process the message stream
* @return a {@link KafkaConsumerBuilder} to build a {@link KafkaConsumer} for the given
* processor and decoder.
*/
public KafkaConsumerBuilder processWith(final Decoder decoder,
final StreamProcessor processor) {
return new KafkaConsumerBuilder<>(DefaultDecoder, decoder, processor);
}
public KafkaConsumerBuilder processWith(final Decoder keyDecoder,
final Decoder valueDecoder,
final StreamProcessor processor) {
return new KafkaConsumerBuilder<>(keyDecoder, valueDecoder, processor);
}
/**
* A Builder for building a configured {@link KafkaConsumer}.
*
* @param the type of the messages the {@link KafkaConsumer} will process.
*/
public class KafkaConsumerBuilder {
private final Decoder keyDecoder;
private final Decoder valueDecoder;
private final StreamProcessor processor;
private static final String DEFAULT_NAME = "kafka-consumer-default";
private KafkaConsumerBuilder(final Decoder keyDecoder,
final Decoder valueDecoder,
final StreamProcessor processor) {
this.keyDecoder = keyDecoder;
this.valueDecoder = valueDecoder;
this.processor = processor;
}
/**
* Builds a {@link KafkaConsumer} instance for the given {@link Environment}.
*
* @param environment the {@link Environment} to build {@link KafkaConsumer} instances for.
*
* @return a managed and configured {@link KafkaConsumer}.
*/
public KafkaConsumer build(final Environment environment) {
return build(environment, DEFAULT_NAME);
}
/**
* Builds a {@link KafkaConsumer} instance from the given {@link ExecutorService} and name,
* for the given {@link Environment}.
*
* The name is used to identify the returned {@link KafkaConsumer} instance, for example, as
* the name of its {@link com.codahale.metrics.health.HealthCheck}s, thread pool, etc.
*
* This implementation creates a new {@link ExecutorService} with a fixed-size thread-pool,
* configured for one thread per-partition the {@link KafkaConsumer} is being configured to
* consume.
*
* @param environment the {@link Environment} to build {@link KafkaConsumer} instances for.
* @param name the name of the {@link KafkaConsumer}.
*
* @return a managed and configured {@link KafkaConsumer}.
*/
public KafkaConsumer build(final Environment environment, final String name) {
int threads = 0;
for (final Integer p : getPartitions().values()) {
threads = threads + p;
}
final ScheduledExecutorService executor = environment.lifecycle()
.scheduledExecutorService(name + "-%d")
.threads(threads)
.shutdownTime(getShutdownGracePeriod())
.build();
return build(environment, executor, name);
}
/**
* Builds a {@link KafkaConsumer} instance from the given {@link ExecutorService} and name,
* for the given {@link Environment}.
*
* The name is used to identify the returned {@link KafkaConsumer} instance, for example, as
* the name of its {@link com.codahale.metrics.health.HealthCheck}s, etc.
*
* @param environment the {@link Environment} to build {@link KafkaConsumer} instances for.
* @param executor the {@link ExecutorService} to process messages with.
* @param name the name of the {@link KafkaConsumer}.
*
* @return a managed and configured {@link KafkaConsumer}.
*/
public KafkaConsumer build(final Environment environment,
final ScheduledExecutorService executor,
final String name) {
final SynchronousConsumer consumer = build(executor);
// manage the consumer
environment.lifecycle().manage(consumer);
environment.lifecycle().addServerLifecycleListener(consumer);
// add health checks
environment.healthChecks().register(name, new KafkaConsumerHealthCheck(consumer));
return consumer;
}
/**
* Builds a {@link SynchronousConsumer} instance with this builders' configuration using the
* given {@link ExecutorService}.
*
* If possible, it's always preferable to use one of the overloads that take an {@link
* Environment} directly. This overload exists for situations where you don't have access to
* an {@link Environment} (e.g. some Commands or unit tests).
*
* @param executor The {@link ExecutorService} to process messages with.
*
* @return a configured {@link KafkaConsumer}.
*/
public SynchronousConsumer build(final ScheduledExecutorService executor) {
return new SynchronousConsumer<>(
Consumer.createJavaConsumerConnector(toConsumerConfig(KafkaConsumerFactory.this)),
getPartitions(),
keyDecoder,
valueDecoder,
processor,
executor,
getInitialRecoveryDelay(),
getMaxRecoveryDelay(),
getRetryResetDelay(),
getMaxRecoveryAttempts(),
isShutdownOnFatal(),
getStartDelay());
}
}
static ConsumerConfig toConsumerConfig(final KafkaConsumerFactory factory) {
final ZooKeeperFactory zookeeper = factory.getZookeeper();
final Properties props = new Properties();
props.setProperty("zookeeper.connect",
zookeeper.getQuorumSpec() + zookeeper.getNamespace());
props.setProperty("zookeeper.connection.timeout.ms",
String.valueOf(zookeeper.getConnectionTimeout().toMilliseconds()));
props.setProperty("zookeeper.session.timeout.ms",
String.valueOf(zookeeper.getSessionTimeout().toMilliseconds()));
props.setProperty("group.id",
factory.getGroup());
props.setProperty("socket.timeout.ms",
String.valueOf(factory.getSocketTimeout().toMilliseconds()));
props.setProperty("socket.receive.buffer.bytes",
String.valueOf(factory.getReceiveBufferSize().toBytes()));
props.setProperty("fetch.message.max.bytes",
String.valueOf(factory.getFetchSize().toBytes()));
props.setProperty("fetch.wait.max.ms",
String.valueOf(factory.getBackOffIncrement().toMilliseconds()));
props.setProperty("queued.max.message.chunks",
String.valueOf(factory.getQueuedChunks()));
props.setProperty("auto.commit.enable",
String.valueOf(factory.getAutoCommit()));
props.setProperty("auto.commit.interval.ms",
String.valueOf(factory.getAutoCommitInterval().toMilliseconds()));
props.setProperty("auto.offset.reset",
String.valueOf(factory.getInitialOffset()).toLowerCase());
props.setProperty("consumer.timeout.ms",
String.valueOf(factory.getTimeout().toMilliseconds()));
props.setProperty("rebalance.max.retries",
String.valueOf(factory.getRebalanceRetries()));
return new ConsumerConfig(props);
}
}