org.apache.kafka.connect.util.KafkaBasedLog Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.apache.kafka.connect.util;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.errors.ConnectException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Future;
/**
*
* KafkaBasedLog provides a generic implementation of a shared, compacted log of records stored in Kafka that all
* clients need to consume and, at times, agree on their offset / that they have read to the end of the log.
*
*
* This functionality is useful for storing different types of data that all clients may need to agree on --
* offsets or config for example. This class runs a consumer in a background thread to continuously tail the target
* topic, accepts write requests which it writes to the topic using an internal producer, and provides some helpful
* utilities like checking the current log end offset and waiting until the current end of the log is reached.
*
*
* To support different use cases, this class works with either single- or multi-partition topics.
*
*
* Since this class is generic, it delegates the details of data storage via a callback that is invoked for each
* record that is consumed from the topic. The invocation of callbacks is guaranteed to be serialized -- if the
* calling class keeps track of state based on the log and only writes to it when consume callbacks are invoked
* and only reads it in {@link #readToEnd(Callback)} callbacks then no additional synchronization will be required.
*
*/
public class KafkaBasedLog {
private static final Logger log = LoggerFactory.getLogger(KafkaBasedLog.class);
private static final long CREATE_TOPIC_TIMEOUT_MS = 30000;
private Time time;
private final String topic;
private final Map producerConfigs;
private final Map consumerConfigs;
private final Callback> consumedCallback;
private Consumer consumer;
private Producer producer;
private Thread thread;
private boolean stopRequested;
private Queue> readLogEndOffsetCallbacks;
/**
* Create a new KafkaBasedLog object. This does not start reading the log and writing is not permitted until
* {@link #start()} is invoked.
*
* @param topic the topic to treat as a log
* @param producerConfigs configuration options to use when creating the internal producer. At a minimum this must
* contain compatible serializer settings for the generic types used on this class. Some
* setting, such as the number of acks, will be overridden to ensure correct behavior of this
* class.
* @param consumerConfigs configuration options to use when creating the internal consumer. At a minimum this must
* contain compatible serializer settings for the generic types used on this class. Some
* setting, such as the auto offset reset policy, will be overridden to ensure correct
* behavior of this class.
* @param consumedCallback callback to invoke for each {@link ConsumerRecord} consumed when tailing the log
* @param time Time interface
*/
public KafkaBasedLog(String topic, Map producerConfigs, Map consumerConfigs,
Callback> consumedCallback, Time time) {
this.topic = topic;
this.producerConfigs = producerConfigs;
this.consumerConfigs = consumerConfigs;
this.consumedCallback = consumedCallback;
this.stopRequested = false;
this.readLogEndOffsetCallbacks = new ArrayDeque<>();
this.time = time;
}
public void start() {
log.info("Starting KafkaBasedLog with topic " + topic);
producer = createProducer();
consumer = createConsumer();
List partitions = new ArrayList<>();
// Until we have admin utilities we can use to check for the existence of this topic and create it if it is missing,
// we rely on topic auto-creation
List partitionInfos = null;
long started = time.milliseconds();
while (partitionInfos == null && time.milliseconds() - started < CREATE_TOPIC_TIMEOUT_MS) {
partitionInfos = consumer.partitionsFor(topic);
Utils.sleep(Math.min(time.milliseconds() - started, 1000));
}
if (partitionInfos == null)
throw new ConnectException("Could not look up partition metadata for offset backing store topic in" +
" allotted period. This could indicate a connectivity issue, unavailable topic partitions, or if" +
" this is your first use of the topic it may have taken too long to create.");
for (PartitionInfo partition : partitionInfos)
partitions.add(new TopicPartition(partition.topic(), partition.partition()));
consumer.assign(partitions);
readToLogEnd();
thread = new WorkThread();
thread.start();
log.info("Finished reading KafakBasedLog for topic " + topic);
log.info("Started KafakBasedLog for topic " + topic);
}
public void stop() {
log.info("Stopping KafkaBasedLog for topic " + topic);
synchronized (this) {
stopRequested = true;
}
consumer.wakeup();
try {
thread.join();
} catch (InterruptedException e) {
throw new ConnectException("Failed to stop KafkaBasedLog. Exiting without cleanly shutting " +
"down it's producer and consumer.", e);
}
try {
producer.close();
} catch (KafkaException e) {
log.error("Failed to stop KafkaBasedLog producer", e);
}
try {
consumer.close();
} catch (KafkaException e) {
log.error("Failed to stop KafkaBasedLog consumer", e);
}
log.info("Stopped KafkaBasedLog for topic " + topic);
}
/**
* Flushes any outstanding writes and then reads to the current end of the log and invokes the specified callback.
* Note that this checks the current, offsets, reads to them, and invokes the callback regardless of whether
* additional records have been written to the log. If the caller needs to ensure they have truly reached the end
* of the log, they must ensure there are no other writers during this period.
*
* This waits until the end of all partitions has been reached.
*
* This method is asynchronous. If you need a synchronous version, pass an instance of
* {@link org.apache.kafka.connect.util.FutureCallback} as the {@param callback} parameter and wait on it to block.
*
* @param callback the callback to invoke once the end of the log has been reached.
*/
public void readToEnd(Callback callback) {
producer.flush();
synchronized (this) {
readLogEndOffsetCallbacks.add(callback);
}
consumer.wakeup();
}
/**
* Same as {@link #readToEnd(Callback)} but provides a {@link Future} instead of using a callback.
* @return the future associated with the operation
*/
public Future readToEnd() {
FutureCallback future = new FutureCallback<>(null);
readToEnd(future);
return future;
}
public void send(K key, V value) {
send(key, value, null);
}
public void send(K key, V value, org.apache.kafka.clients.producer.Callback callback) {
producer.send(new ProducerRecord<>(topic, key, value), callback);
}
private Producer createProducer() {
// Always require producer acks to all to ensure durable writes
producerConfigs.put(ProducerConfig.ACKS_CONFIG, "all");
return new KafkaProducer<>(producerConfigs);
}
private Consumer createConsumer() {
// Always force reset to the beginning of the log since this class wants to consume all available log data
consumerConfigs.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
return new KafkaConsumer<>(consumerConfigs);
}
private void poll(long timeoutMs) {
try {
ConsumerRecords records = consumer.poll(timeoutMs);
for (ConsumerRecord record : records)
consumedCallback.onCompletion(null, record);
} catch (WakeupException e) {
// Expected on get() or stop(). The calling code should handle this
throw e;
} catch (KafkaException e) {
log.error("Error polling: " + e);
}
}
private void readToLogEnd() {
log.trace("Reading to end of offset log");
Set assignment = consumer.assignment();
// This approach to getting the current end offset is hacky until we have an API for looking these up directly
Map offsets = new HashMap<>();
for (TopicPartition tp : assignment) {
long offset = consumer.position(tp);
offsets.put(tp, offset);
consumer.seekToEnd(tp);
}
Map endOffsets = new HashMap<>();
try {
poll(0);
} finally {
// If there is an exception, even a possibly expected one like WakeupException, we need to make sure
// the consumers position is reset or it'll get into an inconsistent state.
for (TopicPartition tp : assignment) {
long startOffset = offsets.get(tp);
long endOffset = consumer.position(tp);
if (endOffset > startOffset) {
endOffsets.put(tp, endOffset);
consumer.seek(tp, startOffset);
}
log.trace("Reading to end of log for {}: starting offset {} to ending offset {}", tp, startOffset, endOffset);
}
}
while (!endOffsets.isEmpty()) {
poll(Integer.MAX_VALUE);
Iterator> it = endOffsets.entrySet().iterator();
while (it.hasNext()) {
Map.Entry entry = it.next();
if (consumer.position(entry.getKey()) >= entry.getValue())
it.remove();
else
break;
}
}
}
private class WorkThread extends Thread {
@Override
public void run() {
try {
while (true) {
int numCallbacks;
synchronized (KafkaBasedLog.this) {
if (stopRequested)
break;
numCallbacks = readLogEndOffsetCallbacks.size();
}
if (numCallbacks > 0) {
try {
readToLogEnd();
} catch (WakeupException e) {
// Either received another get() call and need to retry reading to end of log or stop() was
// called. Both are handled by restarting this loop.
continue;
}
}
synchronized (KafkaBasedLog.this) {
// Only invoke exactly the number of callbacks we found before triggering the read to log end
// since it is possible for another write + readToEnd to sneak in in the meantime
for (int i = 0; i < numCallbacks; i++) {
Callback cb = readLogEndOffsetCallbacks.poll();
cb.onCompletion(null, null);
}
}
try {
poll(Integer.MAX_VALUE);
} catch (WakeupException e) {
// See previous comment, both possible causes of this wakeup are handled by starting this loop again
continue;
}
}
} catch (Throwable t) {
log.error("Unexpected exception in KafkaBasedLog's work thread", t);
}
}
}
}