All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.connect.util.KafkaBasedLog Maven / Gradle / Ivy

There is a newer version: 3.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.connect.util;

import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.errors.TimeoutException;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.errors.ConnectException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.Duration;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Future;


/**
 * 

* KafkaBasedLog provides a generic implementation of a shared, compacted log of records stored in Kafka that all * clients need to consume and, at times, agree on their offset / that they have read to the end of the log. *

*

* This functionality is useful for storing different types of data that all clients may need to agree on -- * offsets or config for example. This class runs a consumer in a background thread to continuously tail the target * topic, accepts write requests which it writes to the topic using an internal producer, and provides some helpful * utilities like checking the current log end offset and waiting until the current end of the log is reached. *

*

* To support different use cases, this class works with either single- or multi-partition topics. *

*

* Since this class is generic, it delegates the details of data storage via a callback that is invoked for each * record that is consumed from the topic. The invocation of callbacks is guaranteed to be serialized -- if the * calling class keeps track of state based on the log and only writes to it when consume callbacks are invoked * and only reads it in {@link #readToEnd(Callback)} callbacks then no additional synchronization will be required. *

*/ public class KafkaBasedLog { private static final Logger log = LoggerFactory.getLogger(KafkaBasedLog.class); private static final long CREATE_TOPIC_TIMEOUT_MS = 30000; private Time time; private final String topic; private final Map producerConfigs; private final Map consumerConfigs; private final Callback> consumedCallback; private Consumer consumer; private Producer producer; private Thread thread; private boolean stopRequested; private Queue> readLogEndOffsetCallbacks; private Runnable initializer; /** * Create a new KafkaBasedLog object. This does not start reading the log and writing is not permitted until * {@link #start()} is invoked. * * @param topic the topic to treat as a log * @param producerConfigs configuration options to use when creating the internal producer. At a minimum this must * contain compatible serializer settings for the generic types used on this class. Some * setting, such as the number of acks, will be overridden to ensure correct behavior of this * class. * @param consumerConfigs configuration options to use when creating the internal consumer. At a minimum this must * contain compatible serializer settings for the generic types used on this class. Some * setting, such as the auto offset reset policy, will be overridden to ensure correct * behavior of this class. * @param consumedCallback callback to invoke for each {@link ConsumerRecord} consumed when tailing the log * @param time Time interface * @param initializer the component that should be run when this log is {@link #start() started}; may be null */ public KafkaBasedLog(String topic, Map producerConfigs, Map consumerConfigs, Callback> consumedCallback, Time time, Runnable initializer) { this.topic = topic; this.producerConfigs = producerConfigs; this.consumerConfigs = consumerConfigs; this.consumedCallback = consumedCallback; this.stopRequested = false; this.readLogEndOffsetCallbacks = new ArrayDeque<>(); this.time = time; this.initializer = initializer != null ? initializer : new Runnable() { @Override public void run() { } }; } public void start() { log.info("Starting KafkaBasedLog with topic " + topic); initializer.run(); producer = createProducer(); consumer = createConsumer(); List partitions = new ArrayList<>(); // We expect that the topics will have been created either manually by the user or automatically by the herder List partitionInfos = null; long started = time.milliseconds(); while (partitionInfos == null && time.milliseconds() - started < CREATE_TOPIC_TIMEOUT_MS) { partitionInfos = consumer.partitionsFor(topic); Utils.sleep(Math.min(time.milliseconds() - started, 1000)); } if (partitionInfos == null) throw new ConnectException("Could not look up partition metadata for offset backing store topic in" + " allotted period. This could indicate a connectivity issue, unavailable topic partitions, or if" + " this is your first use of the topic it may have taken too long to create."); for (PartitionInfo partition : partitionInfos) partitions.add(new TopicPartition(partition.topic(), partition.partition())); consumer.assign(partitions); // Always consume from the beginning of all partitions. Necessary to ensure that we don't use committed offsets // when a 'group.id' is specified (if offsets happen to have been committed unexpectedly). consumer.seekToBeginning(partitions); readToLogEnd(); thread = new WorkThread(); thread.start(); log.info("Finished reading KafkaBasedLog for topic " + topic); log.info("Started KafkaBasedLog for topic " + topic); } public void stop() { log.info("Stopping KafkaBasedLog for topic " + topic); synchronized (this) { stopRequested = true; } consumer.wakeup(); try { thread.join(); } catch (InterruptedException e) { throw new ConnectException("Failed to stop KafkaBasedLog. Exiting without cleanly shutting " + "down it's producer and consumer.", e); } try { producer.close(); } catch (KafkaException e) { log.error("Failed to stop KafkaBasedLog producer", e); } try { consumer.close(); } catch (KafkaException e) { log.error("Failed to stop KafkaBasedLog consumer", e); } log.info("Stopped KafkaBasedLog for topic " + topic); } /** * Flushes any outstanding writes and then reads to the current end of the log and invokes the specified callback. * Note that this checks the current, offsets, reads to them, and invokes the callback regardless of whether * additional records have been written to the log. If the caller needs to ensure they have truly reached the end * of the log, they must ensure there are no other writers during this period. * * This waits until the end of all partitions has been reached. * * This method is asynchronous. If you need a synchronous version, pass an instance of * {@link org.apache.kafka.connect.util.FutureCallback} as the {@param callback} parameter and wait on it to block. * * @param callback the callback to invoke once the end of the log has been reached. */ public void readToEnd(Callback callback) { log.trace("Starting read to end log for topic {}", topic); producer.flush(); synchronized (this) { readLogEndOffsetCallbacks.add(callback); } consumer.wakeup(); } /** * Flush the underlying producer to ensure that all pending writes have been sent. */ public void flush() { producer.flush(); } /** * Same as {@link #readToEnd(Callback)} but provides a {@link Future} instead of using a callback. * @return the future associated with the operation */ public Future readToEnd() { FutureCallback future = new FutureCallback<>(null); readToEnd(future); return future; } public void send(K key, V value) { send(key, value, null); } public void send(K key, V value, org.apache.kafka.clients.producer.Callback callback) { producer.send(new ProducerRecord<>(topic, key, value), callback); } private Producer createProducer() { // Always require producer acks to all to ensure durable writes producerConfigs.put(ProducerConfig.ACKS_CONFIG, "all"); // Don't allow more than one in-flight request to prevent reordering on retry (if enabled) producerConfigs.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION, 1); return new KafkaProducer<>(producerConfigs); } private Consumer createConsumer() { // Always force reset to the beginning of the log since this class wants to consume all available log data consumerConfigs.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); // Turn off autocommit since we always want to consume the full log consumerConfigs.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false); return new KafkaConsumer<>(consumerConfigs); } private void poll(long timeoutMs) { try { ConsumerRecords records = consumer.poll(Duration.ofMillis(timeoutMs)); for (ConsumerRecord record : records) consumedCallback.onCompletion(null, record); } catch (WakeupException e) { // Expected on get() or stop(). The calling code should handle this throw e; } catch (KafkaException e) { log.error("Error polling: " + e); } } private void readToLogEnd() { log.trace("Reading to end of offset log"); Set assignment = consumer.assignment(); Map endOffsets = consumer.endOffsets(assignment); log.trace("Reading to end of log offsets {}", endOffsets); while (!endOffsets.isEmpty()) { Iterator> it = endOffsets.entrySet().iterator(); while (it.hasNext()) { Map.Entry entry = it.next(); if (consumer.position(entry.getKey()) >= entry.getValue()) it.remove(); else { poll(Integer.MAX_VALUE); break; } } } } private class WorkThread extends Thread { public WorkThread() { super("KafkaBasedLog Work Thread - " + topic); } @Override public void run() { try { log.trace("{} started execution", this); while (true) { int numCallbacks; synchronized (KafkaBasedLog.this) { if (stopRequested) break; numCallbacks = readLogEndOffsetCallbacks.size(); } if (numCallbacks > 0) { try { readToLogEnd(); log.trace("Finished read to end log for topic {}", topic); } catch (TimeoutException e) { log.warn("Timeout while reading log to end for topic '{}'. Retrying automatically. " + "This may occur when brokers are unavailable or unreachable. Reason: {}", topic, e.getMessage()); continue; } catch (WakeupException e) { // Either received another get() call and need to retry reading to end of log or stop() was // called. Both are handled by restarting this loop. continue; } } synchronized (KafkaBasedLog.this) { // Only invoke exactly the number of callbacks we found before triggering the read to log end // since it is possible for another write + readToEnd to sneak in the meantime for (int i = 0; i < numCallbacks; i++) { Callback cb = readLogEndOffsetCallbacks.poll(); cb.onCompletion(null, null); } } try { poll(Integer.MAX_VALUE); } catch (WakeupException e) { // See previous comment, both possible causes of this wakeup are handled by starting this loop again continue; } } } catch (Throwable t) { log.error("Unexpected exception in {}", this, t); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy