All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.connect.runtime.WorkerSinkTask Maven / Gradle / Ivy

There is a newer version: 3.9.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. **/ package org.apache.kafka.connect.runtime; import org.apache.kafka.clients.consumer.ConsumerConfig; import org.apache.kafka.clients.consumer.ConsumerRebalanceListener; import org.apache.kafka.clients.consumer.ConsumerRecord; import org.apache.kafka.clients.consumer.ConsumerRecords; import org.apache.kafka.clients.consumer.KafkaConsumer; import org.apache.kafka.clients.consumer.OffsetAndMetadata; import org.apache.kafka.clients.consumer.OffsetCommitCallback; import org.apache.kafka.common.KafkaException; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.errors.WakeupException; import org.apache.kafka.common.utils.Time; import org.apache.kafka.common.utils.Utils; import org.apache.kafka.connect.data.SchemaAndValue; import org.apache.kafka.connect.errors.ConnectException; import org.apache.kafka.connect.errors.RetriableException; import org.apache.kafka.connect.sink.SinkRecord; import org.apache.kafka.connect.sink.SinkTask; import org.apache.kafka.connect.storage.Converter; import org.apache.kafka.connect.util.ConnectorTaskId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; /** * WorkerTask that uses a SinkTask to export data from Kafka. */ class WorkerSinkTask implements WorkerTask { private static final Logger log = LoggerFactory.getLogger(WorkerSinkTask.class); private final ConnectorTaskId id; private final SinkTask task; private final WorkerConfig workerConfig; private final Time time; private final Converter keyConverter; private final Converter valueConverter; private WorkerSinkTaskThread workThread; private Map taskProps; private KafkaConsumer consumer; private WorkerSinkTaskContext context; private boolean started; private final List messageBatch; private Map lastCommittedOffsets; private Map currentOffsets; private boolean pausedForRedelivery; public WorkerSinkTask(ConnectorTaskId id, SinkTask task, WorkerConfig workerConfig, Converter keyConverter, Converter valueConverter, Time time) { this.id = id; this.task = task; this.workerConfig = workerConfig; this.keyConverter = keyConverter; this.valueConverter = valueConverter; this.time = time; this.started = false; this.messageBatch = new ArrayList<>(); this.currentOffsets = new HashMap<>(); this.pausedForRedelivery = false; } @Override public void start(Map props) { taskProps = props; consumer = createConsumer(); context = new WorkerSinkTaskContext(consumer); workThread = createWorkerThread(); workThread.start(); } @Override public void stop() { // Offset commit is handled upon exit in work thread if (workThread != null) workThread.startGracefulShutdown(); consumer.wakeup(); } @Override public boolean awaitStop(long timeoutMs) { boolean success = true; if (workThread != null) { try { success = workThread.awaitShutdown(timeoutMs, TimeUnit.MILLISECONDS); if (!success) workThread.forceShutdown(); } catch (InterruptedException e) { success = false; } } task.stop(); return success; } @Override public void close() { // FIXME Kafka needs to add a timeout parameter here for us to properly obey the timeout // passed in if (consumer != null) consumer.close(); } /** * Preforms initial join process for consumer group, ensures we have an assignment, and initializes + starts the * SinkTask. * * @returns true if successful, false if joining the consumer group was interrupted */ public boolean joinConsumerGroupAndStart() { String topicsStr = taskProps.get(SinkTask.TOPICS_CONFIG); if (topicsStr == null || topicsStr.isEmpty()) throw new ConnectException("Sink tasks require a list of topics."); String[] topics = topicsStr.split(","); log.debug("Task {} subscribing to topics {}", id, topics); consumer.subscribe(Arrays.asList(topics), new HandleRebalance()); // Ensure we're in the group so that if start() wants to rewind offsets, it will have an assignment of partitions // to work with. Any rewinding will be handled immediately when polling starts. try { consumer.poll(0); } catch (WakeupException e) { log.error("Sink task {} was stopped before completing join group. Task initialization and start is being skipped", this); return false; } task.initialize(context); task.start(taskProps); log.info("Sink task {} finished initialization and start", this); started = true; return true; } /** Poll for new messages with the given timeout. Should only be invoked by the worker thread. */ public void poll(long timeoutMs) { try { rewind(); long retryTimeout = context.timeout(); if (retryTimeout > 0) { timeoutMs = Math.min(timeoutMs, retryTimeout); context.timeout(-1L); } log.trace("{} polling consumer with timeout {} ms", id, timeoutMs); ConsumerRecords msgs = consumer.poll(timeoutMs); assert messageBatch.isEmpty() || msgs.isEmpty(); log.trace("{} polling returned {} messages", id, msgs.count()); convertMessages(msgs); deliverMessages(); } catch (WakeupException we) { log.trace("{} consumer woken up", id); } } /** * Starts an offset commit by flushing outstanding messages from the task and then starting * the write commit. This should only be invoked by the WorkerSinkTaskThread. **/ public void commitOffsets(boolean sync, final int seqno) { log.info("{} Committing offsets", this); final Map offsets = new HashMap<>(currentOffsets); try { task.flush(offsets); } catch (Throwable t) { log.error("Commit of {} offsets failed due to exception while flushing:", this, t); log.error("Rewinding offsets to last committed offsets"); for (Map.Entry entry : lastCommittedOffsets.entrySet()) { log.debug("{} Rewinding topic partition {} to offset {}", id, entry.getKey(), entry.getValue().offset()); consumer.seek(entry.getKey(), entry.getValue().offset()); } currentOffsets = new HashMap<>(lastCommittedOffsets); workThread.onCommitCompleted(t, seqno); return; } if (sync) { try { consumer.commitSync(offsets); lastCommittedOffsets = offsets; workThread.onCommitCompleted(null, seqno); } catch (KafkaException e) { workThread.onCommitCompleted(e, seqno); } } else { OffsetCommitCallback cb = new OffsetCommitCallback() { @Override public void onComplete(Map offsets, Exception error) { lastCommittedOffsets = offsets; workThread.onCommitCompleted(error, seqno); } }; consumer.commitAsync(offsets, cb); } } public Time time() { return time; } public WorkerConfig workerConfig() { return workerConfig; } private KafkaConsumer createConsumer() { // Include any unknown worker configs so consumer configs can be set globally on the worker // and through to the task Map props = new HashMap<>(); props.put(ConsumerConfig.GROUP_ID_CONFIG, "connect-" + id.connector()); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, Utils.join(workerConfig.getList(WorkerConfig.BOOTSTRAP_SERVERS_CONFIG), ",")); props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, "false"); props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest"); props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer"); props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArrayDeserializer"); props.putAll(workerConfig.originalsWithPrefix("consumer.")); KafkaConsumer newConsumer; try { newConsumer = new KafkaConsumer<>(props); } catch (Throwable t) { throw new ConnectException("Failed to create consumer", t); } return newConsumer; } private WorkerSinkTaskThread createWorkerThread() { return new WorkerSinkTaskThread(this, "WorkerSinkTask-" + id, time, workerConfig); } private void convertMessages(ConsumerRecords msgs) { for (ConsumerRecord msg : msgs) { log.trace("Consuming message with key {}, value {}", msg.key(), msg.value()); SchemaAndValue keyAndSchema = keyConverter.toConnectData(msg.topic(), msg.key()); SchemaAndValue valueAndSchema = valueConverter.toConnectData(msg.topic(), msg.value()); messageBatch.add( new SinkRecord(msg.topic(), msg.partition(), keyAndSchema.schema(), keyAndSchema.value(), valueAndSchema.schema(), valueAndSchema.value(), msg.offset()) ); } } private void deliverMessages() { // Finally, deliver this batch to the sink try { // Since we reuse the messageBatch buffer, ensure we give the task its own copy task.put(new ArrayList<>(messageBatch)); for (SinkRecord record : messageBatch) currentOffsets.put(new TopicPartition(record.topic(), record.kafkaPartition()), new OffsetAndMetadata(record.kafkaOffset() + 1)); messageBatch.clear(); // If we had paused all consumer topic partitions to try to redeliver data, then we should resume any that // the task had not explicitly paused if (pausedForRedelivery) { for (TopicPartition tp : consumer.assignment()) if (!context.pausedPartitions().contains(tp)) consumer.resume(tp); pausedForRedelivery = false; } } catch (RetriableException e) { log.error("RetriableException from SinkTask {}:", id, e); // If we're retrying a previous batch, make sure we've paused all topic partitions so we don't get new data, // but will still be able to poll in order to handle user-requested timeouts, keep group membership, etc. pausedForRedelivery = true; for (TopicPartition tp : consumer.assignment()) consumer.pause(tp); // Let this exit normally, the batch will be reprocessed on the next loop. } catch (Throwable t) { log.error("Task {} threw an uncaught and unrecoverable exception", id); log.error("Task is being killed and will not recover until manually restarted:", t); throw new ConnectException("Exiting WorkerSinkTask due to unrecoverable exception."); } } private void rewind() { Map offsets = context.offsets(); if (offsets.isEmpty()) { return; } for (TopicPartition tp: offsets.keySet()) { Long offset = offsets.get(tp); if (offset != null) { log.trace("Rewind {} to offset {}.", tp, offset); consumer.seek(tp, offset); lastCommittedOffsets.put(tp, new OffsetAndMetadata(offset)); currentOffsets.put(tp, new OffsetAndMetadata(offset)); } } context.clearOffsets(); } private class HandleRebalance implements ConsumerRebalanceListener { @Override public void onPartitionsAssigned(Collection partitions) { lastCommittedOffsets = new HashMap<>(); currentOffsets = new HashMap<>(); for (TopicPartition tp : partitions) { long pos = consumer.position(tp); lastCommittedOffsets.put(tp, new OffsetAndMetadata(pos)); currentOffsets.put(tp, new OffsetAndMetadata(pos)); log.debug("{} assigned topic partition {} with offset {}", id, tp, pos); } // If we paused everything for redelivery (which is no longer relevant since we discarded the data), make // sure anything we paused that the task didn't request to be paused *and* which we still own is resumed. // Also make sure our tracking of paused partitions is updated to remove any partitions we no longer own. if (pausedForRedelivery) { pausedForRedelivery = false; Set assigned = new HashSet<>(partitions); Set taskPaused = context.pausedPartitions(); for (TopicPartition tp : partitions) { if (!taskPaused.contains(tp)) consumer.resume(tp); } Iterator tpIter = taskPaused.iterator(); while (tpIter.hasNext()) { TopicPartition tp = tpIter.next(); if (assigned.contains(tp)) tpIter.remove(); } } // Instead of invoking the assignment callback on initialization, we guarantee the consumer is ready upon // task start. Since this callback gets invoked during that initial setup before we've started the task, we // need to guard against invoking the user's callback method during that period. if (started) task.onPartitionsAssigned(partitions); } @Override public void onPartitionsRevoked(Collection partitions) { if (started) { task.onPartitionsRevoked(partitions); commitOffsets(true, -1); } // Make sure we don't have any leftover data since offsets will be reset to committed positions messageBatch.clear(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy