All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.connect.runtime.WorkerTask Maven / Gradle / Ivy

There is a newer version: 3.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.connect.runtime;

import org.apache.kafka.common.MetricName;
import org.apache.kafka.common.MetricNameTemplate;
import org.apache.kafka.common.metrics.Gauge;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.metrics.stats.Avg;
import org.apache.kafka.common.metrics.stats.Frequencies;
import org.apache.kafka.common.metrics.stats.Max;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.connector.ConnectRecord;
import org.apache.kafka.connect.runtime.errors.ErrorHandlingMetrics;
import org.apache.kafka.connect.runtime.AbstractStatus.State;
import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
import org.apache.kafka.connect.runtime.errors.ErrorReporter;
import org.apache.kafka.connect.runtime.errors.RetryWithToleranceOperator;
import org.apache.kafka.connect.storage.StatusBackingStore;
import org.apache.kafka.connect.util.ConnectorTaskId;
import org.apache.kafka.connect.util.LoggingContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.Locale;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;

/**
 * Handles processing for an individual task. This interface only provides the basic methods
 * used by {@link Worker} to manage the tasks. Implementations combine a user-specified Task with
 * Kafka to create a data flow.
 * 

* Note on locking: since the task runs in its own thread, special care must be taken to ensure * that state transitions are reported correctly, in particular since some state transitions are * asynchronous (e.g. pause/resume). For example, changing the state to paused could cause a race * if the task fails at the same time. To protect from these cases, we synchronize status updates * using the WorkerTask's monitor. * @param The type of record initially entering the processing pipeline from the source or consumer * @param The type of record during transformations (must be an implementation of {@link ConnectRecord}) */ abstract class WorkerTask> implements Runnable { private static final Logger log = LoggerFactory.getLogger(WorkerTask.class); private static final String THREAD_NAME_PREFIX = "task-thread-"; private final TaskStatus.Listener statusListener; private final StatusBackingStore statusBackingStore; protected final ConnectorTaskId id; protected final ClassLoader loader; protected final Time time; private final CountDownLatch shutdownLatch = new CountDownLatch(1); private final TaskMetricsGroup taskMetricsGroup; private volatile TargetState targetState; private volatile boolean failed; private volatile boolean stopping; // indicates whether the Worker has asked the task to stop private volatile boolean cancelled; // indicates whether the Worker has cancelled the task (e.g. because of slow shutdown) private final ErrorHandlingMetrics errorMetrics; protected final RetryWithToleranceOperator retryWithToleranceOperator; protected final TransformationChain transformationChain; private final Supplier>> errorReportersSupplier; public WorkerTask(ConnectorTaskId id, TaskStatus.Listener statusListener, TargetState initialState, ClassLoader loader, ConnectMetrics connectMetrics, ErrorHandlingMetrics errorMetrics, RetryWithToleranceOperator retryWithToleranceOperator, TransformationChain transformationChain, Supplier>> errorReportersSupplier, Time time, StatusBackingStore statusBackingStore) { this.id = id; this.taskMetricsGroup = new TaskMetricsGroup(this.id, connectMetrics, statusListener); this.errorMetrics = errorMetrics; this.statusListener = taskMetricsGroup; this.loader = loader; this.targetState = initialState; this.failed = false; this.stopping = false; this.cancelled = false; this.taskMetricsGroup.recordState(this.targetState); this.retryWithToleranceOperator = retryWithToleranceOperator; this.transformationChain = transformationChain; this.errorReportersSupplier = errorReportersSupplier; this.time = time; this.statusBackingStore = statusBackingStore; } public ConnectorTaskId id() { return id; } public ClassLoader loader() { return loader; } /** * Initialize the task for execution. * * @param taskConfig initial configuration */ public abstract void initialize(TaskConfig taskConfig); private void triggerStop() { synchronized (this) { stopping = true; // wakeup any threads that are waiting for unpause this.notifyAll(); } } /** * Stop this task from processing messages. This method does not block, it only triggers * shutdown. Use {@link #awaitStop} to block until completion. */ public void stop() { triggerStop(); } /** * Cancel this task. This won't actually stop it, but it will prevent the state from being * updated when it eventually does shutdown. */ public void cancel() { cancelled = true; retryWithToleranceOperator.triggerStop(); } /** * Wait for this task to finish stopping. * * @param timeoutMs time in milliseconds to await stop * @return true if successful, false if the timeout was reached */ public boolean awaitStop(long timeoutMs) { try { return shutdownLatch.await(timeoutMs, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { return false; } } /** * Remove all metrics published by this task. */ public void removeMetrics() { // Close quietly here so that we can be sure to close everything even if one attempt fails Utils.closeQuietly(taskMetricsGroup::close, "Task metrics group"); Utils.closeQuietly(errorMetrics, "Error handling metrics"); } // Visible for testing void doStart() { retryWithToleranceOperator.reporters(errorReportersSupplier.get()); initializeAndStart(); statusListener.onStartup(id); } protected abstract void initializeAndStart(); protected abstract void execute(); protected abstract void close(); protected boolean isFailed() { return failed; } protected boolean isStopping() { // The target state should never be STOPPED, but if things go wrong and it somehow is, // we handle that identically to a request to shut down the task return stopping || targetState == TargetState.STOPPED; } protected boolean isCancelled() { return cancelled; } // Visible for testing void doClose() { try { close(); } catch (Throwable t) { log.error("{} Task threw an uncaught and unrecoverable exception during shutdown", this, t); throw t; } finally { Utils.closeQuietly(transformationChain, "transformation chain"); Utils.closeQuietly(retryWithToleranceOperator, "retry operator"); } } private void doRun() throws InterruptedException { try { synchronized (this) { if (isStopping()) return; if (targetState == TargetState.PAUSED) { onPause(); if (!awaitUnpause()) return; } } doStart(); execute(); } catch (Throwable t) { failed = true; if (cancelled) { log.warn("{} After being scheduled for shutdown, the orphan task threw an uncaught exception. A newer instance of this task might be already running", this, t); } else if (isStopping()) { log.warn("{} After being scheduled for shutdown, task threw an uncaught exception.", this, t); } else { log.error("{} Task threw an uncaught and unrecoverable exception. Task is being killed and will not recover until manually restarted", this, t); throw t; } } finally { doClose(); } } private void onShutdown() { synchronized (this) { triggerStop(); // if we were cancelled, skip the status update since the task may have already been // started somewhere else if (!cancelled) statusListener.onShutdown(id); } } protected void onFailure(Throwable t) { synchronized (this) { triggerStop(); // if we were cancelled, skip the status update since the task may have already been // started somewhere else if (!cancelled) statusListener.onFailure(id, t); } } protected synchronized void onPause() { statusListener.onPause(id); } protected synchronized void onResume() { statusListener.onResume(id); } @Override public void run() { // Clear all MDC parameters, in case this thread is being reused LoggingContext.clear(); try (LoggingContext loggingContext = LoggingContext.forTask(id())) { String savedName = Thread.currentThread().getName(); try { Thread.currentThread().setName(THREAD_NAME_PREFIX + id); doRun(); onShutdown(); } catch (Throwable t) { onFailure(t); if (t instanceof Error) throw (Error) t; } finally { Thread.currentThread().setName(savedName); shutdownLatch.countDown(); } } } public boolean shouldPause() { return this.targetState == TargetState.PAUSED; } /** * Await task resumption. * * @return true if the task's target state is not paused, false if the task is shutdown before resumption * @throws InterruptedException */ protected boolean awaitUnpause() throws InterruptedException { synchronized (this) { while (targetState == TargetState.PAUSED) { if (isStopping()) return false; this.wait(); } return true; } } public void transitionTo(TargetState state) { synchronized (this) { // Ignore the state change if we are stopping. // This has the consequence that, if we ever transition to the STOPPED target state (which // should never happen since whole point of that state is that it comes with a complete // shutdown of all the tasks for the connector), we will never be able to transition out of it. // Since part of transitioning to the STOPPED state is that we shut down the task and all of // its resources (Kafka clients, SMTs, etc.), this is a reasonable way to do things; otherwise, // we'd have to re-instantiate all of those resources to be able to resume (or even just pause) // the task . if (isStopping()) { log.debug("{} Ignoring request to transition stopped task {} to state {}", this, id, state); return; } if (targetState == TargetState.STOPPED) log.warn("{} Received unexpected request to transition task {} to state {}; will shut down in response", this, id, TargetState.STOPPED); this.targetState = state; this.notifyAll(); } } /** * Include this topic to the set of active topics for the connector that this worker task * is running. This information is persisted in the status backing store used by this worker. * * @param topic the topic to mark as active for this connector */ protected void recordActiveTopic(String topic) { if (statusBackingStore.getTopic(id.connector(), topic) != null) { // The topic is already recorded as active. No further action is required. return; } statusBackingStore.put(new TopicStatus(topic, id, time.milliseconds())); } /** * Record that offsets have been committed. * * @param duration the length of time in milliseconds for the commit attempt to complete */ protected void recordCommitSuccess(long duration) { taskMetricsGroup.recordCommit(duration, true, null); } /** * Record that offsets have been committed. * * @param duration the length of time in milliseconds for the commit attempt to complete * @param error the unexpected error that occurred; may be null in the case of timeouts or interruptions */ protected void recordCommitFailure(long duration, Throwable error) { taskMetricsGroup.recordCommit(duration, false, error); } /** * Record that a batch of records has been processed. * * @param size the number of records in the batch */ protected void recordBatch(int size) { taskMetricsGroup.recordBatch(size); } TaskMetricsGroup taskMetricsGroup() { return taskMetricsGroup; } static class TaskMetricsGroup implements TaskStatus.Listener { private final TaskStatus.Listener delegateListener; private final MetricGroup metricGroup; private final Time time; private final StateTracker taskStateTimer; private final Sensor commitTime; private final Sensor batchSize; private final Sensor commitAttempts; public TaskMetricsGroup(ConnectorTaskId id, ConnectMetrics connectMetrics, TaskStatus.Listener statusListener) { delegateListener = statusListener; time = connectMetrics.time(); taskStateTimer = new StateTracker(); ConnectMetricsRegistry registry = connectMetrics.registry(); metricGroup = connectMetrics.group(registry.taskGroupName(), registry.connectorTagName(), id.connector(), registry.taskTagName(), Integer.toString(id.task())); // prevent collisions by removing any previously created metrics in this group. metricGroup.close(); metricGroup.addValueMetric(registry.taskStatus, now -> taskStateTimer.currentState().toString().toLowerCase(Locale.getDefault()) ); addRatioMetric(State.RUNNING, registry.taskRunningRatio); addRatioMetric(State.PAUSED, registry.taskPauseRatio); commitTime = metricGroup.sensor("commit-time"); commitTime.add(metricGroup.metricName(registry.taskCommitTimeMax), new Max()); commitTime.add(metricGroup.metricName(registry.taskCommitTimeAvg), new Avg()); batchSize = metricGroup.sensor("batch-size"); batchSize.add(metricGroup.metricName(registry.taskBatchSizeMax), new Max()); batchSize.add(metricGroup.metricName(registry.taskBatchSizeAvg), new Avg()); MetricName offsetCommitFailures = metricGroup.metricName(registry.taskCommitFailurePercentage); MetricName offsetCommitSucceeds = metricGroup.metricName(registry.taskCommitSuccessPercentage); Frequencies commitFrequencies = Frequencies.forBooleanValues(offsetCommitFailures, offsetCommitSucceeds); commitAttempts = metricGroup.sensor("offset-commit-completion"); commitAttempts.add(commitFrequencies); } private void addRatioMetric(final State matchingState, MetricNameTemplate template) { MetricName metricName = metricGroup.metricName(template); metricGroup.metrics().addMetricIfAbsent(metricName, null, (Gauge) (config, now) -> taskStateTimer.durationRatio(matchingState, now)); } void close() { metricGroup.close(); } void recordCommit(long duration, boolean success, Throwable error) { if (success) { commitTime.record(duration); commitAttempts.record(1.0d); } else { commitAttempts.record(0.0d); } } void recordBatch(int size) { batchSize.record(size); } @Override public void onStartup(ConnectorTaskId id) { taskStateTimer.changeState(State.RUNNING, time.milliseconds()); delegateListener.onStartup(id); } @Override public void onFailure(ConnectorTaskId id, Throwable cause) { taskStateTimer.changeState(State.FAILED, time.milliseconds()); delegateListener.onFailure(id, cause); } @Override public void onPause(ConnectorTaskId id) { taskStateTimer.changeState(State.PAUSED, time.milliseconds()); delegateListener.onPause(id); } @Override public void onResume(ConnectorTaskId id) { taskStateTimer.changeState(State.RUNNING, time.milliseconds()); delegateListener.onResume(id); } @Override public void onShutdown(ConnectorTaskId id) { taskStateTimer.changeState(State.UNASSIGNED, time.milliseconds()); delegateListener.onShutdown(id); } @Override public void onDeletion(ConnectorTaskId id) { taskStateTimer.changeState(State.DESTROYED, time.milliseconds()); delegateListener.onDeletion(id); } @Override public void onRestart(ConnectorTaskId id) { taskStateTimer.changeState(State.RESTARTING, time.milliseconds()); delegateListener.onRestart(id); } public void recordState(TargetState state) { switch (state) { case STARTED: taskStateTimer.changeState(State.RUNNING, time.milliseconds()); break; case PAUSED: taskStateTimer.changeState(State.PAUSED, time.milliseconds()); break; default: break; } } public State state() { return taskStateTimer.currentState(); } protected MetricGroup metricGroup() { return metricGroup; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy