All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.connect.runtime.distributed.DistributedHerder Maven / Gradle / Ivy

There is a newer version: 3.9.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/

package org.apache.kafka.connect.runtime.distributed;

import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.utils.SystemTime;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.connector.ConnectorContext;
import org.apache.kafka.connect.errors.AlreadyExistsException;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.NotFoundException;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.apache.kafka.connect.runtime.Herder;
import org.apache.kafka.connect.runtime.HerderConnectorContext;
import org.apache.kafka.connect.runtime.TaskConfig;
import org.apache.kafka.connect.runtime.Worker;
import org.apache.kafka.connect.runtime.rest.RestServer;
import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo;
import org.apache.kafka.connect.runtime.rest.entities.TaskInfo;
import org.apache.kafka.connect.sink.SinkConnector;
import org.apache.kafka.connect.storage.KafkaConfigStorage;
import org.apache.kafka.connect.util.Callback;
import org.apache.kafka.connect.util.ConnectorTaskId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;

/**
 * 

* Distributed "herder" that coordinates with other workers to spread work across multiple processes. *

*

* Under the hood, this is implemented as a group managed by Kafka's group membership facilities (i.e. the generalized * group/consumer coordinator). Each instance of DistributedHerder joins the group and indicates what it's current * configuration state is (where it is in the configuration log). The group coordinator selects one member to take * this information and assign each instance a subset of the active connectors & tasks to execute. This assignment * is currently performed in a simple round-robin fashion, but this is not guaranteed -- the herder may also choose * to, e.g., use a sticky assignment to avoid the usual start/stop costs associated with connectors and tasks. Once * an assignment is received, the DistributedHerder simply runs its assigned connectors and tasks in a Worker. *

*

* In addition to distributing work, the DistributedHerder uses the leader determined during the work assignment * to select a leader for this generation of the group who is responsible for other tasks that can only be performed * by a single node at a time. Most importantly, this includes writing updated configurations for connectors and tasks, * (and therefore, also for creating, destroy, and scaling up/down connectors). *

*/ public class DistributedHerder implements Herder, Runnable { private static final Logger log = LoggerFactory.getLogger(DistributedHerder.class); private static final long RECONFIGURE_CONNECTOR_TASKS_BACKOFF_MS = 250; private final Worker worker; private final KafkaConfigStorage configStorage; private ClusterConfigState configState; private final Time time; private final int workerSyncTimeoutMs; private final int workerUnsyncBackoffMs; private final WorkerGroupMember member; private final AtomicBoolean stopping; private final CountDownLatch stopLatch = new CountDownLatch(1); // Track enough information about the current membership state to be able to determine which requests via the API // and the from other nodes are safe to process private boolean rebalanceResolved; private ConnectProtocol.Assignment assignment; // To handle most external requests, like creating or destroying a connector, we can use a generic request where // the caller specifies all the code that should be executed. private final Queue requests = new PriorityQueue<>(); // Config updates can be collected and applied together when possible. Also, we need to take care to rebalance when // needed (e.g. task reconfiguration, which requires everyone to coordinate offset commits). private Set connectorConfigUpdates = new HashSet<>(); private boolean needsReconfigRebalance; private final ExecutorService forwardRequestExecutor; public DistributedHerder(DistributedConfig config, Worker worker, String restUrl) { this(config, worker, null, null, restUrl, new SystemTime()); } // public for testing public DistributedHerder(DistributedConfig config, Worker worker, KafkaConfigStorage configStorage, WorkerGroupMember member, String restUrl, Time time) { this.worker = worker; if (configStorage != null) { // For testing. Assume configuration has already been performed this.configStorage = configStorage; } else { this.configStorage = new KafkaConfigStorage(worker.getInternalValueConverter(), connectorConfigCallback(), taskConfigCallback()); this.configStorage.configure(config.originals()); } configState = ClusterConfigState.EMPTY; this.time = time; this.workerSyncTimeoutMs = config.getInt(DistributedConfig.WORKER_SYNC_TIMEOUT_MS_CONFIG); this.workerUnsyncBackoffMs = config.getInt(DistributedConfig.WORKER_UNSYNC_BACKOFF_MS_CONFIG); this.member = member != null ? member : new WorkerGroupMember(config, restUrl, this.configStorage, rebalanceListener()); stopping = new AtomicBoolean(false); rebalanceResolved = true; // If we still need to follow up after a rebalance occurred, starting up tasks needsReconfigRebalance = false; forwardRequestExecutor = Executors.newSingleThreadExecutor(); } @Override public void start() { Thread thread = new Thread(this, "DistributedHerder"); thread.start(); } public void run() { try { log.info("Herder starting"); configStorage.start(); log.info("Herder started"); while (!stopping.get()) { tick(); } halt(); log.info("Herder stopped"); } catch (Throwable t) { log.error("Uncaught exception in herder work thread, exiting: ", t); stopLatch.countDown(); System.exit(1); } finally { stopLatch.countDown(); } } // public for testing public void tick() { // The main loop does two primary things: 1) drive the group membership protocol, responding to rebalance events // as they occur, and 2) handle external requests targeted at the leader. All the "real" work of the herder is // performed in this thread, which keeps synchronization straightforward at the cost of some operations possibly // blocking up this thread (especially those in callbacks due to rebalance events). try { member.ensureActive(); // Ensure we're in a good state in our group. If not restart and everything should be setup to rejoin if (!handleRebalanceCompleted()) return; } catch (WakeupException e) { // May be due to a request from another thread, or might be stopping. If the latter, we need to check the // flag immediately. If the former, we need to re-run the ensureActive call since we can't handle requests // unless we're in the group. return; } // Process any external requests final long now = time.milliseconds(); long nextRequestTimeoutMs = Long.MAX_VALUE; while (true) { final HerderRequest next; synchronized (this) { next = requests.peek(); if (next == null) { break; } else if (now >= next.at) { requests.poll(); } else { nextRequestTimeoutMs = next.at - now; break; } } try { next.action().call(); next.callback().onCompletion(null, null); } catch (Throwable t) { next.callback().onCompletion(t, null); } } // Process any configuration updates Set connectorConfigUpdatesCopy = null; synchronized (this) { if (needsReconfigRebalance || !connectorConfigUpdates.isEmpty()) { // Connector reconfigs only need local updates since there is no coordination between workers required. // However, if connectors were added or removed, work needs to be rebalanced since we have more work // items to distribute among workers. ClusterConfigState newConfigState = configStorage.snapshot(); if (!newConfigState.connectors().equals(configState.connectors())) needsReconfigRebalance = true; configState = newConfigState; if (needsReconfigRebalance) { // Task reconfigs require a rebalance. Request the rebalance, clean out state, and then restart // this loop, which will then ensure the rebalance occurs without any other requests being // processed until it completes. member.requestRejoin(); // Any connector config updates will be addressed during the rebalance too connectorConfigUpdates.clear(); needsReconfigRebalance = false; return; } else if (!connectorConfigUpdates.isEmpty()) { // We can't start/stop while locked since starting connectors can cause task updates that will // require writing configs, which in turn make callbacks into this class from another thread that // require acquiring a lock. This leads to deadlock. Instead, just copy the info we need and process // the updates after unlocking. connectorConfigUpdatesCopy = connectorConfigUpdates; connectorConfigUpdates = new HashSet<>(); } } } if (connectorConfigUpdatesCopy != null) { // If we only have connector config updates, we can just bounce the updated connectors that are // currently assigned to this worker. Set localConnectors = assignment == null ? Collections.emptySet() : new HashSet<>(assignment.connectors()); for (String connectorName : connectorConfigUpdatesCopy) { if (!localConnectors.contains(connectorName)) continue; boolean remains = configState.connectors().contains(connectorName); log.info("Handling connector-only config update by {} connector {}", remains ? "restarting" : "stopping", connectorName); worker.stopConnector(connectorName); // The update may be a deletion, so verify we actually need to restart the connector if (remains) startConnector(connectorName); } } // Let the group take any actions it needs to try { member.poll(nextRequestTimeoutMs); // Ensure we're in a good state in our group. If not restart and everything should be setup to rejoin if (!handleRebalanceCompleted()) return; } catch (WakeupException e) { // FIXME should not be WakeupException // Ignore. Just indicates we need to check the exit flag, for requested actions, etc. } } // public for testing public void halt() { synchronized (this) { // Clean up any connectors and tasks that are still running. log.info("Stopping connectors and tasks that are still assigned to this worker."); for (String connName : new HashSet<>(worker.connectorNames())) { try { worker.stopConnector(connName); } catch (Throwable t) { log.error("Failed to shut down connector " + connName, t); } } for (ConnectorTaskId taskId : new HashSet<>(worker.taskIds())) { try { worker.stopTask(taskId); } catch (Throwable t) { log.error("Failed to shut down task " + taskId, t); } } member.stop(); // Explicitly fail any outstanding requests so they actually get a response and get an understandable reason // for their failure while (!requests.isEmpty()) { HerderRequest request = requests.poll(); request.callback().onCompletion(new ConnectException("Worker is shutting down"), null); } if (configStorage != null) configStorage.stop(); } } @Override public void stop() { log.info("Herder stopping"); stopping.set(true); member.wakeup(); while (stopLatch.getCount() > 0) { try { stopLatch.await(); } catch (InterruptedException e) { // ignore, should not happen } } forwardRequestExecutor.shutdown(); try { if (!forwardRequestExecutor.awaitTermination(10000, TimeUnit.MILLISECONDS)) forwardRequestExecutor.shutdownNow(); } catch (InterruptedException e) { // ignore } log.info("Herder stopped"); } @Override public synchronized void connectors(final Callback> callback) { log.trace("Submitting connector listing request"); addRequest( new Callable() { @Override public Void call() throws Exception { if (!checkConfigSynced(callback)) return null; callback.onCompletion(null, configState.connectors()); return null; } }, forwardErrorCallback(callback) ); } @Override public synchronized void connectorInfo(final String connName, final Callback callback) { log.trace("Submitting connector info request {}", connName); addRequest( new Callable() { @Override public Void call() throws Exception { if (!checkConfigSynced(callback)) return null; if (!configState.connectors().contains(connName)) { callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null); } else { callback.onCompletion(null, new ConnectorInfo(connName, configState.connectorConfig(connName), configState.tasks(connName))); } return null; } }, forwardErrorCallback(callback) ); } @Override public void connectorConfig(String connName, final Callback> callback) { // Subset of connectorInfo, so piggy back on that implementation log.trace("Submitting connector config read request {}", connName); connectorInfo(connName, new Callback() { @Override public void onCompletion(Throwable error, ConnectorInfo result) { if (error != null) callback.onCompletion(error, null); else callback.onCompletion(null, result.config()); } }); } @Override public void putConnectorConfig(final String connName, Map config, final boolean allowReplace, final Callback> callback) { final Map connConfig; if (config == null) { connConfig = null; } else if (!config.containsKey(ConnectorConfig.NAME_CONFIG)) { connConfig = new HashMap<>(config); connConfig.put(ConnectorConfig.NAME_CONFIG, connName); } else { connConfig = config; } log.trace("Submitting connector config write request {}", connName); addRequest( new Callable() { @Override public Void call() throws Exception { log.trace("Handling connector config request {}", connName); if (!isLeader()) { callback.onCompletion(new NotLeaderException("Only the leader can set connector configs.", leaderUrl()), null); return null; } boolean exists = configState.connectors().contains(connName); if (!allowReplace && exists) { callback.onCompletion(new AlreadyExistsException("Connector " + connName + " already exists"), null); return null; } if (connConfig == null && !exists) { callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null); return null; } log.trace("Submitting connector config {} {} {}", connName, allowReplace, configState.connectors()); configStorage.putConnectorConfig(connName, connConfig); boolean created = !exists && connConfig != null; // Note that we use the updated connector config despite the fact that we don't have an updated // snapshot yet. The existing task info should still be accurate. ConnectorInfo info = connConfig == null ? null : new ConnectorInfo(connName, connConfig, configState.tasks(connName)); callback.onCompletion(null, new Created<>(created, info)); return null; } }, forwardErrorCallback(callback) ); } @Override public synchronized void requestTaskReconfiguration(final String connName) { log.trace("Submitting connector task reconfiguration request {}", connName); addRequest( new Callable() { @Override public Void call() throws Exception { reconfigureConnectorTasksWithRetry(connName); return null; } }, new Callback() { @Override public void onCompletion(Throwable error, Void result) { if (error != null) { log.error("Unexpected error during task reconfiguration: ", error); log.error("Task reconfiguration for {} failed unexpectedly, this connector will not be properly reconfigured unless manually triggered.", connName); } } } ); } @Override public synchronized void taskConfigs(final String connName, final Callback> callback) { log.trace("Submitting get task configuration request {}", connName); addRequest( new Callable() { @Override public Void call() throws Exception { if (!checkConfigSynced(callback)) return null; if (!configState.connectors().contains(connName)) { callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null); } else { List result = new ArrayList<>(); for (int i = 0; i < configState.taskCount(connName); i++) { ConnectorTaskId id = new ConnectorTaskId(connName, i); result.add(new TaskInfo(id, configState.taskConfig(id))); } callback.onCompletion(null, result); } return null; } }, forwardErrorCallback(callback) ); } @Override public synchronized void putTaskConfigs(final String connName, final List> configs, final Callback callback) { log.trace("Submitting put task configuration request {}", connName); addRequest( new Callable() { @Override public Void call() throws Exception { if (!isLeader()) callback.onCompletion(new NotLeaderException("Only the leader may write task configurations.", leaderUrl()), null); else if (!configState.connectors().contains(connName)) callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null); else { configStorage.putTaskConfigs(taskConfigListAsMap(connName, configs)); callback.onCompletion(null, null); } return null; } }, forwardErrorCallback(callback) ); } // Should only be called from work thread, so synchronization should not be needed private boolean isLeader() { return assignment != null && member.memberId().equals(assignment.leader()); } /** * Get the URL for the leader's REST interface, or null if we do not have the leader's URL yet. */ private String leaderUrl() { if (assignment == null) return null; return assignment.leaderUrl(); } /** * Handle post-assignment operations, either trying to resolve issues that kept assignment from completing, getting * this node into sync and its work started. Since * * @return false if we couldn't finish */ private boolean handleRebalanceCompleted() { if (this.rebalanceResolved) return true; // We need to handle a variety of cases after a rebalance: // 1. Assignment failed // 1a. We are the leader for the round. We will be leader again if we rejoin now, so we need to catch up before // even attempting to. If we can't we should drop out of the group because we will block everyone from making // progress. We can backoff and try rejoining later. // 1b. We are not the leader. We might need to catch up. If we're already caught up we can rejoin immediately, // otherwise, we just want to wait indefinitely to catch up and rejoin whenver we're finally ready. // 2. Assignment succeeded. // 2a. We are caught up on configs. Awesome! We can proceed to run our assigned work. // 2b. We need to try to catch up. We can do this potentially indefinitely because if it takes to long, we'll // be kicked out of the group anyway due to lack of heartbeats. boolean needsReadToEnd = false; long syncConfigsTimeoutMs = Long.MAX_VALUE; boolean needsRejoin = false; if (assignment.failed()) { needsRejoin = true; if (isLeader()) { log.warn("Join group completed, but assignment failed and we are the leader. Reading to end of config and retrying."); needsReadToEnd = true; syncConfigsTimeoutMs = workerSyncTimeoutMs; } else if (configState.offset() < assignment.offset()) { log.warn("Join group completed, but assignment failed and we lagging. Reading to end of config and retrying."); needsReadToEnd = true; } else { log.warn("Join group completed, but assignment failed. We were up to date, so just retrying."); } } else { if (configState.offset() < assignment.offset()) { log.warn("Catching up to assignment's config offset."); needsReadToEnd = true; } } if (needsReadToEnd) { // Force exiting this method to avoid creating any connectors/tasks and require immediate rejoining if // we timed out. This should only happen if we were the leader and didn't finish quickly enough, in which // case we've waited a long time and should have already left the group OR the timeout should have been // very long and not having finished also indicates we've waited longer than the session timeout. if (!readConfigToEnd(syncConfigsTimeoutMs)) needsRejoin = true; } if (needsRejoin) { member.requestRejoin(); return false; } // Should still validate that they match since we may have gone *past* the required offset, in which case we // should *not* start any tasks and rejoin if (configState.offset() != assignment.offset()) { log.info("Current config state offset {} does not match group assignment {}. Forcing rebalance.", configState.offset(), assignment.offset()); member.requestRejoin(); return false; } startWork(); // We only mark this as resolved once we've actually started work, which allows us to correctly track whether // what work is currently active and running. If we bail early, the main tick loop + having requested rejoin // guarantees we'll attempt to rejoin before executing this method again. rebalanceResolved = true; return true; } /** * Try to read to the end of the config log within the given timeout * @param timeoutMs maximum time to wait to sync to the end of the log * @return true if successful, false if timed out */ private boolean readConfigToEnd(long timeoutMs) { log.info("Current config state offset {} is behind group assignment {}, reading to end of config log", configState.offset(), assignment.offset()); try { configStorage.readToEnd().get(timeoutMs, TimeUnit.MILLISECONDS); configState = configStorage.snapshot(); log.info("Finished reading to end of log and updated config snapshot, new config log offset: {}", configState.offset()); return true; } catch (TimeoutException e) { log.warn("Didn't reach end of config log quickly enough", e); // TODO: With explicit leave group support, it would be good to explicitly leave the group *before* this // backoff since it'll be longer than the session timeout if (isLeader()) backoff(workerUnsyncBackoffMs); return false; } catch (InterruptedException | ExecutionException e) { throw new ConnectException("Error trying to catch up after assignment", e); } } private void backoff(long ms) { Utils.sleep(ms); } private void startWork() { // Start assigned connectors and tasks log.info("Starting connectors and tasks using config offset {}", assignment.offset()); for (String connectorName : assignment.connectors()) { try { startConnector(connectorName); } catch (ConfigException e) { log.error("Couldn't instantiate connector " + connectorName + " because it has an invalid connector " + "configuration. This connector will not execute until reconfigured.", e); } } for (ConnectorTaskId taskId : assignment.tasks()) { try { log.info("Starting task {}", taskId); Map configs = configState.taskConfig(taskId); TaskConfig taskConfig = new TaskConfig(configs); worker.addTask(taskId, taskConfig); } catch (ConfigException e) { log.error("Couldn't instantiate task " + taskId + " because it has an invalid task " + "configuration. This task will not execute until reconfigured.", e); } } log.info("Finished starting connectors and tasks"); } // Helper for starting a connector with the given name, which will extract & parse the config, generate connector // context and add to the worker. This needs to be called from within the main worker thread for this herder. private void startConnector(String connectorName) { log.info("Starting connector {}", connectorName); Map configs = configState.connectorConfig(connectorName); ConnectorConfig connConfig = new ConnectorConfig(configs); String connName = connConfig.getString(ConnectorConfig.NAME_CONFIG); ConnectorContext ctx = new HerderConnectorContext(DistributedHerder.this, connName); worker.addConnector(connConfig, ctx); // Immediately request configuration since this could be a brand new connector. However, also only update those // task configs if they are actually different from the existing ones to avoid unnecessary updates when this is // just restoring an existing connector. reconfigureConnectorTasksWithRetry(connName); } private void reconfigureConnectorTasksWithRetry(final String connName) { reconfigureConnector(connName, new Callback() { @Override public void onCompletion(Throwable error, Void result) { // If we encountered an error, we don't have much choice but to just retry. If we don't, we could get // stuck with a connector that thinks it has generated tasks, but wasn't actually successful and therefore // never makes progress. The retry has to run through a HerderRequest since this callback could be happening // from the HTTP request forwarding thread. if (error != null) { log.error("Failed to reconfigure connector's tasks, retrying after backoff:", error); addRequest(RECONFIGURE_CONNECTOR_TASKS_BACKOFF_MS, new Callable() { @Override public Void call() throws Exception { reconfigureConnectorTasksWithRetry(connName); return null; } }, new Callback() { @Override public void onCompletion(Throwable error, Void result) { log.error("Unexpected error during connector task reconfiguration: ", error); log.error("Task reconfiguration for {} failed unexpectedly, this connector will not be properly reconfigured unless manually triggered.", connName); } } ); } } }); } // Updates configurations for a connector by requesting them from the connector, filling in parameters provided // by the system, then checks whether any configs have actually changed before submitting the new configs to storage private void reconfigureConnector(final String connName, final Callback cb) { try { Map configs = configState.connectorConfig(connName); ConnectorConfig connConfig = new ConnectorConfig(configs); List sinkTopics = null; if (SinkConnector.class.isAssignableFrom(connConfig.getClass(ConnectorConfig.CONNECTOR_CLASS_CONFIG))) sinkTopics = connConfig.getList(ConnectorConfig.TOPICS_CONFIG); final List> taskProps = worker.connectorTaskConfigs(connName, connConfig.getInt(ConnectorConfig.TASKS_MAX_CONFIG), sinkTopics); boolean changed = false; int currentNumTasks = configState.taskCount(connName); if (taskProps.size() != currentNumTasks) { log.debug("Change in connector task count from {} to {}, writing updated task configurations", currentNumTasks, taskProps.size()); changed = true; } else { int index = 0; for (Map taskConfig : taskProps) { if (!taskConfig.equals(configState.taskConfig(new ConnectorTaskId(connName, index)))) { log.debug("Change in task configurations, writing updated task configurations"); changed = true; break; } index++; } } if (changed) { if (isLeader()) { configStorage.putTaskConfigs(taskConfigListAsMap(connName, taskProps)); cb.onCompletion(null, null); } else { // We cannot forward the request on the same thread because this reconfiguration can happen in as a // result of . If we blocked forwardRequestExecutor.submit(new Runnable() { @Override public void run() { try { String reconfigUrl = RestServer.urlJoin(leaderUrl(), "/connectors/" + connName + "/tasks"); RestServer.httpRequest(reconfigUrl, "POST", taskProps, null); cb.onCompletion(null, null); } catch (ConnectException e) { log.error("Request to leader to reconfigure connector tasks failed", e); cb.onCompletion(e, null); } } }); } } } catch (Throwable t) { cb.onCompletion(t, null); } } // Common handling for requests that get config data. Checks if we are in sync with the current config, which allows // us to answer requests directly. If we are not, handles invoking the callback with the appropriate error. private boolean checkConfigSynced(Callback callback) { if (assignment == null || configState.offset() != assignment.offset()) { if (!isLeader()) callback.onCompletion(new NotLeaderException("Cannot get config data because config is not in sync and this is not the leader", leaderUrl()), null); else callback.onCompletion(new ConnectException("Cannot get config data because this is the leader node, but it does not have the most up to date configs"), null); return false; } return true; } private void addRequest(Callable action, Callback callback) { addRequest(0, action, callback); } private void addRequest(long delayMs, Callable action, Callback callback) { HerderRequest req = new HerderRequest(time.milliseconds() + delayMs, action, callback); requests.add(req); if (requests.peek() == req) member.wakeup(); } private class HerderRequest implements Comparable { private final long at; private final Callable action; private final Callback callback; public HerderRequest(long at, Callable action, Callback callback) { this.at = at; this.action = action; this.callback = callback; } public Callable action() { return action; } public Callback callback() { return callback; } @Override public int compareTo(HerderRequest o) { return Long.compare(at, o.at); } } private static final Callback forwardErrorCallback(final Callback callback) { return new Callback() { @Override public void onCompletion(Throwable error, Void result) { if (error != null) callback.onCompletion(error, null); } }; }; // Config callbacks are triggered from the KafkaConfigStorage thread private Callback connectorConfigCallback() { return new Callback() { @Override public void onCompletion(Throwable error, String connector) { log.info("Connector {} config updated", connector); // Stage the update and wake up the work thread. Connector config *changes* only need the one connector // to be bounced. However, this callback may also indicate a connector *addition*, which does require // a rebalance, so we need to be careful about what operation we request. synchronized (DistributedHerder.this) { connectorConfigUpdates.add(connector); } member.wakeup(); } }; } private Callback> taskConfigCallback() { return new Callback>() { @Override public void onCompletion(Throwable error, List tasks) { log.info("Tasks {} configs updated", tasks); // Stage the update and wake up the work thread. No need to record the set of tasks here because task reconfigs // always need a rebalance to ensure offsets get committed. // TODO: As an optimization, some task config updates could avoid a rebalance. In particular, single-task // connectors clearly don't need any coordination. synchronized (DistributedHerder.this) { needsReconfigRebalance = true; } member.wakeup(); } }; } // Rebalances are triggered internally from the group member, so these are always executed in the work thread. private WorkerRebalanceListener rebalanceListener() { return new WorkerRebalanceListener() { @Override public void onAssigned(ConnectProtocol.Assignment assignment) { // This callback just logs the info and saves it. The actual response is handled in the main loop, which // ensures the group member's logic for rebalancing can complete, potentially long-running steps to // catch up (or backoff if we fail) not executed in a callback, and so we'll be able to invoke other // group membership actions (e.g., we may need to explicitly leave the group if we cannot handle the // assigned tasks). log.info("Joined group and got assignment: {}", assignment); synchronized (DistributedHerder.this) { DistributedHerder.this.assignment = assignment; rebalanceResolved = false; } // We *must* interrupt any poll() call since this could occur when the poll starts, and we might then // sleep in the poll() for a long time. Forcing a wakeup ensures we'll get to process this event in the // main thread. member.wakeup(); } @Override public void onRevoked(String leader, Collection connectors, Collection tasks) { log.info("Rebalance started"); // Note that since we don't reset the assignment, we we don't revoke leadership here. During a rebalance, // it is still important to have a leader that can write configs, offsets, etc. if (rebalanceResolved) { // TODO: Parallelize this. We should be able to request all connectors and tasks to stop, then wait on all of // them to finish // TODO: Technically we don't have to stop connectors at all until we know they've really been removed from // this worker. Instead, we can let them continue to run but buffer any update requests (which should be // rare anyway). This would avoid a steady stream of start/stop, which probably also includes lots of // unnecessary repeated connections to the source/sink system. for (String connectorName : connectors) worker.stopConnector(connectorName); // TODO: We need to at least commit task offsets, but if we could commit offsets & pause them instead of // stopping them then state could continue to be reused when the task remains on this worker. For example, // this would avoid having to close a connection and then reopen it when the task is assigned back to this // worker again. for (ConnectorTaskId taskId : tasks) worker.stopTask(taskId); log.info("Finished stopping tasks in preparation for rebalance"); } else { log.info("Wasn't unable to resume work after last rebalance, can skip stopping connectors and tasks"); } } }; } private static Map> taskConfigListAsMap(String connName, List> configs) { int index = 0; Map> result = new HashMap<>(); for (Map taskConfigMap : configs) { ConnectorTaskId taskId = new ConnectorTaskId(connName, index); result.put(taskId, taskConfigMap); index++; } return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy