Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.apache.kafka.connect.runtime.distributed;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.config.ConfigException;
import org.apache.kafka.common.utils.SystemTime;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.connector.ConnectorContext;
import org.apache.kafka.connect.errors.AlreadyExistsException;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.NotFoundException;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.apache.kafka.connect.runtime.Herder;
import org.apache.kafka.connect.runtime.HerderConnectorContext;
import org.apache.kafka.connect.runtime.TaskConfig;
import org.apache.kafka.connect.runtime.Worker;
import org.apache.kafka.connect.runtime.rest.RestServer;
import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo;
import org.apache.kafka.connect.runtime.rest.entities.TaskInfo;
import org.apache.kafka.connect.sink.SinkConnector;
import org.apache.kafka.connect.storage.KafkaConfigStorage;
import org.apache.kafka.connect.util.Callback;
import org.apache.kafka.connect.util.ConnectorTaskId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
/**
*
* Distributed "herder" that coordinates with other workers to spread work across multiple processes.
*
*
* Under the hood, this is implemented as a group managed by Kafka's group membership facilities (i.e. the generalized
* group/consumer coordinator). Each instance of DistributedHerder joins the group and indicates what it's current
* configuration state is (where it is in the configuration log). The group coordinator selects one member to take
* this information and assign each instance a subset of the active connectors & tasks to execute. This assignment
* is currently performed in a simple round-robin fashion, but this is not guaranteed -- the herder may also choose
* to, e.g., use a sticky assignment to avoid the usual start/stop costs associated with connectors and tasks. Once
* an assignment is received, the DistributedHerder simply runs its assigned connectors and tasks in a Worker.
*
*
* In addition to distributing work, the DistributedHerder uses the leader determined during the work assignment
* to select a leader for this generation of the group who is responsible for other tasks that can only be performed
* by a single node at a time. Most importantly, this includes writing updated configurations for connectors and tasks,
* (and therefore, also for creating, destroy, and scaling up/down connectors).
*
*/
public class DistributedHerder implements Herder, Runnable {
private static final Logger log = LoggerFactory.getLogger(DistributedHerder.class);
private static final long RECONFIGURE_CONNECTOR_TASKS_BACKOFF_MS = 250;
private final Worker worker;
private final KafkaConfigStorage configStorage;
private ClusterConfigState configState;
private final Time time;
private final int workerSyncTimeoutMs;
private final int workerUnsyncBackoffMs;
private final WorkerGroupMember member;
private final AtomicBoolean stopping;
private final CountDownLatch stopLatch = new CountDownLatch(1);
// Track enough information about the current membership state to be able to determine which requests via the API
// and the from other nodes are safe to process
private boolean rebalanceResolved;
private ConnectProtocol.Assignment assignment;
// To handle most external requests, like creating or destroying a connector, we can use a generic request where
// the caller specifies all the code that should be executed.
private final Queue requests = new PriorityQueue<>();
// Config updates can be collected and applied together when possible. Also, we need to take care to rebalance when
// needed (e.g. task reconfiguration, which requires everyone to coordinate offset commits).
private Set connectorConfigUpdates = new HashSet<>();
private boolean needsReconfigRebalance;
private final ExecutorService forwardRequestExecutor;
public DistributedHerder(DistributedConfig config, Worker worker, String restUrl) {
this(config, worker, null, null, restUrl, new SystemTime());
}
// public for testing
public DistributedHerder(DistributedConfig config, Worker worker, KafkaConfigStorage configStorage, WorkerGroupMember member, String restUrl, Time time) {
this.worker = worker;
if (configStorage != null) {
// For testing. Assume configuration has already been performed
this.configStorage = configStorage;
} else {
this.configStorage = new KafkaConfigStorage(worker.getInternalValueConverter(), connectorConfigCallback(), taskConfigCallback());
this.configStorage.configure(config.originals());
}
configState = ClusterConfigState.EMPTY;
this.time = time;
this.workerSyncTimeoutMs = config.getInt(DistributedConfig.WORKER_SYNC_TIMEOUT_MS_CONFIG);
this.workerUnsyncBackoffMs = config.getInt(DistributedConfig.WORKER_UNSYNC_BACKOFF_MS_CONFIG);
this.member = member != null ? member : new WorkerGroupMember(config, restUrl, this.configStorage, rebalanceListener());
stopping = new AtomicBoolean(false);
rebalanceResolved = true; // If we still need to follow up after a rebalance occurred, starting up tasks
needsReconfigRebalance = false;
forwardRequestExecutor = Executors.newSingleThreadExecutor();
}
@Override
public void start() {
Thread thread = new Thread(this, "DistributedHerder");
thread.start();
}
public void run() {
try {
log.info("Herder starting");
configStorage.start();
log.info("Herder started");
while (!stopping.get()) {
tick();
}
halt();
log.info("Herder stopped");
} catch (Throwable t) {
log.error("Uncaught exception in herder work thread, exiting: ", t);
stopLatch.countDown();
System.exit(1);
} finally {
stopLatch.countDown();
}
}
// public for testing
public void tick() {
// The main loop does two primary things: 1) drive the group membership protocol, responding to rebalance events
// as they occur, and 2) handle external requests targeted at the leader. All the "real" work of the herder is
// performed in this thread, which keeps synchronization straightforward at the cost of some operations possibly
// blocking up this thread (especially those in callbacks due to rebalance events).
try {
member.ensureActive();
// Ensure we're in a good state in our group. If not restart and everything should be setup to rejoin
if (!handleRebalanceCompleted()) return;
} catch (WakeupException e) {
// May be due to a request from another thread, or might be stopping. If the latter, we need to check the
// flag immediately. If the former, we need to re-run the ensureActive call since we can't handle requests
// unless we're in the group.
return;
}
// Process any external requests
final long now = time.milliseconds();
long nextRequestTimeoutMs = Long.MAX_VALUE;
while (true) {
final HerderRequest next;
synchronized (this) {
next = requests.peek();
if (next == null) {
break;
} else if (now >= next.at) {
requests.poll();
} else {
nextRequestTimeoutMs = next.at - now;
break;
}
}
try {
next.action().call();
next.callback().onCompletion(null, null);
} catch (Throwable t) {
next.callback().onCompletion(t, null);
}
}
// Process any configuration updates
Set connectorConfigUpdatesCopy = null;
synchronized (this) {
if (needsReconfigRebalance || !connectorConfigUpdates.isEmpty()) {
// Connector reconfigs only need local updates since there is no coordination between workers required.
// However, if connectors were added or removed, work needs to be rebalanced since we have more work
// items to distribute among workers.
ClusterConfigState newConfigState = configStorage.snapshot();
if (!newConfigState.connectors().equals(configState.connectors()))
needsReconfigRebalance = true;
configState = newConfigState;
if (needsReconfigRebalance) {
// Task reconfigs require a rebalance. Request the rebalance, clean out state, and then restart
// this loop, which will then ensure the rebalance occurs without any other requests being
// processed until it completes.
member.requestRejoin();
// Any connector config updates will be addressed during the rebalance too
connectorConfigUpdates.clear();
needsReconfigRebalance = false;
return;
} else if (!connectorConfigUpdates.isEmpty()) {
// We can't start/stop while locked since starting connectors can cause task updates that will
// require writing configs, which in turn make callbacks into this class from another thread that
// require acquiring a lock. This leads to deadlock. Instead, just copy the info we need and process
// the updates after unlocking.
connectorConfigUpdatesCopy = connectorConfigUpdates;
connectorConfigUpdates = new HashSet<>();
}
}
}
if (connectorConfigUpdatesCopy != null) {
// If we only have connector config updates, we can just bounce the updated connectors that are
// currently assigned to this worker.
Set localConnectors = assignment == null ? Collections.emptySet() : new HashSet<>(assignment.connectors());
for (String connectorName : connectorConfigUpdatesCopy) {
if (!localConnectors.contains(connectorName))
continue;
boolean remains = configState.connectors().contains(connectorName);
log.info("Handling connector-only config update by {} connector {}",
remains ? "restarting" : "stopping", connectorName);
worker.stopConnector(connectorName);
// The update may be a deletion, so verify we actually need to restart the connector
if (remains)
startConnector(connectorName);
}
}
// Let the group take any actions it needs to
try {
member.poll(nextRequestTimeoutMs);
// Ensure we're in a good state in our group. If not restart and everything should be setup to rejoin
if (!handleRebalanceCompleted()) return;
} catch (WakeupException e) { // FIXME should not be WakeupException
// Ignore. Just indicates we need to check the exit flag, for requested actions, etc.
}
}
// public for testing
public void halt() {
synchronized (this) {
// Clean up any connectors and tasks that are still running.
log.info("Stopping connectors and tasks that are still assigned to this worker.");
for (String connName : new HashSet<>(worker.connectorNames())) {
try {
worker.stopConnector(connName);
} catch (Throwable t) {
log.error("Failed to shut down connector " + connName, t);
}
}
for (ConnectorTaskId taskId : new HashSet<>(worker.taskIds())) {
try {
worker.stopTask(taskId);
} catch (Throwable t) {
log.error("Failed to shut down task " + taskId, t);
}
}
member.stop();
// Explicitly fail any outstanding requests so they actually get a response and get an understandable reason
// for their failure
while (!requests.isEmpty()) {
HerderRequest request = requests.poll();
request.callback().onCompletion(new ConnectException("Worker is shutting down"), null);
}
if (configStorage != null)
configStorage.stop();
}
}
@Override
public void stop() {
log.info("Herder stopping");
stopping.set(true);
member.wakeup();
while (stopLatch.getCount() > 0) {
try {
stopLatch.await();
} catch (InterruptedException e) {
// ignore, should not happen
}
}
forwardRequestExecutor.shutdown();
try {
if (!forwardRequestExecutor.awaitTermination(10000, TimeUnit.MILLISECONDS))
forwardRequestExecutor.shutdownNow();
} catch (InterruptedException e) {
// ignore
}
log.info("Herder stopped");
}
@Override
public synchronized void connectors(final Callback> callback) {
log.trace("Submitting connector listing request");
addRequest(
new Callable() {
@Override
public Void call() throws Exception {
if (!checkConfigSynced(callback))
return null;
callback.onCompletion(null, configState.connectors());
return null;
}
},
forwardErrorCallback(callback)
);
}
@Override
public synchronized void connectorInfo(final String connName, final Callback callback) {
log.trace("Submitting connector info request {}", connName);
addRequest(
new Callable() {
@Override
public Void call() throws Exception {
if (!checkConfigSynced(callback))
return null;
if (!configState.connectors().contains(connName)) {
callback.onCompletion(new NotFoundException("Connector " + connName + " not found"), null);
} else {
callback.onCompletion(null, new ConnectorInfo(connName, configState.connectorConfig(connName), configState.tasks(connName)));
}
return null;
}
},
forwardErrorCallback(callback)
);
}
@Override
public void connectorConfig(String connName, final Callback