Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.connect.runtime.distributed;
import org.apache.kafka.clients.CommonClientConfigs;
import org.apache.kafka.common.KafkaFuture;
import org.apache.kafka.common.config.ConfigDef;
import org.apache.kafka.common.config.ConfigValue;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.metrics.Sensor;
import org.apache.kafka.common.metrics.stats.Avg;
import org.apache.kafka.common.metrics.stats.CumulativeSum;
import org.apache.kafka.common.metrics.stats.Max;
import org.apache.kafka.common.utils.Exit;
import org.apache.kafka.common.utils.ExponentialBackoff;
import org.apache.kafka.common.utils.LogContext;
import org.apache.kafka.common.utils.ThreadUtils;
import org.apache.kafka.common.utils.Time;
import org.apache.kafka.common.utils.Utils;
import org.apache.kafka.connect.connector.policy.ConnectorClientConfigOverridePolicy;
import org.apache.kafka.connect.errors.AlreadyExistsException;
import org.apache.kafka.connect.errors.ConnectException;
import org.apache.kafka.connect.errors.NotFoundException;
import org.apache.kafka.connect.runtime.AbstractHerder;
import org.apache.kafka.connect.runtime.CloseableConnectorContext;
import org.apache.kafka.connect.runtime.ConnectMetrics;
import org.apache.kafka.connect.runtime.ConnectMetrics.MetricGroup;
import org.apache.kafka.connect.runtime.ConnectMetricsRegistry;
import org.apache.kafka.connect.runtime.ConnectorConfig;
import org.apache.kafka.connect.runtime.HerderConnectorContext;
import org.apache.kafka.connect.runtime.HerderRequest;
import org.apache.kafka.connect.runtime.RestartPlan;
import org.apache.kafka.connect.runtime.RestartRequest;
import org.apache.kafka.connect.runtime.SessionKey;
import org.apache.kafka.connect.runtime.SinkConnectorConfig;
import org.apache.kafka.connect.runtime.SourceConnectorConfig;
import org.apache.kafka.connect.runtime.TargetState;
import org.apache.kafka.connect.runtime.TaskStatus;
import org.apache.kafka.connect.runtime.Worker;
import org.apache.kafka.connect.runtime.rest.InternalRequestSignature;
import org.apache.kafka.connect.runtime.rest.RestClient;
import org.apache.kafka.connect.runtime.rest.entities.ConnectorInfo;
import org.apache.kafka.connect.runtime.rest.entities.ConnectorOffsets;
import org.apache.kafka.connect.runtime.rest.entities.ConnectorStateInfo;
import org.apache.kafka.connect.runtime.rest.entities.ConnectorType;
import org.apache.kafka.connect.runtime.rest.entities.Message;
import org.apache.kafka.connect.runtime.rest.entities.TaskInfo;
import org.apache.kafka.connect.runtime.rest.errors.BadRequestException;
import org.apache.kafka.connect.runtime.rest.errors.ConnectRestException;
import org.apache.kafka.connect.sink.SinkConnector;
import org.apache.kafka.connect.source.ConnectorTransactionBoundaries;
import org.apache.kafka.connect.source.ExactlyOnceSupport;
import org.apache.kafka.connect.source.SourceConnector;
import org.apache.kafka.connect.source.SourceTask;
import org.apache.kafka.connect.storage.ClusterConfigState;
import org.apache.kafka.connect.storage.ConfigBackingStore;
import org.apache.kafka.connect.storage.PrivilegedWriteException;
import org.apache.kafka.connect.storage.StatusBackingStore;
import org.apache.kafka.connect.util.Callback;
import org.apache.kafka.connect.util.ConnectUtils;
import org.apache.kafka.connect.util.ConnectorTaskId;
import org.apache.kafka.connect.util.FutureCallback;
import org.apache.kafka.connect.util.SinkUtils;
import org.apache.kafka.connect.util.Stage;
import org.apache.kafka.connect.util.TemporaryStage;
import org.slf4j.Logger;
import javax.crypto.KeyGenerator;
import javax.crypto.SecretKey;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.UriBuilder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;
import static org.apache.kafka.clients.consumer.ConsumerConfig.GROUP_ID_CONFIG;
import static org.apache.kafka.common.utils.Utils.UncheckedCloseable;
import static org.apache.kafka.connect.runtime.ConnectorConfig.CONNECTOR_CLIENT_CONSUMER_OVERRIDES_PREFIX;
import static org.apache.kafka.connect.runtime.WorkerConfig.TOPIC_TRACKING_ENABLE_CONFIG;
import static org.apache.kafka.connect.runtime.distributed.ConnectProtocol.CONNECT_PROTOCOL_V0;
import static org.apache.kafka.connect.runtime.distributed.ConnectProtocolCompatibility.EAGER;
import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V1;
import static org.apache.kafka.connect.runtime.distributed.IncrementalCooperativeConnectProtocol.CONNECT_PROTOCOL_V2;
/**
*
* Distributed "herder" that coordinates with other workers to spread work across multiple processes.
*
*
* Under the hood, this is implemented as a group managed by Kafka's group membership facilities (i.e. the generalized
* group/consumer coordinator). Each instance of DistributedHerder joins the group and indicates what its current
* configuration state is (where it is in the configuration log). The group coordinator selects one member to take
* this information and assign each instance a subset of the active connectors & tasks to execute. The assignment
* strategy depends on the {@link ConnectAssignor} used. Once an assignment is received, the DistributedHerder simply
* runs its assigned connectors and tasks in a {@link Worker}.
*
*
* In addition to distributing work, the DistributedHerder uses the leader determined during the work assignment
* to select a leader for this generation of the group who is responsible for other tasks that can only be performed
* by a single node at a time. Most importantly, this includes writing updated configurations for connectors and tasks,
* (and therefore, also for creating, destroy, and scaling up/down connectors).
*
*
* The DistributedHerder uses a single thread for most of its processing. This includes processing
* config changes, handling task rebalances and serving requests from the HTTP layer. The latter are pushed
* into a queue until the thread has time to handle them. A consequence of this is that requests can get blocked
* behind a worker rebalance. When the herder knows that a rebalance is expected, it typically returns an error
* immediately to the request, but this is not always possible (in particular when another worker has requested
* the rebalance). Similar to handling HTTP requests, config changes which are observed asynchronously by polling
* the config log are batched for handling in the work thread.
*
*/
public class DistributedHerder extends AbstractHerder implements Runnable {
private final Logger log;
private static final long FORWARD_REQUEST_SHUTDOWN_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(10);
private static final long START_AND_STOP_SHUTDOWN_TIMEOUT_MS = TimeUnit.SECONDS.toMillis(1);
private static final long RECONFIGURE_CONNECTOR_TASKS_BACKOFF_INITIAL_MS = 250;
private static final long RECONFIGURE_CONNECTOR_TASKS_BACKOFF_MAX_MS = 60000;
private static final long CONFIG_TOPIC_WRITE_PRIVILEGES_BACKOFF_MS = 250;
private static final int START_STOP_THREAD_POOL_SIZE = 8;
private static final short BACKOFF_RETRIES = 5;
private final AtomicLong requestSeqNum = new AtomicLong();
private final Time time;
private final HerderMetrics herderMetrics;
private final List uponShutdown;
private final String workerGroupId;
private final int workerSyncTimeoutMs;
private final int workerUnsyncBackoffMs;
private final int keyRotationIntervalMs;
private final String requestSignatureAlgorithm;
private final List keySignatureVerificationAlgorithms;
private final KeyGenerator keyGenerator;
private final RestClient restClient;
// Visible for testing
ExecutorService forwardRequestExecutor;
// Visible for testing
final ExecutorService herderExecutor;
// Visible for testing
ExecutorService startAndStopExecutor;
private final WorkerGroupMember member;
private final AtomicBoolean stopping;
private final boolean isTopicTrackingEnabled;
// Track enough information about the current membership state to be able to determine which requests via the API
// and the from other nodes are safe to process
private boolean rebalanceResolved;
private ExtendedAssignment runningAssignment = ExtendedAssignment.empty();
private final Set tasksToRestart = new HashSet<>();
// visible for testing
ExtendedAssignment assignment;
private boolean canReadConfigs;
// visible for testing
protected ClusterConfigState configState;
// To handle most external requests, like creating or destroying a connector, we can use a generic request where
// the caller specifies all the code that should be executed.
final NavigableSet requests = new ConcurrentSkipListSet<>();
// Config updates can be collected and applied together when possible. Also, we need to take care to rebalance when
// needed (e.g. task reconfiguration, which requires everyone to coordinate offset commits).
private Set connectorConfigUpdates = new HashSet<>();
private Set taskConfigUpdates = new HashSet<>();
// Similarly collect target state changes (when observed by the config storage listener) for handling in the
// herder's main thread.
private Set connectorTargetStateChanges = new HashSet<>();
// Access to this map is protected by the herder's monitor
private final Map activeZombieFencings = new HashMap<>();
private final List restNamespace;
private boolean needsReconfigRebalance;
private volatile boolean fencedFromConfigTopic;
private volatile int generation;
private volatile long scheduledRebalance;
private volatile SecretKey sessionKey;
private volatile long keyExpiration;
private short currentProtocolVersion;
private short backoffRetries;
private volatile DistributedHerderRequest currentRequest;
private volatile Stage tickThreadStage;
// visible for testing
// The latest pending restart request for each named connector
final Map pendingRestartRequests = new HashMap<>();
// The thread that the herder's tick loop runs on. Would be final, but cannot be set in the constructor,
// and it's also useful to be able to modify it for testing
Thread herderThread;
private final DistributedConfig config;
/**
* Create a herder that will form a Connect cluster with other {@link DistributedHerder} instances (in this or other JVMs)
* that have the same group ID.
*
* @param config the configuration for the worker; may not be null
* @param time the clock to use; may not be null
* @param worker the {@link Worker} instance to use; may not be null
* @param kafkaClusterId the identifier of the Kafka cluster to use for internal topics; may not be null
* @param statusBackingStore the backing store for statuses; may not be null
* @param configBackingStore the backing store for connector configurations; may not be null
* @param restUrl the URL of this herder's REST API; may not be null, but may be an arbitrary placeholder
* value if this worker does not expose a REST API
* @param restClient a REST client that can be used to issue requests to other workers in the cluster; may
* be null if inter-worker communication is not enabled
* @param connectorClientConfigOverridePolicy the policy specifying the client configuration properties that may be overridden
* in connector configurations; may not be null
* @param restNamespace zero or more path elements to prepend to the paths of forwarded REST requests; may be empty, but not null
* @param uponShutdown any {@link AutoCloseable} objects that should be closed when this herder is {@link #stop() stopped},
* after all services and resources owned by this herder are stopped
*/
public DistributedHerder(DistributedConfig config,
Time time,
Worker worker,
String kafkaClusterId,
StatusBackingStore statusBackingStore,
ConfigBackingStore configBackingStore,
String restUrl,
RestClient restClient,
ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
List restNamespace,
AutoCloseable... uponShutdown) {
this(config, worker, worker.workerId(), kafkaClusterId, statusBackingStore, configBackingStore, null, restUrl, restClient, worker.metrics(),
time, connectorClientConfigOverridePolicy, restNamespace, null, uponShutdown);
configBackingStore.setUpdateListener(new ConfigUpdateListener());
}
// visible for testing
DistributedHerder(DistributedConfig config,
Worker worker,
String workerId,
String kafkaClusterId,
StatusBackingStore statusBackingStore,
ConfigBackingStore configBackingStore,
WorkerGroupMember member,
String restUrl,
RestClient restClient,
ConnectMetrics metrics,
Time time,
ConnectorClientConfigOverridePolicy connectorClientConfigOverridePolicy,
List restNamespace,
ExecutorService forwardRequestExecutor,
// https://github.com/mockito/mockito/issues/2601 explains why we can't use varargs here
AutoCloseable[] uponShutdown) {
super(worker, workerId, kafkaClusterId, statusBackingStore, configBackingStore, connectorClientConfigOverridePolicy, time);
this.time = time;
this.herderMetrics = new HerderMetrics(metrics);
this.workerGroupId = config.getString(DistributedConfig.GROUP_ID_CONFIG);
this.workerSyncTimeoutMs = config.getInt(DistributedConfig.WORKER_SYNC_TIMEOUT_MS_CONFIG);
this.workerUnsyncBackoffMs = config.getInt(DistributedConfig.WORKER_UNSYNC_BACKOFF_MS_CONFIG);
this.requestSignatureAlgorithm = config.getString(DistributedConfig.INTER_WORKER_SIGNATURE_ALGORITHM_CONFIG);
this.keyRotationIntervalMs = config.getInt(DistributedConfig.INTER_WORKER_KEY_TTL_MS_CONFIG);
this.keySignatureVerificationAlgorithms = config.getList(DistributedConfig.INTER_WORKER_VERIFICATION_ALGORITHMS_CONFIG);
this.keyGenerator = config.getInternalRequestKeyGenerator();
this.restClient = restClient;
this.isTopicTrackingEnabled = config.getBoolean(TOPIC_TRACKING_ENABLE_CONFIG);
this.restNamespace = Objects.requireNonNull(restNamespace);
this.uponShutdown = Arrays.asList(uponShutdown);
String clientIdConfig = config.getString(CommonClientConfigs.CLIENT_ID_CONFIG);
String clientId = clientIdConfig.isEmpty() ? "connect-" + workerId : clientIdConfig;
// Thread factory uses String.format and '%' is handled as a placeholder
// need to escape if the client.id contains an actual % character
String escapedClientIdForThreadNameFormat = clientId.replace("%", "%%");
LogContext logContext = new LogContext("[Worker clientId=" + clientId + ", groupId=" + this.workerGroupId + "] ");
log = logContext.logger(DistributedHerder.class);
this.member = member != null
? member
: new WorkerGroupMember(config, restUrl, this.configBackingStore,
new RebalanceListener(time), time, clientId, logContext);
this.herderExecutor = new ThreadPoolExecutor(1, 1, 0L,
TimeUnit.MILLISECONDS,
new LinkedBlockingDeque<>(1),
ThreadUtils.createThreadFactory(
this.getClass().getSimpleName() + "-" + escapedClientIdForThreadNameFormat + "-%d", false));
this.forwardRequestExecutor = forwardRequestExecutor != null
? forwardRequestExecutor
: Executors.newFixedThreadPool(
1,
ThreadUtils.createThreadFactory("ForwardRequestExecutor-" + escapedClientIdForThreadNameFormat + "-%d", false)
);
this.startAndStopExecutor = Executors.newFixedThreadPool(START_STOP_THREAD_POOL_SIZE,
ThreadUtils.createThreadFactory(
"StartAndStopExecutor-" + escapedClientIdForThreadNameFormat + "-%d", false));
this.config = config;
stopping = new AtomicBoolean(false);
configState = ClusterConfigState.EMPTY;
rebalanceResolved = true; // If we still need to follow up after a rebalance occurred, starting up tasks
needsReconfigRebalance = false;
fencedFromConfigTopic = false;
canReadConfigs = true; // We didn't try yet, but Configs are readable until proven otherwise
scheduledRebalance = Long.MAX_VALUE;
keyExpiration = Long.MAX_VALUE;
sessionKey = null;
backoffRetries = BACKOFF_RETRIES;
currentRequest = null;
tickThreadStage = new Stage("awaiting startup", time.milliseconds());
currentProtocolVersion = ConnectProtocolCompatibility.compatibility(
config.getString(DistributedConfig.CONNECT_PROTOCOL_CONFIG)
).protocolVersion();
if (!internalRequestValidationEnabled(currentProtocolVersion)) {
log.warn(
"Internal request verification will be disabled for this cluster as this worker's {} configuration has been set to '{}'. "
+ "If this is not intentional, either remove the '{}' configuration from the worker config file or change its value "
+ "to '{}'. If this configuration is left as-is, the cluster will be insecure; for more information, see KIP-507: "
+ "https://cwiki.apache.org/confluence/display/KAFKA/KIP-507%3A+Securing+Internal+Connect+REST+Endpoints",
DistributedConfig.CONNECT_PROTOCOL_CONFIG,
config.getString(DistributedConfig.CONNECT_PROTOCOL_CONFIG),
DistributedConfig.CONNECT_PROTOCOL_CONFIG,
ConnectProtocolCompatibility.SESSIONED.name()
);
}
}
@Override
public void start() {
this.herderExecutor.submit(this);
}
@Override
public void run() {
try {
log.info("Herder starting");
herderThread = Thread.currentThread();
try (TickThreadStage stage = new TickThreadStage("reading to the end of internal topics")) {
startServices();
}
log.info("Herder started");
running = true;
while (!stopping.get()) {
tick();
}
recordTickThreadStage("shutting down");
halt();
log.info("Herder stopped");
herderMetrics.close();
} catch (Throwable t) {
log.error("Uncaught exception in herder work thread, exiting: ", t);
Exit.exit(1);
} finally {
running = false;
}
}
// public for testing
public void tick() {
// The main loop does two primary things: 1) drive the group membership protocol, responding to rebalance events
// as they occur, and 2) handle external requests targeted at the leader. All the "real" work of the herder is
// performed in this thread, which keeps synchronization straightforward at the cost of some operations possibly
// blocking up this thread (especially those in callbacks due to rebalance events).
try {
// if we failed to read to end of log before, we need to make sure the issue was resolved before joining group
// Joining and immediately leaving for failure to read configs is exceedingly impolite
if (!canReadConfigs) {
if (readConfigToEnd(workerSyncTimeoutMs)) {
canReadConfigs = true;
} else {
return; // Safe to return and tick immediately because readConfigToEnd will do the backoff for us
}
}
log.debug("Ensuring group membership is still active");
String stageDescription = "ensuring membership in the cluster";
member.ensureActive(() -> new TickThreadStage(stageDescription));
completeTickThreadStage();
// Ensure we're in a good state in our group. If not restart and everything should be setup to rejoin
if (!handleRebalanceCompleted()) return;
} catch (WakeupException e) {
// May be due to a request from another thread, or might be stopping. If the latter, we need to check the
// flag immediately. If the former, we need to re-run the ensureActive call since we can't handle requests
// unless we're in the group.
log.trace("Woken up while ensure group membership is still active");
return;
}
if (fencedFromConfigTopic) {
if (isLeader()) {
// We were accidentally fenced out, possibly by a zombie leader
try {
log.debug("Reclaiming write privileges for config topic after being fenced out");
try (TickThreadStage stage = new TickThreadStage("reclaiming write privileges for the config topic")) {
configBackingStore.claimWritePrivileges();
}
fencedFromConfigTopic = false;
log.debug("Successfully reclaimed write privileges for config topic after being fenced out");
} catch (Exception e) {
log.warn("Unable to claim write privileges for config topic. Will backoff and possibly retry if still the leader", e);
backoff(CONFIG_TOPIC_WRITE_PRIVILEGES_BACKOFF_MS);
return;
}
} else {
log.trace("Relinquished write privileges for config topic after being fenced out, since worker is no longer the leader of the cluster");
// We were meant to be fenced out because we fell out of the group and a new leader was elected
fencedFromConfigTopic = false;
}
}
long now = time.milliseconds();
if (checkForKeyRotation(now)) {
log.debug("Distributing new session key");
keyExpiration = Long.MAX_VALUE;
try {
SessionKey newSessionKey = new SessionKey(keyGenerator.generateKey(), now);
writeToConfigTopicAsLeader(
"writing a new session key to the config topic",
() -> configBackingStore.putSessionKey(newSessionKey)
);
} catch (Exception e) {
log.info("Failed to write new session key to config topic; forcing a read to the end of the config topic before possibly retrying", e);
canReadConfigs = false;
return;
}
}
// Process any external requests
// TODO: Some of these can be performed concurrently or even optimized away entirely.
// For example, if three different connectors are slated to be restarted, it's fine to
// restart all three at the same time instead.
// Another example: if multiple configurations are submitted for the same connector,
// the only one that actually has to be written to the config topic is the
// most-recently one.
Long scheduledTick = null;
while (true) {
final DistributedHerderRequest next = peekWithoutException();
if (next == null) {
break;
} else if (now >= next.at) {
currentRequest = requests.pollFirst();
} else {
scheduledTick = next.at;
break;
}
runRequest(next.action(), next.callback());
}
// Process all pending connector restart requests
processRestartRequests();
if (scheduledRebalance < Long.MAX_VALUE) {
scheduledTick = scheduledTick != null ? Math.min(scheduledTick, scheduledRebalance) : scheduledRebalance;
rebalanceResolved = false;
log.debug("Scheduled rebalance at: {} (now: {} scheduledTick: {}) ",
scheduledRebalance, now, scheduledTick);
}
if (isLeader() && internalRequestValidationEnabled() && keyExpiration < Long.MAX_VALUE) {
scheduledTick = scheduledTick != null ? Math.min(scheduledTick, keyExpiration) : keyExpiration;
log.debug("Scheduled next key rotation at: {} (now: {} scheduledTick: {}) ",
keyExpiration, now, scheduledTick);
}
// Process any configuration updates
AtomicReference> connectorConfigUpdatesCopy = new AtomicReference<>();
AtomicReference> connectorTargetStateChangesCopy = new AtomicReference<>();
AtomicReference> taskConfigUpdatesCopy = new AtomicReference<>();
boolean shouldReturn;
if (member.currentProtocolVersion() == CONNECT_PROTOCOL_V0) {
shouldReturn = updateConfigsWithEager(connectorConfigUpdatesCopy,
connectorTargetStateChangesCopy);
// With eager protocol we should return immediately if needsReconfigRebalance has
// been set to retain the old workflow
if (shouldReturn) {
return;
}
if (connectorConfigUpdatesCopy.get() != null) {
processConnectorConfigUpdates(connectorConfigUpdatesCopy.get());
}
if (connectorTargetStateChangesCopy.get() != null) {
processTargetStateChanges(connectorTargetStateChangesCopy.get());
}
} else {
shouldReturn = updateConfigsWithIncrementalCooperative(connectorConfigUpdatesCopy,
connectorTargetStateChangesCopy, taskConfigUpdatesCopy);
if (connectorConfigUpdatesCopy.get() != null) {
processConnectorConfigUpdates(connectorConfigUpdatesCopy.get());
}
if (connectorTargetStateChangesCopy.get() != null) {
processTargetStateChanges(connectorTargetStateChangesCopy.get());
}
if (taskConfigUpdatesCopy.get() != null) {
processTaskConfigUpdatesWithIncrementalCooperative(taskConfigUpdatesCopy.get());
}
if (shouldReturn) {
return;
}
}
// Let the group take any actions it needs to
try {
long nextRequestTimeoutMs = scheduledTick != null ? Math.max(scheduledTick - time.milliseconds(), 0L) : Long.MAX_VALUE;
log.trace("Polling for group activity; will wait for {}ms or until poll is interrupted by "
+ "either config backing store updates or a new external request",
nextRequestTimeoutMs);
String pollDurationDescription = scheduledTick != null ? "for up to " + nextRequestTimeoutMs + "ms or " : "";
String stageDescription = "polling the group coordinator " + pollDurationDescription + "until interrupted";
member.poll(nextRequestTimeoutMs, () -> new TickThreadStage(stageDescription));
completeTickThreadStage();
// Ensure we're in a good state in our group. If not restart and everything should be setup to rejoin
handleRebalanceCompleted();
} catch (WakeupException e) { // FIXME should not be WakeupException
log.trace("Woken up while polling for group activity");
// Ignore. Just indicates we need to check the exit flag, for requested actions, etc.
}
}
private boolean checkForKeyRotation(long now) {
SecretKey key;
long expiration;
synchronized (this) {
// This happens on startup; the snapshot contains the session key,
// but no callback in the config update listener has been fired for it yet.
if (sessionKey == null && configState.sessionKey() != null) {
sessionKey = configState.sessionKey().key();
keyExpiration = configState.sessionKey().creationTimestamp() + keyRotationIntervalMs;
}
key = sessionKey;
expiration = keyExpiration;
}
if (internalRequestValidationEnabled()) {
if (isLeader()) {
if (key == null) {
log.debug("Internal request signing is enabled but no session key has been distributed yet. "
+ "Distributing new key now.");
return true;
} else if (expiration <= now) {
log.debug("Existing key has expired. Distributing new key now.");
return true;
} else if (!key.getAlgorithm().equals(keyGenerator.getAlgorithm())
|| key.getEncoded().length != keyGenerator.generateKey().getEncoded().length) {
log.debug("Previously-distributed key uses different algorithm/key size "
+ "than required by current worker configuration. Distributing new key now.");
return true;
}
}
}
return false;
}
private synchronized boolean updateConfigsWithEager(AtomicReference> connectorConfigUpdatesCopy,
AtomicReference> connectorTargetStateChangesCopy) {
// This branch is here to avoid creating a snapshot if not needed
if (needsReconfigRebalance
|| !connectorConfigUpdates.isEmpty()
|| !connectorTargetStateChanges.isEmpty()) {
log.trace("Handling config updates with eager rebalancing");
// Connector reconfigs only need local updates since there is no coordination between workers required.
// However, if connectors were added or removed, work needs to be rebalanced since we have more work
// items to distribute among workers.
configState = configBackingStore.snapshot();
if (needsReconfigRebalance) {
// Task reconfigs require a rebalance. Request the rebalance, clean out state, and then restart
// this loop, which will then ensure the rebalance occurs without any other requests being
// processed until it completes.
log.debug("Requesting rebalance due to reconfiguration of tasks (needsReconfigRebalance: {})",
needsReconfigRebalance);
member.requestRejoin();
needsReconfigRebalance = false;
// Any connector config updates or target state changes will be addressed during the rebalance too
connectorConfigUpdates.clear();
connectorTargetStateChanges.clear();
return true;
} else {
if (!connectorConfigUpdates.isEmpty()) {
// We can't start/stop while locked since starting connectors can cause task updates that will
// require writing configs, which in turn make callbacks into this class from another thread that
// require acquiring a lock. This leads to deadlock. Instead, just copy the info we need and process
// the updates after unlocking.
connectorConfigUpdatesCopy.set(connectorConfigUpdates);
connectorConfigUpdates = new HashSet<>();
}
if (!connectorTargetStateChanges.isEmpty()) {
// Similarly for target state changes which can cause connectors to be restarted
connectorTargetStateChangesCopy.set(connectorTargetStateChanges);
connectorTargetStateChanges = new HashSet<>();
}
}
} else {
log.trace("Skipping config updates with eager rebalancing "
+ "since no config rebalance is required "
+ "and there are no connector config, task config, or target state changes pending");
}
return false;
}
private synchronized boolean updateConfigsWithIncrementalCooperative(AtomicReference> connectorConfigUpdatesCopy,
AtomicReference> connectorTargetStateChangesCopy,
AtomicReference> taskConfigUpdatesCopy) {
boolean retValue = false;
// This branch is here to avoid creating a snapshot if not needed
if (needsReconfigRebalance
|| !connectorConfigUpdates.isEmpty()
|| !connectorTargetStateChanges.isEmpty()
|| !taskConfigUpdates.isEmpty()) {
log.trace("Handling config updates with incremental cooperative rebalancing");
// Connector reconfigs only need local updates since there is no coordination between workers required.
// However, if connectors were added or removed, work needs to be rebalanced since we have more work
// items to distribute among workers.
configState = configBackingStore.snapshot();
if (needsReconfigRebalance) {
log.debug("Requesting rebalance due to reconfiguration of tasks (needsReconfigRebalance: {})",
needsReconfigRebalance);
member.requestRejoin();
needsReconfigRebalance = false;
retValue = true;
}
if (!connectorConfigUpdates.isEmpty()) {
// We can't start/stop while locked since starting connectors can cause task updates that will
// require writing configs, which in turn make callbacks into this class from another thread that
// require acquiring a lock. This leads to deadlock. Instead, just copy the info we need and process
// the updates after unlocking.
connectorConfigUpdatesCopy.set(connectorConfigUpdates);
connectorConfigUpdates = new HashSet<>();
}
if (!connectorTargetStateChanges.isEmpty()) {
// Similarly for target state changes which can cause connectors to be restarted
connectorTargetStateChangesCopy.set(connectorTargetStateChanges);
connectorTargetStateChanges = new HashSet<>();
}
if (!taskConfigUpdates.isEmpty()) {
// Similarly for task config updates
taskConfigUpdatesCopy.set(taskConfigUpdates);
taskConfigUpdates = new HashSet<>();
}
} else {
log.trace("Skipping config updates with incremental cooperative rebalancing "
+ "since no config rebalance is required "
+ "and there are no connector config, task config, or target state changes pending");
}
return retValue;
}
private void processConnectorConfigUpdates(Set connectorConfigUpdates) {
// If we only have connector config updates, we can just bounce the updated connectors that are
// currently assigned to this worker.
Set localConnectors = assignment == null ? Collections.emptySet() : new HashSet<>(assignment.connectors());
Collection> connectorsToStart = new ArrayList<>();
log.trace("Processing connector config updates; "
+ "currently-owned connectors are {}, and to-be-updated connectors are {}",
localConnectors,
connectorConfigUpdates);
for (String connectorName : connectorConfigUpdates) {
if (!localConnectors.contains(connectorName)) {
log.trace("Skipping config update for connector {} as it is not owned by this worker",
connectorName);
continue;
}
boolean remains = configState.contains(connectorName);
log.info("Handling connector-only config update by {} connector {}",
remains ? "restarting" : "stopping", connectorName);
try (TickThreadStage stage = new TickThreadStage("stopping connector " + connectorName)) {
worker.stopAndAwaitConnector(connectorName);
}
// The update may be a deletion, so verify we actually need to restart the connector
if (remains) {
connectorsToStart.add(getConnectorStartingCallable(connectorName));
}
}
String stageDescription = "restarting " + connectorsToStart.size() + " reconfigured connectors";
startAndStop(connectorsToStart, stageDescription);
}
private void processTargetStateChanges(Set connectorTargetStateChanges) {
log.trace("Processing target state updates; "
+ "currently-known connectors are {}, and to-be-updated connectors are {}",
configState.connectors(), connectorTargetStateChanges);
for (String connector : connectorTargetStateChanges) {
TargetState targetState = configState.targetState(connector);
if (!configState.connectors().contains(connector)) {
log.debug("Received target state change for unknown connector: {}", connector);
continue;
}
// we must propagate the state change to the worker so that the connector's
// tasks can transition to the new target state
worker.setTargetState(connector, targetState, (error, newState) -> {
if (error != null) {
log.error("Failed to transition connector to target state", error);
return;
}
// additionally, if the worker is running the connector itself, then we need to
// request reconfiguration to ensure that config changes while paused take effect
if (newState == TargetState.STARTED) {
requestTaskReconfiguration(connector);
}
});
}
}
private void processTaskConfigUpdatesWithIncrementalCooperative(Set taskConfigUpdates) {
Set localTasks = assignment == null
? Collections.emptySet()
: new HashSet<>(assignment.tasks());
log.trace("Processing task config updates with incremental cooperative rebalance protocol; "
+ "currently-owned tasks are {}, and to-be-updated tasks are {}",
localTasks, taskConfigUpdates);
Set connectorsWhoseTasksToStop = taskConfigUpdates.stream()
.map(ConnectorTaskId::connector).collect(Collectors.toSet());
stopReconfiguredTasks(connectorsWhoseTasksToStop);
}
private void stopReconfiguredTasks(Set connectors) {
Set localTasks = assignment == null
? Collections.emptySet()
: new HashSet<>(assignment.tasks());
List tasksToStop = localTasks.stream()
.filter(taskId -> connectors.contains(taskId.connector()))
.collect(Collectors.toList());
if (tasksToStop.isEmpty()) {
// The rest of the method would essentially be a no-op so this isn't strictly necessary,
// but it prevents an unnecessary log message from being emitted
return;
}
log.info("Handling task config update by stopping tasks {}, which will be restarted after rebalance if still assigned to this worker", tasksToStop);
try (TickThreadStage stage = new TickThreadStage("stopping " + tasksToStop.size() + " reconfigured tasks")) {
worker.stopAndAwaitTasks(tasksToStop);
}
tasksToRestart.addAll(tasksToStop);
}
// public for testing
public void halt() {
synchronized (this) {
// Clean up any connectors and tasks that are still running.
log.info("Stopping connectors and tasks that are still assigned to this worker.");
worker.stopAndAwaitConnectors();
worker.stopAndAwaitTasks();
member.stop();
// Explicitly fail any outstanding requests so they actually get a response and get an
// understandable reason for their failure.
DistributedHerderRequest request = requests.pollFirst();
while (request != null) {
request.callback().onCompletion(new ConnectException("Worker is shutting down"), null);
request = requests.pollFirst();
}
stopServices();
}
}
@Override
protected void stopServices() {
try {
super.stopServices();
} finally {
this.uponShutdown.forEach(closeable -> Utils.closeQuietly(closeable, closeable != null ? closeable.toString() : ""));
}
}
// Timeout for herderExecutor to gracefully terminate is set to a value to accommodate
// reading to the end of the config topic + successfully attempting to stop all connectors and tasks and a buffer of 10s
private long herderExecutorTimeoutMs() {
return this.workerSyncTimeoutMs +
config.getLong(DistributedConfig.TASK_SHUTDOWN_GRACEFUL_TIMEOUT_MS_CONFIG) +
Worker.CONNECTOR_GRACEFUL_SHUTDOWN_TIMEOUT_MS + 10000;
}
@Override
public void stop() {
log.info("Herder stopping");
stopping.set(true);
member.wakeup();
ThreadUtils.shutdownExecutorServiceQuietly(herderExecutor, herderExecutorTimeoutMs(), TimeUnit.MILLISECONDS);
ThreadUtils.shutdownExecutorServiceQuietly(forwardRequestExecutor, FORWARD_REQUEST_SHUTDOWN_TIMEOUT_MS, TimeUnit.MILLISECONDS);
ThreadUtils.shutdownExecutorServiceQuietly(startAndStopExecutor, START_AND_STOP_SHUTDOWN_TIMEOUT_MS, TimeUnit.MILLISECONDS);
log.info("Herder stopped");
running = false;
}
@Override
public void connectors(final Callback> callback) {
log.trace("Submitting connector listing request");
addRequest(
() -> {
if (!checkRebalanceNeeded(callback))
callback.onCompletion(null, configState.connectors());
return null;
},
forwardErrorAndTickThreadStages(callback)
);
}
@Override
public void connectorInfo(final String connName, final Callback callback) {
log.trace("Submitting connector info request {}", connName);
addRequest(
() -> {
if (checkRebalanceNeeded(callback))
return null;
if (!configState.contains(connName)) {
callback.onCompletion(
new NotFoundException("Connector " + connName + " not found"), null);
} else {
callback.onCompletion(null, connectorInfo(connName));
}
return null;
},
forwardErrorAndTickThreadStages(callback)
);
}
@Override
public void tasksConfig(String connName, final Callback