org.elasticsearch.cluster.service.ClusterApplierService Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.cluster.service;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateApplier;
import org.elasticsearch.cluster.ClusterStateListener;
import org.elasticsearch.cluster.ClusterStateObserver;
import org.elasticsearch.cluster.LocalNodeMasterListener;
import org.elasticsearch.cluster.NodeConnectionsService;
import org.elasticsearch.cluster.TimeoutClusterStateListener;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.service.ClusterApplierRecordingService.Recorder;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.common.util.concurrent.PrioritizedEsThreadPoolExecutor;
import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.indices.store.IndicesStore;
import org.elasticsearch.threadpool.Scheduler;
import org.elasticsearch.threadpool.ThreadPool;
import java.util.Collection;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadFactory;
public class ClusterApplierService extends AbstractLifecycleComponent implements ClusterApplier {
private static final Logger logger = LogManager.getLogger(ClusterApplierService.class);
public static final Setting CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING = Setting.positiveTimeSetting(
"cluster.service.slow_task_logging_threshold",
TimeValue.timeValueSeconds(30),
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
public static final String CLUSTER_UPDATE_THREAD_NAME = "clusterApplierService#updateTask";
private final ClusterSettings clusterSettings;
private final ThreadPool threadPool;
private volatile TimeValue slowTaskLoggingThreshold;
private volatile PrioritizedEsThreadPoolExecutor threadPoolExecutor;
/**
* Those 3 state listeners are changing infrequently - CopyOnWriteArrayList is just fine
*/
private final Collection highPriorityStateAppliers = new CopyOnWriteArrayList<>();
private final Collection normalPriorityStateAppliers = new CopyOnWriteArrayList<>();
private final Collection lowPriorityStateAppliers = new CopyOnWriteArrayList<>();
private final Collection clusterStateListeners = new CopyOnWriteArrayList<>();
private final Map timeoutClusterStateListeners = new ConcurrentHashMap<>();
private final AtomicReference state; // last applied state
private final String nodeName;
private final ClusterApplierRecordingService recordingService;
private NodeConnectionsService nodeConnectionsService;
public ClusterApplierService(String nodeName, Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) {
this.clusterSettings = clusterSettings;
this.threadPool = threadPool;
this.state = new AtomicReference<>();
this.nodeName = nodeName;
this.recordingService = new ClusterApplierRecordingService();
this.slowTaskLoggingThreshold = CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING.get(settings);
this.clusterSettings.addSettingsUpdateConsumer(
CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING,
this::setSlowTaskLoggingThreshold
);
}
private void setSlowTaskLoggingThreshold(TimeValue slowTaskLoggingThreshold) {
this.slowTaskLoggingThreshold = slowTaskLoggingThreshold;
}
public synchronized void setNodeConnectionsService(NodeConnectionsService nodeConnectionsService) {
assert this.nodeConnectionsService == null : "nodeConnectionsService is already set";
this.nodeConnectionsService = nodeConnectionsService;
}
@Override
public void setInitialState(ClusterState initialState) {
if (lifecycle.started()) {
throw new IllegalStateException("can't set initial state when started");
}
assert state.get() == null : "state is already set";
state.set(initialState);
}
@Override
protected synchronized void doStart() {
Objects.requireNonNull(nodeConnectionsService, "please set the node connection service before starting");
Objects.requireNonNull(state.get(), "please set initial state before starting");
threadPoolExecutor = createThreadPoolExecutor();
}
protected PrioritizedEsThreadPoolExecutor createThreadPoolExecutor() {
return EsExecutors.newSinglePrioritizing(
nodeName + "/" + CLUSTER_UPDATE_THREAD_NAME,
daemonThreadFactory(nodeName, CLUSTER_UPDATE_THREAD_NAME),
threadPool.getThreadContext(),
threadPool.scheduler(),
PrioritizedEsThreadPoolExecutor.StarvationWatcher.NOOP_STARVATION_WATCHER
);
}
class UpdateTask extends SourcePrioritizedRunnable {
private final ActionListener listener;
private final Function updateFunction;
UpdateTask(Priority priority, String source, ActionListener listener, Function updateFunction) {
super(priority, source);
this.listener = listener;
this.updateFunction = updateFunction;
}
@Override
public void run() {
runTask(source(), updateFunction, listener);
}
}
@Override
protected synchronized void doStop() {
for (Map.Entry onGoingTimeout : timeoutClusterStateListeners.entrySet()) {
try {
onGoingTimeout.getValue().cancel();
onGoingTimeout.getKey().onClose();
} catch (Exception ex) {
logger.debug("failed to notify listeners on shutdown", ex);
}
}
ThreadPool.terminate(threadPoolExecutor, 10, TimeUnit.SECONDS);
}
@Override
protected synchronized void doClose() {}
/**
* The current cluster state.
* Should be renamed to appliedClusterState
*/
public ClusterState state() {
assert assertNotCalledFromClusterStateApplier();
ClusterState clusterState = this.state.get();
assert clusterState != null : "initial cluster state not set yet";
return clusterState;
}
/**
* Adds a high priority applier of updated cluster states.
*/
public void addHighPriorityApplier(ClusterStateApplier applier) {
highPriorityStateAppliers.add(applier);
}
/**
* Adds an applier which will be called after all high priority and normal appliers have been called.
*/
public void addLowPriorityApplier(ClusterStateApplier applier) {
lowPriorityStateAppliers.add(applier);
}
/**
* Adds a applier of updated cluster states.
*/
public void addStateApplier(ClusterStateApplier applier) {
normalPriorityStateAppliers.add(applier);
}
/**
* Removes an applier of updated cluster states.
*/
public void removeApplier(ClusterStateApplier applier) {
normalPriorityStateAppliers.remove(applier);
highPriorityStateAppliers.remove(applier);
lowPriorityStateAppliers.remove(applier);
}
/**
* Add a listener for updated cluster states. Listeners are executed in the system thread context.
*/
public void addListener(ClusterStateListener listener) {
clusterStateListeners.add(listener);
}
/**
* Removes a listener for updated cluster states.
*/
public void removeListener(final ClusterStateListener listener) {
clusterStateListeners.remove(listener);
}
/**
* Removes a timeout listener for updated cluster states.
*/
public void removeTimeoutListener(TimeoutClusterStateListener listener) {
final NotifyTimeout timeout = timeoutClusterStateListeners.remove(listener);
if (timeout != null) {
timeout.cancel();
}
}
/**
* Add a listener for on/off local node master events
*/
public void addLocalNodeMasterListener(LocalNodeMasterListener listener) {
addListener(listener);
}
/**
* Adds a cluster state listener that is expected to be removed during a short period of time.
* If provided, the listener will be notified once a specific time has elapsed.
*
* NOTE: the listener is not removed on timeout. This is the responsibility of the caller.
*/
public void addTimeoutListener(@Nullable final TimeValue timeout, final TimeoutClusterStateListener listener) {
if (lifecycle.stoppedOrClosed()) {
listener.onClose();
return;
}
// call the post added notification on the same event thread
try {
threadPoolExecutor.execute(new SourcePrioritizedRunnable(Priority.HIGH, "_add_listener_") {
@Override
public void run() {
final NotifyTimeout notifyTimeout = new NotifyTimeout(listener, timeout);
final NotifyTimeout previous = timeoutClusterStateListeners.put(listener, notifyTimeout);
assert previous == null : "Added same listener [" + listener + "]";
if (lifecycle.stoppedOrClosed()) {
listener.onClose();
return;
}
if (timeout != null) {
notifyTimeout.cancellable = threadPool.schedule(notifyTimeout, timeout, ThreadPool.Names.GENERIC);
}
listener.postAdded();
}
});
} catch (EsRejectedExecutionException e) {
if (lifecycle.stoppedOrClosed()) {
listener.onClose();
} else {
throw e;
}
}
}
/**
* Run the given clusterStateConsumer on the applier thread. Should only be used in tests and by {@link IndicesStore} when it's deleting
* the data behind a shard that moved away from a node.
*
* @param priority {@link Priority#HIGH} unless in tests.
*/
// TODO get rid of this, make it so that shard data can be deleted without blocking the applier thread.
public void runOnApplierThread(
String source,
Priority priority,
Consumer clusterStateConsumer,
ActionListener listener
) {
submitStateUpdateTask(source, priority, (clusterState) -> {
clusterStateConsumer.accept(clusterState);
return clusterState;
}, listener);
}
public ThreadPool threadPool() {
return threadPool;
}
@Override
public void onNewClusterState(
final String source,
final Supplier clusterStateSupplier,
final ActionListener listener
) {
submitStateUpdateTask(source, Priority.HIGH, currentState -> {
ClusterState nextState = clusterStateSupplier.get();
if (nextState != null) {
return nextState;
} else {
return currentState;
}
}, listener);
}
private void submitStateUpdateTask(
final String source,
final Priority priority,
final Function clusterStateUpdate,
final ActionListener listener
) {
if (lifecycle.started() == false) {
return;
}
final ThreadContext threadContext = threadPool.getThreadContext();
final Supplier storedContextSupplier = threadContext.newRestorableContext(true);
try (ThreadContext.StoredContext ignore = threadContext.stashContext()) {
threadContext.markAsSystemContext();
threadPoolExecutor.execute(
new UpdateTask(
priority,
source,
new ClusterApplyActionListener(source, listener, storedContextSupplier),
clusterStateUpdate
)
);
} catch (EsRejectedExecutionException e) {
assert lifecycle.stoppedOrClosed() : e;
// ignore cases where we are shutting down..., there is really nothing interesting to be done here...
if (lifecycle.stoppedOrClosed() == false) {
throw e;
}
}
}
/** asserts that the current thread is NOT the cluster state update thread */
public static boolean assertNotClusterStateUpdateThread(String reason) {
assert Thread.currentThread().getName().contains(CLUSTER_UPDATE_THREAD_NAME) == false
: "Expected current thread ["
+ Thread.currentThread()
+ "] to not be the cluster state update thread. Reason: ["
+ reason
+ "]";
return true;
}
/** asserts that the current stack trace does NOT involve a cluster state applier */
private static boolean assertNotCalledFromClusterStateApplier() {
if (Thread.currentThread().getName().contains(CLUSTER_UPDATE_THREAD_NAME)) {
for (StackTraceElement element : Thread.currentThread().getStackTrace()) {
final String className = element.getClassName();
final String methodName = element.getMethodName();
if (className.equals(ClusterStateObserver.class.getName())) {
// people may start an observer from an applier
return true;
} else if (className.equals(ClusterApplierService.class.getName()) && methodName.equals("callClusterStateAppliers")) {
throw new AssertionError("should not be called by a cluster state applier: the applied state is not yet available");
}
}
}
return true;
}
private void runTask(String source, Function updateFunction, ActionListener clusterApplyListener) {
if (lifecycle.started() == false) {
logger.debug("processing [{}]: ignoring, cluster applier service not started", source);
return;
}
logger.debug("processing [{}]: execute", source);
final ClusterState previousClusterState = state.get();
final long startTimeMillis = threadPool.relativeTimeInMillis();
final Recorder stopWatch = new Recorder(threadPool::rawRelativeTimeInMillis);
final ClusterState newClusterState;
try {
try (Releasable ignored = stopWatch.record("running task [" + source + ']')) {
newClusterState = updateFunction.apply(previousClusterState);
}
} catch (Exception e) {
TimeValue executionTime = getTimeSince(startTimeMillis);
logger.trace(
() -> new ParameterizedMessage(
"failed to execute cluster state applier in [{}], state:\nversion [{}], source [{}]\n{}",
executionTime,
previousClusterState.version(),
source,
previousClusterState
),
e
);
warnAboutSlowTaskIfNeeded(executionTime, source, stopWatch);
clusterApplyListener.onFailure(e);
return;
}
if (previousClusterState == newClusterState) {
TimeValue executionTime = getTimeSince(startTimeMillis);
logger.debug("processing [{}]: took [{}] no change in cluster state", source, executionTime);
warnAboutSlowTaskIfNeeded(executionTime, source, stopWatch);
clusterApplyListener.onResponse(null);
} else {
if (logger.isTraceEnabled()) {
logger.debug("cluster state updated, version [{}], source [{}]\n{}", newClusterState.version(), source, newClusterState);
} else {
logger.debug("cluster state updated, version [{}], source [{}]", newClusterState.version(), source);
}
try {
applyChanges(previousClusterState, newClusterState, source, stopWatch);
TimeValue executionTime = getTimeSince(startTimeMillis);
logger.debug(
"processing [{}]: took [{}] done applying updated cluster state (version: {}, uuid: {})",
source,
executionTime,
newClusterState.version(),
newClusterState.stateUUID()
);
warnAboutSlowTaskIfNeeded(executionTime, source, stopWatch);
clusterApplyListener.onResponse(null);
} catch (Exception e) {
TimeValue executionTime = getTimeSince(startTimeMillis);
if (logger.isTraceEnabled()) {
logger.warn(
new ParameterizedMessage(
"failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]\n{}",
executionTime,
newClusterState.version(),
newClusterState.stateUUID(),
source,
newClusterState
),
e
);
} else {
logger.warn(
new ParameterizedMessage(
"failed to apply updated cluster state in [{}]:\nversion [{}], uuid [{}], source [{}]",
executionTime,
newClusterState.version(),
newClusterState.stateUUID(),
source
),
e
);
}
// failing to apply a cluster state with an exception indicates a bug in validation or in one of the appliers; if we
// continue we will retry with the same cluster state but that might not help.
assert applicationMayFail();
clusterApplyListener.onFailure(e);
}
}
}
private TimeValue getTimeSince(long startTimeMillis) {
return TimeValue.timeValueMillis(Math.max(0, threadPool.relativeTimeInMillis() - startTimeMillis));
}
private void applyChanges(ClusterState previousClusterState, ClusterState newClusterState, String source, Recorder stopWatch) {
ClusterChangedEvent clusterChangedEvent = new ClusterChangedEvent(source, newClusterState, previousClusterState);
// new cluster state, notify all listeners
final DiscoveryNodes.Delta nodesDelta = clusterChangedEvent.nodesDelta();
if (nodesDelta.hasChanges() && logger.isInfoEnabled()) {
String summary = nodesDelta.shortSummary();
if (summary.length() > 0) {
logger.info("{}, term: {}, version: {}, reason: {}", summary, newClusterState.term(), newClusterState.version(), source);
}
}
logger.trace("connecting to nodes of cluster state with version {}", newClusterState.version());
try (Releasable ignored = stopWatch.record("connecting to new nodes")) {
connectToNodesAndWait(newClusterState);
}
// nothing to do until we actually recover from the gateway or any other block indicates we need to disable persistency
if (clusterChangedEvent.state().blocks().disableStatePersistence() == false && clusterChangedEvent.metadataChanged()) {
logger.debug("applying settings from cluster state with version {}", newClusterState.version());
final Settings incomingSettings = clusterChangedEvent.state().metadata().settings();
try (Releasable ignored = stopWatch.record("applying settings")) {
clusterSettings.applySettings(incomingSettings);
}
}
logger.debug("apply cluster state with version {}", newClusterState.version());
callClusterStateAppliers(clusterChangedEvent, stopWatch);
nodeConnectionsService.disconnectFromNodesExcept(newClusterState.nodes());
assert newClusterState.coordinationMetadata()
.getLastAcceptedConfiguration()
.equals(newClusterState.coordinationMetadata().getLastCommittedConfiguration())
: newClusterState.coordinationMetadata().getLastAcceptedConfiguration()
+ " vs "
+ newClusterState.coordinationMetadata().getLastCommittedConfiguration()
+ " on "
+ newClusterState.nodes().getLocalNode();
logger.debug("set locally applied cluster state to version {}", newClusterState.version());
state.set(newClusterState);
callClusterStateListeners(clusterChangedEvent, stopWatch);
}
protected void connectToNodesAndWait(ClusterState newClusterState) {
// can't wait for an ActionFuture on the cluster applier thread, but we do want to block the thread here, so use a CountDownLatch.
final CountDownLatch countDownLatch = new CountDownLatch(1);
connectToNodesAsync(newClusterState, countDownLatch::countDown);
try {
countDownLatch.await();
} catch (InterruptedException e) {
logger.debug("interrupted while connecting to nodes, continuing", e);
Thread.currentThread().interrupt();
}
}
protected final void connectToNodesAsync(ClusterState newClusterState, Runnable onCompletion) {
nodeConnectionsService.connectToNodes(newClusterState.nodes(), onCompletion);
}
private void callClusterStateAppliers(ClusterChangedEvent clusterChangedEvent, Recorder stopWatch) {
callClusterStateAppliers(clusterChangedEvent, stopWatch, highPriorityStateAppliers);
callClusterStateAppliers(clusterChangedEvent, stopWatch, normalPriorityStateAppliers);
callClusterStateAppliers(clusterChangedEvent, stopWatch, lowPriorityStateAppliers);
}
private static void callClusterStateAppliers(
ClusterChangedEvent clusterChangedEvent,
Recorder stopWatch,
Collection clusterStateAppliers
) {
for (ClusterStateApplier applier : clusterStateAppliers) {
logger.trace("calling [{}] with change to version [{}]", applier, clusterChangedEvent.state().version());
final String name = applier.toString();
try (Releasable ignored = stopWatch.record(name)) {
applier.applyClusterState(clusterChangedEvent);
}
}
}
private void callClusterStateListeners(ClusterChangedEvent clusterChangedEvent, Recorder stopWatch) {
callClusterStateListener(clusterChangedEvent, stopWatch, clusterStateListeners);
callClusterStateListener(clusterChangedEvent, stopWatch, timeoutClusterStateListeners.keySet());
}
private void callClusterStateListener(
ClusterChangedEvent clusterChangedEvent,
Recorder stopWatch,
Collection listeners
) {
for (ClusterStateListener listener : listeners) {
try {
logger.trace("calling [{}] with change to version [{}]", listener, clusterChangedEvent.state().version());
final String name = listener.toString();
try (Releasable ignored = stopWatch.record(name)) {
listener.clusterChanged(clusterChangedEvent);
}
} catch (Exception ex) {
logger.warn("failed to notify ClusterStateListener", ex);
}
}
}
private static class ClusterApplyActionListener implements ActionListener {
private final String source;
private final ActionListener listener;
private final Supplier storedContextSupplier;
ClusterApplyActionListener(
String source,
ActionListener listener,
Supplier storedContextSupplier
) {
this.source = source;
this.listener = listener;
this.storedContextSupplier = storedContextSupplier;
}
@Override
public void onFailure(Exception e) {
try (ThreadContext.StoredContext ignored = storedContextSupplier.get()) {
listener.onFailure(e);
} catch (Exception inner) {
inner.addSuppressed(e);
assert false : inner;
logger.error(new ParameterizedMessage("exception thrown by listener notifying of failure from [{}]", source), inner);
}
}
@Override
public void onResponse(Void unused) {
try (ThreadContext.StoredContext ignored = storedContextSupplier.get()) {
listener.onResponse(null);
} catch (Exception e) {
assert false : e;
logger.error(
new ParameterizedMessage("exception thrown by listener while notifying of cluster state processed from [{}]", source),
e
);
}
}
}
private void warnAboutSlowTaskIfNeeded(TimeValue executionTime, String source, Recorder recorder) {
if (executionTime.getMillis() > slowTaskLoggingThreshold.getMillis()) {
logger.warn(
"cluster state applier task [{}] took [{}] which is above the warn threshold of [{}]: {}",
source,
executionTime,
slowTaskLoggingThreshold,
recorder.getRecordings().stream().map(ti -> '[' + ti.v1() + "] took [" + ti.v2() + "ms]").collect(Collectors.joining(", "))
);
}
recordingService.updateStats(recorder);
}
private class NotifyTimeout implements Runnable {
final TimeoutClusterStateListener listener;
@Nullable
final TimeValue timeout;
volatile Scheduler.Cancellable cancellable;
NotifyTimeout(TimeoutClusterStateListener listener, @Nullable TimeValue timeout) {
this.listener = listener;
this.timeout = timeout;
}
public void cancel() {
if (cancellable != null) {
cancellable.cancel();
}
}
@Override
public void run() {
assert timeout != null : "This should only ever execute if there's an actual timeout set";
if (cancellable != null && cancellable.isCancelled()) {
return;
}
if (lifecycle.stoppedOrClosed()) {
listener.onClose();
} else {
listener.onTimeout(this.timeout);
}
// note, we rely on the listener to remove itself in case of timeout if needed
}
}
// overridden by tests that need to check behaviour in the event of an application failure without tripping assertions
protected boolean applicationMayFail() {
return false;
}
@Override
public ClusterApplierRecordingService.Stats getStats() {
return recordingService.getStats();
}
}