
org.elasticsearch.cluster.InternalClusterInfoService Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch - Open Source, Distributed, RESTful Search Engine
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.cluster;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.FailedNodeException;
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsRequest;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.action.admin.indices.stats.IndicesStatsRequest;
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
import org.elasticsearch.action.admin.indices.stats.ShardStats;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.action.support.RefCountingRunnable;
import org.elasticsearch.action.support.ThreadedActionListener;
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.store.StoreStats;
import org.elasticsearch.threadpool.ThreadPool;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.Consumer;
import static org.elasticsearch.core.Strings.format;
/**
* InternalClusterInfoService provides the ClusterInfoService interface,
* routinely updated on a timer. The timer can be dynamically changed by
* setting the cluster.info.update.interval
setting (defaulting
* to 30 seconds). The InternalClusterInfoService only runs on the master node.
* Listens for changes in the number of data nodes and immediately submits a
* ClusterInfoUpdateJob if a node has been added.
*
* Every time the timer runs, if cluster.routing.allocation.disk.threshold_enabled
* is enabled, gathers information about the disk usage and shard sizes across the cluster,
* computes a new cluster info and notifies the registered listeners. If disk threshold
* monitoring is disabled, listeners are called with an empty cluster info.
*/
public class InternalClusterInfoService implements ClusterInfoService, ClusterStateListener {
private static final Logger logger = LogManager.getLogger(InternalClusterInfoService.class);
public static final Setting INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING = Setting.timeSetting(
"cluster.info.update.interval",
TimeValue.timeValueSeconds(30),
TimeValue.timeValueSeconds(10),
Property.Dynamic,
Property.NodeScope
);
public static final Setting INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING = Setting.positiveTimeSetting(
"cluster.info.update.timeout",
TimeValue.timeValueSeconds(15),
Property.Dynamic,
Property.NodeScope
);
private volatile boolean enabled;
private volatile TimeValue updateFrequency;
private volatile TimeValue fetchTimeout;
private volatile Map leastAvailableSpaceUsages;
private volatile Map mostAvailableSpaceUsages;
private volatile IndicesStatsSummary indicesStatsSummary;
private final ThreadPool threadPool;
private final Client client;
private final List> listeners = new CopyOnWriteArrayList<>();
private final Object mutex = new Object();
private final List> nextRefreshListeners = new ArrayList<>();
private final ClusterService clusterService;
private AsyncRefresh currentRefresh;
private RefreshScheduler refreshScheduler;
public InternalClusterInfoService(Settings settings, ClusterService clusterService, ThreadPool threadPool, Client client) {
this.leastAvailableSpaceUsages = Map.of();
this.mostAvailableSpaceUsages = Map.of();
this.indicesStatsSummary = IndicesStatsSummary.EMPTY;
this.threadPool = threadPool;
this.client = client;
this.clusterService = clusterService;
this.updateFrequency = INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.get(settings);
this.fetchTimeout = INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.get(settings);
this.enabled = DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.get(settings);
ClusterSettings clusterSettings = clusterService.getClusterSettings();
clusterSettings.addSettingsUpdateConsumer(INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING, this::setFetchTimeout);
clusterSettings.addSettingsUpdateConsumer(INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING, this::setUpdateFrequency);
clusterSettings.addSettingsUpdateConsumer(
DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING,
this::setEnabled
);
}
private void setEnabled(boolean enabled) {
this.enabled = enabled;
}
private void setFetchTimeout(TimeValue fetchTimeout) {
this.fetchTimeout = fetchTimeout;
}
void setUpdateFrequency(TimeValue updateFrequency) {
this.updateFrequency = updateFrequency;
}
@Override
public void clusterChanged(ClusterChangedEvent event) {
final Runnable newRefresh;
synchronized (mutex) {
if (event.localNodeMaster() == false) {
refreshScheduler = null;
return;
}
if (refreshScheduler == null) {
logger.trace("elected as master, scheduling cluster info update tasks");
refreshScheduler = new RefreshScheduler();
nextRefreshListeners.add(refreshScheduler.getListener());
}
newRefresh = getNewRefresh();
assert assertRefreshInvariant();
}
newRefresh.run();
// Refresh if a data node was added
for (DiscoveryNode addedNode : event.nodesDelta().addedNodes()) {
if (addedNode.canContainData()) {
refreshAsync(new PlainActionFuture<>());
break;
}
}
}
private class AsyncRefresh {
private final List> thisRefreshListeners;
private final RefCountingRunnable fetchRefs = new RefCountingRunnable(this::callListeners);
AsyncRefresh(List> thisRefreshListeners) {
this.thisRefreshListeners = thisRefreshListeners;
}
void execute() {
if (enabled == false) {
logger.trace("skipping collecting info from cluster, notifying listeners with empty cluster info");
leastAvailableSpaceUsages = Map.of();
mostAvailableSpaceUsages = Map.of();
indicesStatsSummary = IndicesStatsSummary.EMPTY;
callListeners();
return;
}
logger.trace("starting async refresh");
try (var ignoredRefs = fetchRefs) {
try (var ignored = threadPool.getThreadContext().clearTraceContext()) {
fetchNodeStats();
}
try (var ignored = threadPool.getThreadContext().clearTraceContext()) {
fetchIndicesStats();
}
}
}
private void fetchIndicesStats() {
final IndicesStatsRequest indicesStatsRequest = new IndicesStatsRequest();
indicesStatsRequest.clear();
indicesStatsRequest.store(true);
indicesStatsRequest.indicesOptions(IndicesOptions.STRICT_EXPAND_OPEN_CLOSED_HIDDEN);
indicesStatsRequest.timeout(fetchTimeout);
client.admin()
.indices()
.stats(
indicesStatsRequest,
new ThreadedActionListener<>(
threadPool.executor(ThreadPool.Names.MANAGEMENT),
ActionListener.releaseAfter(new ActionListener<>() {
@Override
public void onResponse(IndicesStatsResponse indicesStatsResponse) {
logger.trace("received indices stats response");
if (indicesStatsResponse.getShardFailures().length > 0) {
final Set failedNodeIds = new HashSet<>();
for (final var shardFailure : indicesStatsResponse.getShardFailures()) {
if (shardFailure.getCause() instanceof final FailedNodeException failedNodeException) {
if (failedNodeIds.add(failedNodeException.nodeId())) {
logger.warn(
() -> format(
"failed to retrieve shard stats from node [%s]",
failedNodeException.nodeId()
),
failedNodeException.getCause()
);
}
logger.trace(
() -> format(
"failed to retrieve stats for shard [%s][%s]",
shardFailure.index(),
shardFailure.shardId()
),
shardFailure.getCause()
);
} else {
logger.warn(
() -> format(
"failed to retrieve stats for shard [%s][%s]",
shardFailure.index(),
shardFailure.shardId()
),
shardFailure.getCause()
);
}
}
}
final ShardStats[] stats = indicesStatsResponse.getShards();
final Map shardSizeByIdentifierBuilder = new HashMap<>();
final Map shardDataSetSizeBuilder = new HashMap<>();
final Map dataPath = new HashMap<>();
final Map reservedSpaceBuilders =
new HashMap<>();
buildShardLevelInfo(
clusterService.state().routingTable(),
adjustShardStats(stats),
shardSizeByIdentifierBuilder,
shardDataSetSizeBuilder,
dataPath,
reservedSpaceBuilders
);
final Map reservedSpace = new HashMap<>();
reservedSpaceBuilders.forEach((nodeAndPath, builder) -> reservedSpace.put(nodeAndPath, builder.build()));
indicesStatsSummary = new IndicesStatsSummary(
Map.copyOf(shardSizeByIdentifierBuilder),
Map.copyOf(shardDataSetSizeBuilder),
Map.copyOf(dataPath),
Map.copyOf(reservedSpace)
);
}
@Override
public void onFailure(Exception e) {
if (e instanceof ClusterBlockException) {
logger.trace("failed to retrieve indices stats", e);
} else {
logger.warn("failed to retrieve indices stats", e);
}
indicesStatsSummary = IndicesStatsSummary.EMPTY;
}
}, fetchRefs.acquire())
)
);
}
private void fetchNodeStats() {
final NodesStatsRequest nodesStatsRequest = new NodesStatsRequest("data:true");
nodesStatsRequest.clear();
nodesStatsRequest.addMetric(NodesStatsRequest.Metric.FS.metricName());
nodesStatsRequest.timeout(fetchTimeout);
client.admin().cluster().nodesStats(nodesStatsRequest, ActionListener.releaseAfter(new ActionListener<>() {
@Override
public void onResponse(NodesStatsResponse nodesStatsResponse) {
logger.trace("received node stats response");
for (final FailedNodeException failure : nodesStatsResponse.failures()) {
logger.warn(() -> "failed to retrieve stats for node [" + failure.nodeId() + "]", failure.getCause());
}
Map leastAvailableUsagesBuilder = new HashMap<>();
Map mostAvailableUsagesBuilder = new HashMap<>();
fillDiskUsagePerNode(
adjustNodesStats(nodesStatsResponse.getNodes()),
leastAvailableUsagesBuilder,
mostAvailableUsagesBuilder
);
leastAvailableSpaceUsages = Map.copyOf(leastAvailableUsagesBuilder);
mostAvailableSpaceUsages = Map.copyOf(mostAvailableUsagesBuilder);
}
@Override
public void onFailure(Exception e) {
if (e instanceof ClusterBlockException) {
logger.trace("failed to retrieve node stats", e);
} else {
logger.warn("failed to retrieve node stats", e);
}
leastAvailableSpaceUsages = Map.of();
mostAvailableSpaceUsages = Map.of();
}
}, fetchRefs.acquire()));
}
private void callListeners() {
try {
logger.trace("stats all received, computing cluster info and notifying listeners");
final ClusterInfo clusterInfo = getClusterInfo();
boolean anyListeners = false;
for (final Consumer listener : listeners) {
anyListeners = true;
try {
logger.trace("notifying [{}] of new cluster info", listener);
listener.accept(clusterInfo);
} catch (Exception e) {
logger.info(() -> "failed to notify [" + listener + "] of new cluster info", e);
}
}
assert anyListeners : "expected to notify at least one listener";
for (final ActionListener listener : thisRefreshListeners) {
listener.onResponse(clusterInfo);
}
} finally {
onRefreshComplete(this);
}
}
}
private void onRefreshComplete(AsyncRefresh completedRefresh) {
final Runnable newRefresh;
synchronized (mutex) {
assert currentRefresh == completedRefresh;
currentRefresh = null;
// We only ever run one refresh at once; if another refresh was requested while this one was running then we must start another
// to ensure that the stats it sees are up-to-date.
newRefresh = getNewRefresh();
assert assertRefreshInvariant();
}
newRefresh.run();
}
private Runnable getNewRefresh() {
assert Thread.holdsLock(mutex) : "mutex not held";
if (currentRefresh != null) {
return () -> {};
}
if (nextRefreshListeners.isEmpty()) {
return () -> {};
}
final ArrayList> thisRefreshListeners = new ArrayList<>(nextRefreshListeners);
nextRefreshListeners.clear();
currentRefresh = new AsyncRefresh(thisRefreshListeners);
return currentRefresh::execute;
}
private boolean assertRefreshInvariant() {
assert Thread.holdsLock(mutex) : "mutex not held";
// We never leave a refresh listener waiting unless we're already refreshing (which will pick up the waiting listener on completion)
assert nextRefreshListeners.isEmpty() || currentRefresh != null;
return true;
}
private class RefreshScheduler {
ActionListener getListener() {
return ActionListener.running(() -> {
if (shouldRefresh()) {
threadPool.scheduleUnlessShuttingDown(updateFrequency, ThreadPool.Names.SAME, () -> {
if (shouldRefresh()) {
refreshAsync(getListener());
}
});
}
});
}
private boolean shouldRefresh() {
synchronized (mutex) {
return refreshScheduler == this;
}
}
}
@Override
public ClusterInfo getClusterInfo() {
final IndicesStatsSummary indicesStatsSummary = this.indicesStatsSummary; // single volatile read
return new ClusterInfo(
leastAvailableSpaceUsages,
mostAvailableSpaceUsages,
indicesStatsSummary.shardSizes,
indicesStatsSummary.shardDataSetSizes,
indicesStatsSummary.dataPath,
indicesStatsSummary.reservedSpace
);
}
// allow tests to adjust the node stats on receipt
List adjustNodesStats(List nodeStats) {
return nodeStats;
}
ShardStats[] adjustShardStats(ShardStats[] shardStats) {
return shardStats;
}
void refreshAsync(ActionListener future) {
final Runnable newRefresh;
synchronized (mutex) {
nextRefreshListeners.add(future);
newRefresh = getNewRefresh();
assert assertRefreshInvariant();
}
newRefresh.run();
}
@Override
public void addListener(Consumer clusterInfoConsumer) {
listeners.add(clusterInfoConsumer);
}
static void buildShardLevelInfo(
RoutingTable routingTable,
ShardStats[] stats,
Map shardSizes,
Map shardDataSetSizeBuilder,
Map dataPathByShard,
Map reservedSpaceByShard
) {
for (ShardStats s : stats) {
final ShardRouting shardRouting = routingTable.deduplicate(s.getShardRouting());
dataPathByShard.put(ClusterInfo.NodeAndShard.from(shardRouting), s.getDataPath());
final StoreStats storeStats = s.getStats().getStore();
if (storeStats == null) {
continue;
}
final long size = storeStats.sizeInBytes();
final long dataSetSize = storeStats.totalDataSetSizeInBytes();
final long reserved = storeStats.getReservedSize().getBytes();
final String shardIdentifier = ClusterInfo.shardIdentifierFromRouting(shardRouting);
logger.trace("shard: {} size: {} reserved: {}", shardIdentifier, size, reserved);
shardSizes.put(shardIdentifier, size);
if (dataSetSize > shardDataSetSizeBuilder.getOrDefault(shardRouting.shardId(), -1L)) {
shardDataSetSizeBuilder.put(shardRouting.shardId(), dataSetSize);
}
if (reserved != StoreStats.UNKNOWN_RESERVED_BYTES) {
final ClusterInfo.ReservedSpace.Builder reservedSpaceBuilder = reservedSpaceByShard.computeIfAbsent(
new ClusterInfo.NodeAndPath(shardRouting.currentNodeId(), s.getDataPath()),
t -> new ClusterInfo.ReservedSpace.Builder()
);
reservedSpaceBuilder.add(shardRouting.shardId(), reserved);
}
}
}
private static void fillDiskUsagePerNode(
List nodeStatsArray,
Map newLeastAvailableUsages,
Map newMostAvailableUsages
) {
for (NodeStats nodeStats : nodeStatsArray) {
DiskUsage leastAvailableUsage = DiskUsage.findLeastAvailablePath(nodeStats);
if (leastAvailableUsage != null) {
newLeastAvailableUsages.put(nodeStats.getNode().getId(), leastAvailableUsage);
}
DiskUsage mostAvailableUsage = DiskUsage.findMostAvailable(nodeStats);
if (mostAvailableUsage != null) {
newMostAvailableUsages.put(nodeStats.getNode().getId(), mostAvailableUsage);
}
}
}
private record IndicesStatsSummary(
Map shardSizes,
Map shardDataSetSizes,
Map dataPath,
Map reservedSpace
) {
static final IndicesStatsSummary EMPTY = new IndicesStatsSummary(Map.of(), Map.of(), Map.of(), Map.of());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy