All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.cluster.InternalClusterInfoService Maven / Gradle / Ivy

There is a newer version: 8.14.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.cluster;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.FailedNodeException;
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsRequest;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.action.admin.indices.stats.IndicesStatsRequest;
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
import org.elasticsearch.action.admin.indices.stats.ShardStats;
import org.elasticsearch.action.support.DefaultShardOperationFailedException;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.CountDown;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.store.StoreStats;
import org.elasticsearch.monitor.fs.FsInfo;
import org.elasticsearch.threadpool.ThreadPool;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.Consumer;

/**
 * InternalClusterInfoService provides the ClusterInfoService interface,
 * routinely updated on a timer. The timer can be dynamically changed by
 * setting the cluster.info.update.interval setting (defaulting
 * to 30 seconds). The InternalClusterInfoService only runs on the master node.
 * Listens for changes in the number of data nodes and immediately submits a
 * ClusterInfoUpdateJob if a node has been added.
 *
 * Every time the timer runs, gathers information about the disk usage and
 * shard sizes across the cluster.
 */
public class InternalClusterInfoService implements ClusterInfoService, ClusterStateListener {

    private static final Logger logger = LogManager.getLogger(InternalClusterInfoService.class);

    public static final Setting INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING = Setting.timeSetting(
        "cluster.info.update.interval",
        TimeValue.timeValueSeconds(30),
        TimeValue.timeValueSeconds(10),
        Property.Dynamic,
        Property.NodeScope
    );
    public static final Setting INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING = Setting.positiveTimeSetting(
        "cluster.info.update.timeout",
        TimeValue.timeValueSeconds(15),
        Property.Dynamic,
        Property.NodeScope
    );

    private volatile boolean enabled;
    private volatile TimeValue updateFrequency;
    private volatile TimeValue fetchTimeout;

    private volatile ImmutableOpenMap leastAvailableSpaceUsages;
    private volatile ImmutableOpenMap mostAvailableSpaceUsages;
    private volatile IndicesStatsSummary indicesStatsSummary;

    private final ThreadPool threadPool;
    private final Client client;
    private final List> listeners = new CopyOnWriteArrayList<>();

    private final Object mutex = new Object();
    private final List> nextRefreshListeners = new ArrayList<>();
    private AsyncRefresh currentRefresh;
    private RefreshScheduler refreshScheduler;

    public InternalClusterInfoService(Settings settings, ClusterService clusterService, ThreadPool threadPool, Client client) {
        this.leastAvailableSpaceUsages = ImmutableOpenMap.of();
        this.mostAvailableSpaceUsages = ImmutableOpenMap.of();
        this.indicesStatsSummary = IndicesStatsSummary.EMPTY;
        this.threadPool = threadPool;
        this.client = client;
        this.updateFrequency = INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.get(settings);
        this.fetchTimeout = INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING.get(settings);
        this.enabled = DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING.get(settings);
        ClusterSettings clusterSettings = clusterService.getClusterSettings();
        clusterSettings.addSettingsUpdateConsumer(INTERNAL_CLUSTER_INFO_TIMEOUT_SETTING, this::setFetchTimeout);
        clusterSettings.addSettingsUpdateConsumer(INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING, this::setUpdateFrequency);
        clusterSettings.addSettingsUpdateConsumer(
            DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_THRESHOLD_ENABLED_SETTING,
            this::setEnabled
        );
    }

    private void setEnabled(boolean enabled) {
        this.enabled = enabled;
    }

    private void setFetchTimeout(TimeValue fetchTimeout) {
        this.fetchTimeout = fetchTimeout;
    }

    void setUpdateFrequency(TimeValue updateFrequency) {
        this.updateFrequency = updateFrequency;
    }

    @Override
    public void clusterChanged(ClusterChangedEvent event) {
        final Runnable newRefresh;
        synchronized (mutex) {
            if (event.localNodeMaster() == false) {
                refreshScheduler = null;
                return;
            }

            if (refreshScheduler == null) {
                logger.trace("elected as master, scheduling cluster info update tasks");
                refreshScheduler = new RefreshScheduler();
                nextRefreshListeners.add(refreshScheduler.getListener());
            }
            newRefresh = getNewRefresh();
            assert assertRefreshInvariant();
        }
        newRefresh.run();

        // Refresh if a data node was added
        for (DiscoveryNode addedNode : event.nodesDelta().addedNodes()) {
            if (addedNode.canContainData()) {
                refreshAsync(new PlainActionFuture<>());
                break;
            }
        }
    }

    private class AsyncRefresh {

        private final List> thisRefreshListeners;
        private final CountDown countDown = new CountDown(2);

        AsyncRefresh(List> thisRefreshListeners) {
            this.thisRefreshListeners = thisRefreshListeners;
        }

        void execute() {
            assert countDown.isCountedDown() == false;

            logger.trace("starting async refresh");

            final NodesStatsRequest nodesStatsRequest = new NodesStatsRequest("data:true");
            nodesStatsRequest.clear();
            nodesStatsRequest.addMetric(NodesStatsRequest.Metric.FS.metricName());
            nodesStatsRequest.timeout(fetchTimeout);
            client.admin().cluster().nodesStats(nodesStatsRequest, ActionListener.runAfter(new ActionListener<>() {
                @Override
                public void onResponse(NodesStatsResponse nodesStatsResponse) {
                    logger.trace("received node stats response");

                    for (final FailedNodeException failure : nodesStatsResponse.failures()) {
                        logger.warn(() -> "failed to retrieve stats for node [" + failure.nodeId() + "]", failure.getCause());
                    }

                    ImmutableOpenMap.Builder leastAvailableUsagesBuilder = ImmutableOpenMap.builder();
                    ImmutableOpenMap.Builder mostAvailableUsagesBuilder = ImmutableOpenMap.builder();
                    fillDiskUsagePerNode(
                        adjustNodesStats(nodesStatsResponse.getNodes()),
                        leastAvailableUsagesBuilder,
                        mostAvailableUsagesBuilder
                    );
                    leastAvailableSpaceUsages = leastAvailableUsagesBuilder.build();
                    mostAvailableSpaceUsages = mostAvailableUsagesBuilder.build();
                }

                @Override
                public void onFailure(Exception e) {
                    if (e instanceof ClusterBlockException) {
                        logger.trace("failed to retrieve node stats", e);
                    } else {
                        logger.warn("failed to retrieve node stats", e);
                    }
                    leastAvailableSpaceUsages = ImmutableOpenMap.of();
                    mostAvailableSpaceUsages = ImmutableOpenMap.of();
                }
            }, this::onStatsProcessed));

            final IndicesStatsRequest indicesStatsRequest = new IndicesStatsRequest();
            indicesStatsRequest.clear();
            indicesStatsRequest.store(true);
            indicesStatsRequest.indicesOptions(IndicesOptions.STRICT_EXPAND_OPEN_CLOSED_HIDDEN);
            indicesStatsRequest.timeout(fetchTimeout);
            client.admin().indices().stats(indicesStatsRequest, ActionListener.runAfter(new ActionListener<>() {
                @Override
                public void onResponse(IndicesStatsResponse indicesStatsResponse) {
                    logger.trace("received indices stats response");

                    if (indicesStatsResponse.getShardFailures().length > 0) {
                        final Set failedNodeIds = new HashSet<>();
                        for (final DefaultShardOperationFailedException shardFailure : indicesStatsResponse.getShardFailures()) {
                            if (shardFailure.getCause()instanceof final FailedNodeException failedNodeException) {
                                if (failedNodeIds.add(failedNodeException.nodeId())) {
                                    logger.warn(
                                        new ParameterizedMessage(
                                            "failed to retrieve shard stats from node [{}]",
                                            failedNodeException.nodeId()
                                        ),
                                        failedNodeException.getCause()
                                    );
                                }
                                logger.trace(
                                    new ParameterizedMessage(
                                        "failed to retrieve stats for shard [{}][{}]",
                                        shardFailure.index(),
                                        shardFailure.shardId()
                                    ),
                                    shardFailure.getCause()
                                );
                            } else {
                                logger.warn(
                                    new ParameterizedMessage(
                                        "failed to retrieve stats for shard [{}][{}]",
                                        shardFailure.index(),
                                        shardFailure.shardId()
                                    ),
                                    shardFailure.getCause()
                                );
                            }
                        }
                    }

                    final ShardStats[] stats = indicesStatsResponse.getShards();
                    final ImmutableOpenMap.Builder shardSizeByIdentifierBuilder = ImmutableOpenMap.builder();
                    final ImmutableOpenMap.Builder shardDataSetSizeBuilder = ImmutableOpenMap.builder();
                    final ImmutableOpenMap.Builder dataPathByShardRoutingBuilder = ImmutableOpenMap.builder();
                    final Map reservedSpaceBuilders = new HashMap<>();
                    buildShardLevelInfo(
                        stats,
                        shardSizeByIdentifierBuilder,
                        shardDataSetSizeBuilder,
                        dataPathByShardRoutingBuilder,
                        reservedSpaceBuilders
                    );

                    final ImmutableOpenMap.Builder rsrvdSpace = ImmutableOpenMap
                        .builder();
                    reservedSpaceBuilders.forEach((nodeAndPath, builder) -> rsrvdSpace.put(nodeAndPath, builder.build()));

                    indicesStatsSummary = new IndicesStatsSummary(
                        shardSizeByIdentifierBuilder.build(),
                        shardDataSetSizeBuilder.build(),
                        dataPathByShardRoutingBuilder.build(),
                        rsrvdSpace.build()
                    );
                }

                @Override
                public void onFailure(Exception e) {
                    if (e instanceof ClusterBlockException) {
                        logger.trace("failed to retrieve indices stats", e);
                    } else {
                        logger.warn("failed to retrieve indices stats", e);
                    }
                    indicesStatsSummary = IndicesStatsSummary.EMPTY;
                }
            }, this::onStatsProcessed));
        }

        private void onStatsProcessed() {
            if (countDown.countDown()) {
                logger.trace("stats all received, computing cluster info and notifying listeners");
                try {
                    final ClusterInfo clusterInfo = getClusterInfo();
                    boolean anyListeners = false;
                    for (final Consumer listener : listeners) {
                        anyListeners = true;
                        try {
                            logger.trace("notifying [{}] of new cluster info", listener);
                            listener.accept(clusterInfo);
                        } catch (Exception e) {
                            logger.info(() -> "failed to notify [" + listener + "] of new cluster info", e);
                        }
                    }
                    assert anyListeners : "expected to notify at least one listener";

                    for (final ActionListener listener : thisRefreshListeners) {
                        listener.onResponse(clusterInfo);
                    }
                } finally {
                    onRefreshComplete(this);
                }
            }
        }
    }

    private void onRefreshComplete(AsyncRefresh completedRefresh) {
        final Runnable newRefresh;
        synchronized (mutex) {
            assert currentRefresh == completedRefresh;
            currentRefresh = null;

            // We only ever run one refresh at once; if another refresh was requested while this one was running then we must start another
            // to ensure that the stats it sees are up-to-date.
            newRefresh = getNewRefresh();
            assert assertRefreshInvariant();
        }
        newRefresh.run();
    }

    private Runnable getNewRefresh() {
        assert Thread.holdsLock(mutex) : "mutex not held";

        if (currentRefresh != null) {
            return () -> {};
        }

        if (nextRefreshListeners.isEmpty()) {
            return () -> {};
        }

        final ArrayList> thisRefreshListeners = new ArrayList<>(nextRefreshListeners);
        nextRefreshListeners.clear();

        if (enabled) {
            currentRefresh = new AsyncRefresh(thisRefreshListeners);
            return currentRefresh::execute;
        } else {
            return () -> {
                leastAvailableSpaceUsages = ImmutableOpenMap.of();
                mostAvailableSpaceUsages = ImmutableOpenMap.of();
                indicesStatsSummary = IndicesStatsSummary.EMPTY;
                thisRefreshListeners.forEach(l -> l.onResponse(ClusterInfo.EMPTY));
            };
        }
    }

    private boolean assertRefreshInvariant() {
        assert Thread.holdsLock(mutex) : "mutex not held";
        // We never leave a refresh listener waiting unless we're already refreshing (which will pick up the waiting listener on completion)
        assert nextRefreshListeners.isEmpty() || currentRefresh != null;
        return true;
    }

    private class RefreshScheduler {

        ActionListener getListener() {
            return ActionListener.wrap(() -> {
                if (shouldRefresh()) {
                    threadPool.scheduleUnlessShuttingDown(updateFrequency, ThreadPool.Names.SAME, () -> {
                        if (shouldRefresh()) {
                            refreshAsync(getListener());
                        }
                    });
                }
            });
        }

        private boolean shouldRefresh() {
            synchronized (mutex) {
                return refreshScheduler == this;
            }
        }
    }

    @Override
    public ClusterInfo getClusterInfo() {
        final IndicesStatsSummary indicesStatsSummary = this.indicesStatsSummary; // single volatile read
        return new ClusterInfo(
            leastAvailableSpaceUsages,
            mostAvailableSpaceUsages,
            indicesStatsSummary.shardSizes,
            indicesStatsSummary.shardDataSetSizes,
            indicesStatsSummary.shardRoutingToDataPath,
            indicesStatsSummary.reservedSpace
        );
    }

    // allow tests to adjust the node stats on receipt
    List adjustNodesStats(List nodeStats) {
        return nodeStats;
    }

    void refreshAsync(ActionListener future) {
        final Runnable newRefresh;
        synchronized (mutex) {
            nextRefreshListeners.add(future);
            newRefresh = getNewRefresh();
            assert assertRefreshInvariant();
        }
        newRefresh.run();
    }

    @Override
    public void addListener(Consumer clusterInfoConsumer) {
        listeners.add(clusterInfoConsumer);
    }

    static void buildShardLevelInfo(
        ShardStats[] stats,
        ImmutableOpenMap.Builder shardSizes,
        ImmutableOpenMap.Builder shardDataSetSizeBuilder,
        ImmutableOpenMap.Builder newShardRoutingToDataPath,
        Map reservedSpaceByShard
    ) {
        for (ShardStats s : stats) {
            final ShardRouting shardRouting = s.getShardRouting();
            newShardRoutingToDataPath.put(shardRouting, s.getDataPath());

            final StoreStats storeStats = s.getStats().getStore();
            if (storeStats == null) {
                continue;
            }
            final long size = storeStats.sizeInBytes();
            final long dataSetSize = storeStats.totalDataSetSizeInBytes();
            final long reserved = storeStats.getReservedSize().getBytes();

            final String shardIdentifier = ClusterInfo.shardIdentifierFromRouting(shardRouting);
            logger.trace("shard: {} size: {} reserved: {}", shardIdentifier, size, reserved);
            shardSizes.put(shardIdentifier, size);
            if (dataSetSize > shardDataSetSizeBuilder.getOrDefault(shardRouting.shardId(), -1L)) {
                shardDataSetSizeBuilder.put(shardRouting.shardId(), dataSetSize);
            }
            if (reserved != StoreStats.UNKNOWN_RESERVED_BYTES) {
                final ClusterInfo.ReservedSpace.Builder reservedSpaceBuilder = reservedSpaceByShard.computeIfAbsent(
                    new ClusterInfo.NodeAndPath(shardRouting.currentNodeId(), s.getDataPath()),
                    t -> new ClusterInfo.ReservedSpace.Builder()
                );
                reservedSpaceBuilder.add(shardRouting.shardId(), reserved);
            }
        }
    }

    static void fillDiskUsagePerNode(
        List nodeStatsArray,
        ImmutableOpenMap.Builder newLeastAvailableUsages,
        ImmutableOpenMap.Builder newMostAvailableUsages
    ) {
        for (NodeStats nodeStats : nodeStatsArray) {
            if (nodeStats.getFs() == null) {
                logger.warn("node [{}/{}] did not return any filesystem stats", nodeStats.getNode().getName(), nodeStats.getNode().getId());
                continue;
            }

            FsInfo.Path leastAvailablePath = null;
            FsInfo.Path mostAvailablePath = null;
            for (FsInfo.Path info : nodeStats.getFs()) {
                if (leastAvailablePath == null) {
                    // noinspection ConstantConditions this assertion is for the benefit of readers, it's always true
                    assert mostAvailablePath == null;
                    mostAvailablePath = leastAvailablePath = info;
                } else if (leastAvailablePath.getAvailable().getBytes() > info.getAvailable().getBytes()) {
                    leastAvailablePath = info;
                } else if (mostAvailablePath.getAvailable().getBytes() < info.getAvailable().getBytes()) {
                    mostAvailablePath = info;
                }
            }
            if (leastAvailablePath == null) {
                // noinspection ConstantConditions this assertion is for the benefit of readers, it's always true
                assert mostAvailablePath == null;
                logger.warn("node [{}/{}] did not return any filesystem stats", nodeStats.getNode().getName(), nodeStats.getNode().getId());
                continue;
            }

            final String nodeId = nodeStats.getNode().getId();
            final String nodeName = nodeStats.getNode().getName();
            if (logger.isTraceEnabled()) {
                logger.trace(
                    "node [{}]: most available: total: {}, available: {} / least available: total: {}, available: {}",
                    nodeId,
                    mostAvailablePath.getTotal(),
                    mostAvailablePath.getAvailable(),
                    leastAvailablePath.getTotal(),
                    leastAvailablePath.getAvailable()
                );
            }
            if (leastAvailablePath.getTotal().getBytes() < 0) {
                if (logger.isTraceEnabled()) {
                    logger.trace(
                        "node: [{}] least available path has less than 0 total bytes of disk [{}], skipping",
                        nodeId,
                        leastAvailablePath.getTotal().getBytes()
                    );
                }
            } else {
                newLeastAvailableUsages.put(
                    nodeId,
                    new DiskUsage(
                        nodeId,
                        nodeName,
                        leastAvailablePath.getPath(),
                        leastAvailablePath.getTotal().getBytes(),
                        leastAvailablePath.getAvailable().getBytes()
                    )
                );
            }
            if (mostAvailablePath.getTotal().getBytes() < 0) {
                if (logger.isTraceEnabled()) {
                    logger.trace(
                        "node: [{}] most available path has less than 0 total bytes of disk [{}], skipping",
                        nodeId,
                        mostAvailablePath.getTotal().getBytes()
                    );
                }
            } else {
                newMostAvailableUsages.put(
                    nodeId,
                    new DiskUsage(
                        nodeId,
                        nodeName,
                        mostAvailablePath.getPath(),
                        mostAvailablePath.getTotal().getBytes(),
                        mostAvailablePath.getAvailable().getBytes()
                    )
                );
            }

        }
    }

    private record IndicesStatsSummary(
        ImmutableOpenMap shardSizes,
        ImmutableOpenMap shardDataSetSizes,
        ImmutableOpenMap shardRoutingToDataPath,
        ImmutableOpenMap reservedSpace
    ) {
        static final IndicesStatsSummary EMPTY = new IndicesStatsSummary(
            ImmutableOpenMap.of(),
            ImmutableOpenMap.of(),
            ImmutableOpenMap.of(),
            ImmutableOpenMap.of()
        );

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy