All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.cluster.coordination.ClusterFormationFailureHelper Maven / Gradle / Ivy

/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.cluster.coordination;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfiguration;
import org.elasticsearch.cluster.coordination.CoordinationState.VoteCollection;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.gateway.GatewayMetaState;
import org.elasticsearch.monitor.StatusInfo;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.threadpool.ThreadPool.Names;

import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.function.Supplier;

import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING;
import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;

public class ClusterFormationFailureHelper {
    private static final Logger logger = LogManager.getLogger(ClusterFormationFailureHelper.class);

    public static final Setting DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING = Setting.timeSetting(
        "discovery.cluster_formation_warning_timeout",
        TimeValue.timeValueMillis(10000),
        TimeValue.timeValueMillis(1),
        Setting.Property.NodeScope
    );

    private final Supplier clusterFormationStateSupplier;
    private final ThreadPool threadPool;
    private final TimeValue clusterFormationWarningTimeout;
    private final Runnable logLastFailedJoinAttempt;
    @Nullable // if no warning is scheduled
    private volatile WarningScheduler warningScheduler;

    public ClusterFormationFailureHelper(
        Settings settings,
        Supplier clusterFormationStateSupplier,
        ThreadPool threadPool,
        Runnable logLastFailedJoinAttempt
    ) {
        this.clusterFormationStateSupplier = clusterFormationStateSupplier;
        this.threadPool = threadPool;
        this.clusterFormationWarningTimeout = DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.get(settings);
        this.logLastFailedJoinAttempt = logLastFailedJoinAttempt;
    }

    public boolean isRunning() {
        return warningScheduler != null;
    }

    public void start() {
        assert warningScheduler == null;
        warningScheduler = new WarningScheduler();
        warningScheduler.scheduleNextWarning();
    }

    public void stop() {
        warningScheduler = null;
    }

    private class WarningScheduler {

        private boolean isActive() {
            return warningScheduler == this;
        }

        void scheduleNextWarning() {
            threadPool.scheduleUnlessShuttingDown(clusterFormationWarningTimeout, Names.GENERIC, new AbstractRunnable() {
                @Override
                public void onFailure(Exception e) {
                    logger.debug("unexpected exception scheduling cluster formation warning", e);
                }

                @Override
                protected void doRun() {
                    if (isActive()) {
                        logLastFailedJoinAttempt.run();
                        logger.warn(clusterFormationStateSupplier.get().getDescription());
                    }
                }

                @Override
                public void onAfter() {
                    if (isActive()) {
                        scheduleNextWarning();
                    }
                }

                @Override
                public String toString() {
                    return "emit warning if cluster not formed";
                }
            });
        }
    }

    static class ClusterFormationState {
        private final Settings settings;
        private final ClusterState clusterState;
        private final List resolvedAddresses;
        private final List foundPeers;
        private final long currentTerm;
        private final ElectionStrategy electionStrategy;
        private final StatusInfo statusInfo;

        ClusterFormationState(
            Settings settings,
            ClusterState clusterState,
            List resolvedAddresses,
            List foundPeers,
            long currentTerm,
            ElectionStrategy electionStrategy,
            StatusInfo statusInfo
        ) {
            this.settings = settings;
            this.clusterState = clusterState;
            this.resolvedAddresses = resolvedAddresses;
            this.foundPeers = foundPeers;
            this.currentTerm = currentTerm;
            this.electionStrategy = electionStrategy;
            this.statusInfo = statusInfo;
        }

        String getDescription() {
            if (statusInfo.getStatus() == UNHEALTHY) {
                return String.format(Locale.ROOT, "this node is unhealthy: %s", statusInfo.getInfo());
            }

            final StringBuilder clusterStateNodes = new StringBuilder();
            DiscoveryNodes.addCommaSeparatedNodesWithoutAttributes(clusterState.nodes().getMasterNodes().valuesIt(), clusterStateNodes);

            final String discoveryWillContinueDescription = String.format(
                Locale.ROOT,
                "discovery will continue using %s from hosts providers and [%s] from last-known cluster state; "
                    + "node term %d, last-accepted version %d in term %d",
                resolvedAddresses,
                clusterStateNodes,
                currentTerm,
                clusterState.getVersionOrMetadataVersion(),
                clusterState.term()
            );

            final StringBuilder foundPeersDescription = new StringBuilder();
            DiscoveryNodes.addCommaSeparatedNodesWithoutAttributes(foundPeers.iterator(), foundPeersDescription);

            final String discoveryStateIgnoringQuorum = String.format(
                Locale.ROOT,
                "have discovered [%s]; %s",
                foundPeersDescription,
                discoveryWillContinueDescription
            );

            if (clusterState.nodes().getLocalNode().isMasterNode() == false) {
                return String.format(Locale.ROOT, "master not discovered yet: %s", discoveryStateIgnoringQuorum);
            }

            if (clusterState.getLastAcceptedConfiguration().isEmpty()) {

                // TODO handle the case that there is a 6.x node around here, when rolling upgrades are supported

                final String bootstrappingDescription;

                if (INITIAL_MASTER_NODES_SETTING.get(Settings.EMPTY).equals(INITIAL_MASTER_NODES_SETTING.get(settings))) {
                    bootstrappingDescription = "[" + INITIAL_MASTER_NODES_SETTING.getKey() + "] is empty on this node";
                } else {
                    bootstrappingDescription = String.format(
                        Locale.ROOT,
                        "this node must discover master-eligible nodes %s to bootstrap a cluster",
                        INITIAL_MASTER_NODES_SETTING.get(settings)
                    );
                }

                return String.format(
                    Locale.ROOT,
                    "master not discovered yet, this node has not previously joined a bootstrapped (v%d+) cluster, and %s: %s",
                    Version.V_6_6_0.major + 1,
                    bootstrappingDescription,
                    discoveryStateIgnoringQuorum
                );
            }

            assert clusterState.getLastCommittedConfiguration().isEmpty() == false;

            if (clusterState.getLastCommittedConfiguration().equals(VotingConfiguration.MUST_JOIN_ELECTED_MASTER)) {
                return String.format(
                    Locale.ROOT,
                    "master not discovered yet and this node was detached from its previous cluster, have discovered [%s]; %s",
                    foundPeersDescription,
                    discoveryWillContinueDescription
                );
            }

            final String quorumDescription;
            if (clusterState.getLastAcceptedConfiguration().equals(clusterState.getLastCommittedConfiguration())) {
                quorumDescription = describeQuorum(clusterState.getLastAcceptedConfiguration());
            } else {
                quorumDescription = describeQuorum(clusterState.getLastAcceptedConfiguration())
                    + " and "
                    + describeQuorum(clusterState.getLastCommittedConfiguration());
            }

            final VoteCollection voteCollection = new VoteCollection();
            foundPeers.forEach(voteCollection::addVote);
            final String haveDiscoveredQuorum = electionStrategy.isElectionQuorum(
                clusterState.nodes().getLocalNode(),
                currentTerm,
                clusterState.term(),
                clusterState.getVersionOrMetadataVersion(),
                clusterState.getLastCommittedConfiguration(),
                clusterState.getLastAcceptedConfiguration(),
                voteCollection
            ) ? "have discovered possible quorum" : "have only discovered non-quorum";

            return String.format(
                Locale.ROOT,
                "master not discovered or elected yet, an election requires %s, %s [%s]; %s",
                quorumDescription,
                haveDiscoveredQuorum,
                foundPeersDescription,
                discoveryWillContinueDescription
            );
        }

        private String describeQuorum(VotingConfiguration votingConfiguration) {
            final Set nodeIds = votingConfiguration.getNodeIds();
            assert nodeIds.isEmpty() == false;
            final int requiredNodes = nodeIds.size() / 2 + 1;

            final Set realNodeIds = new HashSet<>(nodeIds);
            realNodeIds.removeIf(ClusterBootstrapService::isBootstrapPlaceholder);
            assert requiredNodes <= realNodeIds.size() : nodeIds;

            if (nodeIds.size() == 1) {
                if (nodeIds.contains(GatewayMetaState.STALE_STATE_CONFIG_NODE_ID)) {
                    return "one or more nodes that have already participated as master-eligible nodes in the cluster but this node was "
                        + "not master-eligible the last time it joined the cluster";
                } else {
                    return "a node with id " + realNodeIds;
                }
            } else if (nodeIds.size() == 2) {
                return "two nodes with ids " + realNodeIds;
            } else {
                if (requiredNodes < realNodeIds.size()) {
                    return "at least " + requiredNodes + " nodes with ids from " + realNodeIds;
                } else {
                    return requiredNodes + " nodes with ids " + realNodeIds;
                }
            }
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy