All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.cluster.coordination.ClusterFormationFailureHelper Maven / Gradle / Ivy

There is a newer version: 8.13.2
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.cluster.coordination;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.coordination.CoordinationMetaData.VotingConfiguration;
import org.elasticsearch.cluster.coordination.CoordinationState.VoteCollection;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.transport.TransportAddress;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.threadpool.ThreadPool.Names;

import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static org.elasticsearch.cluster.coordination.ClusterBootstrapService.INITIAL_MASTER_NODES_SETTING;

public class ClusterFormationFailureHelper {
    private static final Logger logger = LogManager.getLogger(ClusterFormationFailureHelper.class);

    public static final Setting DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING =
        Setting.timeSetting("discovery.cluster_formation_warning_timeout",
            TimeValue.timeValueMillis(10000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);

    private final Supplier clusterFormationStateSupplier;
    private final ThreadPool threadPool;
    private final TimeValue clusterFormationWarningTimeout;
    private final Runnable logLastFailedJoinAttempt;
    @Nullable // if no warning is scheduled
    private volatile WarningScheduler warningScheduler;

    public ClusterFormationFailureHelper(Settings settings, Supplier clusterFormationStateSupplier,
                                         ThreadPool threadPool, Runnable logLastFailedJoinAttempt) {
        this.clusterFormationStateSupplier = clusterFormationStateSupplier;
        this.threadPool = threadPool;
        this.clusterFormationWarningTimeout = DISCOVERY_CLUSTER_FORMATION_WARNING_TIMEOUT_SETTING.get(settings);
        this.logLastFailedJoinAttempt = logLastFailedJoinAttempt;
    }

    public boolean isRunning() {
        return warningScheduler != null;
    }

    public void start() {
        assert warningScheduler == null;
        warningScheduler = new WarningScheduler();
        warningScheduler.scheduleNextWarning();
    }

    public void stop() {
        warningScheduler = null;
    }

    private class WarningScheduler {

        private boolean isActive() {
            return warningScheduler == this;
        }

        void scheduleNextWarning() {
            threadPool.scheduleUnlessShuttingDown(clusterFormationWarningTimeout, Names.GENERIC, new AbstractRunnable() {
                @Override
                public void onFailure(Exception e) {
                    logger.debug("unexpected exception scheduling cluster formation warning", e);
                }

                @Override
                protected void doRun() {
                    if (isActive()) {
                        logLastFailedJoinAttempt.run();
                        logger.warn(clusterFormationStateSupplier.get().getDescription());
                    }
                }

                @Override
                public void onAfter() {
                    if (isActive()) {
                        scheduleNextWarning();
                    }
                }

                @Override
                public String toString() {
                    return "emit warning if cluster not formed";
                }
            });
        }
    }

    static class ClusterFormationState {
        private final Settings settings;
        private final ClusterState clusterState;
        private final List resolvedAddresses;
        private final List foundPeers;
        private final long currentTerm;
        private final ElectionStrategy electionStrategy;

        ClusterFormationState(Settings settings, ClusterState clusterState, List resolvedAddresses,
                              List foundPeers, long currentTerm, ElectionStrategy electionStrategy) {
            this.settings = settings;
            this.clusterState = clusterState;
            this.resolvedAddresses = resolvedAddresses;
            this.foundPeers = foundPeers;
            this.currentTerm = currentTerm;
            this.electionStrategy = electionStrategy;
        }

        String getDescription() {
            final List clusterStateNodes = StreamSupport.stream(clusterState.nodes().getMasterNodes().values().spliterator(), false)
                .map(n -> n.value.toString()).collect(Collectors.toList());

            final String discoveryWillContinueDescription = String.format(Locale.ROOT,
                "discovery will continue using %s from hosts providers and %s from last-known cluster state; " +
                    "node term %d, last-accepted version %d in term %d",
                resolvedAddresses, clusterStateNodes, currentTerm, clusterState.getVersionOrMetaDataVersion(), clusterState.term());

            final String discoveryStateIgnoringQuorum = String.format(Locale.ROOT, "have discovered %s; %s",
                foundPeers, discoveryWillContinueDescription);

            if (clusterState.nodes().getLocalNode().isMasterNode() == false) {
                return String.format(Locale.ROOT, "master not discovered yet: %s", discoveryStateIgnoringQuorum);
            }

            if (clusterState.getLastAcceptedConfiguration().isEmpty()) {

                // TODO handle the case that there is a 6.x node around here, when rolling upgrades are supported

                final String bootstrappingDescription;

                if (INITIAL_MASTER_NODES_SETTING.get(Settings.EMPTY).equals(INITIAL_MASTER_NODES_SETTING.get(settings))) {
                    bootstrappingDescription = "[" + INITIAL_MASTER_NODES_SETTING.getKey() + "] is empty on this node";
                } else {
                    // TODO update this when we can bootstrap on only a quorum of the initial nodes
                    bootstrappingDescription = String.format(Locale.ROOT,
                        "this node must discover master-eligible nodes %s to bootstrap a cluster",
                        INITIAL_MASTER_NODES_SETTING.get(settings));
                }

                return String.format(Locale.ROOT,
                    "master not discovered yet, this node has not previously joined a bootstrapped (v%d+) cluster, and %s: %s",
                    Version.V_6_6_0.major + 1, bootstrappingDescription, discoveryStateIgnoringQuorum);
            }

            assert clusterState.getLastCommittedConfiguration().isEmpty() == false;

            if (clusterState.getLastCommittedConfiguration().equals(VotingConfiguration.MUST_JOIN_ELECTED_MASTER)) {
                return String.format(Locale.ROOT,
                        "master not discovered yet and this node was detached from its previous cluster, have discovered %s; %s",
                        foundPeers, discoveryWillContinueDescription);
            }

            final String quorumDescription;
            if (clusterState.getLastAcceptedConfiguration().equals(clusterState.getLastCommittedConfiguration())) {
                quorumDescription = describeQuorum(clusterState.getLastAcceptedConfiguration());
            } else {
                quorumDescription = describeQuorum(clusterState.getLastAcceptedConfiguration())
                    + " and "
                    + describeQuorum(clusterState.getLastCommittedConfiguration());
            }

            final VoteCollection voteCollection = new VoteCollection();
            foundPeers.forEach(voteCollection::addVote);
            final String isQuorumOrNot
                = electionStrategy.isElectionQuorum(clusterState.nodes().getLocalNode(), currentTerm, clusterState.term(),
                    clusterState.getVersionOrMetaDataVersion(), clusterState.getLastCommittedConfiguration(),
                    clusterState.getLastAcceptedConfiguration(),
                    voteCollection) ? "is a quorum" : "is not a quorum";

            return String.format(Locale.ROOT,
                "master not discovered or elected yet, an election requires %s, have discovered %s which %s; %s",
                quorumDescription, foundPeers, isQuorumOrNot, discoveryWillContinueDescription);
        }

        private String describeQuorum(VotingConfiguration votingConfiguration) {
            final Set nodeIds = votingConfiguration.getNodeIds();
            assert nodeIds.isEmpty() == false;
            final int requiredNodes = nodeIds.size() / 2 + 1;

            final Set realNodeIds = new HashSet<>(nodeIds);
            realNodeIds.removeIf(ClusterBootstrapService::isBootstrapPlaceholder);
            assert requiredNodes <= realNodeIds.size() : nodeIds;

            if (nodeIds.size() == 1) {
                return "a node with id " + realNodeIds;
            } else if (nodeIds.size() == 2) {
                return "two nodes with ids " + realNodeIds;
            } else {
                if (requiredNodes < realNodeIds.size()) {
                    return "at least " + requiredNodes + " nodes with ids from " + realNodeIds;
                } else {
                    return requiredNodes + " nodes with ids " + realNodeIds;
                }
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy