
org.opensearch.cluster.coordination.Reconfigurator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.cluster.coordination;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.cluster.coordination.CoordinationMetadata.VotingConfiguration;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Setting.Property;
import org.opensearch.common.settings.Settings;
import java.util.Set;
import java.util.TreeSet;
import java.util.stream.Collectors;
/**
* Computes the optimal configuration of voting nodes in the cluster.
*/
public class Reconfigurator {
private static final Logger logger = LogManager.getLogger(Reconfigurator.class);
/**
* The cluster usually requires a vote from at least half of the master nodes in order to commit a cluster state update, and to achieve
* the best resilience it makes automatic adjustments to the voting configuration as master nodes join or leave the cluster. Adjustments
* that fix or increase the size of the voting configuration are always a good idea, but the wisdom of reducing the voting configuration
* size is less clear. For instance, automatically reducing the voting configuration down to a single node means the cluster requires
* this node to operate, which is not resilient: if it broke we could restore every other master-eligible node in the cluster to health
* and still the cluster would be unavailable. However not reducing the voting configuration size can also hamper resilience: in a
* five-node cluster we could lose two nodes and by reducing the voting configuration to the remaining three nodes we could tolerate the
* loss of a further node before failing.
*
* We offer two options: either we auto-shrink the voting configuration as long as it contains more than three nodes, or we don't and we
* require the user to control the voting configuration manually using the retirement API. The former, default, option, guarantees that
* as long as there have been at least three master-eligible nodes in the cluster and no more than one of them is currently unavailable,
* then the cluster will still operate, which is what almost everyone wants. Manual control is for users who want different guarantees.
*/
public static final Setting CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION = Setting.boolSetting(
"cluster.auto_shrink_voting_configuration",
true,
Property.NodeScope,
Property.Dynamic
);
private volatile boolean autoShrinkVotingConfiguration;
public Reconfigurator(Settings settings, ClusterSettings clusterSettings) {
autoShrinkVotingConfiguration = CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION.get(settings);
clusterSettings.addSettingsUpdateConsumer(CLUSTER_AUTO_SHRINK_VOTING_CONFIGURATION, this::setAutoShrinkVotingConfiguration);
}
public void setAutoShrinkVotingConfiguration(boolean autoShrinkVotingConfiguration) {
this.autoShrinkVotingConfiguration = autoShrinkVotingConfiguration;
}
private static int roundDownToOdd(int size) {
return size - (size % 2 == 0 ? 1 : 0);
}
@Override
public String toString() {
return "Reconfigurator{" + "autoShrinkVotingConfiguration=" + autoShrinkVotingConfiguration + '}';
}
/**
* Compute an optimal configuration for the cluster.
*
* @param liveNodes The live nodes in the cluster. The optimal configuration prefers live nodes over non-live nodes as far as
* possible.
* @param retiredNodeIds Nodes that are leaving the cluster and which should not appear in the configuration if possible. Nodes that are
* retired and not in the current configuration will never appear in the resulting configuration; this is useful
* for shifting the vote in a 2-node cluster so one of the nodes can be restarted without harming availability.
* @param currentMaster The current master. Unless retired, we prefer to keep the current master in the config.
* @param currentConfig The current configuration. As far as possible, we prefer to keep the current config as-is.
* @return An optimal configuration, or leave the current configuration unchanged if the optimal configuration has no live quorum.
*/
public VotingConfiguration reconfigure(
Set liveNodes,
Set retiredNodeIds,
DiscoveryNode currentMaster,
VotingConfiguration currentConfig
) {
assert liveNodes.contains(currentMaster) : "liveNodes = " + liveNodes + " master = " + currentMaster;
logger.trace(
"{} reconfiguring {} based on liveNodes={}, retiredNodeIds={}, currentMaster={}",
this,
currentConfig,
liveNodes,
retiredNodeIds,
currentMaster
);
final Set liveNodeIds = liveNodes.stream()
.filter(DiscoveryNode::isMasterNode)
.map(DiscoveryNode::getId)
.collect(Collectors.toSet());
final Set currentConfigNodeIds = currentConfig.getNodeIds();
final Set orderedCandidateNodes = new TreeSet<>();
liveNodes.stream()
.filter(DiscoveryNode::isMasterNode)
.filter(n -> retiredNodeIds.contains(n.getId()) == false)
.forEach(
n -> orderedCandidateNodes.add(
new VotingConfigNode(n.getId(), true, n.getId().equals(currentMaster.getId()), currentConfigNodeIds.contains(n.getId()))
)
);
currentConfigNodeIds.stream()
.filter(nid -> liveNodeIds.contains(nid) == false)
.filter(nid -> retiredNodeIds.contains(nid) == false)
.forEach(nid -> orderedCandidateNodes.add(new VotingConfigNode(nid, false, false, true)));
/*
* Now we work out how many nodes should be in the configuration:
*/
final int nonRetiredConfigSize = Math.toIntExact(orderedCandidateNodes.stream().filter(n -> n.inCurrentConfig).count());
final int minimumConfigEnforcedSize = autoShrinkVotingConfiguration ? (nonRetiredConfigSize < 3 ? 1 : 3) : nonRetiredConfigSize;
final int nonRetiredLiveNodeCount = Math.toIntExact(orderedCandidateNodes.stream().filter(n -> n.live).count());
final int targetSize = Math.max(roundDownToOdd(nonRetiredLiveNodeCount), minimumConfigEnforcedSize);
final VotingConfiguration newConfig = new VotingConfiguration(
orderedCandidateNodes.stream().limit(targetSize).map(n -> n.id).collect(Collectors.toSet())
);
// new configuration should have a quorum
if (newConfig.hasQuorum(liveNodeIds)) {
return newConfig;
} else {
// If there are not enough live nodes to form a quorum in the newly-proposed configuration, it's better to do nothing.
return currentConfig;
}
}
static class VotingConfigNode implements Comparable {
final String id;
final boolean live;
final boolean currentMaster;
final boolean inCurrentConfig;
VotingConfigNode(String id, boolean live, boolean currentMaster, boolean inCurrentConfig) {
this.id = id;
this.live = live;
this.currentMaster = currentMaster;
this.inCurrentConfig = inCurrentConfig;
}
@Override
public int compareTo(VotingConfigNode other) {
// prefer current master
final int currentMasterComp = Boolean.compare(other.currentMaster, currentMaster);
if (currentMasterComp != 0) {
return currentMasterComp;
}
// prefer nodes that are live
final int liveComp = Boolean.compare(other.live, live);
if (liveComp != 0) {
return liveComp;
}
// prefer nodes that are in current config for stability
final int inCurrentConfigComp = Boolean.compare(other.inCurrentConfig, inCurrentConfig);
if (inCurrentConfigComp != 0) {
return inCurrentConfigComp;
}
// tiebreak by node id to have stable ordering
return id.compareTo(other.id);
}
@Override
public String toString() {
return "VotingConfigNode{"
+ "id='"
+ id
+ '\''
+ ", live="
+ live
+ ", currentMaster="
+ currentMaster
+ ", inCurrentConfig="
+ inCurrentConfig
+ '}';
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy