org.elasticsearch.indices.ShardLimitValidator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch - Open Source, Distributed, RESTful Search Engine
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.indices;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.ValidationException;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import static org.elasticsearch.cluster.metadata.IndexMetadata.INDEX_NUMBER_OF_SHARDS_SETTING;
/**
* This class contains the logic used to check the cluster-wide shard limit before shards are created and ensuring that the limit is
* updated correctly on setting updates, etc.
*
* NOTE: This is the limit applied at *shard creation time*. If you are looking for the limit applied at *allocation* time, which is
* controlled by a different setting,
* see {@link org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider}.
*/
public class ShardLimitValidator {
public static final Setting SETTING_CLUSTER_MAX_SHARDS_PER_NODE = Setting.intSetting(
"cluster.max_shards_per_node",
1000,
1,
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
public static final Setting SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN = Setting.intSetting(
"cluster.max_shards_per_node.frozen",
3000,
1,
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
public static final String FROZEN_GROUP = "frozen";
static final Set VALID_GROUPS = org.elasticsearch.core.Set.of("normal", FROZEN_GROUP);
public static final Setting INDEX_SETTING_SHARD_LIMIT_GROUP = Setting.simpleString(
"index.shard_limit.group",
"normal",
value -> {
if (VALID_GROUPS.contains(value) == false) {
throw new IllegalArgumentException("[" + value + "] is not a valid shard limit group");
}
},
Setting.Property.IndexScope,
Setting.Property.PrivateIndex,
Setting.Property.NotCopyableOnResize
);
protected final AtomicInteger shardLimitPerNode = new AtomicInteger();
protected final AtomicInteger shardLimitPerNodeFrozen = new AtomicInteger();
public ShardLimitValidator(final Settings settings, ClusterService clusterService) {
this.shardLimitPerNode.set(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(settings));
this.shardLimitPerNodeFrozen.set(SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.get(settings));
clusterService.getClusterSettings().addSettingsUpdateConsumer(SETTING_CLUSTER_MAX_SHARDS_PER_NODE, this::setShardLimitPerNode);
clusterService.getClusterSettings()
.addSettingsUpdateConsumer(SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN, this::setShardLimitPerNodeFrozen);
}
private void setShardLimitPerNode(int newValue) {
this.shardLimitPerNode.set(newValue);
}
private void setShardLimitPerNodeFrozen(int newValue) {
this.shardLimitPerNodeFrozen.set(newValue);
}
/**
* Gets the currently configured value of the {@link ShardLimitValidator#SETTING_CLUSTER_MAX_SHARDS_PER_NODE} setting.
* @return the current value of the setting
*/
public int getShardLimitPerNode() {
return shardLimitPerNode.get();
}
/**
* Checks whether an index can be created without going over the cluster shard limit.
*
* @param settings the settings of the index to be created
* @param state the current cluster state
* @throws ValidationException if creating this index would put the cluster over the cluster shard limit
*/
public void validateShardLimit(final Settings settings, final ClusterState state) {
final int numberOfShards = INDEX_NUMBER_OF_SHARDS_SETTING.get(settings);
final int numberOfReplicas = IndexMetadata.INDEX_NUMBER_OF_REPLICAS_SETTING.get(settings);
final int shardsToCreate = numberOfShards * (1 + numberOfReplicas);
final boolean frozen = FROZEN_GROUP.equals(INDEX_SETTING_SHARD_LIMIT_GROUP.get(settings));
final Optional shardLimit = checkShardLimit(frozen == false ? shardsToCreate : 0, frozen ? shardsToCreate : 0, state);
if (shardLimit.isPresent()) {
final ValidationException e = new ValidationException();
e.addValidationError(shardLimit.get());
throw e;
}
}
/**
* Validates whether a list of indices can be opened without going over the cluster shard limit. Only counts indices which are
* currently closed and will be opened, ignores indices which are already open.
*
* @param currentState The current cluster state.
* @param indicesToOpen The indices which are to be opened.
* @throws ValidationException If this operation would take the cluster over the limit and enforcement is enabled.
*/
public void validateShardLimit(ClusterState currentState, Index[] indicesToOpen) {
int frozen = 0;
int normal = 0;
for (Index index : indicesToOpen) {
IndexMetadata imd = currentState.metadata().index(index);
if (imd.getState().equals(IndexMetadata.State.CLOSE)) {
int totalNewShards = imd.getNumberOfShards() * (1 + imd.getNumberOfReplicas());
if (FROZEN_GROUP.equals(INDEX_SETTING_SHARD_LIMIT_GROUP.get(imd.getSettings()))) {
frozen += totalNewShards;
} else {
normal += totalNewShards;
}
}
}
Optional error = checkShardLimit(normal, frozen, currentState);
if (error.isPresent()) {
ValidationException ex = new ValidationException();
ex.addValidationError(error.get());
throw ex;
}
}
public void validateShardLimitOnReplicaUpdate(ClusterState currentState, Index[] indices, int replicas) {
int frozen = 0;
int normal = 0;
for (Index index : indices) {
IndexMetadata imd = currentState.metadata().index(index);
int totalNewShards = getTotalNewShards(index, currentState, replicas);
if (FROZEN_GROUP.equals(INDEX_SETTING_SHARD_LIMIT_GROUP.get(imd.getSettings()))) {
frozen += totalNewShards;
} else {
normal += totalNewShards;
}
}
Optional error = checkShardLimit(normal, frozen, currentState);
if (error.isPresent()) {
ValidationException ex = new ValidationException();
ex.addValidationError(error.get());
throw ex;
}
}
private int getTotalNewShards(Index index, ClusterState currentState, int updatedNumberOfReplicas) {
IndexMetadata indexMetadata = currentState.metadata().index(index);
int shardsInIndex = indexMetadata.getNumberOfShards();
int oldNumberOfReplicas = indexMetadata.getNumberOfReplicas();
int replicaIncrease = updatedNumberOfReplicas - oldNumberOfReplicas;
return replicaIncrease * shardsInIndex;
}
/**
* Checks to see if an operation can be performed without taking the cluster over the cluster-wide shard limit.
* Returns an error message if appropriate, or an empty {@link Optional} otherwise.
*
* @param newShards The number of normal shards to be added by this operation
* @param newFrozenShards The number of frozen shards to be added by this operation
* @param state The current cluster state
* @return If present, an error message to be given as the reason for failing
* an operation. If empty, a sign that the operation is valid.
*/
private Optional checkShardLimit(int newShards, int newFrozenShards, ClusterState state) {
// we verify the two limits independently. This also means that if they have mixed frozen and other data-roles nodes, such a mixed
// node can have both 1000 normal and 3000 frozen shards. This is the trade-off to keep the simplicity of the counts. We advocate
// against such mixed nodes for production use anyway.
int frozenNodeCount = nodeCount(state, ShardLimitValidator::hasFrozen);
int normalNodeCount = nodeCount(state, ShardLimitValidator::hasNonFrozen);
Optional normal = checkShardLimit(newShards, state, getShardLimitPerNode(), normalNodeCount, "normal");
return normal.isPresent()
? normal
: checkShardLimit(newFrozenShards, state, shardLimitPerNodeFrozen.get(), frozenNodeCount, "frozen");
}
/**
* This method decides whether there is enough room in the cluster to add the given number of shards with the given number of replicas
* without exceeding the "cluster.max_shards_per_node.frozen" setting if the shards are going on frozen nodes or the
* "cluster.max_shards_per_node" setting if the shards are going on normal nodes. This check does not guarantee that the number of
* shards can be added, just that there is theoretically room to add them without exceeding the shards per node configuration.
* @param numberOfNewShards The number of primary shards that we want to be able to add to the cluster
* @param replicas The number of replcas of the primary shards that we want to be able to add to the cluster
* @param state The cluster state, used to get cluster settings and to get the number of open shards already in the cluster
* @param frozenNodes If true, check whether there is room to put these shards onto frozen nodes. If false, check whether there is room
* to put these shards onto normal nodes.
* @return True if there is room to add the requested number of shards to the cluster, and false if there is not
*/
public static boolean canAddShardsToCluster(int numberOfNewShards, int replicas, ClusterState state, boolean frozenNodes) {
Settings clusterSettings = state.getMetadata().settings();
int maxShardsPerNode = frozenNodes
? SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN.get(clusterSettings)
: SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterSettings);
int nodeCount = nodeCount(state, frozenNodes ? ShardLimitValidator::hasFrozen : ShardLimitValidator::hasNonFrozen);
String nodeGroup = frozenNodes ? FROZEN_GROUP : "normal";
Optional errorMessage = checkShardLimit(numberOfNewShards * (1 + replicas), state, maxShardsPerNode, nodeCount, nodeGroup);
return errorMessage.isPresent() == false;
}
// package-private for testing
static Optional checkShardLimit(int newShards, ClusterState state, int maxShardsPerNode, int nodeCount, String group) {
// Only enforce the shard limit if we have at least one data node, so that we don't block
// index creation during cluster setup
if (nodeCount == 0 || newShards <= 0) {
return Optional.empty();
}
int maxShardsInCluster = maxShardsPerNode * nodeCount;
int currentOpenShards = state.getMetadata().getTotalOpenIndexShards();
if ((currentOpenShards + newShards) > maxShardsInCluster) {
Predicate indexMetadataPredicate = imd -> imd.getState().equals(IndexMetadata.State.OPEN)
&& group.equals(INDEX_SETTING_SHARD_LIMIT_GROUP.get(imd.getSettings()));
long currentFilteredShards = state.metadata()
.indices()
.values()
.stream()
.filter(indexMetadataPredicate)
.mapToInt(IndexMetadata::getTotalNumberOfShards)
.sum();
if ((currentFilteredShards + newShards) > maxShardsInCluster) {
String errorMessage = "this action would add ["
+ newShards
+ "] shards, but this cluster currently has ["
+ currentFilteredShards
+ "]/["
+ maxShardsInCluster
+ "] maximum "
+ group
+ " shards open";
return Optional.of(errorMessage);
}
}
return Optional.empty();
}
private static int nodeCount(ClusterState state, Predicate nodePredicate) {
return (int) state.getNodes().getDataNodes().values().stream().filter(nodePredicate).count();
}
private static boolean hasFrozen(DiscoveryNode node) {
return node.getRoles().contains(DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE);
}
private static boolean hasNonFrozen(DiscoveryNode node) {
return node.getRoles().stream().anyMatch(r -> r.canContainData() && r != DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy