org.opensearch.cluster.routing.allocation.decider.NodeLoadAwareAllocationDecider Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
package org.opensearch.cluster.routing.allocation.decider;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.opensearch.cluster.metadata.Metadata;
import org.opensearch.cluster.routing.RoutingNode;
import org.opensearch.cluster.routing.ShardRouting;
import org.opensearch.cluster.routing.allocation.RoutingAllocation;
import org.opensearch.common.settings.ClusterSettings;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Setting.Property;
import org.opensearch.common.settings.Settings;
import java.util.function.BiPredicate;
/**
* This {@link NodeLoadAwareAllocationDecider} controls shard over-allocation
* due to node failures or otherwise on the surviving nodes. The allocation limits
* are decided by the user provisioned capacity, to determine if there were lost nodes.
* The provisioned capacity as defined by the below settings needs to be updated on every
* cluster scale up and scale down operations.
*
* cluster.routing.allocation.overload_awareness.provisioned_capacity: N
*
*
* and prevent allocation on the surviving nodes of the under capacity cluster
* based on overload factor defined as a percentage and flat skew as absolute allowed skewness by
*
*
* cluster.routing.allocation.load_awareness.skew_factor: X
* cluster.routing.allocation.load_awareness.flat_skew: N
*
* The total limit per node based on skew_factor and flat_skew doesn't limit primaries that previously
* existed on the disk as those shards are force allocated by
* {@link AllocationDeciders#canForceAllocatePrimary(ShardRouting, RoutingNode, RoutingAllocation)}
* however new primaries due to index creation, snapshot restore etc can be controlled via the below settings.
* Setting the value to true allows newly created primaries to get assigned while preventing the replica allocation
* breaching the skew factor.
* Note that setting this to false can result in the primaries not get assigned and the cluster turning RED
*
* cluster.routing.allocation.load_awareness.allow_unassigned_primaries
*
*
* @opensearch.internal
*/
public class NodeLoadAwareAllocationDecider extends AllocationDecider {
public static final String NAME = "load_awareness";
public static final Setting CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_PROVISIONED_CAPACITY_SETTING = Setting.intSetting(
"cluster.routing.allocation.load_awareness.provisioned_capacity",
-1,
-1,
Property.Dynamic,
Property.NodeScope
);
public static final Setting CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_SKEW_FACTOR_SETTING = Setting.doubleSetting(
"cluster.routing.allocation.load_awareness.skew_factor",
50,
-1,
Property.Dynamic,
Property.NodeScope
);
public static final Setting CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_ALLOW_UNASSIGNED_PRIMARIES_SETTING = Setting.boolSetting(
"cluster.routing.allocation.load_awareness.allow_unassigned_primaries",
true,
Setting.Property.Dynamic,
Property.NodeScope
);
public static final Setting CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_FLAT_SKEW_SETTING = Setting.intSetting(
"cluster.routing.allocation.load_awareness.flat_skew",
2,
2,
Property.Dynamic,
Property.NodeScope
);
private volatile int provisionedCapacity;
private volatile double skewFactor;
private volatile boolean allowUnassignedPrimaries;
private volatile int flatSkew;
private static final Logger logger = LogManager.getLogger(NodeLoadAwareAllocationDecider.class);
public NodeLoadAwareAllocationDecider(Settings settings, ClusterSettings clusterSettings) {
this.skewFactor = CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_SKEW_FACTOR_SETTING.get(settings);
this.provisionedCapacity = CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_PROVISIONED_CAPACITY_SETTING.get(settings);
this.allowUnassignedPrimaries = CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_ALLOW_UNASSIGNED_PRIMARIES_SETTING.get(settings);
this.flatSkew = CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_FLAT_SKEW_SETTING.get(settings);
clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_SKEW_FACTOR_SETTING, this::setSkewFactor);
clusterSettings.addSettingsUpdateConsumer(
CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_PROVISIONED_CAPACITY_SETTING,
this::setProvisionedCapacity
);
clusterSettings.addSettingsUpdateConsumer(
CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_ALLOW_UNASSIGNED_PRIMARIES_SETTING,
this::setAllowUnassignedPrimaries
);
clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_FLAT_SKEW_SETTING, this::setFlatSkew);
}
private void setAllowUnassignedPrimaries(boolean allowUnassignedPrimaries) {
this.allowUnassignedPrimaries = allowUnassignedPrimaries;
}
private void setSkewFactor(double skewFactor) {
this.skewFactor = skewFactor;
}
private void setProvisionedCapacity(int provisionedCapacity) {
this.provisionedCapacity = provisionedCapacity;
}
private void setFlatSkew(int flatSkew) {
this.flatSkew = flatSkew;
}
@Override
public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
return underCapacity(shardRouting, node, allocation, (count, limit) -> count >= limit);
}
@Override
public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
return underCapacity(shardRouting, node, allocation, (count, limit) -> count > limit);
}
private Decision underCapacity(
ShardRouting shardRouting,
RoutingNode node,
RoutingAllocation allocation,
BiPredicate decider
) {
if (provisionedCapacity <= 0 || skewFactor < 0) {
return allocation.decision(
Decision.YES,
NAME,
"overload awareness allocation is not enabled, set cluster setting [%s] and cluster setting [%s] to enable it",
CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_SKEW_FACTOR_SETTING.getKey(),
CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_PROVISIONED_CAPACITY_SETTING.getKey()
);
}
if (shardRouting.unassigned() && shardRouting.primary() && allowUnassignedPrimaries) {
return allocation.decision(
Decision.YES,
NAME,
"overload allocation awareness is allowed for unassigned primaries, set cluster setting [%s] to disable it",
CLUSTER_ROUTING_ALLOCATION_LOAD_AWARENESS_ALLOW_UNASSIGNED_PRIMARIES_SETTING.getKey()
);
}
Metadata metadata = allocation.metadata();
float expectedAvgShardsPerNode = (float) metadata.getTotalNumberOfShards() / provisionedCapacity;
int nodeShardCount = node.numberOfOwningShards();
int limit = flatSkew + (int) Math.ceil(expectedAvgShardsPerNode * (1 + skewFactor / 100.0));
if (decider.test(nodeShardCount, limit)) {
logger.debug(
() -> new ParameterizedMessage(
"Too many shards [{}] allocated to this node [{}]. Expected average shards"
+ " per node [{}], overload factor [{}], node limit [{}]",
nodeShardCount,
node.nodeId(),
expectedAvgShardsPerNode,
skewFactor,
limit
)
);
return allocation.decision(
Decision.NO,
NAME,
"too many shards [%d] allocated to this node, limit per node [%d] considering"
+ " overload factor [%.2f] and flat skew [%d] based on capacity [%d]",
nodeShardCount,
limit,
skewFactor,
flatSkew,
provisionedCapacity
);
}
return allocation.decision(Decision.YES, NAME, "node meets all skew awareness attribute requirements");
}
}