All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.cluster.routing.allocation.decider.AwarenessAllocationDecider Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.cluster.routing.allocation.decider;

import com.carrotsearch.hppc.ObjectIntHashMap;
import com.google.common.collect.Maps;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.node.settings.NodeSettingsService;

import java.util.HashMap;
import java.util.Map;

/**
 * This {@link AllocationDecider} controls shard allocation based on
 * awareness key-value pairs defined in the node configuration.
 * Awareness explicitly controls where replicas should be allocated based on
 * attributes like node or physical rack locations. Awareness attributes accept
 * arbitrary configuration keys like a rack data-center identifier. For example
 * the setting:
 * 
 * cluster.routing.allocation.awareness.attributes: rack_id
 * 
*

* will cause allocations to be distributed over different racks such that * ideally at least one replicas of the all shard is available on the same rack. * To enable allocation awareness in this example nodes should contain a value * for the rack_id key like: *

 * node.rack_id:1
 * 
*

* Awareness can also be used to prevent over-allocation in the case of node or * even "zone" failure. For example in cloud-computing infrastructures like * Amazone AWS a cluster might span over multiple "zones". Awareness can be used * to distribute replicas to individual zones by setting: *

 * cluster.routing.allocation.awareness.attributes: zone
 * 
*

* and forcing allocation to be aware of the following zone the data resides in: *

 * cluster.routing.allocation.awareness.force.zone.values: zone1,zone2
 * 
*

* In contrast to regular awareness this setting will prevent over-allocation on * zone1 even if zone2 fails partially or becomes entirely * unavailable. Nodes that belong to a certain zone / group should be started * with the zone id configured on the node-level settings like: *

 * node.zone: zone1
 * 
*/ public class AwarenessAllocationDecider extends AllocationDecider { public static final String NAME = "awareness"; public static final String CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES = "cluster.routing.allocation.awareness.attributes"; public static final String CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP = "cluster.routing.allocation.awareness.force."; class ApplySettings implements NodeSettingsService.Listener { @Override public void onRefreshSettings(Settings settings) { String[] awarenessAttributes = settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES, null); if (awarenessAttributes == null && "".equals(settings.get(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES, null))) { awarenessAttributes = Strings.EMPTY_ARRAY; // the empty string resets this } if (awarenessAttributes != null) { logger.info("updating [cluster.routing.allocation.awareness.attributes] from [{}] to [{}]", AwarenessAllocationDecider.this.awarenessAttributes, awarenessAttributes); AwarenessAllocationDecider.this.awarenessAttributes = awarenessAttributes; } Map forcedAwarenessAttributes = new HashMap<>(AwarenessAllocationDecider.this.forcedAwarenessAttributes); Map forceGroups = settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP); if (!forceGroups.isEmpty()) { for (Map.Entry entry : forceGroups.entrySet()) { String[] aValues = entry.getValue().getAsArray("values"); if (aValues.length > 0) { forcedAwarenessAttributes.put(entry.getKey(), aValues); } } } AwarenessAllocationDecider.this.forcedAwarenessAttributes = forcedAwarenessAttributes; } } private String[] awarenessAttributes; private Map forcedAwarenessAttributes; /** * Creates a new {@link AwarenessAllocationDecider} instance */ public AwarenessAllocationDecider() { this(Settings.Builder.EMPTY_SETTINGS); } /** * Creates a new {@link AwarenessAllocationDecider} instance from given settings * * @param settings {@link Settings} to use */ public AwarenessAllocationDecider(Settings settings) { this(settings, new NodeSettingsService(settings)); } @Inject public AwarenessAllocationDecider(Settings settings, NodeSettingsService nodeSettingsService) { super(settings); this.awarenessAttributes = settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES); forcedAwarenessAttributes = Maps.newHashMap(); Map forceGroups = settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP); for (Map.Entry entry : forceGroups.entrySet()) { String[] aValues = entry.getValue().getAsArray("values"); if (aValues.length > 0) { forcedAwarenessAttributes.put(entry.getKey(), aValues); } } nodeSettingsService.addListener(new ApplySettings()); } /** * Get the attributes defined by this instance * * @return attributes defined by this instance */ public String[] awarenessAttributes() { return this.awarenessAttributes; } @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return underCapacity(shardRouting, node, allocation, true); } @Override public Decision canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return underCapacity(shardRouting, node, allocation, false); } private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation, boolean moveToNode) { if (awarenessAttributes.length == 0) { return allocation.decision(Decision.YES, NAME, "no allocation awareness enabled"); } IndexMetaData indexMetaData = allocation.metaData().index(shardRouting.index()); int shardCount = indexMetaData.getNumberOfReplicas() + 1; // 1 for primary for (String awarenessAttribute : awarenessAttributes) { // the node the shard exists on must be associated with an awareness attribute if (!node.node().attributes().containsKey(awarenessAttribute)) { return allocation.decision(Decision.NO, NAME, "node does not contain awareness attribute: [%s]", awarenessAttribute); } // build attr_value -> nodes map ObjectIntHashMap nodesPerAttribute = allocation.routingNodes().nodesPerAttributesCounts(awarenessAttribute); // build the count of shards per attribute value ObjectIntHashMap shardPerAttribute = new ObjectIntHashMap<>(); for (ShardRouting assignedShard : allocation.routingNodes().assignedShards(shardRouting)) { if (assignedShard.started() || assignedShard.initializing()) { // Note: this also counts relocation targets as that will be the new location of the shard. // Relocation sources should not be counted as the shard is moving away RoutingNode routingNode = allocation.routingNodes().node(assignedShard.currentNodeId()); shardPerAttribute.addTo(routingNode.node().attributes().get(awarenessAttribute), 1); } } if (moveToNode) { if (shardRouting.assignedToNode()) { String nodeId = shardRouting.relocating() ? shardRouting.relocatingNodeId() : shardRouting.currentNodeId(); if (!node.nodeId().equals(nodeId)) { // we work on different nodes, move counts around shardPerAttribute.putOrAdd(allocation.routingNodes().node(nodeId).node().attributes().get(awarenessAttribute), 0, -1); shardPerAttribute.addTo(node.node().attributes().get(awarenessAttribute), 1); } } else { shardPerAttribute.addTo(node.node().attributes().get(awarenessAttribute), 1); } } int numberOfAttributes = nodesPerAttribute.size(); String[] fullValues = forcedAwarenessAttributes.get(awarenessAttribute); if (fullValues != null) { for (String fullValue : fullValues) { if (!shardPerAttribute.containsKey(fullValue)) { numberOfAttributes++; } } } // TODO should we remove ones that are not part of full list? int averagePerAttribute = shardCount / numberOfAttributes; int totalLeftover = shardCount % numberOfAttributes; int requiredCountPerAttribute; if (averagePerAttribute == 0) { // if we have more attributes values than shard count, no leftover totalLeftover = 0; requiredCountPerAttribute = 1; } else { requiredCountPerAttribute = averagePerAttribute; } int leftoverPerAttribute = totalLeftover == 0 ? 0 : 1; int currentNodeCount = shardPerAttribute.get(node.node().attributes().get(awarenessAttribute)); // if we are above with leftover, then we know we are not good, even with mod if (currentNodeCount > (requiredCountPerAttribute + leftoverPerAttribute)) { return allocation.decision(Decision.NO, NAME, "too many shards on node for attribute: [%s], required per attribute: [%d], node count: [%d], leftover: [%d]", awarenessAttribute, requiredCountPerAttribute, currentNodeCount, leftoverPerAttribute); } // all is well, we are below or same as average if (currentNodeCount <= requiredCountPerAttribute) { continue; } } return allocation.decision(Decision.YES, NAME, "node meets awareness requirements"); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy