org.elasticsearch.action.admin.cluster.allocation.TransportClusterAllocationExplainAction Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.admin.cluster.allocation;
import org.apache.lucene.index.CorruptIndexException;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.admin.indices.shards.IndicesShardStoresRequest;
import org.elasticsearch.action.admin.indices.shards.IndicesShardStoresResponse;
import org.elasticsearch.action.admin.indices.shards.TransportIndicesShardStoresAction;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.master.TransportMasterNodeAction;
import org.elasticsearch.cluster.ClusterInfo;
import org.elasticsearch.cluster.ClusterInfoService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.RecoverySource;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.cluster.routing.allocation.allocator.ShardsAllocator;
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
import org.elasticsearch.cluster.routing.allocation.decider.Decision;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.collect.ImmutableOpenIntMap;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.gateway.GatewayAllocator;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.elasticsearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;
/**
* The {@code TransportClusterAllocationExplainAction} is responsible for actually executing the explanation of a shard's allocation on the
* master node in the cluster.
*/
public class TransportClusterAllocationExplainAction
extends TransportMasterNodeAction {
private final ClusterInfoService clusterInfoService;
private final AllocationDeciders allocationDeciders;
private final ShardsAllocator shardAllocator;
private final TransportIndicesShardStoresAction shardStoresAction;
private final GatewayAllocator gatewayAllocator;
@Inject
public TransportClusterAllocationExplainAction(Settings settings, TransportService transportService, ClusterService clusterService,
ThreadPool threadPool, ActionFilters actionFilters,
IndexNameExpressionResolver indexNameExpressionResolver,
ClusterInfoService clusterInfoService, AllocationDeciders allocationDeciders,
ShardsAllocator shardAllocator, TransportIndicesShardStoresAction shardStoresAction,
GatewayAllocator gatewayAllocator) {
super(settings, ClusterAllocationExplainAction.NAME, transportService, clusterService, threadPool, actionFilters,
indexNameExpressionResolver, ClusterAllocationExplainRequest::new);
this.clusterInfoService = clusterInfoService;
this.allocationDeciders = allocationDeciders;
this.shardAllocator = shardAllocator;
this.shardStoresAction = shardStoresAction;
this.gatewayAllocator = gatewayAllocator;
}
@Override
protected String executor() {
return ThreadPool.Names.MANAGEMENT;
}
@Override
protected ClusterBlockException checkBlock(ClusterAllocationExplainRequest request, ClusterState state) {
return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ);
}
@Override
protected ClusterAllocationExplainResponse newResponse() {
return new ClusterAllocationExplainResponse();
}
/**
* Return the decisions for the given {@code ShardRouting} on the given {@code RoutingNode}. If {@code includeYesDecisions} is not true,
* only non-YES (NO and THROTTLE) decisions are returned.
*/
public static Decision tryShardOnNode(ShardRouting shard, RoutingNode node, RoutingAllocation allocation, boolean includeYesDecisions) {
Decision d = allocation.deciders().canAllocate(shard, node, allocation);
if (includeYesDecisions) {
return d;
} else {
Decision.Multi nonYesDecisions = new Decision.Multi();
List decisions = d.getDecisions();
for (Decision decision : decisions) {
if (decision.type() != Decision.Type.YES) {
nonYesDecisions.add(decision);
}
}
return nonYesDecisions;
}
}
/**
* Construct a {@code WeightedDecision} object for the given shard given all the metadata. This also attempts to construct the human
* readable FinalDecision and final explanation as part of the explanation.
*/
public static NodeExplanation calculateNodeExplanation(ShardRouting shard,
IndexMetaData indexMetaData,
DiscoveryNode node,
Decision nodeDecision,
Float nodeWeight,
IndicesShardStoresResponse.StoreStatus storeStatus,
String assignedNodeId,
Set activeAllocationIds,
boolean hasPendingAsyncFetch) {
final ClusterAllocationExplanation.FinalDecision finalDecision;
final ClusterAllocationExplanation.StoreCopy storeCopy;
final String finalExplanation;
if (storeStatus == null) {
// No copies of the data
storeCopy = ClusterAllocationExplanation.StoreCopy.NONE;
} else {
final Exception storeErr = storeStatus.getStoreException();
if (storeErr != null) {
if (ExceptionsHelper.unwrapCause(storeErr) instanceof CorruptIndexException) {
storeCopy = ClusterAllocationExplanation.StoreCopy.CORRUPT;
} else {
storeCopy = ClusterAllocationExplanation.StoreCopy.IO_ERROR;
}
} else if (activeAllocationIds.isEmpty()) {
// The ids are only empty if dealing with a legacy index
// TODO: fetch the shard state versions and display here?
storeCopy = ClusterAllocationExplanation.StoreCopy.UNKNOWN;
} else if (activeAllocationIds.contains(storeStatus.getAllocationId())) {
storeCopy = ClusterAllocationExplanation.StoreCopy.AVAILABLE;
} else {
// Otherwise, this is a stale copy of the data (allocation ids don't match)
storeCopy = ClusterAllocationExplanation.StoreCopy.STALE;
}
}
if (node.getId().equals(assignedNodeId)) {
finalDecision = ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED;
finalExplanation = "the shard is already assigned to this node";
} else if (shard.unassigned() && shard.primary() == false &&
shard.unassignedInfo().getReason() != UnassignedInfo.Reason.INDEX_CREATED && nodeDecision.type() != Decision.Type.YES) {
finalExplanation = "the shard cannot be assigned because allocation deciders return a " + nodeDecision.type().name() +
" decision";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.unassigned() && shard.primary() == false &&
shard.unassignedInfo().getReason() != UnassignedInfo.Reason.INDEX_CREATED && hasPendingAsyncFetch) {
finalExplanation = "the shard's state is still being fetched so it cannot be allocated";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.primary() && shard.unassigned() &&
(shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE ||
shard.recoverySource().getType() == RecoverySource.Type.SNAPSHOT)
&& hasPendingAsyncFetch) {
finalExplanation = "the shard's state is still being fetched so it cannot be allocated";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.primary() && shard.unassigned() && shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE &&
storeCopy == ClusterAllocationExplanation.StoreCopy.STALE) {
finalExplanation = "the copy of the shard is stale, allocation ids do not match";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.primary() && shard.unassigned() && shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE &&
storeCopy == ClusterAllocationExplanation.StoreCopy.NONE) {
finalExplanation = "there is no copy of the shard available";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.primary() && shard.unassigned() && shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE &&
storeCopy == ClusterAllocationExplanation.StoreCopy.CORRUPT) {
finalExplanation = "the copy of the shard is corrupt";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else if (shard.primary() && shard.unassigned() && shard.recoverySource().getType() == RecoverySource.Type.EXISTING_STORE &&
storeCopy == ClusterAllocationExplanation.StoreCopy.IO_ERROR) {
finalExplanation = "the copy of the shard cannot be read";
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
} else {
if (nodeDecision.type() == Decision.Type.NO) {
finalDecision = ClusterAllocationExplanation.FinalDecision.NO;
finalExplanation = "the shard cannot be assigned because one or more allocation decider returns a 'NO' decision";
} else {
// TODO: handle throttling decision better here
finalDecision = ClusterAllocationExplanation.FinalDecision.YES;
if (storeCopy == ClusterAllocationExplanation.StoreCopy.AVAILABLE) {
finalExplanation = "the shard can be assigned and the node contains a valid copy of the shard data";
} else {
finalExplanation = "the shard can be assigned";
}
}
}
return new NodeExplanation(node, nodeDecision, nodeWeight, storeStatus, finalDecision, finalExplanation, storeCopy);
}
/**
* For the given {@code ShardRouting}, return the explanation of the allocation for that shard on all nodes. If {@code
* includeYesDecisions} is true, returns all decisions, otherwise returns only 'NO' and 'THROTTLE' decisions.
*/
public static ClusterAllocationExplanation explainShard(ShardRouting shard, RoutingAllocation allocation, RoutingNodes routingNodes,
boolean includeYesDecisions, ShardsAllocator shardAllocator,
List shardStores,
GatewayAllocator gatewayAllocator, ClusterInfo clusterInfo) {
// don't short circuit deciders, we want a full explanation
allocation.debugDecision(true);
// get the existing unassigned info if available
UnassignedInfo ui = shard.unassignedInfo();
Map nodeToDecision = new HashMap<>();
for (RoutingNode node : routingNodes) {
DiscoveryNode discoNode = node.node();
if (discoNode.isDataNode()) {
Decision d = tryShardOnNode(shard, node, allocation, includeYesDecisions);
nodeToDecision.put(discoNode, d);
}
}
long remainingDelayMillis = 0;
final MetaData metadata = allocation.metaData();
final IndexMetaData indexMetaData = metadata.index(shard.index());
long allocationDelayMillis = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetaData.getSettings()).getMillis();
if (ui != null && ui.isDelayed()) {
long remainingDelayNanos = ui.getRemainingDelay(System.nanoTime(), indexMetaData.getSettings());
remainingDelayMillis = TimeValue.timeValueNanos(remainingDelayNanos).millis();
}
// Calculate weights for each of the nodes
Map weights = shardAllocator.weighShard(allocation, shard);
Map nodeToStatus = new HashMap<>(shardStores.size());
for (IndicesShardStoresResponse.StoreStatus status : shardStores) {
nodeToStatus.put(status.getNode(), status);
}
Map explanations = new HashMap<>(shardStores.size());
for (Map.Entry entry : nodeToDecision.entrySet()) {
DiscoveryNode node = entry.getKey();
Decision decision = entry.getValue();
Float weight = weights.get(node);
IndicesShardStoresResponse.StoreStatus storeStatus = nodeToStatus.get(node);
NodeExplanation nodeExplanation = calculateNodeExplanation(shard, indexMetaData, node, decision, weight,
storeStatus, shard.currentNodeId(), indexMetaData.inSyncAllocationIds(shard.getId()),
allocation.hasPendingAsyncFetch());
explanations.put(node, nodeExplanation);
}
return new ClusterAllocationExplanation(shard.shardId(), shard.primary(),
shard.currentNodeId(), allocationDelayMillis, remainingDelayMillis, ui,
gatewayAllocator.hasFetchPending(shard.shardId(), shard.primary()), explanations, clusterInfo);
}
@Override
protected void masterOperation(final ClusterAllocationExplainRequest request, final ClusterState state,
final ActionListener listener) {
final RoutingNodes routingNodes = state.getRoutingNodes();
final ClusterInfo clusterInfo = clusterInfoService.getClusterInfo();
final RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, state,
clusterInfo, System.nanoTime(), false);
ShardRouting foundShard = null;
if (request.useAnyUnassignedShard()) {
// If we can use any shard, just pick the first unassigned one (if there are any)
RoutingNodes.UnassignedShards.UnassignedIterator ui = routingNodes.unassigned().iterator();
if (ui.hasNext()) {
foundShard = ui.next();
}
} else {
String index = request.getIndex();
int shard = request.getShard();
if (request.isPrimary()) {
// If we're looking for the primary shard, there's only one copy, so pick it directly
foundShard = allocation.routingTable().shardRoutingTable(index, shard).primaryShard();
} else {
// If looking for a replica, go through all the replica shards
List replicaShardRoutings = allocation.routingTable().shardRoutingTable(index, shard).replicaShards();
if (replicaShardRoutings.size() > 0) {
// Pick the first replica at the very least
foundShard = replicaShardRoutings.get(0);
// In case there are multiple replicas where some are assigned and some aren't,
// try to find one that is unassigned at least
for (ShardRouting replica : replicaShardRoutings) {
if (replica.unassigned()) {
foundShard = replica;
break;
}
}
}
}
}
if (foundShard == null) {
listener.onFailure(new ElasticsearchException("unable to find any shards to explain [{}] in the routing table", request));
return;
}
final ShardRouting shardRouting = foundShard;
logger.debug("explaining the allocation for [{}], found shard [{}]", request, shardRouting);
getShardStores(shardRouting, new ActionListener() {
@Override
public void onResponse(IndicesShardStoresResponse shardStoreResponse) {
ImmutableOpenIntMap> shardStatuses =
shardStoreResponse.getStoreStatuses().get(shardRouting.getIndexName());
List shardStoreStatus = shardStatuses.get(shardRouting.id());
ClusterAllocationExplanation cae = explainShard(shardRouting, allocation, routingNodes,
request.includeYesDecisions(), shardAllocator, shardStoreStatus, gatewayAllocator,
request.includeDiskInfo() ? clusterInfo : null);
listener.onResponse(new ClusterAllocationExplainResponse(cae));
}
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
});
}
private void getShardStores(ShardRouting shard, final ActionListener listener) {
IndicesShardStoresRequest request = new IndicesShardStoresRequest(shard.getIndexName());
request.shardStatuses("all");
shardStoresAction.execute(request, listener);
}
}