org.elasticsearch.cluster.routing.allocation.AllocationService Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
There is a newer version: 8.15.1
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.cluster.routing.allocation;

import org.elasticsearch.cluster.ClusterInfoService;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.cluster.health.ClusterStateHealth;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.cluster.routing.AllocationId;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.RoutingNodes;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus;
import org.elasticsearch.cluster.routing.allocation.allocator.ShardsAllocator;
import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands;
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.gateway.GatewayAllocator;
import org.elasticsearch.index.shard.ShardId;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import static org.elasticsearch.cluster.routing.UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING;


/**
 * This service manages the node allocation of a cluster. For this reason the
 * {@link AllocationService} keeps {@link AllocationDeciders} to choose nodes
 * for shard allocation. This class also manages new nodes joining the cluster
 * and rerouting of shards.
 */
public class AllocationService extends AbstractComponent {

    private final AllocationDeciders allocationDeciders;
    private final GatewayAllocator gatewayAllocator;
    private final ShardsAllocator shardsAllocator;
    private final ClusterInfoService clusterInfoService;
    private final ClusterName clusterName;

    @Inject
    public AllocationService(Settings settings, AllocationDeciders allocationDeciders, GatewayAllocator gatewayAllocator,
                             ShardsAllocator shardsAllocator, ClusterInfoService clusterInfoService) {
        super(settings);
        this.allocationDeciders = allocationDeciders;
        this.gatewayAllocator = gatewayAllocator;
        this.shardsAllocator = shardsAllocator;
        this.clusterInfoService = clusterInfoService;
        clusterName = ClusterName.CLUSTER_NAME_SETTING.get(settings);
    }

    /**
     * Applies the started shards. Note, shards can be called several times within this method.
     * 
     * If the same instance of the routing table is returned, then no change has been made.
     */
    public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, List startedShards) {
        return applyStartedShards(clusterState, startedShards, true);
    }

    public RoutingAllocation.Result applyStartedShards(ClusterState clusterState, List startedShards, boolean withReroute) {
        RoutingNodes routingNodes = getMutableRoutingNodes(clusterState);
        // shuffle the unassigned nodes, just so we won't have things like poison failed shards
        routingNodes.unassigned().shuffle();
        StartedRerouteAllocation allocation = new StartedRerouteAllocation(allocationDeciders, routingNodes, clusterState, startedShards, clusterInfoService.getClusterInfo(), currentNanoTime());
        boolean changed = applyStartedShards(allocation, startedShards);
        if (!changed) {
            return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData());
        }
        gatewayAllocator.applyStartedShards(allocation);
        if (withReroute) {
            reroute(allocation);
        }
        String startedShardsAsString = firstListElementsToCommaDelimitedString(startedShards, s -> s.shardId().toString());
        return buildResultAndLogHealthChange(allocation, "shards started [" + startedShardsAsString + "] ...");
    }

    protected RoutingAllocation.Result buildResultAndLogHealthChange(RoutingAllocation allocation, String reason) {
        return buildResultAndLogHealthChange(allocation, reason, new RoutingExplanations());

    }

    protected RoutingAllocation.Result buildResultAndLogHealthChange(RoutingAllocation allocation, String reason, RoutingExplanations explanations) {
        MetaData oldMetaData = allocation.metaData();
        RoutingTable oldRoutingTable = allocation.routingTable();
        RoutingNodes newRoutingNodes = allocation.routingNodes();
        final RoutingTable newRoutingTable = new RoutingTable.Builder().updateNodes(oldRoutingTable.version(), newRoutingNodes).build();
        MetaData newMetaData = updateMetaDataWithRoutingTable(oldMetaData, oldRoutingTable, newRoutingTable);
        assert newRoutingTable.validate(newMetaData); // validates the routing table is coherent with the cluster state metadata
        logClusterHealthStateChange(
            new ClusterStateHealth(ClusterState.builder(clusterName).
                metaData(allocation.metaData()).routingTable(allocation.routingTable()).build()),
            new ClusterStateHealth(ClusterState.builder(clusterName).
                metaData(newMetaData).routingTable(newRoutingTable).build()),
            reason
        );
        return new RoutingAllocation.Result(true, newRoutingTable, newMetaData, explanations);
    }

    /**
     * Updates the current {@link MetaData} based on the newly created {@link RoutingTable}. Specifically
     * we update {@link IndexMetaData#getActiveAllocationIds()} and {@link IndexMetaData#primaryTerm(int)} based on
     * the changes made during this allocation.
     *
     * @param oldMetaData     {@link MetaData} object from before the routing table was changed.
     * @param oldRoutingTable {@link RoutingTable} from before the  change.
     * @param newRoutingTable new {@link RoutingTable} created by the allocation change
     * @return adapted {@link MetaData}, potentially the original one if no change was needed.
     */
    static MetaData updateMetaDataWithRoutingTable(MetaData oldMetaData, RoutingTable oldRoutingTable, RoutingTable newRoutingTable) {
        MetaData.Builder metaDataBuilder = null;
        for (IndexRoutingTable newIndexTable : newRoutingTable) {
            final IndexMetaData oldIndexMetaData = oldMetaData.index(newIndexTable.getIndex());
            if (oldIndexMetaData == null) {
                throw new IllegalStateException("no metadata found for index " + newIndexTable.getIndex().getName());
            }
            IndexMetaData.Builder indexMetaDataBuilder = null;
            for (IndexShardRoutingTable newShardTable : newIndexTable) {
                final ShardId shardId = newShardTable.shardId();

                // update activeAllocationIds
                Set activeAllocationIds = newShardTable.activeShards().stream()
                        .map(ShardRouting::allocationId)
                        .filter(Objects::nonNull)
                        .map(AllocationId::getId)
                        .collect(Collectors.toSet());
                // only update active allocation ids if there is an active shard
                if (activeAllocationIds.isEmpty() == false) {
                    // get currently stored allocation ids
                    Set storedAllocationIds = oldIndexMetaData.activeAllocationIds(shardId.id());
                    if (activeAllocationIds.equals(storedAllocationIds) == false) {
                        if (indexMetaDataBuilder == null) {
                            indexMetaDataBuilder = IndexMetaData.builder(oldIndexMetaData);
                        }
                        indexMetaDataBuilder.putActiveAllocationIds(shardId.id(), activeAllocationIds);
                    }
                }

                // update primary terms
                final ShardRouting newPrimary = newShardTable.primaryShard();
                if (newPrimary == null) {
                    throw new IllegalStateException("missing primary shard for " + newShardTable.shardId());
                }
                final ShardRouting oldPrimary = oldRoutingTable.shardRoutingTable(shardId).primaryShard();
                if (oldPrimary == null) {
                    throw new IllegalStateException("missing primary shard for " + newShardTable.shardId());
                }
                // we update the primary term on initial assignment or when a replica is promoted. Most notably we do *not*
                // update them when a primary relocates
                if (newPrimary.unassigned() ||
                        newPrimary.isSameAllocation(oldPrimary) ||
                        // we do not use newPrimary.isTargetRelocationOf(oldPrimary) because that one enforces newPrimary to
                        // be initializing. However, when the target shard is activated, we still want the primary term to staty
                        // the same
                        (oldPrimary.relocating() && newPrimary.isSameAllocation(oldPrimary.buildTargetRelocatingShard()))) {
                    // do nothing
                } else {
                    // incrementing the primary term
                    if (indexMetaDataBuilder == null) {
                        indexMetaDataBuilder = IndexMetaData.builder(oldIndexMetaData);
                    }
                    indexMetaDataBuilder.primaryTerm(shardId.id(), oldIndexMetaData.primaryTerm(shardId.id()) + 1);
                }
            }
            if (indexMetaDataBuilder != null) {
                if (metaDataBuilder == null) {
                    metaDataBuilder = MetaData.builder(oldMetaData);
                }
                metaDataBuilder.put(indexMetaDataBuilder);
            }
        }
        if (metaDataBuilder != null) {
            return metaDataBuilder.build();
        } else {
            return oldMetaData;
        }
    }

    public RoutingAllocation.Result applyFailedShard(ClusterState clusterState, ShardRouting failedShard) {
        return applyFailedShards(clusterState, Collections.singletonList(new FailedRerouteAllocation.FailedShard(failedShard, null, null)));
    }

    /**
     * Applies the failed shards. Note, shards can be called several times within this method.
     * 
     * If the same instance of the routing table is returned, then no change has been made.
     */
    public RoutingAllocation.Result applyFailedShards(ClusterState clusterState, List failedShards) {
        RoutingNodes routingNodes = getMutableRoutingNodes(clusterState);
        // shuffle the unassigned nodes, just so we won't have things like poison failed shards
        routingNodes.unassigned().shuffle();
        long currentNanoTime = currentNanoTime();
        FailedRerouteAllocation allocation = new FailedRerouteAllocation(allocationDeciders, routingNodes, clusterState, failedShards, clusterInfoService.getClusterInfo(), currentNanoTime);
        boolean changed = false;
        // as failing primaries also fail associated replicas, we fail replicas first here so that their nodes are added to ignore list
        List orderedFailedShards = new ArrayList<>(failedShards);
        orderedFailedShards.sort(Comparator.comparing(failedShard -> failedShard.shard.primary()));
        for (FailedRerouteAllocation.FailedShard failedShard : orderedFailedShards) {
            UnassignedInfo unassignedInfo = failedShard.shard.unassignedInfo();
            final int failedAllocations = unassignedInfo != null ? unassignedInfo.getNumFailedAllocations() : 0;
            changed |= applyFailedShard(allocation, failedShard.shard, true, new UnassignedInfo(UnassignedInfo.Reason.ALLOCATION_FAILED, failedShard.message, failedShard.failure,
                    failedAllocations + 1, currentNanoTime, System.currentTimeMillis(), false, AllocationStatus.NO_ATTEMPT));
        }
        if (!changed) {
            return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData());
        }
        gatewayAllocator.applyFailedShards(allocation);
        reroute(allocation);
        String failedShardsAsString = firstListElementsToCommaDelimitedString(failedShards, s -> s.shard.shardId().toString());
        return buildResultAndLogHealthChange(allocation, "shards failed [" + failedShardsAsString + "] ...");
    }

    /**
     * Removes delay markers from unassigned shards based on current time stamp. Returns true if markers were removed.
     */
    private boolean removeDelayMarkers(RoutingAllocation allocation) {
        final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = allocation.routingNodes().unassigned().iterator();
        final MetaData metaData = allocation.metaData();
        boolean changed = false;
        while (unassignedIterator.hasNext()) {
            ShardRouting shardRouting = unassignedIterator.next();
            UnassignedInfo unassignedInfo = shardRouting.unassignedInfo();
            if (unassignedInfo.isDelayed()) {
                final long newComputedLeftDelayNanos = unassignedInfo.getRemainingDelay(allocation.getCurrentNanoTime(),
                    metaData.getIndexSafe(shardRouting.index()).getSettings());
                if (newComputedLeftDelayNanos == 0) {
                    changed = true;
                    unassignedIterator.updateUnassignedInfo(new UnassignedInfo(unassignedInfo.getReason(), unassignedInfo.getMessage(), unassignedInfo.getFailure(),
                        unassignedInfo.getNumFailedAllocations(), unassignedInfo.getUnassignedTimeInNanos(), unassignedInfo.getUnassignedTimeInMillis(), false,
                        unassignedInfo.getLastAllocationStatus()));
                }
            }
        }
        return changed;
    }

    /**
     * Internal helper to cap the number of elements in a potentially long list for logging.
     *
     * @param elements  The elements to log. May be any non-null list. Must not be null.
     * @param formatter A function that can convert list elements to a String. Must not be null.
     * @param        The list element type.
     * @return A comma-separated string of the first few elements.
     */
    private  String firstListElementsToCommaDelimitedString(List elements, Function formatter) {
        final int maxNumberOfElements = 10;
        return elements
                .stream()
                .limit(maxNumberOfElements)
                .map(formatter)
                .collect(Collectors.joining(", "));
    }

    public RoutingAllocation.Result reroute(ClusterState clusterState, AllocationCommands commands, boolean explain, boolean retryFailed) {
        RoutingNodes routingNodes = getMutableRoutingNodes(clusterState);
        // we don't shuffle the unassigned shards here, to try and get as close as possible to
        // a consistent result of the effect the commands have on the routing
        // this allows systems to dry run the commands, see the resulting cluster state, and act on it
        RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState,
            clusterInfoService.getClusterInfo(), currentNanoTime(), retryFailed);
        // don't short circuit deciders, we want a full explanation
        allocation.debugDecision(true);
        // we ignore disable allocation, because commands are explicit
        allocation.ignoreDisable(true);
        RoutingExplanations explanations = commands.execute(allocation, explain);
        // we revert the ignore disable flag, since when rerouting, we want the original setting to take place
        allocation.ignoreDisable(false);
        // the assumption is that commands will move / act on shards (or fail through exceptions)
        // so, there will always be shard "movements", so no need to check on reroute
        reroute(allocation);
        return buildResultAndLogHealthChange(allocation, "reroute commands", explanations);
    }


    /**
     * Reroutes the routing table based on the live nodes.
     * 
     * If the same instance of the routing table is returned, then no change has been made.
     */
    public RoutingAllocation.Result reroute(ClusterState clusterState, String reason) {
        return reroute(clusterState, reason, false);
    }

    /**
     * Reroutes the routing table based on the live nodes.
     * 
     * If the same instance of the routing table is returned, then no change has been made.
     */
    protected RoutingAllocation.Result reroute(ClusterState clusterState, String reason, boolean debug) {
        RoutingNodes routingNodes = getMutableRoutingNodes(clusterState);
        // shuffle the unassigned nodes, just so we won't have things like poison failed shards
        routingNodes.unassigned().shuffle();
        RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, routingNodes, clusterState,
            clusterInfoService.getClusterInfo(), currentNanoTime(), false);
        allocation.debugDecision(debug);
        if (!reroute(allocation)) {
            return new RoutingAllocation.Result(false, clusterState.routingTable(), clusterState.metaData());
        }
        return buildResultAndLogHealthChange(allocation, reason);
    }

    private void logClusterHealthStateChange(ClusterStateHealth previousStateHealth, ClusterStateHealth newStateHealth, String reason) {
        ClusterHealthStatus previousHealth = previousStateHealth.getStatus();
        ClusterHealthStatus currentHealth = newStateHealth.getStatus();
        if (!previousHealth.equals(currentHealth)) {
            logger.info("Cluster health status changed from [{}] to [{}] (reason: [{}]).", previousHealth, currentHealth, reason);
        }
    }

    private boolean reroute(RoutingAllocation allocation) {
        boolean changed = false;
        // first, clear from the shards any node id they used to belong to that is now dead
        changed |= deassociateDeadNodes(allocation);

        // elect primaries *before* allocating unassigned, so backups of primaries that failed
        // will be moved to primary state and not wait for primaries to be allocated and recovered (*from gateway*)
        changed |= electPrimariesAndUnassignedDanglingReplicas(allocation);

        // now allocate all the unassigned to available nodes
        if (allocation.routingNodes().unassigned().size() > 0) {
            changed |= removeDelayMarkers(allocation);
            changed |= gatewayAllocator.allocateUnassigned(allocation);
        }

        changed |= shardsAllocator.allocate(allocation);
        assert RoutingNodes.assertShardStats(allocation.routingNodes());
        return changed;
    }

    private boolean electPrimariesAndUnassignedDanglingReplicas(RoutingAllocation allocation) {
        boolean changed = false;
        final RoutingNodes routingNodes = allocation.routingNodes();
        if (routingNodes.unassigned().getNumPrimaries() == 0) {
            // move out if we don't have unassigned primaries
            return changed;
        }
        // now, go over and elect a new primary if possible, not, from this code block on, if one is elected,
        // routingNodes.hasUnassignedPrimaries() will potentially be false
        final RoutingNodes.UnassignedShards.UnassignedIterator unassignedIterator = routingNodes.unassigned().iterator();
        while (unassignedIterator.hasNext()) {
            ShardRouting shardEntry = unassignedIterator.next();
            if (shardEntry.primary()) {
                // remove dangling replicas that are initializing for primary shards
                changed |= failReplicasForUnassignedPrimary(allocation, shardEntry);
                ShardRouting candidate = allocation.routingNodes().activeReplica(shardEntry.shardId());
                if (candidate != null) {
                    shardEntry = unassignedIterator.demotePrimaryToReplicaShard();
                    ShardRouting primarySwappedCandidate = routingNodes.promoteAssignedReplicaShardToPrimary(candidate);
                    if (primarySwappedCandidate.relocatingNodeId() != null) {
                        changed = true;
                        // its also relocating, make sure to move the other routing to primary
                        RoutingNode node = routingNodes.node(primarySwappedCandidate.relocatingNodeId());
                        if (node != null) {
                            for (ShardRouting shardRouting : node) {
                                if (shardRouting.shardId().equals(primarySwappedCandidate.shardId()) && !shardRouting.primary()) {
                                    routingNodes.promoteAssignedReplicaShardToPrimary(shardRouting);
                                    break;
                                }
                            }
                        }
                    }
                    IndexMetaData index = allocation.metaData().getIndexSafe(primarySwappedCandidate.index());
                    if (IndexMetaData.isIndexUsingShadowReplicas(index.getSettings())) {
                        routingNodes.reinitShadowPrimary(primarySwappedCandidate);
                        changed = true;
                    }
                }
            }
        }

        return changed;
    }

    private boolean deassociateDeadNodes(RoutingAllocation allocation) {
        boolean changed = false;
        for (Iterator it = allocation.routingNodes().mutableIterator(); it.hasNext(); ) {
            RoutingNode node = it.next();
            if (allocation.nodes().getDataNodes().containsKey(node.nodeId())) {
                // its a live node, continue
                continue;
            }
            changed = true;
            // now, go over all the shards routing on the node, and fail them
            for (ShardRouting shardRouting : node.copyShards()) {
                final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index());
                boolean delayed = INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.get(indexMetaData.getSettings()).nanos() > 0;
                UnassignedInfo unassignedInfo = new UnassignedInfo(UnassignedInfo.Reason.NODE_LEFT, "node_left[" + node.nodeId() + "]",
                    null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), delayed, AllocationStatus.NO_ATTEMPT);
                applyFailedShard(allocation, shardRouting, false, unassignedInfo);
            }
            // its a dead node, remove it, note, its important to remove it *after* we apply failed shard
            // since it relies on the fact that the RoutingNode exists in the list of nodes
            it.remove();
        }
        return changed;
    }

    private boolean failReplicasForUnassignedPrimary(RoutingAllocation allocation, ShardRouting primary) {
        List replicas = new ArrayList<>();
        for (ShardRouting routing : allocation.routingNodes().assignedShards(primary.shardId())) {
            if (!routing.primary() && routing.initializing()) {
                replicas.add(routing);
            }
        }
        boolean changed = false;
        for (ShardRouting routing : replicas) {
            changed |= applyFailedShard(allocation, routing, false,
                    new UnassignedInfo(UnassignedInfo.Reason.PRIMARY_FAILED, "primary failed while replica initializing",
                            null, 0, allocation.getCurrentNanoTime(), System.currentTimeMillis(), false,
                            AllocationStatus.NO_ATTEMPT));
        }
        return changed;
    }

    private boolean applyStartedShards(RoutingAllocation routingAllocation, Iterable startedShardEntries) {
        boolean dirty = false;
        // apply shards might be called several times with the same shard, ignore it
        RoutingNodes routingNodes = routingAllocation.routingNodes();
        for (ShardRouting startedShard : startedShardEntries) {
            assert startedShard.initializing();

            // validate index still exists. strictly speaking this is not needed but it gives clearer logs
            if (routingAllocation.metaData().index(startedShard.index()) == null) {
                logger.debug("{} ignoring shard started, unknown index (routing: {})", startedShard.shardId(), startedShard);
                continue;
            }

            RoutingNode currentRoutingNode = routingNodes.node(startedShard.currentNodeId());
            if (currentRoutingNode == null) {
                logger.debug("{} failed to find shard in order to start it [failed to find node], ignoring (routing: {})", startedShard.shardId(), startedShard);
                continue;
            }

            ShardRouting matchingShard = currentRoutingNode.getByShardId(startedShard.shardId());
            if (matchingShard == null) {
                logger.debug("{} failed to find shard in order to start it [failed to find shard], ignoring (routing: {})", startedShard.shardId(), startedShard);
            } else if (matchingShard.isSameAllocation(startedShard) == false) {
                logger.debug("{} failed to find shard with matching allocation id in order to start it [failed to find matching shard], ignoring (routing: {}, matched shard routing: {})", startedShard.shardId(), startedShard, matchingShard);
            } else {
                startedShard = matchingShard;
                if (startedShard.active()) {
                    logger.trace("{} shard is already started, ignoring (routing: {})", startedShard.shardId(), startedShard);
                } else {
                    assert startedShard.initializing();
                    dirty = true;
                    routingNodes.started(startedShard);
                    logger.trace("{} marked shard as started (routing: {})", startedShard.shardId(), startedShard);

                    if (startedShard.relocatingNodeId() != null) {
                        // relocation target has been started, remove relocation source
                        RoutingNode relocationSourceNode = routingNodes.node(startedShard.relocatingNodeId());
                        ShardRouting relocationSourceShard = relocationSourceNode.getByShardId(startedShard.shardId());
                        assert relocationSourceShard.isRelocationSourceOf(startedShard);
                        routingNodes.remove(relocationSourceShard);
                    }
                }
            }
        }
        return dirty;
    }

    /**
     * Applies the relevant logic to handle a failed shard. Returns true if changes happened that
     * require relocation.
     */
    private boolean applyFailedShard(RoutingAllocation allocation, ShardRouting failedShard, boolean addToIgnoreList, UnassignedInfo unassignedInfo) {
        IndexRoutingTable indexRoutingTable = allocation.routingTable().index(failedShard.index());
        if (indexRoutingTable == null) {
            logger.debug("{} ignoring shard failure, unknown index in {} ({})", failedShard.shardId(), failedShard, unassignedInfo.shortSummary());
            return false;
        }
        RoutingNodes routingNodes = allocation.routingNodes();

        RoutingNode matchedNode = routingNodes.node(failedShard.currentNodeId());
        if (matchedNode == null) {
            logger.debug("{} ignoring shard failure, unknown node in {} ({})", failedShard.shardId(), failedShard, unassignedInfo.shortSummary());
            return false;
        }

        ShardRouting matchedShard = matchedNode.getByShardId(failedShard.shardId());
        if (matchedShard != null && matchedShard.isSameAllocation(failedShard)) {
            logger.debug("{} failed shard {} found in routingNodes, failing it ({})", failedShard.shardId(), failedShard, unassignedInfo.shortSummary());
            // replace incoming instance to make sure we work on the latest one
            failedShard = matchedShard;
        } else {
            logger.debug("{} ignoring shard failure, unknown allocation id in {} ({})", failedShard.shardId(), failedShard, unassignedInfo.shortSummary());
            return false;
        }

        if (failedShard.primary()) {
            // fail replicas first otherwise we move RoutingNodes into an inconsistent state
            failReplicasForUnassignedPrimary(allocation, failedShard);
        }

        if (addToIgnoreList) {
            // make sure we ignore this shard on the relevant node
            allocation.addIgnoreShardForNode(failedShard.shardId(), failedShard.currentNodeId());
        }

        cancelShard(logger, failedShard, unassignedInfo, routingNodes);
        assert matchedNode.getByShardId(failedShard.shardId()) == null : "failedShard " + failedShard + " was matched but wasn't removed";
        return true;
    }

    public static void cancelShard(ESLogger logger, ShardRouting cancelledShard, UnassignedInfo unassignedInfo, RoutingNodes routingNodes) {
        if (cancelledShard.relocatingNodeId() == null) {
            routingNodes.moveToUnassigned(cancelledShard, unassignedInfo);
        } else {
            if (cancelledShard.initializing()) {
                // The shard is a target of a relocating shard. In that case we only
                // need to remove the target shard and cancel the source relocation.
                // No shard is left unassigned
                logger.trace("{} is a relocation target, resolving source to cancel relocation ({})", cancelledShard, unassignedInfo.shortSummary());
                RoutingNode sourceNode = routingNodes.node(cancelledShard.relocatingNodeId());
                ShardRouting sourceShard = sourceNode.getByShardId(cancelledShard.shardId());
                assert sourceShard.isRelocationSourceOf(cancelledShard);
                logger.trace("{}, resolved source to [{}]. canceling relocation ... ({})", cancelledShard.shardId(), sourceShard, unassignedInfo.shortSummary());
                routingNodes.cancelRelocation(sourceShard);
                routingNodes.remove(cancelledShard);
            } else {
                assert cancelledShard.relocating();
                // The cancelled shard is the main copy of the current shard routing.
                // now, find the shard that is initializing on the target node
                RoutingNode targetNode = routingNodes.node(cancelledShard.relocatingNodeId());
                ShardRouting targetShard = targetNode.getByShardId(cancelledShard.shardId());
                assert targetShard.isRelocationTargetOf(cancelledShard);
                if (cancelledShard.primary()) {
                    logger.trace("{} is removed due to the failure/cancellation of the source shard", targetShard);
                    // cancel and remove target shard
                    routingNodes.remove(targetShard);
                    routingNodes.moveToUnassigned(cancelledShard, unassignedInfo);
                } else {
                    logger.trace("{}, relocation source failed / cancelled, mark as initializing without relocation source", targetShard);
                    // promote to initializing shard without relocation source and ensure that removed relocation source
                    // is not added back as unassigned shard
                    routingNodes.removeRelocationSource(targetShard);
                    routingNodes.remove(cancelledShard);
                }
            }
        }
    }

    private RoutingNodes getMutableRoutingNodes(ClusterState clusterState) {
        RoutingNodes routingNodes = new RoutingNodes(clusterState, false); // this is a costly operation - only call this once!
        return routingNodes;
    }

    /** override this to control time based decisions during allocation */
    protected long currentNanoTime() {
        return System.nanoTime();
    }
}