All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider Maven / Gradle / Ivy

There is a newer version: 8.13.4
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.cluster.routing.allocation.decider;

import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.routing.RoutingNode;
import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
import org.elasticsearch.common.settings.Setting;

/**
 * An allocation decider that prevents shards from being allocated on any node if the shards allocation has been retried N times without
 * success. This means if a shard has been INITIALIZING N times in a row without being moved to STARTED the shard will be ignored until
 * the setting for {@code index.allocation.max_retry} is raised. The default value is {@code 5}.
 * Note: This allocation decider also allows allocation of repeatedly failing shards when the {@code /_cluster/reroute?retry_failed=true}
 * API is manually invoked. This allows single retries without raising the limits.
 *
 */
public class MaxRetryAllocationDecider extends AllocationDecider {

    public static final Setting SETTING_ALLOCATION_MAX_RETRY = Setting.intSetting(
        "index.allocation.max_retries",
        5,
        0,
        Setting.Property.Dynamic,
        Setting.Property.IndexScope,
        Setting.Property.NotCopyableOnResize
    );

    public static final String NAME = "max_retry";

    private static final Decision YES_NO_FAILURES = Decision.single(Decision.Type.YES, NAME, "shard has no previous failures");

    @Override
    public Decision canAllocate(ShardRouting shardRouting, RoutingAllocation allocation) {
        final UnassignedInfo unassignedInfo = shardRouting.unassignedInfo();
        final int numFailedAllocations = unassignedInfo == null ? 0 : unassignedInfo.getNumFailedAllocations();
        if (numFailedAllocations > 0) {
            return decisionWithFailures(shardRouting, allocation, unassignedInfo, numFailedAllocations);
        }
        return YES_NO_FAILURES;
    }

    private static Decision decisionWithFailures(
        ShardRouting shardRouting,
        RoutingAllocation allocation,
        UnassignedInfo unassignedInfo,
        int numFailedAllocations
    ) {
        final IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index());
        final int maxRetry = SETTING_ALLOCATION_MAX_RETRY.get(indexMetadata.getSettings());
        final Decision res = numFailedAllocations >= maxRetry ? Decision.NO : Decision.YES;
        return allocation.debugDecision() ? debugDecision(res, unassignedInfo, numFailedAllocations, maxRetry) : res;
    }

    private static Decision debugDecision(Decision decision, UnassignedInfo unassignedInfo, int numFailedAllocations, int maxRetry) {
        if (decision.type() == Decision.Type.YES) {
            return Decision.single(
                Decision.Type.NO,
                NAME,
                "shard has exceeded the maximum number of retries [%d] on "
                    + "failed allocation attempts - manually call [/_cluster/reroute?retry_failed=true] to retry, [%s]",
                maxRetry,
                unassignedInfo.toString()
            );
        } else {
            return Decision.single(
                Decision.Type.YES,
                NAME,
                "shard has failed allocating [%d] times but [%d] retries are allowed",
                numFailedAllocations,
                maxRetry
            );
        }
    }

    @Override
    public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
        return canAllocate(shardRouting, allocation);
    }

    @Override
    public Decision canForceAllocatePrimary(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
        assert shardRouting.primary() : "must not call canForceAllocatePrimary on a non-primary shard " + shardRouting;
        // check if we have passed the maximum retry threshold through canAllocate,
        // if so, we don't want to force the primary allocation here
        return canAllocate(shardRouting, node, allocation);
    }

    @Override
    public Decision canForceAllocateDuringReplace(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) {
        return canAllocate(shardRouting, node, allocation);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy