
org.elasticsearch.cluster.metadata.MetadataIndexStateService Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.cluster.metadata;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRunnable;
import org.elasticsearch.action.admin.indices.close.CloseIndexClusterStateUpdateRequest;
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse;
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse.IndexResult;
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse.ShardResult;
import org.elasticsearch.action.admin.indices.close.TransportVerifyShardBeforeCloseAction;
import org.elasticsearch.action.admin.indices.open.OpenIndexClusterStateUpdateRequest;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockClusterStateUpdateRequest;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockResponse;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockResponse.AddBlockResult;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockResponse.AddBlockShardResult;
import org.elasticsearch.action.admin.indices.readonly.TransportVerifyShardIndexBlockAction;
import org.elasticsearch.action.support.ActiveShardsObserver;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.action.support.master.ShardsAcknowledgedResponse;
import org.elasticsearch.action.support.replication.ReplicationResponse;
import org.elasticsearch.client.internal.node.NodeClient;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateAckListener;
import org.elasticsearch.cluster.ClusterStateTaskExecutor;
import org.elasticsearch.cluster.ClusterStateTaskListener;
import org.elasticsearch.cluster.SimpleBatchedExecutor;
import org.elasticsearch.cluster.block.ClusterBlock;
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.ShardRoutingRoleStrategy;
import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.cluster.routing.allocation.allocator.AllocationActionMultiListener;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.cluster.service.MasterServiceTaskQueue;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.AtomicArray;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.common.util.concurrent.CountDown;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.shard.IndexLongFieldRange;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.ShardLimitValidator;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.snapshots.RestoreService;
import org.elasticsearch.snapshots.SnapshotInProgressException;
import org.elasticsearch.snapshots.SnapshotsService;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.threadpool.ThreadPool;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import static java.util.stream.Collectors.joining;
import static org.elasticsearch.core.Strings.format;
/**
* Service responsible for submitting open/close index requests as well as for adding index blocks
*/
public class MetadataIndexStateService {
private static final Logger logger = LogManager.getLogger(MetadataIndexStateService.class);
public static final int INDEX_CLOSED_BLOCK_ID = 4;
public static final ClusterBlock INDEX_CLOSED_BLOCK = new ClusterBlock(
4,
"index closed",
false,
false,
false,
RestStatus.FORBIDDEN,
ClusterBlockLevel.READ_WRITE
);
public static final Setting VERIFIED_BEFORE_CLOSE_SETTING = Setting.boolSetting(
"index.verified_before_close",
false,
Setting.Property.IndexScope,
Setting.Property.PrivateIndex
);
private final ClusterService clusterService;
private final AllocationService allocationService;
private final IndexMetadataVerifier indexMetadataVerifier;
private final IndicesService indicesService;
private final ShardLimitValidator shardLimitValidator;
private final NodeClient client;
private final ThreadPool threadPool;
private final MasterServiceTaskQueue opensQueue;
private final MasterServiceTaskQueue addBlocksToCloseQueue;
private final MasterServiceTaskQueue closesQueue;
private final MasterServiceTaskQueue addBlocksQueue;
private final MasterServiceTaskQueue finalizeBlocksQueue;
@Inject
public MetadataIndexStateService(
ClusterService clusterService,
AllocationService allocationService,
IndexMetadataVerifier indexMetadataVerifier,
IndicesService indicesService,
ShardLimitValidator shardLimitValidator,
NodeClient client,
ThreadPool threadPool
) {
this.clusterService = clusterService;
this.allocationService = allocationService;
this.indexMetadataVerifier = indexMetadataVerifier;
this.indicesService = indicesService;
this.shardLimitValidator = shardLimitValidator;
this.client = client;
this.threadPool = threadPool;
opensQueue = clusterService.createTaskQueue("open-index", Priority.URGENT, new OpenIndicesExecutor());
addBlocksToCloseQueue = clusterService.createTaskQueue("add-blocks-to-close", Priority.URGENT, new AddBlocksToCloseExecutor());
closesQueue = clusterService.createTaskQueue("close-index", Priority.URGENT, new CloseIndicesExecutor());
addBlocksQueue = clusterService.createTaskQueue("add-blocks", Priority.URGENT, new AddBlocksExecutor());
finalizeBlocksQueue = clusterService.createTaskQueue("finalize-blocks", Priority.URGENT, new FinalizeBlocksExecutor());
}
/**
* Closes one or more indices.
*
* Closing indices is a 3 steps process: it first adds a write block to every indices to close, then waits for the operations on shards
* to be terminated and finally closes the indices by moving their state to CLOSE.
*/
public void closeIndices(final CloseIndexClusterStateUpdateRequest request, final ActionListener listener) {
if (request.indices() == null || request.indices().length == 0) {
throw new IllegalArgumentException("Index name is required");
}
addBlocksToCloseQueue.submitTask(
"add-block-index-to-close " + Arrays.toString(request.indices()),
new AddBlocksToCloseTask(request, listener),
request.masterNodeTimeout()
);
}
private class AddBlocksToCloseExecutor extends SimpleBatchedExecutor> {
@Override
public Tuple> executeTask(AddBlocksToCloseTask task, ClusterState clusterState)
throws Exception {
final Map blockedIndices = new HashMap<>(task.request.indices().length);
var updatedClusterState = addIndexClosedBlocks(task.request.indices(), blockedIndices, clusterState);
return Tuple.tuple(updatedClusterState, blockedIndices);
}
@Override
public void taskSucceeded(AddBlocksToCloseTask task, Map blockedIndices) {
if (blockedIndices.isEmpty()) {
task.listener().onResponse(CloseIndexResponse.EMPTY);
} else {
threadPool.executor(ThreadPool.Names.MANAGEMENT)
.execute(
new WaitForClosedBlocksApplied(
blockedIndices,
task.request,
task.listener()
.delegateFailure(
(delegate2, verifyResults) -> closesQueue.submitTask(
"close-indices",
new CloseIndicesTask(task.request, blockedIndices, verifyResults, delegate2),
null
)
)
)
);
}
}
}
private record AddBlocksToCloseTask(CloseIndexClusterStateUpdateRequest request, ActionListener listener)
implements
ClusterStateTaskListener {
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
}
private class CloseIndicesExecutor implements ClusterStateTaskExecutor {
@Override
@SuppressForbidden(reason = "consuming published cluster state for legacy reasons")
public ClusterState execute(BatchExecutionContext batchExecutionContext) {
var listener = new AllocationActionMultiListener(threadPool.getThreadContext());
var state = batchExecutionContext.initialState();
for (final var taskContext : batchExecutionContext.taskContexts()) {
final var task = taskContext.getTask();
try {
final Tuple> closingResult = closeRoutingTable(
state,
task.blockedIndices,
task.verifyResults,
allocationService.getShardRoutingRoleStrategy()
);
state = closingResult.v1();
final List indices = closingResult.v2();
assert indices.size() == task.verifyResults.size();
taskContext.success(clusterState -> {
final boolean acknowledged = indices.stream().noneMatch(IndexResult::hasFailures);
final String[] waitForIndices = indices.stream()
.filter(result -> result.hasFailures() == false)
.filter(result -> clusterState.routingTable().hasIndex(result.getIndex()))
.map(result -> result.getIndex().getName())
.toArray(String[]::new);
if (waitForIndices.length > 0) {
ActiveShardsObserver.waitForActiveShards(
clusterService,
waitForIndices,
task.request.waitForActiveShards(),
task.request.ackTimeout(),
listener.delay(task.listener()).map(shardsAcknowledged -> {
if (shardsAcknowledged == false) {
logger.debug(
() -> format(
"[%s] indices closed, but the operation timed out while "
+ "waiting for enough shards to be started.",
Arrays.toString(waitForIndices)
)
);
}
// acknowledged maybe be false but some indices may have been correctly closed,
// so we maintain a kind of coherency by overriding the shardsAcknowledged value
// (see ShardsAcknowledgedResponse constructor)
boolean shardsAcked = acknowledged ? shardsAcknowledged : false;
return new CloseIndexResponse(acknowledged, shardsAcked, indices);
})
);
} else {
listener.delay(task.listener()).onResponse(new CloseIndexResponse(acknowledged, false, indices));
}
});
} catch (Exception e) {
taskContext.onFailure(e);
}
}
try (var ignored = batchExecutionContext.dropHeadersContext()) {
// reroute may encounter deprecated features but the resulting warnings are not associated with any particular task
return allocationService.reroute(state, "indices closed", listener.reroute());
}
}
}
private record CloseIndicesTask(
CloseIndexClusterStateUpdateRequest request,
Map blockedIndices,
Map verifyResults,
ActionListener listener
) implements ClusterStateTaskListener {
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
}
/**
* Step 1 - Start closing indices by adding a write block
*
* This step builds the list of indices to close (the ones explicitly requested that are not in CLOSE state) and adds a unique cluster
* block (or reuses an existing one) to every index to close in the cluster state. After the cluster state is published, the shards
* should start to reject writing operations and we can proceed with step 2.
*/
static ClusterState addIndexClosedBlocks(
final Index[] indices,
final Map blockedIndices,
final ClusterState currentState
) {
final Set indicesToClose = new HashSet<>();
for (Index index : indices) {
final IndexMetadata indexMetadata = currentState.metadata().getIndexSafe(index);
if (indexMetadata.getState() != IndexMetadata.State.CLOSE) {
indicesToClose.add(index);
} else {
logger.debug("index {} is already closed, ignoring", index);
assert currentState.blocks().hasIndexBlock(index.getName(), INDEX_CLOSED_BLOCK);
}
}
if (indicesToClose.isEmpty()) {
return currentState;
}
// Check if index closing conflicts with any running restores
Set restoringIndices = RestoreService.restoringIndices(currentState, indicesToClose);
if (restoringIndices.isEmpty() == false) {
throw new IllegalArgumentException("Cannot close indices that are being restored: " + restoringIndices);
}
// Check if index closing conflicts with any running snapshots
Set snapshottingIndices = SnapshotsService.snapshottingIndices(currentState, indicesToClose);
if (snapshottingIndices.isEmpty() == false) {
throw new SnapshotInProgressException(
"Cannot close indices that are being snapshotted: "
+ snapshottingIndices
+ ". Try again after snapshot finishes or cancel the currently running snapshot."
);
}
final ClusterBlocks.Builder blocks = ClusterBlocks.builder(currentState.blocks());
for (Index index : indicesToClose) {
ClusterBlock indexBlock = null;
final Set clusterBlocks = currentState.blocks().indices().get(index.getName());
if (clusterBlocks != null) {
for (ClusterBlock clusterBlock : clusterBlocks) {
if (clusterBlock.id() == INDEX_CLOSED_BLOCK_ID) {
// Reuse the existing index closed block
indexBlock = clusterBlock;
break;
}
}
}
if (indexBlock == null) {
// Create a new index closed block
indexBlock = createIndexClosingBlock();
}
assert Strings.hasLength(indexBlock.uuid()) : "Closing block should have a UUID";
blocks.addIndexBlock(index.getName(), indexBlock);
blockedIndices.put(index, indexBlock);
}
logger.info(() -> format("closing indices %s", blockedIndices.keySet().stream().map(Object::toString).collect(joining(","))));
return ClusterState.builder(currentState).blocks(blocks).build();
}
/**
* Updates the cluster state for the given indices with the given index block,
* and also returns the updated indices (and their blocks) in a map.
* @param indices The indices to add blocks to if needed
* @param currentState The current cluster state
* @param block The type of block to add
* @return a tuple of the updated cluster state, as well as the blocks that got added
*/
private static Tuple> addIndexBlock(
final Index[] indices,
final ClusterState currentState,
final APIBlock block
) {
final Metadata.Builder metadata = Metadata.builder(currentState.metadata());
final Set indicesToAddBlock = new HashSet<>();
for (Index index : indices) {
metadata.getSafe(index); // to check if index exists
if (currentState.blocks().hasIndexBlock(index.getName(), block.block)) {
logger.debug("index {} already has block {}, ignoring", index, block.block);
} else {
indicesToAddBlock.add(index);
}
}
if (indicesToAddBlock.isEmpty()) {
return Tuple.tuple(currentState, Map.of());
}
final ClusterBlocks.Builder blocks = ClusterBlocks.builder(currentState.blocks());
final Map blockedIndices = new HashMap<>();
for (Index index : indicesToAddBlock) {
ClusterBlock indexBlock = null;
final Set clusterBlocks = currentState.blocks().indices().get(index.getName());
if (clusterBlocks != null) {
for (ClusterBlock clusterBlock : clusterBlocks) {
if (clusterBlock.id() == block.block.id()) {
// Reuse the existing UUID-based block
indexBlock = clusterBlock;
break;
}
}
}
if (indexBlock == null) {
// Create a new UUID-based block
indexBlock = createUUIDBasedBlock(block.block);
}
assert Strings.hasLength(indexBlock.uuid()) : "Block should have a UUID";
blocks.addIndexBlock(index.getName(), indexBlock);
blockedIndices.put(index, indexBlock);
// update index settings as well to match the block
final IndexMetadata indexMetadata = metadata.getSafe(index);
if (block.setting().get(indexMetadata.getSettings()) == false) {
final Settings updatedSettings = Settings.builder().put(indexMetadata.getSettings()).put(block.settingName(), true).build();
metadata.put(
IndexMetadata.builder(indexMetadata).settings(updatedSettings).settingsVersion(indexMetadata.getSettingsVersion() + 1)
);
}
}
logger.info(
"adding [index.blocks.{}] block to indices {}",
block.name,
blockedIndices.keySet().stream().map(Object::toString).toList()
);
return Tuple.tuple(ClusterState.builder(currentState).blocks(blocks).metadata(metadata).build(), blockedIndices);
}
/**
* Adds an index block based on the given request, and notifies the listener upon completion.
* Adding blocks is done in three steps:
* - First, a temporary UUID-based block is added to the index
* (see {@link #addIndexBlock(Index[], ClusterState, APIBlock)}.
* - Second, shards are checked to have properly applied the UUID-based block.
* (see {@link WaitForBlocksApplied}).
* - Third, the temporary UUID-based block is turned into a full block
* (see {@link #finalizeBlock(ClusterState, Map, Map, APIBlock)}.
* Using this three-step process ensures non-interference by other operations in case where
* we notify successful completion here.
*/
public void addIndexBlock(AddIndexBlockClusterStateUpdateRequest request, ActionListener listener) {
final Index[] concreteIndices = request.indices();
if (concreteIndices == null || concreteIndices.length == 0) {
throw new IllegalArgumentException("Index name is required");
}
Metadata metadata = clusterService.state().metadata();
List writeIndices = new ArrayList<>();
SortedMap lookup = metadata.getIndicesLookup();
for (Index index : concreteIndices) {
IndexAbstraction ia = lookup.get(index.getName());
if (ia != null && ia.getParentDataStream() != null) {
Index writeIndex = metadata.index(ia.getParentDataStream().getWriteIndex()).getIndex();
if (writeIndex.equals(index)) {
writeIndices.add(index.getName());
}
}
}
if (writeIndices.size() > 0) {
throw new IllegalArgumentException(
"cannot add a block to the following data stream write indices ["
+ Strings.collectionToCommaDelimitedString(writeIndices)
+ "]"
);
}
addBlocksQueue.submitTask(
"add-index-block-[" + request.getBlock().name + "]-" + Arrays.toString(concreteIndices),
new AddBlocksTask(request, listener),
request.masterNodeTimeout()
);
}
private class AddBlocksExecutor extends SimpleBatchedExecutor> {
@Override
public Tuple> executeTask(AddBlocksTask task, ClusterState clusterState) {
return addIndexBlock(task.request.indices(), clusterState, task.request.getBlock());
}
@Override
public void taskSucceeded(AddBlocksTask task, Map blockedIndices) {
if (blockedIndices.isEmpty()) {
task.listener().onResponse(AddIndexBlockResponse.EMPTY);
} else {
threadPool.executor(ThreadPool.Names.MANAGEMENT)
.execute(
new WaitForBlocksApplied(
blockedIndices,
task.request,
task.listener()
.delegateFailure(
(delegate2, verifyResults) -> finalizeBlocksQueue.submitTask(
"finalize-index-block-["
+ task.request.getBlock().name
+ "]-["
+ blockedIndices.keySet().stream().map(Index::getName).collect(Collectors.joining(", "))
+ "]",
new FinalizeBlocksTask(task.request, blockedIndices, verifyResults, delegate2),
null
)
)
)
);
}
}
}
private record AddBlocksTask(AddIndexBlockClusterStateUpdateRequest request, ActionListener listener)
implements
ClusterStateTaskListener {
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
}
private static class FinalizeBlocksExecutor extends SimpleBatchedExecutor> {
@Override
public Tuple> executeTask(FinalizeBlocksTask task, ClusterState clusterState) throws Exception {
final Tuple> finalizeResult = finalizeBlock(
clusterState,
task.blockedIndices,
task.verifyResults,
task.request.getBlock()
);
assert finalizeResult.v2().size() == task.verifyResults.size();
return finalizeResult;
}
@Override
public void taskSucceeded(FinalizeBlocksTask task, List indices) {
final boolean acknowledged = indices.stream().noneMatch(AddBlockResult::hasFailures);
task.listener().onResponse(new AddIndexBlockResponse(acknowledged, acknowledged, indices));
}
}
private record FinalizeBlocksTask(
AddIndexBlockClusterStateUpdateRequest request,
Map blockedIndices,
Map verifyResults,
ActionListener listener
) implements ClusterStateTaskListener {
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
}
/**
* Step 2 - Wait for indices to be ready for closing
*
* This step iterates over the indices previously blocked and sends a {@link TransportVerifyShardBeforeCloseAction} to each shard. If
* this action succeed then the shard is considered to be ready for closing. When all shards of a given index are ready for closing,
* the index is considered ready to be closed.
*/
private class WaitForClosedBlocksApplied extends ActionRunnable
© 2015 - 2025 Weber Informatics LLC | Privacy Policy