
org.elasticsearch.cluster.metadata.MetadataIndexStateService Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.cluster.metadata;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionRunnable;
import org.elasticsearch.action.NotifyOnceListener;
import org.elasticsearch.action.admin.indices.close.CloseIndexClusterStateUpdateRequest;
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse;
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse.IndexResult;
import org.elasticsearch.action.admin.indices.close.CloseIndexResponse.ShardResult;
import org.elasticsearch.action.admin.indices.close.TransportVerifyShardBeforeCloseAction;
import org.elasticsearch.action.admin.indices.open.OpenIndexClusterStateUpdateRequest;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockClusterStateUpdateRequest;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockResponse;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockResponse.AddBlockResult;
import org.elasticsearch.action.admin.indices.readonly.AddIndexBlockResponse.AddBlockShardResult;
import org.elasticsearch.action.admin.indices.readonly.TransportVerifyShardIndexBlockAction;
import org.elasticsearch.action.support.ActiveShardsObserver;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.action.support.master.ShardsAcknowledgedResponse;
import org.elasticsearch.action.support.replication.ReplicationResponse;
import org.elasticsearch.cluster.AckedClusterStateUpdateTask;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.block.ClusterBlock;
import org.elasticsearch.cluster.block.ClusterBlockLevel;
import org.elasticsearch.cluster.block.ClusterBlocks;
import org.elasticsearch.cluster.metadata.IndexMetadata.APIBlock;
import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.IndexShardRoutingTable;
import org.elasticsearch.cluster.routing.RoutingTable;
import org.elasticsearch.cluster.routing.allocation.AllocationService;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.collect.ImmutableOpenIntMap;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.AtomicArray;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.common.util.concurrent.CountDown;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.shard.IndexLongFieldRange;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.ShardLimitValidator;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.snapshots.RestoreService;
import org.elasticsearch.snapshots.SnapshotInProgressException;
import org.elasticsearch.snapshots.SnapshotsService;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.threadpool.ThreadPool;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import static java.util.Collections.singleton;
import static java.util.Collections.unmodifiableMap;
/**
* Service responsible for submitting open/close index requests as well as for adding index blocks
*/
public class MetadataIndexStateService {
private static final Logger logger = LogManager.getLogger(MetadataIndexStateService.class);
public static final int INDEX_CLOSED_BLOCK_ID = 4;
public static final ClusterBlock INDEX_CLOSED_BLOCK = new ClusterBlock(
4,
"index closed",
false,
false,
false,
RestStatus.FORBIDDEN,
ClusterBlockLevel.READ_WRITE
);
public static final Setting VERIFIED_BEFORE_CLOSE_SETTING = Setting.boolSetting(
"index.verified_before_close",
false,
Setting.Property.IndexScope,
Setting.Property.PrivateIndex
);
private final ClusterService clusterService;
private final AllocationService allocationService;
private final IndexMetadataVerifier indexMetadataVerifier;
private final IndicesService indicesService;
private final ShardLimitValidator shardLimitValidator;
private final ThreadPool threadPool;
private final TransportVerifyShardBeforeCloseAction transportVerifyShardBeforeCloseAction;
private final TransportVerifyShardIndexBlockAction transportVerifyShardIndexBlockAction;
private final ActiveShardsObserver activeShardsObserver;
@Inject
public MetadataIndexStateService(
ClusterService clusterService,
AllocationService allocationService,
IndexMetadataVerifier indexMetadataVerifier,
IndicesService indicesService,
ShardLimitValidator shardLimitValidator,
ThreadPool threadPool,
TransportVerifyShardBeforeCloseAction transportVerifyShardBeforeCloseAction,
TransportVerifyShardIndexBlockAction transportVerifyShardIndexBlockAction
) {
this.indicesService = indicesService;
this.clusterService = clusterService;
this.allocationService = allocationService;
this.threadPool = threadPool;
this.transportVerifyShardBeforeCloseAction = transportVerifyShardBeforeCloseAction;
this.transportVerifyShardIndexBlockAction = transportVerifyShardIndexBlockAction;
this.indexMetadataVerifier = indexMetadataVerifier;
this.shardLimitValidator = shardLimitValidator;
this.activeShardsObserver = new ActiveShardsObserver(clusterService, threadPool);
}
/**
* Closes one or more indices.
*
* Closing indices is a 3 steps process: it first adds a write block to every indices to close, then waits for the operations on shards
* to be terminated and finally closes the indices by moving their state to CLOSE.
*/
public void closeIndices(final CloseIndexClusterStateUpdateRequest request, final ActionListener listener) {
final Index[] concreteIndices = request.indices();
if (concreteIndices == null || concreteIndices.length == 0) {
throw new IllegalArgumentException("Index name is required");
}
clusterService.submitStateUpdateTask(
"add-block-index-to-close " + Arrays.toString(concreteIndices),
new ClusterStateUpdateTask(Priority.URGENT, request.masterNodeTimeout()) {
private final Map blockedIndices = new HashMap<>();
@Override
public ClusterState execute(final ClusterState currentState) {
return addIndexClosedBlocks(concreteIndices, blockedIndices, currentState);
}
@Override
public void clusterStateProcessed(final String source, final ClusterState oldState, final ClusterState newState) {
if (oldState == newState) {
assert blockedIndices.isEmpty() : "List of blocked indices is not empty but cluster state wasn't changed";
listener.onResponse(new CloseIndexResponse(true, false, Collections.emptyList()));
} else {
assert blockedIndices.isEmpty() == false : "List of blocked indices is empty but cluster state was changed";
threadPool.executor(ThreadPool.Names.MANAGEMENT)
.execute(
new WaitForClosedBlocksApplied(
blockedIndices,
request,
ActionListener.wrap(
verifyResults -> clusterService.submitStateUpdateTask(
"close-indices",
new ClusterStateUpdateTask(Priority.URGENT) {
private final List indices = new ArrayList<>();
@Override
public ClusterState execute(final ClusterState currentState) throws Exception {
Tuple> closingResult = closeRoutingTable(
currentState,
blockedIndices,
verifyResults
);
assert verifyResults.size() == closingResult.v2().size();
indices.addAll(closingResult.v2());
return allocationService.reroute(closingResult.v1(), "indices closed");
}
@Override
public void onFailure(final String source, final Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(
final String source,
final ClusterState oldState,
final ClusterState newState
) {
final boolean acknowledged = indices.stream().noneMatch(IndexResult::hasFailures);
final String[] waitForIndices = indices.stream()
.filter(result -> result.hasFailures() == false)
.filter(result -> newState.routingTable().hasIndex(result.getIndex()))
.map(result -> result.getIndex().getName())
.toArray(String[]::new);
if (waitForIndices.length > 0) {
activeShardsObserver.waitForActiveShards(
waitForIndices,
request.waitForActiveShards(),
request.ackTimeout(),
shardsAcknowledged -> {
if (shardsAcknowledged == false) {
logger.debug(
"[{}] indices closed, but the operation timed out while waiting "
+ "for enough shards to be started.",
Arrays.toString(waitForIndices)
);
}
// acknowledged maybe be false but some indices may have been correctly
// closed, so
// we maintain a kind of coherency by overriding the shardsAcknowledged
// value
// (see ShardsAcknowledgedResponse constructor)
boolean shardsAcked = acknowledged ? shardsAcknowledged : false;
listener.onResponse(
new CloseIndexResponse(acknowledged, shardsAcked, indices)
);
},
listener::onFailure
);
} else {
listener.onResponse(new CloseIndexResponse(acknowledged, false, indices));
}
}
}
),
listener::onFailure
)
)
);
}
}
@Override
public void onFailure(final String source, final Exception e) {
listener.onFailure(e);
}
}
);
}
/**
* Step 1 - Start closing indices by adding a write block
*
* This step builds the list of indices to close (the ones explicitly requested that are not in CLOSE state) and adds a unique cluster
* block (or reuses an existing one) to every index to close in the cluster state. After the cluster state is published, the shards
* should start to reject writing operations and we can proceed with step 2.
*/
static ClusterState addIndexClosedBlocks(
final Index[] indices,
final Map blockedIndices,
final ClusterState currentState
) {
final Metadata.Builder metadata = Metadata.builder(currentState.metadata());
final Set indicesToClose = new HashSet<>();
for (Index index : indices) {
final IndexMetadata indexMetadata = metadata.getSafe(index);
if (indexMetadata.getState() != IndexMetadata.State.CLOSE) {
indicesToClose.add(index);
} else {
logger.debug("index {} is already closed, ignoring", index);
assert currentState.blocks().hasIndexBlock(index.getName(), INDEX_CLOSED_BLOCK);
}
}
if (indicesToClose.isEmpty()) {
return currentState;
}
// Check if index closing conflicts with any running restores
Set restoringIndices = RestoreService.restoringIndices(currentState, indicesToClose);
if (restoringIndices.isEmpty() == false) {
throw new IllegalArgumentException("Cannot close indices that are being restored: " + restoringIndices);
}
// Check if index closing conflicts with any running snapshots
Set snapshottingIndices = SnapshotsService.snapshottingIndices(currentState, indicesToClose);
if (snapshottingIndices.isEmpty() == false) {
throw new SnapshotInProgressException(
"Cannot close indices that are being snapshotted: "
+ snapshottingIndices
+ ". Try again after snapshot finishes or cancel the currently running snapshot."
);
}
final ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks());
final RoutingTable.Builder routingTable = RoutingTable.builder(currentState.routingTable());
for (Index index : indicesToClose) {
ClusterBlock indexBlock = null;
final Set clusterBlocks = currentState.blocks().indices().get(index.getName());
if (clusterBlocks != null) {
for (ClusterBlock clusterBlock : clusterBlocks) {
if (clusterBlock.id() == INDEX_CLOSED_BLOCK_ID) {
// Reuse the existing index closed block
indexBlock = clusterBlock;
break;
}
}
}
if (indexBlock == null) {
// Create a new index closed block
indexBlock = createIndexClosingBlock();
}
assert Strings.hasLength(indexBlock.uuid()) : "Closing block should have a UUID";
blocks.addIndexBlock(index.getName(), indexBlock);
blockedIndices.put(index, indexBlock);
}
logger.info(
() -> new ParameterizedMessage(
"closing indices {}",
blockedIndices.keySet().stream().map(Object::toString).collect(Collectors.joining(","))
)
);
return ClusterState.builder(currentState).blocks(blocks).metadata(metadata).routingTable(routingTable.build()).build();
}
/**
* Updates the cluster state for the given indices with the given index block,
* and also returns the updated indices (and their blocks) in a map.
* @param indices The indices to add blocks to if needed
* @param currentState The current cluster state
* @param block The type of block to add
* @return a tuple of the updated cluster state, as well as the blocks that got added
*/
static Tuple> addIndexBlock(
final Index[] indices,
final ClusterState currentState,
final APIBlock block
) {
final Metadata.Builder metadata = Metadata.builder(currentState.metadata());
final Set indicesToAddBlock = new HashSet<>();
for (Index index : indices) {
metadata.getSafe(index); // to check if index exists
if (currentState.blocks().hasIndexBlock(index.getName(), block.block)) {
logger.debug("index {} already has block {}, ignoring", index, block.block);
} else {
indicesToAddBlock.add(index);
}
}
if (indicesToAddBlock.isEmpty()) {
return Tuple.tuple(currentState, Collections.emptyMap());
}
final ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks());
final RoutingTable.Builder routingTable = RoutingTable.builder(currentState.routingTable());
final Map blockedIndices = new HashMap<>();
for (Index index : indicesToAddBlock) {
ClusterBlock indexBlock = null;
final Set clusterBlocks = currentState.blocks().indices().get(index.getName());
if (clusterBlocks != null) {
for (ClusterBlock clusterBlock : clusterBlocks) {
if (clusterBlock.id() == block.block.id()) {
// Reuse the existing UUID-based block
indexBlock = clusterBlock;
break;
}
}
}
if (indexBlock == null) {
// Create a new UUID-based block
indexBlock = createUUIDBasedBlock(block.block);
}
assert Strings.hasLength(indexBlock.uuid()) : "Block should have a UUID";
blocks.addIndexBlock(index.getName(), indexBlock);
blockedIndices.put(index, indexBlock);
// update index settings as well to match the block
final IndexMetadata indexMetadata = metadata.getSafe(index);
if (block.setting().get(indexMetadata.getSettings()) == false) {
final Settings updatedSettings = Settings.builder().put(indexMetadata.getSettings()).put(block.settingName(), true).build();
metadata.put(
IndexMetadata.builder(indexMetadata).settings(updatedSettings).settingsVersion(indexMetadata.getSettingsVersion() + 1)
);
}
}
logger.info(
"adding block {} to indices {}",
block.name,
blockedIndices.keySet().stream().map(Object::toString).collect(Collectors.toList())
);
return Tuple.tuple(
ClusterState.builder(currentState).blocks(blocks).metadata(metadata).routingTable(routingTable.build()).build(),
blockedIndices
);
}
/**
* Adds an index block based on the given request, and notifies the listener upon completion.
* Adding blocks is done in three steps:
* - First, a temporary UUID-based block is added to the index
* (see {@link #addIndexBlock(Index[], ClusterState, APIBlock)}.
* - Second, shards are checked to have properly applied the UUID-based block.
* (see {@link WaitForBlocksApplied}).
* - Third, the temporary UUID-based block is turned into a full block
* (see {@link #finalizeBlock(ClusterState, Map, Map, APIBlock)}.
* Using this three-step process ensures non-interference by other operations in case where
* we notify successful completion here.
*/
public void addIndexBlock(AddIndexBlockClusterStateUpdateRequest request, ActionListener listener) {
final Index[] concreteIndices = request.indices();
if (concreteIndices == null || concreteIndices.length == 0) {
throw new IllegalArgumentException("Index name is required");
}
Metadata metadata = clusterService.state().metadata();
List writeIndices = new ArrayList<>();
SortedMap lookup = metadata.getIndicesLookup();
for (Index index : concreteIndices) {
IndexAbstraction ia = lookup.get(index.getName());
if (ia != null && ia.getParentDataStream() != null) {
Index writeIndex = metadata.index(ia.getParentDataStream().getWriteIndex()).getIndex();
if (writeIndex.equals(index)) {
writeIndices.add(index.getName());
}
}
}
if (writeIndices.size() > 0) {
throw new IllegalArgumentException(
"cannot add a block to the following data stream write indices ["
+ Strings.collectionToCommaDelimitedString(writeIndices)
+ "]"
);
}
clusterService.submitStateUpdateTask(
"add-index-block-[" + request.getBlock().name + "]-" + Arrays.toString(concreteIndices),
new ClusterStateUpdateTask(Priority.URGENT, request.masterNodeTimeout()) {
private Map blockedIndices;
@Override
public ClusterState execute(final ClusterState currentState) {
final Tuple> tup = addIndexBlock(
concreteIndices,
currentState,
request.getBlock()
);
blockedIndices = tup.v2();
return tup.v1();
}
@Override
public void clusterStateProcessed(final String source, final ClusterState oldState, final ClusterState newState) {
if (oldState == newState) {
assert blockedIndices.isEmpty() : "List of blocked indices is not empty but cluster state wasn't changed";
listener.onResponse(new AddIndexBlockResponse(true, false, Collections.emptyList()));
} else {
assert blockedIndices.isEmpty() == false : "List of blocked indices is empty but cluster state was changed";
threadPool.executor(ThreadPool.Names.MANAGEMENT)
.execute(
new WaitForBlocksApplied(
blockedIndices,
request,
ActionListener.wrap(
verifyResults -> clusterService.submitStateUpdateTask(
"finalize-index-block-["
+ request.getBlock().name
+ "]-["
+ blockedIndices.keySet().stream().map(Index::getName).collect(Collectors.joining(", "))
+ "]",
new ClusterStateUpdateTask(Priority.URGENT) {
private final List indices = new ArrayList<>();
@Override
public ClusterState execute(final ClusterState currentState) throws Exception {
Tuple> addBlockResult = finalizeBlock(
currentState,
blockedIndices,
verifyResults,
request.getBlock()
);
assert verifyResults.size() == addBlockResult.v2().size();
indices.addAll(addBlockResult.v2());
return addBlockResult.v1();
}
@Override
public void onFailure(final String source, final Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(
final String source,
final ClusterState oldState,
final ClusterState newState
) {
final boolean acknowledged = indices.stream().noneMatch(AddBlockResult::hasFailures);
listener.onResponse(new AddIndexBlockResponse(acknowledged, acknowledged, indices));
}
}
),
listener::onFailure
)
)
);
}
}
@Override
public void onFailure(final String source, final Exception e) {
listener.onFailure(e);
}
}
);
}
/**
* Step 2 - Wait for indices to be ready for closing
*
* This step iterates over the indices previously blocked and sends a {@link TransportVerifyShardBeforeCloseAction} to each shard. If
* this action succeed then the shard is considered to be ready for closing. When all shards of a given index are ready for closing,
* the index is considered ready to be closed.
*/
class WaitForClosedBlocksApplied extends ActionRunnable
© 2015 - 2025 Weber Informatics LLC | Privacy Policy