org.opensearch.snapshots.SnapshotsService Maven / Gradle / Ivy
Show all versions of opensearch Show documentation
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.snapshots;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.opensearch.ExceptionsHelper;
import org.opensearch.LegacyESVersion;
import org.opensearch.Version;
import org.opensearch.action.ActionRunnable;
import org.opensearch.action.StepListener;
import org.opensearch.action.admin.cluster.snapshots.clone.CloneSnapshotRequest;
import org.opensearch.action.admin.cluster.snapshots.create.CreateSnapshotRequest;
import org.opensearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest;
import org.opensearch.action.support.ActionFilters;
import org.opensearch.action.support.GroupedActionListener;
import org.opensearch.action.support.clustermanager.TransportClusterManagerNodeAction;
import org.opensearch.cluster.ClusterChangedEvent;
import org.opensearch.cluster.ClusterState;
import org.opensearch.cluster.ClusterStateApplier;
import org.opensearch.cluster.ClusterStateTaskConfig;
import org.opensearch.cluster.ClusterStateTaskExecutor;
import org.opensearch.cluster.ClusterStateTaskListener;
import org.opensearch.cluster.ClusterStateUpdateTask;
import org.opensearch.cluster.NotClusterManagerException;
import org.opensearch.cluster.RepositoryCleanupInProgress;
import org.opensearch.cluster.RestoreInProgress;
import org.opensearch.cluster.SnapshotDeletionsInProgress;
import org.opensearch.cluster.SnapshotsInProgress;
import org.opensearch.cluster.SnapshotsInProgress.ShardSnapshotStatus;
import org.opensearch.cluster.SnapshotsInProgress.ShardState;
import org.opensearch.cluster.SnapshotsInProgress.State;
import org.opensearch.cluster.block.ClusterBlockException;
import org.opensearch.cluster.coordination.FailedToCommitClusterStateException;
import org.opensearch.cluster.metadata.DataStream;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.cluster.metadata.IndexNameExpressionResolver;
import org.opensearch.cluster.metadata.Metadata;
import org.opensearch.cluster.metadata.RepositoriesMetadata;
import org.opensearch.cluster.node.DiscoveryNode;
import org.opensearch.cluster.node.DiscoveryNodes;
import org.opensearch.cluster.routing.IndexRoutingTable;
import org.opensearch.cluster.routing.IndexShardRoutingTable;
import org.opensearch.cluster.routing.RoutingTable;
import org.opensearch.cluster.routing.ShardRouting;
import org.opensearch.cluster.service.ClusterManagerTaskKeys;
import org.opensearch.cluster.service.ClusterManagerTaskThrottler;
import org.opensearch.cluster.service.ClusterService;
import org.opensearch.common.Nullable;
import org.opensearch.common.Priority;
import org.opensearch.common.UUIDs;
import org.opensearch.common.collect.Tuple;
import org.opensearch.common.lifecycle.AbstractLifecycleComponent;
import org.opensearch.common.regex.Regex;
import org.opensearch.common.settings.Setting;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.common.util.concurrent.AbstractRunnable;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.Strings;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.index.Index;
import org.opensearch.core.index.shard.ShardId;
import org.opensearch.index.store.RemoteSegmentStoreDirectoryFactory;
import org.opensearch.index.store.lockmanager.RemoteStoreLockManagerFactory;
import org.opensearch.indices.RemoteStoreSettings;
import org.opensearch.node.remotestore.RemoteStorePinnedTimestampService;
import org.opensearch.repositories.IndexId;
import org.opensearch.repositories.RepositoriesService;
import org.opensearch.repositories.Repository;
import org.opensearch.repositories.RepositoryData;
import org.opensearch.repositories.RepositoryException;
import org.opensearch.repositories.RepositoryMissingException;
import org.opensearch.repositories.RepositoryShardId;
import org.opensearch.repositories.ShardGenerations;
import org.opensearch.threadpool.ThreadPool;
import org.opensearch.transport.TransportService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Executor;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static java.util.Collections.emptySet;
import static java.util.Collections.unmodifiableList;
import static org.opensearch.cluster.SnapshotsInProgress.completed;
import static org.opensearch.common.util.IndexUtils.filterIndices;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode;
import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING;
import static org.opensearch.repositories.blobstore.BlobStoreRepository.REMOTE_STORE_INDEX_SHALLOW_COPY;
import static org.opensearch.repositories.blobstore.BlobStoreRepository.SHALLOW_SNAPSHOT_V2;
import static org.opensearch.repositories.blobstore.BlobStoreRepository.SHARD_PATH_TYPE;
import static org.opensearch.snapshots.SnapshotUtils.validateSnapshotsBackingAnyIndex;
/**
* Service responsible for creating snapshots. This service runs all the steps executed on the cluster-manager node during snapshot creation and
* deletion.
* See package level documentation of {@link org.opensearch.snapshots} for details.
*
* @opensearch.internal
*/
public class SnapshotsService extends AbstractLifecycleComponent implements ClusterStateApplier {
/**
* Minimum node version which does not use {@link Repository#initializeSnapshot(SnapshotId, List, Metadata)} to write snapshot metadata
* when starting a snapshot.
*/
public static final Version NO_REPO_INITIALIZE_VERSION = LegacyESVersion.V_7_5_0;
public static final Version FULL_CONCURRENCY_VERSION = LegacyESVersion.V_7_9_0;
public static final Version CLONE_SNAPSHOT_VERSION = LegacyESVersion.V_7_10_0;
public static final Version SHARD_GEN_IN_REPO_DATA_VERSION = LegacyESVersion.V_7_6_0;
public static final Version INDEX_GEN_IN_REPO_DATA_VERSION = LegacyESVersion.V_7_9_0;
public static final Version OLD_SNAPSHOT_FORMAT = LegacyESVersion.V_7_5_0;
public static final Version MULTI_DELETE_VERSION = LegacyESVersion.V_7_8_0;
private static final Logger logger = LogManager.getLogger(SnapshotsService.class);
public static final String UPDATE_SNAPSHOT_STATUS_ACTION_NAME = "internal:cluster/snapshot/update_snapshot_status";
private final ClusterService clusterService;
private final IndexNameExpressionResolver indexNameExpressionResolver;
private final RepositoriesService repositoriesService;
private final RemoteStoreLockManagerFactory remoteStoreLockManagerFactory;
private final RemoteSegmentStoreDirectoryFactory remoteSegmentStoreDirectoryFactory;
private final ThreadPool threadPool;
private final Map>>> snapshotCompletionListeners =
new ConcurrentHashMap<>();
// Set of snapshots that are currently being initialized by this node
private final Set initializingSnapshots = Collections.synchronizedSet(new HashSet<>());
/**
* Listeners for snapshot deletion keyed by delete uuid as returned from {@link SnapshotDeletionsInProgress.Entry#uuid()}
*/
private final Map>> snapshotDeletionListeners = new HashMap<>();
// Set of repositories currently running either a snapshot finalization or a snapshot delete.
private final Set currentlyFinalizing = Collections.synchronizedSet(new HashSet<>());
// Set of snapshots that are currently being ended by this node
private final Set endingSnapshots = Collections.synchronizedSet(new HashSet<>());
// Set of currently initializing clone operations
private final Set initializingClones = Collections.synchronizedSet(new HashSet<>());
private final UpdateSnapshotStatusAction updateSnapshotStatusHandler;
private final TransportService transportService;
private final RemoteStorePinnedTimestampService remoteStorePinnedTimestampService;
private final OngoingRepositoryOperations repositoryOperations = new OngoingRepositoryOperations();
private final ClusterManagerTaskThrottler.ThrottlingKey createSnapshotTaskKey;
private final ClusterManagerTaskThrottler.ThrottlingKey deleteSnapshotTaskKey;
private static ClusterManagerTaskThrottler.ThrottlingKey updateSnapshotStateTaskKey;
/**
* Setting that specifies the maximum number of allowed concurrent snapshot create and delete operations in the
* cluster state. The number of concurrent operations in a cluster state is defined as the sum of the sizes of
* {@link SnapshotsInProgress#entries()} and {@link SnapshotDeletionsInProgress#getEntries()}.
*/
public static final Setting MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING = Setting.intSetting(
"snapshot.max_concurrent_operations",
1000,
1,
Setting.Property.NodeScope,
Setting.Property.Dynamic
);
public static final String SNAPSHOT_PINNED_TIMESTAMP_DELIMITER = "__";
/**
* Setting to specify the maximum number of shards that can be included in the result for the snapshot status
* API call. Note that it does not apply to V2-shallow snapshots.
*/
public static final Setting MAX_SHARDS_ALLOWED_IN_STATUS_API = Setting.intSetting(
"snapshot.max_shards_allowed_in_status_api",
200000,
1,
Setting.Property.NodeScope,
Setting.Property.Dynamic
);
private volatile int maxConcurrentOperations;
public SnapshotsService(
Settings settings,
ClusterService clusterService,
IndexNameExpressionResolver indexNameExpressionResolver,
RepositoriesService repositoriesService,
TransportService transportService,
ActionFilters actionFilters,
@Nullable RemoteStorePinnedTimestampService remoteStorePinnedTimestampService,
RemoteStoreSettings remoteStoreSettings
) {
this.clusterService = clusterService;
this.indexNameExpressionResolver = indexNameExpressionResolver;
this.repositoriesService = repositoriesService;
this.remoteStoreLockManagerFactory = new RemoteStoreLockManagerFactory(
() -> repositoriesService,
remoteStoreSettings.getSegmentsPathFixedPrefix()
);
this.threadPool = transportService.getThreadPool();
this.remoteSegmentStoreDirectoryFactory = new RemoteSegmentStoreDirectoryFactory(
() -> repositoriesService,
threadPool,
remoteStoreSettings.getSegmentsPathFixedPrefix()
);
this.transportService = transportService;
this.remoteStorePinnedTimestampService = remoteStorePinnedTimestampService;
// The constructor of UpdateSnapshotStatusAction will register itself to the TransportService.
this.updateSnapshotStatusHandler = new UpdateSnapshotStatusAction(
transportService,
clusterService,
threadPool,
actionFilters,
indexNameExpressionResolver
);
if (DiscoveryNode.isClusterManagerNode(settings)) {
// addLowPriorityApplier to make sure that Repository will be created before snapshot
clusterService.addLowPriorityApplier(this);
maxConcurrentOperations = MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING.get(settings);
clusterService.getClusterSettings()
.addSettingsUpdateConsumer(MAX_CONCURRENT_SNAPSHOT_OPERATIONS_SETTING, i -> maxConcurrentOperations = i);
}
// Task is onboarded for throttling, it will get retried from associated TransportClusterManagerNodeAction.
createSnapshotTaskKey = clusterService.registerClusterManagerTask(ClusterManagerTaskKeys.CREATE_SNAPSHOT_KEY, true);
deleteSnapshotTaskKey = clusterService.registerClusterManagerTask(ClusterManagerTaskKeys.DELETE_SNAPSHOT_KEY, true);
updateSnapshotStateTaskKey = clusterService.registerClusterManagerTask(ClusterManagerTaskKeys.UPDATE_SNAPSHOT_STATE_KEY, true);
}
/**
* Same as {@link #createSnapshot(CreateSnapshotRequest, ActionListener)} but invokes its callback on completion of
* the snapshot.
* Note: This method is only used in clusters that contain a node older than {@link #NO_REPO_INITIALIZE_VERSION} to ensure a backwards
* compatible path for initializing the snapshot in the repository is executed.
*
* @param request snapshot request
* @param listener snapshot completion listener
*/
public void executeSnapshotLegacy(final CreateSnapshotRequest request, final ActionListener listener) {
createSnapshotLegacy(
request,
ActionListener.wrap(snapshot -> addListener(snapshot, ActionListener.map(listener, Tuple::v2)), listener::onFailure)
);
}
/**
* Initializes the snapshotting process.
*
* This method is used by clients to start snapshot. It makes sure that there is no snapshots are currently running and
* creates a snapshot record in cluster state metadata.
* Note: This method is only used in clusters that contain a node older than {@link #NO_REPO_INITIALIZE_VERSION} to ensure a backwards
* compatible path for initializing the snapshot in the repository is executed.
*
* @param request snapshot request
* @param listener snapshot creation listener
*/
public void createSnapshotLegacy(final CreateSnapshotRequest request, final ActionListener listener) {
final String repositoryName = request.repository();
final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.snapshot());
validate(repositoryName, snapshotName);
final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); // new UUID for the snapshot
Repository repository = repositoriesService.repository(request.repository());
final Map userMeta = repository.adaptUserMetadata(request.userMetadata());
clusterService.submitStateUpdateTask("create_snapshot [" + snapshotName + ']', new ClusterStateUpdateTask() {
private List indices;
private SnapshotsInProgress.Entry newEntry;
@Override
public ClusterState execute(ClusterState currentState) {
validate(repositoryName, snapshotName, currentState);
SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(SnapshotDeletionsInProgress.TYPE);
if (deletionsInProgress != null && deletionsInProgress.hasDeletionsInProgress()) {
throw new ConcurrentSnapshotExecutionException(
repositoryName,
snapshotName,
"cannot snapshot while a snapshot deletion is in-progress in [" + deletionsInProgress + "]"
);
}
final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(RepositoryCleanupInProgress.TYPE);
if (repositoryCleanupInProgress != null && repositoryCleanupInProgress.hasCleanupInProgress()) {
throw new ConcurrentSnapshotExecutionException(
repositoryName,
snapshotName,
"cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"
);
}
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
// Fail if there are any concurrently running snapshots. The only exception to this being a snapshot in INIT state from a
// previous cluster-manager that we can simply ignore and remove from the cluster state because we would clean it up from
// the cluster state anyway in #applyClusterState.
if (snapshots != null
&& snapshots.entries()
.stream()
.anyMatch(
entry -> (entry.state() == State.INIT && initializingSnapshots.contains(entry.snapshot()) == false) == false
)) {
throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, " a snapshot is already running");
}
// Store newSnapshot here to be processed in clusterStateProcessed
indices = Arrays.asList(indexNameExpressionResolver.concreteIndexNames(currentState, request));
final List dataStreams = indexNameExpressionResolver.dataStreamNames(
currentState,
request.indicesOptions(),
request.indices()
);
logger.trace("[{}][{}] creating snapshot for indices [{}]", repositoryName, snapshotName, indices);
boolean remoteStoreIndexShallowCopy = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings());
newEntry = new SnapshotsInProgress.Entry(
new Snapshot(repositoryName, snapshotId),
request.includeGlobalState(),
request.partial(),
State.INIT,
Collections.emptyList(), // We'll resolve the list of indices when moving to the STARTED state in #beginSnapshot
dataStreams,
threadPool.absoluteTimeInMillis(),
RepositoryData.UNKNOWN_REPO_GEN,
Map.of(),
userMeta,
Version.CURRENT,
remoteStoreIndexShallowCopy
);
initializingSnapshots.add(newEntry.snapshot());
snapshots = SnapshotsInProgress.of(Collections.singletonList(newEntry));
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, snapshots).build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to create snapshot", repositoryName, snapshotName), e);
if (newEntry != null) {
initializingSnapshots.remove(newEntry.snapshot());
}
newEntry = null;
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) {
if (newEntry != null) {
final Snapshot current = newEntry.snapshot();
assert initializingSnapshots.contains(current);
assert indices != null;
beginSnapshot(newState, newEntry, request.partial(), indices, repository, new ActionListener() {
@Override
public void onResponse(final Snapshot snapshot) {
initializingSnapshots.remove(snapshot);
listener.onResponse(snapshot);
}
@Override
public void onFailure(final Exception e) {
initializingSnapshots.remove(current);
listener.onFailure(e);
}
});
}
}
@Override
public TimeValue timeout() {
return request.clusterManagerNodeTimeout();
}
});
}
/**
* Same as {@link #createSnapshot(CreateSnapshotRequest, ActionListener)} but invokes its callback on completion of
* the snapshot.
*
* @param request snapshot request
* @param listener snapshot completion listener
*/
public void executeSnapshot(final CreateSnapshotRequest request, final ActionListener listener) {
Repository repository = repositoriesService.repository(request.repository());
boolean isSnapshotV2 = SHALLOW_SNAPSHOT_V2.get(repository.getMetadata().settings());
logger.debug("shallow_snapshot_v2 is set as [{}]", isSnapshotV2);
boolean remoteStoreIndexShallowCopy = remoteStoreShallowCopyEnabled(repository);
if (remoteStoreIndexShallowCopy
&& isSnapshotV2
&& request.indices().length == 0
&& clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.V_2_17_0)) {
createSnapshotV2(request, listener);
} else {
createSnapshot(
request,
ActionListener.wrap(snapshot -> addListener(snapshot, ActionListener.map(listener, Tuple::v2)), listener::onFailure)
);
}
}
private boolean remoteStoreShallowCopyEnabled(Repository repository) {
boolean remoteStoreIndexShallowCopy = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings());
logger.debug("remote_store_index_shallow_copy setting is set as [{}]", remoteStoreIndexShallowCopy);
if (remoteStoreIndexShallowCopy
&& clusterService.getClusterSettings().get(REMOTE_STORE_COMPATIBILITY_MODE_SETTING).equals(CompatibilityMode.MIXED)) {
// don't allow shallow snapshots if compatibility mode is not strict
logger.warn("Shallow snapshots are not supported during migration. Falling back to full snapshot.");
remoteStoreIndexShallowCopy = false;
}
return remoteStoreIndexShallowCopy;
}
/**
* Initializes the snapshotting process.
*
* This method is used by clients to start snapshot. It makes sure that there is no snapshots are currently running and
* creates a snapshot record in cluster state metadata.
*
*
* @param request snapshot request
* @param listener snapshot creation listener
*/
public void createSnapshot(final CreateSnapshotRequest request, final ActionListener listener) {
final String repositoryName = request.repository();
final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.snapshot());
validate(repositoryName, snapshotName);
// TODO: create snapshot UUID in CreateSnapshotRequest and make this operation idempotent to cleanly deal with transport layer
// retries
final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); // new UUID for the snapshot
Repository repository = repositoriesService.repository(request.repository());
if (repository.isReadOnly()) {
listener.onFailure(new RepositoryException(repository.getMetadata().name(), "cannot create snapshot in a readonly repository"));
return;
}
final Snapshot snapshot = new Snapshot(repositoryName, snapshotId);
final Map userMeta = repository.adaptUserMetadata(request.userMetadata());
repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() {
private SnapshotsInProgress.Entry newEntry;
@Override
public ClusterState execute(ClusterState currentState) {
createSnapshotPreValidations(currentState, repositoryData, repositoryName, snapshotName);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List runningSnapshots = snapshots.entries();
final boolean concurrentOperationsAllowed = currentState.nodes().getMinNodeVersion().onOrAfter(FULL_CONCURRENCY_VERSION);
final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
if (deletionsInProgress.hasDeletionsInProgress() && concurrentOperationsAllowed == false) {
throw new ConcurrentSnapshotExecutionException(
repositoryName,
snapshotName,
"cannot snapshot while a snapshot deletion is in-progress in [" + deletionsInProgress + "]"
);
}
// Fail if there are any concurrently running snapshots. The only exception to this being a snapshot in INIT state from a
// previous cluster-manager that we can simply ignore and remove from the cluster state because we would clean it up from
// the
// cluster state anyway in #applyClusterState.
if (concurrentOperationsAllowed == false && runningSnapshots.stream().anyMatch(entry -> entry.state() != State.INIT)) {
throw new ConcurrentSnapshotExecutionException(repositoryName, snapshotName, " a snapshot is already running");
}
ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress);
// Store newSnapshot here to be processed in clusterStateProcessed
List indices = Arrays.asList(indexNameExpressionResolver.concreteIndexNames(currentState, request));
final List dataStreams = indexNameExpressionResolver.dataStreamNames(
currentState,
request.indicesOptions(),
request.indices()
);
logger.trace("[{}][{}] creating snapshot for indices [{}]", repositoryName, snapshotName, indices);
int pathType = clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.V_2_17_0)
? SHARD_PATH_TYPE.get(repository.getMetadata().settings()).getCode()
: IndexId.DEFAULT_SHARD_PATH_TYPE;
final List indexIds = repositoryData.resolveNewIndices(
indices,
getInFlightIndexIds(runningSnapshots, repositoryName),
pathType
);
final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null);
final Map shards = shards(
snapshots,
deletionsInProgress,
currentState.metadata(),
currentState.routingTable(),
indexIds,
useShardGenerations(version),
repositoryData,
repositoryName
);
if (request.partial() == false) {
Set missing = new HashSet<>();
for (final Map.Entry entry : shards.entrySet()) {
if (entry.getValue().state() == ShardState.MISSING) {
missing.add(entry.getKey().getIndex().getName());
}
}
if (missing.isEmpty() == false) {
throw new SnapshotException(
new Snapshot(repositoryName, snapshotId),
"Indices don't have primary shards " + missing
);
}
}
boolean remoteStoreIndexShallowCopy = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings());
logger.debug("remote_store_index_shallow_copy setting is set as [{}]", remoteStoreIndexShallowCopy);
if (remoteStoreIndexShallowCopy
&& clusterService.getClusterSettings().get(REMOTE_STORE_COMPATIBILITY_MODE_SETTING).equals(CompatibilityMode.MIXED)) {
// don't allow shallow snapshots if compatibility mode is not strict
logger.warn("Shallow snapshots are not supported during migration. Falling back to full snapshot.");
remoteStoreIndexShallowCopy = false;
}
newEntry = SnapshotsInProgress.startedEntry(
new Snapshot(repositoryName, snapshotId),
request.includeGlobalState(),
request.partial(),
indexIds,
dataStreams,
threadPool.absoluteTimeInMillis(),
repositoryData.getGenId(),
shards,
userMeta,
version,
remoteStoreIndexShallowCopy
);
final List newEntries = new ArrayList<>(runningSnapshots);
newEntries.add(newEntry);
return ClusterState.builder(currentState)
.putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(new ArrayList<>(newEntries)))
.build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to create snapshot", repositoryName, snapshotName), e);
listener.onFailure(e);
}
@Override
public ClusterManagerTaskThrottler.ThrottlingKey getClusterManagerThrottlingKey() {
return createSnapshotTaskKey;
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) {
try {
logger.info("snapshot [{}] started", snapshot);
listener.onResponse(snapshot);
} finally {
if (newEntry.state().completed()) {
endSnapshot(newEntry, newState.metadata(), repositoryData);
}
}
}
@Override
public TimeValue timeout() {
return request.clusterManagerNodeTimeout();
}
}, "create_snapshot [" + snapshotName + ']', listener::onFailure);
}
/**
* Initializes the snapshotting process for clients when Snapshot v2 is enabled. This method is responsible for taking
* a shallow snapshot and pinning the snapshot timestamp.The entire process is executed on the cluster manager node.
*
* Unlike traditional snapshot operations, this method performs a synchronous snapshot execution and doesn't
* upload any shard metadata to the snapshot repository.
* The pinned timestamp is later reconciled with remote store segment and translog metadata files during the restore
* operation.
*
* @param request snapshot request
* @param listener snapshot creation listener
*/
public void createSnapshotV2(final CreateSnapshotRequest request, final ActionListener listener) {
long pinnedTimestamp = System.currentTimeMillis();
final String repositoryName = request.repository();
final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.snapshot());
validate(repositoryName, snapshotName);
final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); // new UUID for the snapshot
Repository repository = repositoriesService.repository(repositoryName);
if (repository.isReadOnly()) {
listener.onFailure(
new RepositoryException(repository.getMetadata().name(), "cannot create snapshot-v2 in a readonly repository")
);
return;
}
final Snapshot snapshot = new Snapshot(repositoryName, snapshotId);
ClusterState currentState = clusterService.state();
final Map userMeta = repository.adaptUserMetadata(request.userMetadata());
try {
final StepListener repositoryDataListener = new StepListener<>();
repositoriesService.getRepositoryData(repositoryName, repositoryDataListener);
repositoryDataListener.whenComplete(repositoryData -> {
createSnapshotPreValidations(currentState, repositoryData, repositoryName, snapshotName);
List indices = new ArrayList<>(currentState.metadata().indices().keySet());
final List dataStreams = indexNameExpressionResolver.dataStreamNames(
currentState,
request.indicesOptions(),
request.indices()
);
logger.trace("[{}][{}] creating snapshot-v2 for indices [{}]", repositoryName, snapshotName, indices);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List runningSnapshots = snapshots.entries();
final List indexIds = repositoryData.resolveNewIndices(
indices,
getInFlightIndexIds(runningSnapshots, repositoryName),
IndexId.DEFAULT_SHARD_PATH_TYPE
);
final Version version = minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null);
final ShardGenerations shardGenerations = buildShardsGenerationFromRepositoryData(
currentState.metadata(),
currentState.routingTable(),
indexIds,
repositoryData
);
if (repositoryData.getGenId() == RepositoryData.UNKNOWN_REPO_GEN) {
logger.debug("[{}] was aborted before starting", snapshot);
throw new SnapshotException(snapshot, "Aborted on initialization");
}
final SnapshotInfo snapshotInfo = new SnapshotInfo(
snapshot.getSnapshotId(),
shardGenerations.indices().stream().map(IndexId::getName).collect(Collectors.toList()),
dataStreams,
pinnedTimestamp,
null,
System.currentTimeMillis(),
shardGenerations.totalShards(),
Collections.emptyList(),
request.includeGlobalState(),
userMeta,
true,
pinnedTimestamp
);
if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) {
throw new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2, no longer cluster manager");
}
final StepListener pinnedTimestampListener = new StepListener<>();
pinnedTimestampListener.whenComplete(repoData -> { listener.onResponse(snapshotInfo); }, listener::onFailure);
repository.finalizeSnapshot(
shardGenerations,
repositoryData.getGenId(),
metadataForSnapshot(currentState.metadata(), request.includeGlobalState(), false, dataStreams, indexIds),
snapshotInfo,
version,
state -> state,
Priority.IMMEDIATE,
new ActionListener() {
@Override
public void onResponse(RepositoryData repositoryData) {
if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) {
failSnapshotCompletionListeners(
snapshot,
new SnapshotException(snapshot, "Aborting snapshot-v2, no longer cluster manager")
);
listener.onFailure(
new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2, no longer cluster manager")
);
return;
}
updateSnapshotPinnedTimestamp(repositoryData, snapshot, pinnedTimestamp, pinnedTimestampListener);
}
@Override
public void onFailure(Exception e) {
logger.error("Failed to upload files to snapshot repo {} for snapshot-v2 {} ", repositoryName, snapshotName);
listener.onFailure(e);
}
}
);
}, listener::onFailure);
} catch (Exception e) {
assert false : new AssertionError(e);
logger.error("Snapshot-v2 {} creation failed with exception {}", snapshot.getSnapshotId().getName(), e);
listener.onFailure(e);
}
}
private void createSnapshotPreValidations(
ClusterState currentState,
RepositoryData repositoryData,
String repositoryName,
String snapshotName
) {
Repository repository = repositoriesService.repository(repositoryName);
ensureSnapshotNameAvailableInRepo(repositoryData, snapshotName, repository);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List runningSnapshots = snapshots.entries();
ensureSnapshotNameNotRunning(runningSnapshots, repositoryName, snapshotName);
validate(repositoryName, snapshotName, currentState);
final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(
RepositoryCleanupInProgress.TYPE,
RepositoryCleanupInProgress.EMPTY
);
if (repositoryCleanupInProgress.hasCleanupInProgress()) {
throw new ConcurrentSnapshotExecutionException(
repositoryName,
snapshotName,
"cannot snapshot-v2 while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"
);
}
ensureNoCleanupInProgress(currentState, repositoryName, snapshotName);
}
private void updateSnapshotPinnedTimestamp(
RepositoryData repositoryData,
Snapshot snapshot,
long timestampToPin,
ActionListener listener
) {
remoteStorePinnedTimestampService.pinTimestamp(
timestampToPin,
snapshot.getRepository() + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + snapshot.getSnapshotId().getUUID(),
new ActionListener() {
@Override
public void onResponse(Void unused) {
logger.debug("Timestamp pinned successfully for snapshot {}", snapshot.getSnapshotId().getName());
listener.onResponse(repositoryData);
}
@Override
public void onFailure(Exception e) {
logger.error("Failed to pin timestamp for snapshot {} with exception {}", snapshot.getSnapshotId().getName(), e);
listener.onFailure(e);
}
}
);
}
private void cloneSnapshotPinnedTimestamp(
RepositoryData repositoryData,
SnapshotId sourceSnapshot,
Snapshot snapshot,
long timestampToPin,
ActionListener listener
) {
remoteStorePinnedTimestampService.cloneTimestamp(
timestampToPin,
snapshot.getRepository() + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + sourceSnapshot.getUUID(),
snapshot.getRepository() + SNAPSHOT_PINNED_TIMESTAMP_DELIMITER + snapshot.getSnapshotId().getUUID(),
new ActionListener() {
@Override
public void onResponse(Void unused) {
logger.debug("Timestamp pinned successfully for clone snapshot {}", snapshot.getSnapshotId().getName());
listener.onResponse(repositoryData);
}
@Override
public void onFailure(Exception e) {
logger.error("Failed to pin timestamp for clone snapshot {} with exception {}", snapshot.getSnapshotId().getName(), e);
listener.onFailure(e);
}
}
);
}
private static void ensureSnapshotNameNotRunning(
List runningSnapshots,
String repositoryName,
String snapshotName
) {
if (runningSnapshots.stream().anyMatch(s -> {
final Snapshot running = s.snapshot();
return running.getRepository().equals(repositoryName) && running.getSnapshotId().getName().equals(snapshotName);
})) {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "snapshot with the same name is already in-progress");
}
}
private static Map getInFlightIndexIds(List runningSnapshots, String repositoryName) {
return runningSnapshots.stream()
.filter(entry -> entry.repository().equals(repositoryName))
.flatMap(entry -> entry.indices().stream())
.distinct()
.collect(Collectors.toMap(IndexId::getName, Function.identity()));
}
/**
* This method does some pre-validation, checks for the presence of source snapshot in repository data.
* For shallow snapshot v2 clone, it checks the pinned timestamp to be greater than zero in the source snapshot.
*
* @param request snapshot request
* @param listener snapshot completion listener
*/
public void executeClone(CloneSnapshotRequest request, ActionListener listener) {
final String repositoryName = request.repository();
Repository repository = repositoriesService.repository(repositoryName);
if (repository.isReadOnly()) {
listener.onFailure(new RepositoryException(repositoryName, "cannot create snapshot in a readonly repository"));
return;
}
final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target());
validate(repositoryName, snapshotName);
final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID());
final Snapshot snapshot = new Snapshot(repositoryName, snapshotId);
try {
final StepListener repositoryDataListener = new StepListener<>();
repositoriesService.getRepositoryData(repositoryName, repositoryDataListener);
repositoryDataListener.whenComplete(repositoryData -> {
final SnapshotId sourceSnapshotId = repositoryData.getSnapshotIds()
.stream()
.filter(src -> src.getName().equals(request.source()))
.findAny()
.orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source()));
final StepListener snapshotInfoListener = new StepListener<>();
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshotId)));
snapshotInfoListener.whenComplete(sourceSnapshotInfo -> {
if (sourceSnapshotInfo.getPinnedTimestamp() > 0) {
if (hasWildCardPatterForCloneSnapshotV2(request.indices()) == false) {
throw new SnapshotException(
repositoryName,
snapshotName,
"Aborting clone for Snapshot-v2, only wildcard pattern '*' is supported for indices"
);
}
cloneSnapshotV2(request, snapshot, repositoryName, repository, listener);
} else {
cloneSnapshot(request, snapshot, repositoryName, repository, listener);
}
}, e -> listener.onFailure(e));
}, e -> listener.onFailure(e));
} catch (Exception e) {
assert false : new AssertionError(e);
logger.error("Snapshot {} clone failed with exception {}", snapshot.getSnapshotId().getName(), e);
listener.onFailure(e);
}
}
/**
* This method is responsible for creating a clone of the shallow snapshot v2.
* It pins the same timestamp that is pinned by the source snapshot.
*
* Unlike traditional snapshot operations, this method performs a synchronous clone execution and doesn't
* upload any shard metadata to the snapshot repository.
* The pinned timestamp is later reconciled with remote store segment and translog metadata files during the restore
* operation.
*
* @param request snapshot request
* @param snapshot clone snapshot
* @param repositoryName snapshot repository name
* @param repository snapshot repository
* @param listener completion listener
*/
public void cloneSnapshotV2(
CloneSnapshotRequest request,
Snapshot snapshot,
String repositoryName,
Repository repository,
ActionListener listener
) {
long startTime = System.currentTimeMillis();
ClusterState currentState = clusterService.state();
String snapshotName = snapshot.getSnapshotId().getName();
repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask(Priority.URGENT) {
private SnapshotsInProgress.Entry newEntry;
private SnapshotId sourceSnapshotId;
private List indicesForSnapshot;
@Override
public ClusterState execute(ClusterState currentState) {
createSnapshotPreValidations(currentState, repositoryData, repositoryName, snapshotName);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List runningSnapshots = snapshots.entries();
sourceSnapshotId = repositoryData.getSnapshotIds()
.stream()
.filter(src -> src.getName().equals(request.source()))
.findAny()
.orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source()));
final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
if (deletionsInProgress.getEntries().stream().anyMatch(entry -> entry.getSnapshots().contains(sourceSnapshotId))) {
throw new ConcurrentSnapshotExecutionException(
repositoryName,
sourceSnapshotId.getName(),
"cannot clone from snapshot that is being deleted"
);
}
indicesForSnapshot = new ArrayList<>();
for (IndexId indexId : repositoryData.getIndices().values()) {
if (repositoryData.getSnapshots(indexId).contains(sourceSnapshotId)) {
indicesForSnapshot.add(indexId.getName());
}
}
newEntry = SnapshotsInProgress.startClone(
snapshot,
sourceSnapshotId,
repositoryData.resolveIndices(indicesForSnapshot),
threadPool.absoluteTimeInMillis(),
repositoryData.getGenId(),
minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null)
);
final List newEntries = new ArrayList<>(runningSnapshots);
newEntries.add(newEntry);
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(newEntries)).build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to clone snapshot-v2", repositoryName, snapshotName), e);
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) {
logger.info("snapshot-v2 clone [{}] started", snapshot);
final StepListener snapshotInfoListener = new StepListener<>();
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshotId)));
final ShardGenerations shardGenerations = repositoryData.shardGenerations();
snapshotInfoListener.whenComplete(snapshotInfo -> {
final SnapshotInfo cloneSnapshotInfo = new SnapshotInfo(
snapshot.getSnapshotId(),
indicesForSnapshot,
newEntry.dataStreams(),
startTime,
null,
System.currentTimeMillis(),
snapshotInfo.totalShards(),
Collections.emptyList(),
newEntry.includeGlobalState(),
newEntry.userMetadata(),
true,
snapshotInfo.getPinnedTimestamp()
);
if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) {
throw new SnapshotException(repositoryName, snapshotName, "Aborting snapshot-v2 clone, no longer cluster manager");
}
final StepListener pinnedTimestampListener = new StepListener<>();
pinnedTimestampListener.whenComplete(repoData -> {
logger.info("snapshot-v2 clone [{}] completed successfully", snapshot);
listener.onResponse(null);
}, listener::onFailure);
repository.finalizeSnapshot(
shardGenerations,
repositoryData.getGenId(),
metadataForSnapshot(
currentState.metadata(),
newEntry.includeGlobalState(),
false,
newEntry.dataStreams(),
newEntry.indices()
),
cloneSnapshotInfo,
repositoryData.getVersion(sourceSnapshotId),
state -> stateWithoutSnapshot(state, snapshot),
Priority.IMMEDIATE,
new ActionListener() {
@Override
public void onResponse(RepositoryData repositoryData) {
if (!clusterService.state().nodes().isLocalNodeElectedClusterManager()) {
failSnapshotCompletionListeners(
snapshot,
new SnapshotException(snapshot, "Aborting Snapshot-v2 clone, no longer cluster manager")
);
listener.onFailure(
new SnapshotException(
repositoryName,
snapshotName,
"Aborting Snapshot-v2 clone, no longer cluster manager"
)
);
return;
}
cloneSnapshotPinnedTimestamp(
repositoryData,
sourceSnapshotId,
snapshot,
snapshotInfo.getPinnedTimestamp(),
pinnedTimestampListener
);
}
@Override
public void onFailure(Exception e) {
logger.error(
"Failed to upload files to snapshot repo {} for clone snapshot-v2 {} ",
repositoryName,
snapshotName
);
listener.onFailure(e);
}
}
);
}, listener::onFailure);
}
@Override
public TimeValue timeout() {
return request.clusterManagerNodeTimeout();
}
}, "clone_snapshot_v2 [" + request.source() + "][" + snapshotName + ']', listener::onFailure);
}
// TODO: It is worth revisiting the design choice of creating a placeholder entry in snapshots-in-progress here once we have a cache
// for repository metadata and loading it has predictable performance
public void cloneSnapshot(
CloneSnapshotRequest request,
Snapshot snapshot,
String repositoryName,
Repository repository,
ActionListener listener
) {
String snapshotName = snapshot.getSnapshotId().getName();
initializingClones.add(snapshot);
repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask() {
private SnapshotsInProgress.Entry newEntry;
@Override
public ClusterState execute(ClusterState currentState) {
ensureSnapshotNameAvailableInRepo(repositoryData, snapshotName, repository);
ensureNoCleanupInProgress(currentState, repositoryName, snapshotName);
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List runningSnapshots = snapshots.entries();
ensureSnapshotNameNotRunning(runningSnapshots, repositoryName, snapshotName);
validate(repositoryName, snapshotName, currentState);
final SnapshotId sourceSnapshotId = repositoryData.getSnapshotIds()
.stream()
.filter(src -> src.getName().equals(request.source()))
.findAny()
.orElseThrow(() -> new SnapshotMissingException(repositoryName, request.source()));
final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
if (deletionsInProgress.getEntries().stream().anyMatch(entry -> entry.getSnapshots().contains(sourceSnapshotId))) {
throw new ConcurrentSnapshotExecutionException(
repositoryName,
sourceSnapshotId.getName(),
"cannot clone from snapshot that is being deleted"
);
}
ensureBelowConcurrencyLimit(repositoryName, snapshotName, snapshots, deletionsInProgress);
final List indicesForSnapshot = new ArrayList<>();
for (IndexId indexId : repositoryData.getIndices().values()) {
if (repositoryData.getSnapshots(indexId).contains(sourceSnapshotId)) {
indicesForSnapshot.add(indexId.getName());
}
}
final List matchingIndices = filterIndices(indicesForSnapshot, request.indices(), request.indicesOptions());
if (matchingIndices.isEmpty()) {
throw new SnapshotException(
new Snapshot(repositoryName, sourceSnapshotId),
"No indices in the source snapshot ["
+ sourceSnapshotId
+ "] matched requested pattern ["
+ Strings.arrayToCommaDelimitedString(request.indices())
+ "]"
);
}
newEntry = SnapshotsInProgress.startClone(
snapshot,
sourceSnapshotId,
repositoryData.resolveIndices(matchingIndices),
threadPool.absoluteTimeInMillis(),
repositoryData.getGenId(),
minCompatibleVersion(currentState.nodes().getMinNodeVersion(), repositoryData, null)
);
final List newEntries = new ArrayList<>(runningSnapshots);
newEntries.add(newEntry);
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(newEntries)).build();
}
@Override
public void onFailure(String source, Exception e) {
initializingClones.remove(snapshot);
logger.warn(() -> new ParameterizedMessage("[{}][{}] failed to clone snapshot", repositoryName, snapshotName), e);
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, final ClusterState newState) {
logger.info("snapshot clone [{}] started", snapshot);
addListener(snapshot, ActionListener.wrap(r -> listener.onResponse(null), listener::onFailure));
startCloning(repository, newEntry);
}
@Override
public TimeValue timeout() {
return request.clusterManagerNodeTimeout();
}
}, "clone_snapshot [" + request.source() + "][" + snapshotName + ']', listener::onFailure);
}
private static void ensureNoCleanupInProgress(ClusterState currentState, String repositoryName, String snapshotName) {
final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(
RepositoryCleanupInProgress.TYPE,
RepositoryCleanupInProgress.EMPTY
);
if (repositoryCleanupInProgress.hasCleanupInProgress()) {
throw new ConcurrentSnapshotExecutionException(
repositoryName,
snapshotName,
"cannot snapshot while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"
);
}
}
private static void ensureSnapshotNameAvailableInRepo(RepositoryData repositoryData, String snapshotName, Repository repository) {
// check if the snapshot name already exists in the repository
if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) {
throw new InvalidSnapshotNameException(
repository.getMetadata().name(),
snapshotName,
"snapshot with the same name already exists"
);
}
}
/**
* Determine the number of shards in each index of a clone operation and update the cluster state accordingly.
*
* @param repository repository to run operation on
* @param cloneEntry clone operation in the cluster state
*/
private void startCloning(Repository repository, SnapshotsInProgress.Entry cloneEntry) {
final List indices = cloneEntry.indices();
final SnapshotId sourceSnapshot = cloneEntry.source();
final Snapshot targetSnapshot = cloneEntry.snapshot();
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
// Exception handler for IO exceptions with loading index and repo metadata
final Consumer onFailure = e -> {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
removeFailedSnapshotFromClusterState(targetSnapshot, e, null, null);
};
// 1. step, load SnapshotInfo to make sure that source snapshot was successful for the indices we want to clone
// TODO: we could skip this step for snapshots with state SUCCESS
final StepListener snapshotInfoListener = new StepListener<>();
executor.execute(ActionRunnable.supply(snapshotInfoListener, () -> repository.getSnapshotInfo(sourceSnapshot)));
final StepListener>> allShardCountsListener = new StepListener<>();
final GroupedActionListener> shardCountListener = new GroupedActionListener<>(
allShardCountsListener,
indices.size()
);
snapshotInfoListener.whenComplete(snapshotInfo -> {
for (IndexId indexId : indices) {
if (RestoreService.failed(snapshotInfo, indexId.getName())) {
throw new SnapshotException(
targetSnapshot,
"Can't clone index [" + indexId + "] because its snapshot was not successful."
);
}
}
// 2. step, load the number of shards we have in each index to be cloned from the index metadata.
repository.getRepositoryData(ActionListener.wrap(repositoryData -> {
for (IndexId index : indices) {
executor.execute(ActionRunnable.supply(shardCountListener, () -> {
final IndexMetadata metadata = repository.getSnapshotIndexMetaData(repositoryData, sourceSnapshot, index);
return Tuple.tuple(index, metadata.getNumberOfShards());
}));
}
}, onFailure));
}, onFailure);
// 3. step, we have all the shard counts, now update the cluster state to have clone jobs in the snap entry
allShardCountsListener.whenComplete(counts -> repository.executeConsistentStateUpdate(repoData -> new ClusterStateUpdateTask() {
private SnapshotsInProgress.Entry updatedEntry;
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotsInProgress snapshotsInProgress = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List updatedEntries = new ArrayList<>(snapshotsInProgress.entries());
boolean changed = false;
final String localNodeId = currentState.nodes().getLocalNodeId();
final String repoName = cloneEntry.repository();
final ShardGenerations shardGenerations = repoData.shardGenerations();
for (int i = 0; i < updatedEntries.size(); i++) {
if (cloneEntry.snapshot().equals(updatedEntries.get(i).snapshot())) {
final Map clonesBuilder = new HashMap<>();
final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(
repoName,
snapshotsInProgress.entries()
);
for (Tuple count : counts) {
for (int shardId = 0; shardId < count.v2(); shardId++) {
final RepositoryShardId repoShardId = new RepositoryShardId(count.v1(), shardId);
final String indexName = repoShardId.indexName();
if (inFlightShardStates.isActive(indexName, shardId)) {
clonesBuilder.put(repoShardId, ShardSnapshotStatus.UNASSIGNED_QUEUED);
} else {
clonesBuilder.put(
repoShardId,
new ShardSnapshotStatus(
localNodeId,
inFlightShardStates.generationForShard(repoShardId.index(), shardId, shardGenerations)
)
);
}
}
}
updatedEntry = cloneEntry.withClones(clonesBuilder)
.withRemoteStoreIndexShallowCopy(
Boolean.TRUE.equals(snapshotInfoListener.result().isRemoteStoreIndexShallowCopyEnabled())
);
;
updatedEntries.set(i, updatedEntry);
changed = true;
break;
}
}
return updateWithSnapshots(currentState, changed ? SnapshotsInProgress.of(updatedEntries) : null, null);
}
@Override
public void onFailure(String source, Exception e) {
initializingClones.remove(targetSnapshot);
logger.info(() -> new ParameterizedMessage("Failed to start snapshot clone [{}]", cloneEntry), e);
failAllListenersOnMasterFailOver(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
initializingClones.remove(targetSnapshot);
if (updatedEntry != null) {
final Snapshot target = updatedEntry.snapshot();
final SnapshotId sourceSnapshot = updatedEntry.source();
for (final Map.Entry indexClone : updatedEntry.clones().entrySet()) {
final ShardSnapshotStatus shardStatusBefore = indexClone.getValue();
if (shardStatusBefore.state() != ShardState.INIT) {
continue;
}
final RepositoryShardId repoShardId = indexClone.getKey();
final boolean remoteStoreIndexShallowCopy = Boolean.TRUE.equals(updatedEntry.remoteStoreIndexShallowCopy());
runReadyClone(target, sourceSnapshot, shardStatusBefore, repoShardId, repository, remoteStoreIndexShallowCopy);
}
} else {
// Extremely unlikely corner case of cluster-manager failing over between between starting the clone and
// starting shard clones.
logger.warn("Did not find expected entry [{}] in the cluster state", cloneEntry);
}
}
}, "start snapshot clone", onFailure), onFailure);
}
private final Set currentlyCloning = Collections.synchronizedSet(new HashSet<>());
private void runReadyClone(
Snapshot target,
SnapshotId sourceSnapshot,
ShardSnapshotStatus shardStatusBefore,
RepositoryShardId repoShardId,
Repository repository,
boolean remoteStoreIndexShallowCopy
) {
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
executor.execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
logger.warn(
"Failed to get repository data while cloning shard [{}] from [{}] to [{}]",
repoShardId,
sourceSnapshot,
target.getSnapshotId()
);
failCloneShardAndUpdateClusterState(target, sourceSnapshot, repoShardId);
}
@Override
protected void doRun() {
final String localNodeId = clusterService.localNode().getId();
repository.getRepositoryData(ActionListener.wrap(repositoryData -> {
try {
final IndexMetadata indexMetadata = repository.getSnapshotIndexMetaData(
repositoryData,
sourceSnapshot,
repoShardId.index()
);
final boolean cloneRemoteStoreIndexShardSnapshot = remoteStoreIndexShallowCopy
&& indexMetadata.getSettings().getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false);
final SnapshotId targetSnapshot = target.getSnapshotId();
final ActionListener listener = ActionListener.wrap(
generation -> innerUpdateSnapshotState(
new ShardSnapshotUpdate(
target,
repoShardId,
new ShardSnapshotStatus(localNodeId, ShardState.SUCCESS, generation)
),
ActionListener.runBefore(
ActionListener.wrap(
v -> logger.trace(
"Marked [{}] as successfully cloned from [{}] to [{}]",
repoShardId,
sourceSnapshot,
targetSnapshot
),
e -> {
logger.warn("Cluster state update after successful shard clone [{}] failed", repoShardId);
failAllListenersOnMasterFailOver(e);
}
),
() -> currentlyCloning.remove(repoShardId)
)
),
e -> {
logger.warn("Exception [{}] while trying to clone shard [{}]", e, repoShardId);
failCloneShardAndUpdateClusterState(target, sourceSnapshot, repoShardId);
}
);
if (currentlyCloning.add(repoShardId)) {
if (cloneRemoteStoreIndexShardSnapshot) {
repository.cloneRemoteStoreIndexShardSnapshot(
sourceSnapshot,
targetSnapshot,
repoShardId,
shardStatusBefore.generation(),
remoteStoreLockManagerFactory,
listener
);
} else {
repository.cloneShardSnapshot(
sourceSnapshot,
targetSnapshot,
repoShardId,
shardStatusBefore.generation(),
listener
);
}
}
} catch (IOException e) {
logger.warn("Failed to get index-metadata from repository data for index [{}]", repoShardId.index().getName());
failCloneShardAndUpdateClusterState(target, sourceSnapshot, repoShardId);
}
}, this::onFailure));
}
});
}
private void failCloneShardAndUpdateClusterState(Snapshot target, SnapshotId sourceSnapshot, RepositoryShardId repoShardId) {
// Stale blobs/lock-files will be cleaned up during delete/cleanup operation.
final String localNodeId = clusterService.localNode().getId();
innerUpdateSnapshotState(
new ShardSnapshotUpdate(
target,
repoShardId,
new ShardSnapshotStatus(localNodeId, ShardState.FAILED, "failed to clone shard snapshot", null)
),
ActionListener.runBefore(
ActionListener.wrap(
v -> logger.trace("Marked [{}] as failed clone from [{}] to [{}]", repoShardId, sourceSnapshot, target.getSnapshotId()),
ex -> {
logger.warn("Cluster state update after failed shard clone [{}] failed", repoShardId);
failAllListenersOnMasterFailOver(ex);
}
),
() -> currentlyCloning.remove(repoShardId)
)
);
}
private void ensureBelowConcurrencyLimit(
String repository,
String name,
SnapshotsInProgress snapshotsInProgress,
SnapshotDeletionsInProgress deletionsInProgress
) {
final int inProgressOperations = snapshotsInProgress.entries().size() + deletionsInProgress.getEntries().size();
final int maxOps = maxConcurrentOperations;
if (inProgressOperations >= maxOps) {
throw new ConcurrentSnapshotExecutionException(
repository,
name,
"Cannot start another operation, already running ["
+ inProgressOperations
+ "] operations and the current"
+ " limit for concurrent snapshot operations is set to ["
+ maxOps
+ "]"
);
}
}
/**
* Validates snapshot request
*
* @param repositoryName repository name
* @param snapshotName snapshot name
* @param state current cluster state
*/
private static void validate(String repositoryName, String snapshotName, ClusterState state) {
RepositoriesMetadata repositoriesMetadata = state.getMetadata().custom(RepositoriesMetadata.TYPE);
if (repositoriesMetadata == null || repositoriesMetadata.repository(repositoryName) == null) {
throw new RepositoryMissingException(repositoryName);
}
validate(repositoryName, snapshotName);
}
private static void validate(final String repositoryName, final String snapshotName) {
if (Strings.hasLength(snapshotName) == false) {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "cannot be empty");
}
if (snapshotName.contains(" ")) {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "must not contain whitespace");
}
if (snapshotName.contains(",")) {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "must not contain ','");
}
if (snapshotName.contains("#")) {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "must not contain '#'");
}
if (snapshotName.charAt(0) == '_') {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "must not start with '_'");
}
if (snapshotName.toLowerCase(Locale.ROOT).equals(snapshotName) == false) {
throw new InvalidSnapshotNameException(repositoryName, snapshotName, "must be lowercase");
}
if (Strings.validFileName(snapshotName) == false) {
throw new InvalidSnapshotNameException(
repositoryName,
snapshotName,
"must not contain the following characters " + Strings.INVALID_FILENAME_CHARS
);
}
}
/**
* Starts snapshot.
*
* Creates snapshot in repository and updates snapshot metadata record with list of shards that needs to be processed.
* Note: This method is only used in clusters that contain a node older than {@link #NO_REPO_INITIALIZE_VERSION} to ensure a backwards
* compatible path for initializing the snapshot in the repository is executed.
*
* @param clusterState cluster state
* @param snapshot snapshot meta data
* @param partial allow partial snapshots
* @param userCreateSnapshotListener listener
*/
private void beginSnapshot(
final ClusterState clusterState,
final SnapshotsInProgress.Entry snapshot,
final boolean partial,
final List indices,
final Repository repository,
final ActionListener userCreateSnapshotListener
) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(new AbstractRunnable() {
boolean hadAbortedInitializations;
@Override
protected void doRun() {
assert initializingSnapshots.contains(snapshot.snapshot());
if (repository.isReadOnly()) {
throw new RepositoryException(repository.getMetadata().name(), "cannot create snapshot in a readonly repository");
}
final String snapshotName = snapshot.snapshot().getSnapshotId().getName();
final StepListener repositoryDataListener = new StepListener<>();
repository.getRepositoryData(repositoryDataListener);
repositoryDataListener.whenComplete(repositoryData -> {
// check if the snapshot name already exists in the repository
if (repositoryData.getSnapshotIds().stream().anyMatch(s -> s.getName().equals(snapshotName))) {
throw new InvalidSnapshotNameException(
repository.getMetadata().name(),
snapshotName,
"snapshot with the same name already exists"
);
}
if (clusterState.nodes().getMinNodeVersion().onOrAfter(NO_REPO_INITIALIZE_VERSION) == false) {
// In mixed version clusters we initialize the snapshot in the repository so that in case of a cluster-manager
// failover to an
// older version cluster-manager node snapshot finalization (that assumes initializeSnapshot was called) produces a
// valid
// snapshot.
repository.initializeSnapshot(
snapshot.snapshot().getSnapshotId(),
snapshot.indices(),
metadataForSnapshot(
clusterState.metadata(),
snapshot.includeGlobalState(),
snapshot.partial(),
snapshot.dataStreams(),
snapshot.indices()
)
);
}
logger.info("snapshot [{}] started", snapshot.snapshot());
final Version version = minCompatibleVersion(clusterState.nodes().getMinNodeVersion(), repositoryData, null);
if (indices.isEmpty()) {
// No indices in this snapshot - we are done
userCreateSnapshotListener.onResponse(snapshot.snapshot());
endSnapshot(
SnapshotsInProgress.startedEntry(
snapshot.snapshot(),
snapshot.includeGlobalState(),
snapshot.partial(),
Collections.emptyList(),
Collections.emptyList(),
threadPool.absoluteTimeInMillis(),
repositoryData.getGenId(),
Map.of(),
snapshot.userMetadata(),
version,
snapshot.remoteStoreIndexShallowCopy()
),
clusterState.metadata(),
repositoryData
);
return;
}
clusterService.submitStateUpdateTask("update_snapshot [" + snapshot.snapshot() + "]", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
List entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.snapshot().equals(snapshot.snapshot()) == false) {
entries.add(entry);
continue;
}
if (entry.state() == State.ABORTED) {
entries.add(entry);
assert entry.shards().isEmpty();
hadAbortedInitializations = true;
} else {
int pathType = clusterService.state().nodes().getMinNodeVersion().onOrAfter(Version.V_2_17_0)
? SHARD_PATH_TYPE.get(repository.getMetadata().settings()).getCode()
: IndexId.DEFAULT_SHARD_PATH_TYPE;
final List indexIds = repositoryData.resolveNewIndices(
indices,
Collections.emptyMap(),
pathType
);
// Replace the snapshot that was just initialized
final Map shards = shards(
snapshots,
currentState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY),
currentState.metadata(),
currentState.routingTable(),
indexIds,
useShardGenerations(version),
repositoryData,
entry.repository()
);
if (!partial) {
Tuple, Set> indicesWithMissingShards = indicesWithMissingShards(
shards,
currentState.metadata()
);
Set missing = indicesWithMissingShards.v1();
Set closed = indicesWithMissingShards.v2();
if (missing.isEmpty() == false || closed.isEmpty() == false) {
final StringBuilder failureMessage = new StringBuilder();
if (missing.isEmpty() == false) {
failureMessage.append("Indices don't have primary shards ");
failureMessage.append(missing);
}
if (closed.isEmpty() == false) {
if (failureMessage.length() > 0) {
failureMessage.append("; ");
}
failureMessage.append("Indices are closed ");
failureMessage.append(closed);
}
entries.add(
new SnapshotsInProgress.Entry(
entry,
State.FAILED,
indexIds,
repositoryData.getGenId(),
shards,
version,
failureMessage.toString()
)
);
continue;
}
}
entries.add(
new SnapshotsInProgress.Entry(
entry,
State.STARTED,
indexIds,
repositoryData.getGenId(),
shards,
version,
null
)
);
}
}
return ClusterState.builder(currentState)
.putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(unmodifiableList(entries)))
.build();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(
() -> new ParameterizedMessage("[{}] failed to create snapshot", snapshot.snapshot().getSnapshotId()),
e
);
removeFailedSnapshotFromClusterState(
snapshot.snapshot(),
e,
null,
new CleanupAfterErrorListener(userCreateSnapshotListener, e)
);
}
@Override
public void onNoLongerClusterManager(String source) {
// We are not longer a cluster-manager - we shouldn't try to do any cleanup
// The new cluster-manager will take care of it
logger.warn(
"[{}] failed to create snapshot - no longer a cluster-manager",
snapshot.snapshot().getSnapshotId()
);
userCreateSnapshotListener.onFailure(
new SnapshotException(snapshot.snapshot(), "cluster-manager changed during snapshot initialization")
);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
// The userCreateSnapshotListener.onResponse() notifies caller that the snapshot was accepted
// for processing. If client wants to wait for the snapshot completion, it can register snapshot
// completion listener in this method. For the snapshot completion to work properly, the snapshot
// should still exist when listener is registered.
userCreateSnapshotListener.onResponse(snapshot.snapshot());
if (hadAbortedInitializations) {
final SnapshotsInProgress snapshotsInProgress = newState.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
final SnapshotsInProgress.Entry entry = snapshotsInProgress.snapshot(snapshot.snapshot());
assert entry != null;
endSnapshot(entry, newState.metadata(), repositoryData);
} else {
endCompletedSnapshots(newState);
}
}
});
}, this::onFailure);
}
@Override
public void onFailure(Exception e) {
logger.warn(() -> new ParameterizedMessage("failed to create snapshot [{}]", snapshot.snapshot().getSnapshotId()), e);
removeFailedSnapshotFromClusterState(
snapshot.snapshot(),
e,
null,
new CleanupAfterErrorListener(userCreateSnapshotListener, e)
);
}
});
}
private static class CleanupAfterErrorListener {
private final ActionListener userCreateSnapshotListener;
private final Exception e;
CleanupAfterErrorListener(ActionListener userCreateSnapshotListener, Exception e) {
this.userCreateSnapshotListener = userCreateSnapshotListener;
this.e = e;
}
public void onFailure(@Nullable Exception e) {
userCreateSnapshotListener.onFailure(ExceptionsHelper.useOrSuppress(e, this.e));
}
public void onNoLongerClusterManager() {
userCreateSnapshotListener.onFailure(e);
}
}
private static ShardGenerations buildGenerations(SnapshotsInProgress.Entry snapshot, Metadata metadata) {
ShardGenerations.Builder builder = ShardGenerations.builder();
final Map indexLookup = new HashMap<>();
snapshot.indices().forEach(idx -> indexLookup.put(idx.getName(), idx));
if (snapshot.isClone()) {
snapshot.clones().forEach((id, status) -> {
final IndexId indexId = indexLookup.get(id.indexName());
builder.put(indexId, id.shardId(), status.generation());
});
} else {
snapshot.shards().forEach((id, status) -> {
if (metadata.index(id.getIndex()) == null) {
assert snapshot.partial() : "Index [" + id.getIndex() + "] was deleted during a snapshot but snapshot was not partial.";
return;
}
final IndexId indexId = indexLookup.get(id.getIndexName());
if (indexId != null) {
builder.put(indexId, id.id(), status.generation());
}
});
}
return builder.build();
}
private static Metadata metadataForSnapshot(
Metadata metadata,
boolean includeGlobalState,
boolean isPartial,
List dataStreamsList,
List indices
) {
final Metadata.Builder builder;
if (includeGlobalState == false) {
// Remove global state from the cluster state
builder = Metadata.builder();
for (IndexId index : indices) {
final IndexMetadata indexMetadata = metadata.index(index.getName());
if (indexMetadata == null) {
assert isPartial : "Index [" + index + "] was deleted during a snapshot but snapshot was not partial.";
} else {
builder.put(indexMetadata, false);
}
}
} else {
builder = Metadata.builder(metadata);
}
// Only keep those data streams in the metadata that were actually requested by the initial snapshot create operation
Map dataStreams = new HashMap<>();
for (String dataStreamName : dataStreamsList) {
DataStream dataStream = metadata.dataStreams().get(dataStreamName);
if (dataStream == null) {
assert isPartial : "Data stream [" + dataStreamName + "] was deleted during a snapshot but snapshot was not partial.";
} else {
dataStreams.put(dataStreamName, dataStream);
}
}
return builder.dataStreams(dataStreams).build();
}
/**
* Returns status of the currently running snapshots
*
* This method is executed on cluster-manager node
*
*
* @param snapshotsInProgress snapshots in progress in the cluster state
* @param repository repository id
* @param snapshots list of snapshots that will be used as a filter, empty list means no snapshots are filtered
* @return list of metadata for currently running snapshots
*/
public static List currentSnapshots(
@Nullable SnapshotsInProgress snapshotsInProgress,
String repository,
List snapshots
) {
if (snapshotsInProgress == null || snapshotsInProgress.entries().isEmpty()) {
return Collections.emptyList();
}
if ("_all".equals(repository)) {
return snapshotsInProgress.entries();
}
if (snapshotsInProgress.entries().size() == 1) {
// Most likely scenario - one snapshot is currently running
// Check this snapshot against the query
SnapshotsInProgress.Entry entry = snapshotsInProgress.entries().get(0);
if (entry.snapshot().getRepository().equals(repository) == false) {
return Collections.emptyList();
}
if (snapshots.isEmpty() == false) {
for (String snapshot : snapshots) {
if (entry.snapshot().getSnapshotId().getName().equals(snapshot)) {
return snapshotsInProgress.entries();
}
}
return Collections.emptyList();
} else {
return snapshotsInProgress.entries();
}
}
List builder = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.snapshot().getRepository().equals(repository) == false) {
continue;
}
if (snapshots.isEmpty() == false) {
for (String snapshot : snapshots) {
if (entry.snapshot().getSnapshotId().getName().equals(snapshot)) {
builder.add(entry);
break;
}
}
} else {
builder.add(entry);
}
}
return unmodifiableList(builder);
}
@Override
public void applyClusterState(ClusterChangedEvent event) {
try {
if (event.localNodeClusterManager()) {
// We don't remove old cluster-manager when cluster-manager flips anymore. So, we need to check for change in
// cluster-manager
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final boolean newClusterManager = event.previousState().nodes().isLocalNodeElectedClusterManager() == false;
processExternalChanges(
newClusterManager || removedNodesCleanupNeeded(snapshotsInProgress, event.nodesDelta().removedNodes()),
event.routingTableChanged() && waitingShardsStartedOrUnassigned(snapshotsInProgress, event)
);
} else if (snapshotCompletionListeners.isEmpty() == false) {
// We have snapshot listeners but are not the cluster-manager any more. Fail all waiting listeners except for those that
// already
// have their snapshots finalizing (those that are already finalizing will fail on their own from to update the cluster
// state).
for (Snapshot snapshot : new HashSet<>(snapshotCompletionListeners.keySet())) {
if (endingSnapshots.add(snapshot)) {
failSnapshotCompletionListeners(snapshot, new SnapshotException(snapshot, "no longer cluster-manager"));
}
}
}
} catch (Exception e) {
assert false : new AssertionError(e);
logger.warn("Failed to update snapshot state ", e);
}
assert assertConsistentWithClusterState(event.state());
assert assertNoDanglingSnapshots(event.state());
}
/**
* Cleanup all snapshots found in the given cluster state that have no more work left:
* 1. Completed snapshots
* 2. Snapshots in state INIT that a previous cluster-manager of an older version failed to start
* 3. Snapshots in any other state that have all their shard tasks completed
*/
private void endCompletedSnapshots(ClusterState state) {
SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
snapshotsInProgress.entries()
.stream()
.filter(entry -> entry.state().completed() || entry.state() == State.INIT || completed(entry.shards().values()))
.forEach(entry -> endSnapshot(entry, state.metadata(), null));
}
private boolean assertConsistentWithClusterState(ClusterState state) {
final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
if (snapshotsInProgress.entries().isEmpty() == false) {
synchronized (endingSnapshots) {
final Set runningSnapshots = Stream.concat(
snapshotsInProgress.entries().stream().map(SnapshotsInProgress.Entry::snapshot),
endingSnapshots.stream()
).collect(Collectors.toSet());
final Set snapshotListenerKeys = snapshotCompletionListeners.keySet();
assert runningSnapshots.containsAll(snapshotListenerKeys) : "Saw completion listeners for unknown snapshots in "
+ snapshotListenerKeys
+ " but running snapshots are "
+ runningSnapshots;
}
}
final SnapshotDeletionsInProgress snapshotDeletionsInProgress = state.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
if (snapshotDeletionsInProgress.hasDeletionsInProgress()) {
synchronized (repositoryOperations.runningDeletions) {
final Set runningDeletes = Stream.concat(
snapshotDeletionsInProgress.getEntries().stream().map(SnapshotDeletionsInProgress.Entry::uuid),
repositoryOperations.runningDeletions.stream()
).collect(Collectors.toSet());
final Set deleteListenerKeys = snapshotDeletionListeners.keySet();
assert runningDeletes.containsAll(deleteListenerKeys) : "Saw deletions listeners for unknown uuids in "
+ deleteListenerKeys
+ " but running deletes are "
+ runningDeletes;
}
}
return true;
}
// Assert that there are no snapshots that have a shard that is waiting to be assigned even though the cluster state would allow for it
// to be assigned
private static boolean assertNoDanglingSnapshots(ClusterState state) {
final SnapshotsInProgress snapshotsInProgress = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final SnapshotDeletionsInProgress snapshotDeletionsInProgress = state.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
final Set reposWithRunningDelete = snapshotDeletionsInProgress.getEntries()
.stream()
.filter(entry -> entry.state() == SnapshotDeletionsInProgress.State.STARTED)
.map(SnapshotDeletionsInProgress.Entry::repository)
.collect(Collectors.toSet());
final Set reposSeen = new HashSet<>();
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (reposSeen.add(entry.repository())) {
for (final ShardSnapshotStatus status : entry.shards().values()) {
if (status.equals(ShardSnapshotStatus.UNASSIGNED_QUEUED)) {
assert reposWithRunningDelete.contains(entry.repository()) : "Found shard snapshot waiting to be assigned in ["
+ entry
+ "] but it is not blocked by any running delete";
}
}
}
}
return true;
}
/**
* Updates the state of in-progress snapshots in reaction to a change in the configuration of the cluster nodes (cluster-manager fail-over or
* disconnect of a data node that was executing a snapshot) or a routing change that started shards whose snapshot state is
* {@link SnapshotsInProgress.ShardState#WAITING}.
*
* @param changedNodes true iff either a cluster-manager fail-over occurred or a data node that was doing snapshot work got removed from the
* cluster
* @param startShards true iff any waiting shards were started due to a routing change
*/
private void processExternalChanges(boolean changedNodes, boolean startShards) {
if (changedNodes == false && startShards == false) {
// nothing to do, no relevant external change happened
return;
}
clusterService.submitStateUpdateTask(
"update snapshot after shards started [" + startShards + "] or node configuration changed [" + changedNodes + "]",
new ClusterStateUpdateTask() {
private final Collection finishedSnapshots = new ArrayList<>();
private final Collection deletionsToExecute = new ArrayList<>();
@Override
public ClusterState execute(ClusterState currentState) {
RoutingTable routingTable = currentState.routingTable();
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
DiscoveryNodes nodes = currentState.nodes();
boolean changed = false;
final EnumSet statesToUpdate;
// If we are reacting to a change in the cluster node configuration we have to update the shard states of both started
// and
// aborted snapshots to potentially fail shards running on the removed nodes
if (changedNodes) {
statesToUpdate = EnumSet.of(State.STARTED, State.ABORTED);
} else {
// We are reacting to shards that started only so which only affects the individual shard states of started
// snapshots
statesToUpdate = EnumSet.of(State.STARTED);
}
ArrayList updatedSnapshotEntries = new ArrayList<>();
// We keep a cache of shards that failed in this map. If we fail a shardId for a given repository because of
// a node leaving or shard becoming unassigned for one snapshot, we will also fail it for all subsequent enqueued
// snapshots
// for the same repository
final Map> knownFailures = new HashMap<>();
for (final SnapshotsInProgress.Entry snapshot : snapshots.entries()) {
if (statesToUpdate.contains(snapshot.state())) {
// Currently initializing clone
if (snapshot.isClone() && snapshot.clones().isEmpty()) {
if (initializingClones.contains(snapshot.snapshot())) {
updatedSnapshotEntries.add(snapshot);
} else {
logger.debug("removing not yet start clone operation [{}]", snapshot);
changed = true;
}
} else {
final Map shards = processWaitingShardsAndRemovedNodes(
snapshot.shards(),
routingTable,
nodes,
knownFailures.computeIfAbsent(snapshot.repository(), k -> new HashMap<>())
);
if (shards != null) {
final SnapshotsInProgress.Entry updatedSnapshot = snapshot.withShardStates(shards);
changed = true;
if (updatedSnapshot.state().completed()) {
finishedSnapshots.add(updatedSnapshot);
}
updatedSnapshotEntries.add(updatedSnapshot);
} else {
updatedSnapshotEntries.add(snapshot);
}
}
} else if (snapshot.repositoryStateId() == RepositoryData.UNKNOWN_REPO_GEN) {
// BwC path, older versions could create entries with unknown repo GEN in INIT or ABORTED state that did not yet
// write anything to the repository physically. This means we can simply remove these from the cluster state
// without having to do any additional cleanup.
changed = true;
logger.debug("[{}] was found in dangling INIT or ABORTED state", snapshot);
} else {
if (snapshot.state().completed() || completed(snapshot.shards().values())) {
finishedSnapshots.add(snapshot);
}
updatedSnapshotEntries.add(snapshot);
}
}
final ClusterState res = readyDeletions(
changed
? ClusterState.builder(currentState)
.putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(unmodifiableList(updatedSnapshotEntries)))
.build()
: currentState
).v1();
for (SnapshotDeletionsInProgress.Entry delete : res.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
).getEntries()) {
if (delete.state() == SnapshotDeletionsInProgress.State.STARTED) {
deletionsToExecute.add(delete);
}
}
return res;
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(
() -> new ParameterizedMessage(
"failed to update snapshot state after shards started or nodes removed from [{}] ",
source
),
e
);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
final SnapshotDeletionsInProgress snapshotDeletionsInProgress = newState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
if (finishedSnapshots.isEmpty() == false) {
// If we found snapshots that should be finalized as a result of the CS update we try to initiate finalization for
// them
// unless there is an executing snapshot delete already. If there is an executing snapshot delete we don't have to
// enqueue the snapshot finalizations here because the ongoing delete will take care of that when removing the
// delete
// from the cluster state
final Set reposWithRunningDeletes = snapshotDeletionsInProgress.getEntries()
.stream()
.filter(entry -> entry.state() == SnapshotDeletionsInProgress.State.STARTED)
.map(SnapshotDeletionsInProgress.Entry::repository)
.collect(Collectors.toSet());
for (SnapshotsInProgress.Entry entry : finishedSnapshots) {
if (reposWithRunningDeletes.contains(entry.repository()) == false) {
endSnapshot(entry, newState.metadata(), null);
}
}
}
startExecutableClones(newState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY), null);
// run newly ready deletes
for (SnapshotDeletionsInProgress.Entry entry : deletionsToExecute) {
if (tryEnterRepoLoop(entry.repository())) {
deleteSnapshotsFromRepository(entry, newState.nodes().getMinNodeVersion());
}
}
}
}
);
}
private static Map processWaitingShardsAndRemovedNodes(
final Map snapshotShards,
RoutingTable routingTable,
DiscoveryNodes nodes,
Map knownFailures
) {
boolean snapshotChanged = false;
final Map shards = new HashMap<>();
for (final Map.Entry shardEntry : snapshotShards.entrySet()) {
ShardSnapshotStatus shardStatus = shardEntry.getValue();
ShardId shardId = shardEntry.getKey();
if (shardStatus.equals(ShardSnapshotStatus.UNASSIGNED_QUEUED)) {
// this shard snapshot is waiting for a previous snapshot to finish execution for this shard
final ShardSnapshotStatus knownFailure = knownFailures.get(shardId);
if (knownFailure == null) {
// if no failure is known for the shard we keep waiting
shards.put(shardId, shardStatus);
} else {
// If a failure is known for an execution we waited on for this shard then we fail with the same exception here
// as well
snapshotChanged = true;
shards.put(shardId, knownFailure);
}
} else if (shardStatus.state() == ShardState.WAITING) {
IndexRoutingTable indexShardRoutingTable = routingTable.index(shardId.getIndex());
if (indexShardRoutingTable != null) {
IndexShardRoutingTable shardRouting = indexShardRoutingTable.shard(shardId.id());
if (shardRouting != null && shardRouting.primaryShard() != null) {
if (shardRouting.primaryShard().started()) {
// Shard that we were waiting for has started on a node, let's process it
snapshotChanged = true;
logger.trace("starting shard that we were waiting for [{}] on node [{}]", shardId, shardStatus.nodeId());
shards.put(
shardId,
new ShardSnapshotStatus(shardRouting.primaryShard().currentNodeId(), shardStatus.generation())
);
continue;
} else if (shardRouting.primaryShard().initializing() || shardRouting.primaryShard().relocating()) {
// Shard that we were waiting for hasn't started yet or still relocating - will continue to wait
shards.put(shardId, shardStatus);
continue;
}
}
}
// Shard that we were waiting for went into unassigned state or disappeared - giving up
snapshotChanged = true;
logger.warn("failing snapshot of shard [{}] on unassigned shard [{}]", shardId, shardStatus.nodeId());
final ShardSnapshotStatus failedState = new ShardSnapshotStatus(
shardStatus.nodeId(),
ShardState.FAILED,
"shard is unassigned",
shardStatus.generation()
);
shards.put(shardId, failedState);
knownFailures.put(shardId, failedState);
} else if (shardStatus.state().completed() == false && shardStatus.nodeId() != null) {
if (nodes.nodeExists(shardStatus.nodeId())) {
shards.put(shardId, shardStatus);
} else {
// TODO: Restart snapshot on another node?
snapshotChanged = true;
logger.warn("failing snapshot of shard [{}] on closed node [{}]", shardId, shardStatus.nodeId());
final ShardSnapshotStatus failedState = new ShardSnapshotStatus(
shardStatus.nodeId(),
ShardState.FAILED,
"node shutdown",
shardStatus.generation()
);
shards.put(shardId, failedState);
knownFailures.put(shardId, failedState);
}
} else {
shards.put(shardId, shardStatus);
}
}
if (snapshotChanged) {
return Collections.unmodifiableMap(shards);
} else {
return null;
}
}
private static boolean waitingShardsStartedOrUnassigned(SnapshotsInProgress snapshotsInProgress, ClusterChangedEvent event) {
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.state() == State.STARTED) {
for (final Map.Entry shardStatus : entry.shards().entrySet()) {
if (shardStatus.getValue().state() != ShardState.WAITING) {
continue;
}
final ShardId shardId = shardStatus.getKey();
if (event.indexRoutingTableChanged(shardId.getIndexName())) {
IndexRoutingTable indexShardRoutingTable = event.state().getRoutingTable().index(shardId.getIndex());
if (indexShardRoutingTable == null) {
// index got removed concurrently and we have to fail WAITING state shards
return true;
}
ShardRouting shardRouting = indexShardRoutingTable.shard(shardId.id()).primaryShard();
if (shardRouting != null && (shardRouting.started() || shardRouting.unassigned())) {
return true;
}
}
}
}
}
return false;
}
private static boolean removedNodesCleanupNeeded(SnapshotsInProgress snapshotsInProgress, List removedNodes) {
if (removedNodes.isEmpty()) {
// Nothing to do, no nodes removed
return false;
}
final Set removedNodeIds = removedNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet());
return snapshotsInProgress.entries().stream().anyMatch(snapshot -> {
if (snapshot.state().completed()) {
// nothing to do for already completed snapshots
return false;
}
for (final ShardSnapshotStatus shardSnapshotStatus : snapshot.shards().values()) {
if (shardSnapshotStatus.state().completed() == false && removedNodeIds.contains(shardSnapshotStatus.nodeId())) {
// Snapshot had an incomplete shard running on a removed node so we need to adjust that shard's snapshot status
return true;
}
}
return false;
});
}
/**
* Returns list of indices with missing shards, and list of indices that are closed
*
* @param shards list of shard statuses
* @return list of failed and closed indices
*/
private static Tuple, Set> indicesWithMissingShards(
final Map shards,
Metadata metadata
) {
Set missing = new HashSet<>();
Set closed = new HashSet<>();
for (final Map.Entry entry : shards.entrySet()) {
if (entry.getValue().state() == ShardState.MISSING) {
if (metadata.hasIndex(entry.getKey().getIndex().getName())
&& metadata.getIndexSafe(entry.getKey().getIndex()).getState() == IndexMetadata.State.CLOSE) {
closed.add(entry.getKey().getIndex().getName());
} else {
missing.add(entry.getKey().getIndex().getName());
}
}
}
return new Tuple<>(missing, closed);
}
/**
* Finalizes the shard in repository and then removes it from cluster state
*
* This is non-blocking method that runs on a thread from SNAPSHOT thread pool
* Finalizes the snapshot in the repository.
*
* @param entry snapshot
*/
private void endSnapshot(SnapshotsInProgress.Entry entry, Metadata metadata, @Nullable RepositoryData repositoryData) {
final Snapshot snapshot = entry.snapshot();
final boolean newFinalization = endingSnapshots.add(snapshot);
if (entry.repositoryStateId() == RepositoryData.UNKNOWN_REPO_GEN) {
logger.debug("[{}] was aborted before starting", snapshot);
removeFailedSnapshotFromClusterState(
entry.snapshot(),
new SnapshotException(snapshot, "Aborted on initialization"),
repositoryData,
null
);
return;
}
if (entry.isClone() && entry.state() == State.FAILED) {
logger.debug("Removing failed snapshot clone [{}] from cluster state", entry);
removeFailedSnapshotFromClusterState(entry.snapshot(), new SnapshotException(entry.snapshot(), entry.failure()), null, null);
return;
}
final String repoName = entry.repository();
if (tryEnterRepoLoop(repoName)) {
if (repositoryData == null) {
repositoriesService.repository(repoName).getRepositoryData(new ActionListener() {
@Override
public void onResponse(RepositoryData repositoryData) {
finalizeSnapshotEntry(entry, metadata, repositoryData);
}
@Override
public void onFailure(Exception e) {
clusterService.submitStateUpdateTask(
"fail repo tasks for [" + repoName + "]",
new FailPendingRepoTasksTask(repoName, e)
);
}
});
} else {
finalizeSnapshotEntry(entry, metadata, repositoryData);
}
} else {
if (newFinalization) {
repositoryOperations.addFinalization(entry, metadata);
}
}
}
/**
* Try starting to run a snapshot finalization or snapshot delete for the given repository. If this method returns
* {@code true} then snapshot finalizations and deletions for the repo may be executed. Once no more operations are
* ready for the repository {@link #leaveRepoLoop(String)} should be invoked so that a subsequent state change that
* causes another operation to become ready can execute.
*
* @return true if a finalization or snapshot delete may be started at this point
*/
private boolean tryEnterRepoLoop(String repository) {
return currentlyFinalizing.add(repository);
}
/**
* Stop polling for ready snapshot finalizations or deletes in state {@link SnapshotDeletionsInProgress.State#STARTED} to execute
* for the given repository.
*/
private void leaveRepoLoop(String repository) {
final boolean removed = currentlyFinalizing.remove(repository);
assert removed;
}
private void finalizeSnapshotEntry(SnapshotsInProgress.Entry entry, Metadata metadata, RepositoryData repositoryData) {
assert currentlyFinalizing.contains(entry.repository());
try {
final String failure = entry.failure();
final Snapshot snapshot = entry.snapshot();
logger.trace("[{}] finalizing snapshot in repository, state: [{}], failure[{}]", snapshot, entry.state(), failure);
ArrayList shardFailures = new ArrayList<>();
for (final Map.Entry shardStatus : entry.shards().entrySet()) {
ShardId shardId = shardStatus.getKey();
ShardSnapshotStatus status = shardStatus.getValue();
final ShardState state = status.state();
if (state.failed()) {
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, status.reason()));
} else if (state.completed() == false) {
shardFailures.add(new SnapshotShardFailure(status.nodeId(), shardId, "skipped"));
} else {
assert state == ShardState.SUCCESS;
}
}
final ShardGenerations shardGenerations = buildGenerations(entry, metadata);
final String repository = snapshot.getRepository();
final SnapshotInfo snapshotInfo = new SnapshotInfo(
snapshot.getSnapshotId(),
shardGenerations.indices().stream().map(IndexId::getName).collect(Collectors.toList()),
entry.dataStreams(),
entry.startTime(),
failure,
threadPool.absoluteTimeInMillis(),
entry.partial() ? shardGenerations.totalShards() : entry.shards().size(),
shardFailures,
entry.includeGlobalState(),
entry.userMetadata(),
entry.remoteStoreIndexShallowCopy(),
0
);
final StepListener metadataListener = new StepListener<>();
final Repository repo = repositoriesService.repository(snapshot.getRepository());
if (entry.isClone()) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(metadataListener, () -> {
final Metadata.Builder metaBuilder = Metadata.builder(repo.getSnapshotGlobalMetadata(entry.source()));
for (IndexId index : entry.indices()) {
metaBuilder.put(repo.getSnapshotIndexMetaData(repositoryData, entry.source(), index), false);
}
return metaBuilder.build();
}));
} else {
metadataListener.onResponse(metadata);
}
metadataListener.whenComplete(
meta -> repo.finalizeSnapshot(
shardGenerations,
repositoryData.getGenId(),
metadataForSnapshot(meta, entry.includeGlobalState(), entry.partial(), entry.dataStreams(), entry.indices()),
snapshotInfo,
entry.version(),
state -> stateWithoutSnapshot(state, snapshot),
Priority.NORMAL,
ActionListener.wrap(newRepoData -> {
completeListenersIgnoringException(endAndGetListenersToResolve(snapshot), Tuple.tuple(newRepoData, snapshotInfo));
logger.info("snapshot [{}] completed with state [{}]", snapshot, snapshotInfo.state());
runNextQueuedOperation(newRepoData, repository, true);
}, e -> handleFinalizationFailure(e, entry, repositoryData))
),
e -> handleFinalizationFailure(e, entry, repositoryData)
);
} catch (Exception e) {
assert false : new AssertionError(e);
handleFinalizationFailure(e, entry, repositoryData);
}
}
/**
* Remove a snapshot from {@link #endingSnapshots} set and return its completion listeners that must be resolved.
*/
private List>> endAndGetListenersToResolve(Snapshot snapshot) {
// get listeners before removing from the ending snapshots set to not trip assertion in #assertConsistentWithClusterState that
// makes sure we don't have listeners for snapshots that aren't tracked in any internal state of this class
final List>> listenersToComplete = snapshotCompletionListeners.remove(snapshot);
endingSnapshots.remove(snapshot);
return listenersToComplete;
}
/**
* Handles failure to finalize a snapshot. If the exception indicates that this node was unable to publish a cluster state and stopped
* being the cluster-manager node, then fail all snapshot create and delete listeners executing on this node by delegating to
* {@link #failAllListenersOnMasterFailOver}. Otherwise, i.e. as a result of failing to write to the snapshot repository for some
* reason, remove the snapshot's {@link SnapshotsInProgress.Entry} from the cluster state and move on with other queued snapshot
* operations if there are any.
*
* @param e exception encountered
* @param entry snapshot entry that failed to finalize
* @param repositoryData current repository data for the snapshot's repository
*/
private void handleFinalizationFailure(Exception e, SnapshotsInProgress.Entry entry, RepositoryData repositoryData) {
Snapshot snapshot = entry.snapshot();
if (ExceptionsHelper.unwrap(e, NotClusterManagerException.class, FailedToCommitClusterStateException.class) != null) {
// Failure due to not being cluster-manager any more, don't try to remove snapshot from cluster state the next cluster-manager
// will try ending this snapshot again
logger.debug(() -> new ParameterizedMessage("[{}] failed to update cluster state during snapshot finalization", snapshot), e);
failSnapshotCompletionListeners(
snapshot,
new SnapshotException(snapshot, "Failed to update cluster state during snapshot finalization", e)
);
failAllListenersOnMasterFailOver(e);
} else {
logger.warn(() -> new ParameterizedMessage("[{}] failed to finalize snapshot", snapshot), e);
removeFailedSnapshotFromClusterState(snapshot, e, repositoryData, null);
}
}
/**
* Run the next queued up repository operation for the given repository name.
*
* @param repositoryData current repository data
* @param repository repository name
* @param attemptDelete whether to try and run delete operations that are ready in the cluster state if no
* snapshot create operations remain to execute
*/
private void runNextQueuedOperation(RepositoryData repositoryData, String repository, boolean attemptDelete) {
assert currentlyFinalizing.contains(repository);
final Tuple nextFinalization = repositoryOperations.pollFinalization(repository);
if (nextFinalization == null) {
if (attemptDelete) {
runReadyDeletions(repositoryData, repository);
} else {
leaveRepoLoop(repository);
}
} else {
logger.trace("Moving on to finalizing next snapshot [{}]", nextFinalization);
finalizeSnapshotEntry(nextFinalization.v1(), nextFinalization.v2(), repositoryData);
}
}
/**
* Runs a cluster state update that checks whether we have outstanding snapshot deletions that can be executed and executes them.
*
* TODO: optimize this to execute in a single CS update together with finalizing the latest snapshot
*/
private void runReadyDeletions(RepositoryData repositoryData, String repository) {
clusterService.submitStateUpdateTask("Run ready deletions", new ClusterStateUpdateTask() {
private SnapshotDeletionsInProgress.Entry deletionToRun;
@Override
public ClusterState execute(ClusterState currentState) {
assert readyDeletions(currentState).v1() == currentState
: "Deletes should have been set to ready by finished snapshot deletes and finalizations";
for (SnapshotDeletionsInProgress.Entry entry : currentState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
).getEntries()) {
if (entry.repository().equals(repository) && entry.state() == SnapshotDeletionsInProgress.State.STARTED) {
deletionToRun = entry;
break;
}
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
logger.warn("Failed to run ready delete operations", e);
failAllListenersOnMasterFailOver(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (deletionToRun == null) {
runNextQueuedOperation(repositoryData, repository, false);
} else {
deleteSnapshotsFromRepository(deletionToRun, repositoryData, newState.nodes().getMinNodeVersion());
}
}
});
}
/**
* Finds snapshot delete operations that are ready to execute in the given {@link ClusterState} and computes a new cluster state that
* has all executable deletes marked as executing. Returns a {@link Tuple} of the updated cluster state and all executable deletes.
* This can either be {@link SnapshotDeletionsInProgress.Entry} that were already in state
* {@link SnapshotDeletionsInProgress.State#STARTED} or waiting entries in state {@link SnapshotDeletionsInProgress.State#WAITING}
* that were moved to {@link SnapshotDeletionsInProgress.State#STARTED} in the returned updated cluster state.
*
* @param currentState current cluster state
* @return tuple of an updated cluster state and currently executable snapshot delete operations
*/
private static Tuple> readyDeletions(ClusterState currentState) {
final SnapshotDeletionsInProgress deletions = currentState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
if (deletions.hasDeletionsInProgress() == false) {
return Tuple.tuple(currentState, Collections.emptyList());
}
final SnapshotsInProgress snapshotsInProgress = currentState.custom(SnapshotsInProgress.TYPE);
assert snapshotsInProgress != null;
final Set repositoriesSeen = new HashSet<>();
boolean changed = false;
final ArrayList readyDeletions = new ArrayList<>();
final List newDeletes = new ArrayList<>();
for (SnapshotDeletionsInProgress.Entry entry : deletions.getEntries()) {
final String repo = entry.repository();
if (repositoriesSeen.add(entry.repository())
&& entry.state() == SnapshotDeletionsInProgress.State.WAITING
&& snapshotsInProgress.entries()
.stream()
.filter(se -> se.repository().equals(repo))
.noneMatch(SnapshotsService::isWritingToRepository)) {
changed = true;
final SnapshotDeletionsInProgress.Entry newEntry = entry.started();
readyDeletions.add(newEntry);
newDeletes.add(newEntry);
} else {
newDeletes.add(entry);
}
}
return Tuple.tuple(
changed
? ClusterState.builder(currentState)
.putCustom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.of(newDeletes))
.build()
: currentState,
readyDeletions
);
}
/**
* Computes the cluster state resulting from removing a given snapshot create operation from the given state.
*
* @param state current cluster state
* @param snapshot snapshot for which to remove the snapshot operation
* @return updated cluster state
*/
private static ClusterState stateWithoutSnapshot(ClusterState state, Snapshot snapshot) {
SnapshotsInProgress snapshots = state.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
ClusterState result = state;
boolean changed = false;
ArrayList entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.snapshot().equals(snapshot)) {
changed = true;
} else {
entries.add(entry);
}
}
if (changed) {
result = ClusterState.builder(state)
.putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(unmodifiableList(entries)))
.build();
}
return readyDeletions(result).v1();
}
/**
* Removes record of running snapshot from cluster state and notifies the listener when this action is complete. This method is only
* used when the snapshot fails for some reason. During normal operation the snapshot repository will remove the
* {@link SnapshotsInProgress.Entry} from the cluster state once it's done finalizing the snapshot.
*
* @param snapshot snapshot that failed
* @param failure exception that failed the snapshot
* @param repositoryData repository data or {@code null} when cleaning up a BwC snapshot that never fully initialized
* @param listener listener to invoke when done with, only passed by the BwC path that has {@code repositoryData} set to
* {@code null}
*/
private void removeFailedSnapshotFromClusterState(
Snapshot snapshot,
Exception failure,
@Nullable RepositoryData repositoryData,
@Nullable CleanupAfterErrorListener listener
) {
assert failure != null : "Failure must be supplied";
clusterService.submitStateUpdateTask("remove snapshot metadata", new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
final ClusterState updatedState = stateWithoutSnapshot(currentState, snapshot);
// now check if there are any delete operations that refer to the just failed snapshot and remove the snapshot from them
return updateWithSnapshots(
updatedState,
null,
deletionsWithoutSnapshots(
updatedState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY),
Collections.singletonList(snapshot.getSnapshotId()),
snapshot.getRepository()
)
);
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("[{}] failed to remove snapshot metadata", snapshot), e);
failSnapshotCompletionListeners(
snapshot,
new SnapshotException(snapshot, "Failed to remove snapshot from cluster state", e)
);
failAllListenersOnMasterFailOver(e);
if (listener != null) {
listener.onFailure(e);
}
}
@Override
public void onNoLongerClusterManager(String source) {
failure.addSuppressed(new SnapshotException(snapshot, "no longer cluster-manager"));
failSnapshotCompletionListeners(snapshot, failure);
failAllListenersOnMasterFailOver(new NotClusterManagerException(source));
if (listener != null) {
listener.onNoLongerClusterManager();
}
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
failSnapshotCompletionListeners(snapshot, failure);
if (listener == null) {
if (repositoryData != null) {
runNextQueuedOperation(repositoryData, snapshot.getRepository(), true);
}
} else {
listener.onFailure(null);
}
}
});
}
/**
* Remove the given {@link SnapshotId}s for the given {@code repository} from an instance of {@link SnapshotDeletionsInProgress}.
* If no deletion contained any of the snapshot ids to remove then return {@code null}.
*
* @param deletions snapshot deletions to update
* @param snapshotIds snapshot ids to remove
* @param repository repository that the snapshot ids belong to
* @return updated {@link SnapshotDeletionsInProgress} or {@code null} if unchanged
*/
@Nullable
private static SnapshotDeletionsInProgress deletionsWithoutSnapshots(
SnapshotDeletionsInProgress deletions,
Collection snapshotIds,
String repository
) {
boolean changed = false;
List updatedEntries = new ArrayList<>(deletions.getEntries().size());
for (SnapshotDeletionsInProgress.Entry entry : deletions.getEntries()) {
if (entry.repository().equals(repository)) {
final List updatedSnapshotIds = new ArrayList<>(entry.getSnapshots());
if (updatedSnapshotIds.removeAll(snapshotIds)) {
changed = true;
updatedEntries.add(entry.withSnapshots(updatedSnapshotIds));
} else {
updatedEntries.add(entry);
}
} else {
updatedEntries.add(entry);
}
}
return changed ? SnapshotDeletionsInProgress.of(updatedEntries) : null;
}
private void failSnapshotCompletionListeners(Snapshot snapshot, Exception e) {
failListenersIgnoringException(endAndGetListenersToResolve(snapshot), e);
assert repositoryOperations.assertNotQueued(snapshot);
}
/**
* Deletes snapshots from the repository. In-progress snapshots matched by the delete will be aborted before deleting them.
*
* @param request delete snapshot request
* @param listener listener
*/
public void deleteSnapshots(final DeleteSnapshotRequest request, final ActionListener listener) {
final String[] snapshotNames = request.snapshots();
final String repoName = request.repository();
logger.info(
() -> new ParameterizedMessage(
"deleting snapshots [{}] from repository [{}]",
Strings.arrayToCommaDelimitedString(snapshotNames),
repoName
)
);
final Repository repository = repositoriesService.repository(repoName);
repository.executeConsistentStateUpdate(repositoryData -> new ClusterStateUpdateTask(Priority.NORMAL) {
private Snapshot runningSnapshot;
private ClusterStateUpdateTask deleteFromRepoTask;
private boolean abortedDuringInit = false;
private List outstandingDeletes;
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
final Version minNodeVersion = currentState.nodes().getMinNodeVersion();
if (snapshotNames.length > 1 && minNodeVersion.before(MULTI_DELETE_VERSION)) {
throw new IllegalArgumentException(
"Deleting multiple snapshots in a single request is only supported in version [ "
+ MULTI_DELETE_VERSION
+ "] but cluster contained node of version ["
+ currentState.nodes().getMinNodeVersion()
+ "]"
);
}
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List snapshotEntries = findInProgressSnapshots(snapshots, snapshotNames, repoName);
final List snapshotIds = matchingSnapshotIds(
snapshotEntries.stream().map(e -> e.snapshot().getSnapshotId()).collect(Collectors.toList()),
repositoryData,
snapshotNames,
repoName
);
validateSnapshotsBackingAnyIndex(currentState.getMetadata().getIndices(), snapshotIds, repoName);
if (snapshotEntries.isEmpty() || minNodeVersion.onOrAfter(SnapshotsService.FULL_CONCURRENCY_VERSION)) {
deleteFromRepoTask = createDeleteStateUpdate(snapshotIds, repoName, repositoryData, Priority.NORMAL, listener);
return deleteFromRepoTask.execute(currentState);
}
assert snapshotEntries.size() == 1 : "Expected just a single running snapshot but saw " + snapshotEntries;
final SnapshotsInProgress.Entry snapshotEntry = snapshotEntries.get(0);
runningSnapshot = snapshotEntry.snapshot();
final Map shards;
final State state = snapshotEntry.state();
final String failure;
outstandingDeletes = new ArrayList<>(snapshotIds);
if (state != State.INIT) {
// INIT state snapshots won't ever be physically written to the repository but all other states will end up in the repo
outstandingDeletes.add(runningSnapshot.getSnapshotId());
}
if (state == State.INIT) {
// snapshot is still initializing, mark it as aborted
shards = snapshotEntry.shards();
assert shards.isEmpty();
failure = "Snapshot was aborted during initialization";
abortedDuringInit = true;
} else if (state == State.STARTED) {
// snapshot is started - mark every non completed shard as aborted
final SnapshotsInProgress.Entry abortedEntry = snapshotEntry.abort();
shards = abortedEntry.shards();
failure = abortedEntry.failure();
} else {
boolean hasUncompletedShards = false;
// Cleanup in case a node gone missing and snapshot wasn't updated for some reason
for (final ShardSnapshotStatus shardStatus : snapshotEntry.shards().values()) {
// Check if we still have shard running on existing nodes
if (shardStatus.state().completed() == false
&& shardStatus.nodeId() != null
&& currentState.nodes().get(shardStatus.nodeId()) != null) {
hasUncompletedShards = true;
break;
}
}
if (hasUncompletedShards) {
// snapshot is being finalized - wait for shards to complete finalization process
logger.debug("trying to delete completed snapshot - should wait for shards to finalize on all nodes");
return currentState;
} else {
// no shards to wait for but a node is gone - this is the only case
// where we force to finish the snapshot
logger.debug("trying to delete completed snapshot with no finalizing shards - can delete immediately");
shards = snapshotEntry.shards();
}
failure = snapshotEntry.failure();
}
return ClusterState.builder(currentState)
.putCustom(
SnapshotsInProgress.TYPE,
SnapshotsInProgress.of(
snapshots.entries()
.stream()
// remove init state snapshot we found from a previous cluster-manager if there was one
.filter(existing -> abortedDuringInit == false || existing.equals(snapshotEntry) == false)
.map(existing -> {
if (existing.equals(snapshotEntry)) {
return snapshotEntry.fail(shards, State.ABORTED, failure);
}
return existing;
})
.collect(Collectors.toList())
)
)
.build();
}
@Override
public ClusterManagerTaskThrottler.ThrottlingKey getClusterManagerThrottlingKey() {
return deleteSnapshotTaskKey;
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (deleteFromRepoTask != null) {
assert outstandingDeletes == null : "Shouldn't have outstanding deletes after already starting delete task";
deleteFromRepoTask.clusterStateProcessed(source, oldState, newState);
return;
}
if (abortedDuringInit) {
// BwC Path where we removed an outdated INIT state snapshot from the cluster state
logger.info("Successfully aborted snapshot [{}]", runningSnapshot);
if (outstandingDeletes.isEmpty()) {
listener.onResponse(null);
} else {
clusterService.submitStateUpdateTask(
"delete snapshot",
createDeleteStateUpdate(outstandingDeletes, repoName, repositoryData, Priority.IMMEDIATE, listener)
);
}
return;
}
logger.trace("adding snapshot completion listener to wait for deleted snapshot to finish");
addListener(runningSnapshot, ActionListener.wrap(result -> {
logger.debug("deleted snapshot completed - deleting files");
clusterService.submitStateUpdateTask(
"delete snapshot",
createDeleteStateUpdate(outstandingDeletes, repoName, result.v1(), Priority.IMMEDIATE, listener)
);
}, e -> {
if (ExceptionsHelper.unwrap(e, NotClusterManagerException.class, FailedToCommitClusterStateException.class) != null) {
logger.warn("cluster-manager failover before deleted snapshot could complete", e);
// Just pass the exception to the transport handler as is so it is retried on the new cluster-manager
listener.onFailure(e);
} else {
logger.warn("deleted snapshot failed", e);
listener.onFailure(
new SnapshotMissingException(runningSnapshot.getRepository(), runningSnapshot.getSnapshotId(), e)
);
}
}));
}
@Override
public TimeValue timeout() {
return request.clusterManagerNodeTimeout();
}
}, "delete snapshot", listener::onFailure);
}
private static List matchingSnapshotIds(
List inProgress,
RepositoryData repositoryData,
String[] snapshotsOrPatterns,
String repositoryName
) {
final Map allSnapshotIds = repositoryData.getSnapshotIds()
.stream()
.collect(Collectors.toMap(SnapshotId::getName, Function.identity()));
final Set foundSnapshots = new HashSet<>(inProgress);
for (String snapshotOrPattern : snapshotsOrPatterns) {
if (Regex.isSimpleMatchPattern(snapshotOrPattern)) {
for (Map.Entry entry : allSnapshotIds.entrySet()) {
if (Regex.simpleMatch(snapshotOrPattern, entry.getKey())) {
foundSnapshots.add(entry.getValue());
}
}
} else {
final SnapshotId foundId = allSnapshotIds.get(snapshotOrPattern);
if (foundId == null) {
if (inProgress.stream().noneMatch(snapshotId -> snapshotId.getName().equals(snapshotOrPattern))) {
throw new SnapshotMissingException(repositoryName, snapshotOrPattern);
}
} else {
foundSnapshots.add(allSnapshotIds.get(snapshotOrPattern));
}
}
}
return Collections.unmodifiableList(new ArrayList<>(foundSnapshots));
}
// Return in-progress snapshot entries by name and repository in the given cluster state or null if none is found
private static List findInProgressSnapshots(
SnapshotsInProgress snapshots,
String[] snapshotNames,
String repositoryName
) {
List entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.repository().equals(repositoryName) && Regex.simpleMatch(snapshotNames, entry.snapshot().getSnapshotId().getName())) {
entries.add(entry);
}
}
return entries;
}
private ClusterStateUpdateTask createDeleteStateUpdate(
List snapshotIds,
String repoName,
RepositoryData repositoryData,
Priority priority,
ActionListener listener
) {
// Short circuit to noop state update if there isn't anything to delete
if (snapshotIds.isEmpty()) {
return new ClusterStateUpdateTask() {
@Override
public ClusterState execute(ClusterState currentState) {
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
listener.onResponse(null);
}
};
}
return new ClusterStateUpdateTask(priority) {
private SnapshotDeletionsInProgress.Entry newDelete;
private boolean reusedExistingDelete = false;
// Snapshots that had all of their shard snapshots in queued state and thus were removed from the
// cluster state right away
private final Collection completedNoCleanup = new ArrayList<>();
// Snapshots that were aborted and that already wrote data to the repository and now have to be deleted
// from the repository after the cluster state update
private final Collection completedWithCleanup = new ArrayList<>();
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
final Version minNodeVersion = currentState.nodes().getMinNodeVersion();
if (minNodeVersion.before(FULL_CONCURRENCY_VERSION)) {
if (deletionsInProgress.hasDeletionsInProgress()) {
throw new ConcurrentSnapshotExecutionException(
new Snapshot(repoName, snapshotIds.get(0)),
"cannot delete - another snapshot is currently being deleted in [" + deletionsInProgress + "]"
);
}
}
final RepositoryCleanupInProgress repositoryCleanupInProgress = currentState.custom(
RepositoryCleanupInProgress.TYPE,
RepositoryCleanupInProgress.EMPTY
);
if (repositoryCleanupInProgress.hasCleanupInProgress()) {
throw new ConcurrentSnapshotExecutionException(
new Snapshot(repoName, snapshotIds.get(0)),
"cannot delete snapshots while a repository cleanup is in-progress in [" + repositoryCleanupInProgress + "]"
);
}
final RestoreInProgress restoreInProgress = currentState.custom(RestoreInProgress.TYPE, RestoreInProgress.EMPTY);
// don't allow snapshot deletions while a restore is taking place,
// otherwise we could end up deleting a snapshot that is being restored
// and the files the restore depends on would all be gone
for (RestoreInProgress.Entry entry : restoreInProgress) {
if (repoName.equals(entry.snapshot().getRepository()) && snapshotIds.contains(entry.snapshot().getSnapshotId())) {
throw new ConcurrentSnapshotExecutionException(
new Snapshot(repoName, snapshotIds.get(0)),
"cannot delete snapshot during a restore in progress in [" + restoreInProgress + "]"
);
}
}
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final Set activeCloneSources = snapshots.entries()
.stream()
.filter(SnapshotsInProgress.Entry::isClone)
.map(SnapshotsInProgress.Entry::source)
.collect(Collectors.toSet());
for (SnapshotId snapshotId : snapshotIds) {
if (activeCloneSources.contains(snapshotId)) {
throw new ConcurrentSnapshotExecutionException(
new Snapshot(repoName, snapshotId),
"cannot delete snapshot while it is being cloned"
);
}
}
// Snapshot ids that will have to be physically deleted from the repository
final Set snapshotIdsRequiringCleanup = new HashSet<>(snapshotIds);
final SnapshotsInProgress updatedSnapshots;
if (minNodeVersion.onOrAfter(FULL_CONCURRENCY_VERSION)) {
updatedSnapshots = SnapshotsInProgress.of(snapshots.entries().stream().map(existing -> {
if (existing.state() == State.STARTED
&& snapshotIdsRequiringCleanup.contains(existing.snapshot().getSnapshotId())) {
// snapshot is started - mark every non completed shard as aborted
final SnapshotsInProgress.Entry abortedEntry = existing.abort();
if (abortedEntry == null) {
// No work has been done for this snapshot yet so we remove it from the cluster state directly
final Snapshot existingNotYetStartedSnapshot = existing.snapshot();
// Adding the snapshot to #endingSnapshots since we still have to resolve its listeners to not trip
// any leaked listener assertions
if (endingSnapshots.add(existingNotYetStartedSnapshot)) {
completedNoCleanup.add(existingNotYetStartedSnapshot);
}
snapshotIdsRequiringCleanup.remove(existingNotYetStartedSnapshot.getSnapshotId());
} else if (abortedEntry.state().completed()) {
completedWithCleanup.add(abortedEntry);
}
return abortedEntry;
}
return existing;
}).filter(Objects::nonNull).collect(Collectors.toList()));
if (snapshotIdsRequiringCleanup.isEmpty()) {
// We only saw snapshots that could be removed from the cluster state right away, no need to update the deletions
return updateWithSnapshots(currentState, updatedSnapshots, null);
}
} else {
if (snapshots.entries().isEmpty() == false) {
// However other snapshots are running - cannot continue
throw new ConcurrentSnapshotExecutionException(
repoName,
snapshotIds.toString(),
"another snapshot is currently running cannot delete"
);
}
updatedSnapshots = snapshots;
}
// add the snapshot deletion to the cluster state
final SnapshotDeletionsInProgress.Entry replacedEntry = deletionsInProgress.getEntries()
.stream()
.filter(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.WAITING)
.findFirst()
.orElse(null);
if (replacedEntry == null) {
final Optional foundDuplicate = deletionsInProgress.getEntries()
.stream()
.filter(
entry -> entry.repository().equals(repoName)
&& entry.state() == SnapshotDeletionsInProgress.State.STARTED
&& entry.getSnapshots().containsAll(snapshotIds)
)
.findFirst();
if (foundDuplicate.isPresent()) {
newDelete = foundDuplicate.get();
reusedExistingDelete = true;
return currentState;
}
final List toDelete = Collections.unmodifiableList(new ArrayList<>(snapshotIdsRequiringCleanup));
ensureBelowConcurrencyLimit(repoName, toDelete.get(0).getName(), snapshots, deletionsInProgress);
newDelete = new SnapshotDeletionsInProgress.Entry(
toDelete,
repoName,
threadPool.absoluteTimeInMillis(),
repositoryData.getGenId(),
updatedSnapshots.entries()
.stream()
.filter(entry -> repoName.equals(entry.repository()))
.noneMatch(SnapshotsService::isWritingToRepository)
&& deletionsInProgress.getEntries()
.stream()
.noneMatch(
entry -> repoName.equals(entry.repository())
&& entry.state() == SnapshotDeletionsInProgress.State.STARTED
) ? SnapshotDeletionsInProgress.State.STARTED : SnapshotDeletionsInProgress.State.WAITING
);
} else {
newDelete = replacedEntry.withAddedSnapshots(snapshotIdsRequiringCleanup);
}
return updateWithSnapshots(
currentState,
updatedSnapshots,
(replacedEntry == null ? deletionsInProgress : deletionsInProgress.withRemovedEntry(replacedEntry.uuid()))
.withAddedEntry(newDelete)
);
}
@Override
public void onFailure(String source, Exception e) {
endingSnapshots.removeAll(completedNoCleanup);
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
if (completedNoCleanup.isEmpty() == false) {
logger.info("snapshots {} aborted", completedNoCleanup);
}
for (Snapshot snapshot : completedNoCleanup) {
failSnapshotCompletionListeners(snapshot, new SnapshotException(snapshot, SnapshotsInProgress.ABORTED_FAILURE_TEXT));
}
if (newDelete == null) {
listener.onResponse(null);
} else {
addDeleteListener(newDelete.uuid(), listener);
if (reusedExistingDelete) {
return;
}
if (newDelete.state() == SnapshotDeletionsInProgress.State.STARTED) {
if (tryEnterRepoLoop(repoName)) {
deleteSnapshotsFromRepository(newDelete, repositoryData, newState.nodes().getMinNodeVersion());
} else {
logger.trace("Delete [{}] could not execute directly and was queued", newDelete);
}
} else {
for (SnapshotsInProgress.Entry completedSnapshot : completedWithCleanup) {
endSnapshot(completedSnapshot, newState.metadata(), repositoryData);
}
}
}
}
};
}
/**
* Checks if the given {@link SnapshotsInProgress.Entry} is currently writing to the repository.
*
* @param entry snapshot entry
* @return true if entry is currently writing to the repository
*/
private static boolean isWritingToRepository(SnapshotsInProgress.Entry entry) {
if (entry.state().completed()) {
// Entry is writing to the repo because it's finalizing on cluster-manager
return true;
}
for (final ShardSnapshotStatus value : entry.shards().values()) {
if (value.isActive()) {
// Entry is writing to the repo because it's writing to a shard on a data node or waiting to do so for a concrete shard
return true;
}
}
return false;
}
private void addDeleteListener(String deleteUUID, ActionListener listener) {
snapshotDeletionListeners.computeIfAbsent(deleteUUID, k -> new CopyOnWriteArrayList<>()).add(listener);
}
/**
* Determines the minimum {@link Version} that the snapshot repository must be compatible with from the current nodes in the cluster
* and the contents of the repository. The minimum version is determined as the lowest version found across all snapshots in the
* repository and all nodes in the cluster.
*
* @param minNodeVersion minimum node version in the cluster
* @param repositoryData current {@link RepositoryData} of that repository
* @param excluded snapshot id to ignore when computing the minimum version
* (used to use newer metadata version after a snapshot delete)
* @return minimum node version that must still be able to read the repository metadata
*/
public Version minCompatibleVersion(Version minNodeVersion, RepositoryData repositoryData, @Nullable Collection excluded) {
Version minCompatVersion = minNodeVersion;
final Collection snapshotIds = repositoryData.getSnapshotIds();
for (SnapshotId snapshotId : snapshotIds.stream()
.filter(excluded == null ? sn -> true : sn -> excluded.contains(sn) == false)
.collect(Collectors.toList())) {
final Version known = repositoryData.getVersion(snapshotId);
// If we don't have the version cached in the repository data yet we load it from the snapshot info blobs
if (known == null) {
assert repositoryData.shardGenerations().totalShards() == 0 : "Saw shard generations ["
+ repositoryData.shardGenerations()
+ "] but did not have versions tracked for snapshot ["
+ snapshotId
+ "]";
return OLD_SNAPSHOT_FORMAT;
} else {
minCompatVersion = minCompatVersion.before(known) ? minCompatVersion : known;
}
}
return minCompatVersion;
}
/**
* Checks whether the metadata version supports writing {@link ShardGenerations} to the repository.
*
* @param repositoryMetaVersion version to check
* @return true if version supports {@link ShardGenerations}
*/
public static boolean useShardGenerations(Version repositoryMetaVersion) {
return repositoryMetaVersion.onOrAfter(SHARD_GEN_IN_REPO_DATA_VERSION);
}
/**
* Checks whether the metadata version supports writing {@link ShardGenerations} to the repository.
*
* @param repositoryMetaVersion version to check
* @return true if version supports {@link ShardGenerations}
*/
public static boolean useIndexGenerations(Version repositoryMetaVersion) {
return repositoryMetaVersion.onOrAfter(INDEX_GEN_IN_REPO_DATA_VERSION);
}
/** Deletes snapshot from repository
*
* @param deleteEntry delete entry in cluster state
* @param minNodeVersion minimum node version in the cluster
*/
private void deleteSnapshotsFromRepository(SnapshotDeletionsInProgress.Entry deleteEntry, Version minNodeVersion) {
final long expectedRepoGen = deleteEntry.repositoryStateId();
repositoriesService.getRepositoryData(deleteEntry.repository(), new ActionListener() {
@Override
public void onResponse(RepositoryData repositoryData) {
assert repositoryData.getGenId() == expectedRepoGen
: "Repository generation should not change as long as a ready delete is found in the cluster state but found ["
+ expectedRepoGen
+ "] in cluster state and ["
+ repositoryData.getGenId()
+ "] in the repository";
deleteSnapshotsFromRepository(deleteEntry, repositoryData, minNodeVersion);
}
@Override
public void onFailure(Exception e) {
clusterService.submitStateUpdateTask(
"fail repo tasks for [" + deleteEntry.repository() + "]",
new FailPendingRepoTasksTask(deleteEntry.repository(), e)
);
}
});
}
/** Deletes snapshot from repository
*
* @param deleteEntry delete entry in cluster state
* @param repositoryData the {@link RepositoryData} of the repository to delete from
* @param minNodeVersion minimum node version in the cluster
*/
private void deleteSnapshotsFromRepository(
SnapshotDeletionsInProgress.Entry deleteEntry,
RepositoryData repositoryData,
Version minNodeVersion
) {
if (repositoryOperations.startDeletion(deleteEntry.uuid())) {
assert currentlyFinalizing.contains(deleteEntry.repository());
final List snapshotIds = deleteEntry.getSnapshots();
assert deleteEntry.state() == SnapshotDeletionsInProgress.State.STARTED : "incorrect state for entry [" + deleteEntry + "]";
final Repository repository = repositoriesService.repository(deleteEntry.repository());
// TODO: Relying on repository flag to decide delete flow may lead to shallow snapshot blobs not being taken up for cleanup
// when the repository currently have the flag disabled and we try to delete the shallow snapshots taken prior to disabling
// the flag. This can be improved by having the info whether there ever were any shallow snapshot present in this repository
// or not in RepositoryData.
// SEE https://github.com/opensearch-project/OpenSearch/issues/8610
final boolean remoteStoreShallowCopyEnabled = REMOTE_STORE_INDEX_SHALLOW_COPY.get(repository.getMetadata().settings());
if (remoteStoreShallowCopyEnabled) {
Map snapshotsWithPinnedTimestamp = new ConcurrentHashMap<>();
List snapshotsWithLockFiles = Collections.synchronizedList(new ArrayList<>());
CountDownLatch latch = new CountDownLatch(1);
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(() -> {
try {
for (SnapshotId snapshotId : snapshotIds) {
try {
SnapshotInfo snapshotInfo = repository.getSnapshotInfo(snapshotId);
if (snapshotInfo.getPinnedTimestamp() > 0) {
snapshotsWithPinnedTimestamp.put(snapshotId, snapshotInfo.getPinnedTimestamp());
} else {
snapshotsWithLockFiles.add(snapshotId);
}
} catch (Exception e) {
logger.warn("Failed to get snapshot info for {} with exception {}", snapshotId, e);
removeSnapshotDeletionFromClusterState(deleteEntry, e, repositoryData);
}
}
} finally {
latch.countDown();
}
});
try {
latch.await();
if (snapshotsWithLockFiles.size() > 0) {
repository.deleteSnapshotsAndReleaseLockFiles(
snapshotsWithLockFiles,
repositoryData.getGenId(),
minCompatibleVersion(minNodeVersion, repositoryData, snapshotsWithLockFiles),
remoteStoreLockManagerFactory,
ActionListener.wrap(updatedRepoData -> {
logger.info("snapshots {} deleted", snapshotsWithLockFiles);
removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData);
}, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData))
);
}
if (snapshotsWithPinnedTimestamp.size() > 0) {
repository.deleteSnapshotsWithPinnedTimestamp(
snapshotsWithPinnedTimestamp,
repositoryData.getGenId(),
minCompatibleVersion(minNodeVersion, repositoryData, snapshotsWithPinnedTimestamp.keySet()),
remoteSegmentStoreDirectoryFactory,
remoteStorePinnedTimestampService,
ActionListener.wrap(updatedRepoData -> {
logger.info("snapshots {} deleted", snapshotsWithPinnedTimestamp);
removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData);
}, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData))
);
}
} catch (InterruptedException e) {
logger.error("Interrupted while waiting for snapshot info processing", e);
Thread.currentThread().interrupt();
removeSnapshotDeletionFromClusterState(deleteEntry, e, repositoryData);
}
} else {
repository.deleteSnapshots(
snapshotIds,
repositoryData.getGenId(),
minCompatibleVersion(minNodeVersion, repositoryData, snapshotIds),
ActionListener.wrap(updatedRepoData -> {
logger.info("snapshots {} deleted", snapshotIds);
removeSnapshotDeletionFromClusterState(deleteEntry, null, updatedRepoData);
}, ex -> removeSnapshotDeletionFromClusterState(deleteEntry, ex, repositoryData))
);
}
}
}
/**
* Removes a {@link SnapshotDeletionsInProgress.Entry} from {@link SnapshotDeletionsInProgress} in the cluster state after it executed
* on the repository.
*
* @param deleteEntry delete entry to remove from the cluster state
* @param failure failure encountered while executing the delete on the repository or {@code null} if the delete executed
* successfully
* @param repositoryData current {@link RepositoryData} for the repository we just ran the delete on.
*/
private void removeSnapshotDeletionFromClusterState(
final SnapshotDeletionsInProgress.Entry deleteEntry,
@Nullable final Exception failure,
final RepositoryData repositoryData
) {
final ClusterStateUpdateTask clusterStateUpdateTask;
if (failure == null) {
// If we didn't have a failure during the snapshot delete we will remove all snapshot ids that the delete successfully removed
// from the repository from enqueued snapshot delete entries during the cluster state update. After the cluster state update we
// resolve the delete listeners with the latest repository data from after the delete.
clusterStateUpdateTask = new RemoveSnapshotDeletionAndContinueTask(deleteEntry, repositoryData) {
@Override
protected SnapshotDeletionsInProgress filterDeletions(SnapshotDeletionsInProgress deletions) {
final SnapshotDeletionsInProgress updatedDeletions = deletionsWithoutSnapshots(
deletions,
deleteEntry.getSnapshots(),
deleteEntry.repository()
);
return updatedDeletions == null ? deletions : updatedDeletions;
}
@Override
protected void handleListeners(List> deleteListeners) {
assert repositoryData.getSnapshotIds().stream().noneMatch(deleteEntry.getSnapshots()::contains)
: "Repository data contained snapshot ids "
+ repositoryData.getSnapshotIds()
+ " that should should been deleted by ["
+ deleteEntry
+ "]";
completeListenersIgnoringException(deleteListeners, null);
}
};
} else {
// The delete failed to execute on the repository. We remove it from the cluster state and then fail all listeners associated
// with it.
clusterStateUpdateTask = new RemoveSnapshotDeletionAndContinueTask(deleteEntry, repositoryData) {
@Override
protected void handleListeners(List> deleteListeners) {
failListenersIgnoringException(deleteListeners, failure);
}
};
}
clusterService.submitStateUpdateTask("remove snapshot deletion metadata", clusterStateUpdateTask);
}
/**
* Handle snapshot or delete failure due to not being cluster-manager any more so we don't try to do run additional cluster state updates.
* The next cluster-manager will try handling the missing operations. All we can do is fail all the listeners on this cluster-manager node so that
* transport requests return and we don't leak listeners.
*
* @param e exception that caused us to realize we are not cluster-manager any longer
*/
private void failAllListenersOnMasterFailOver(Exception e) {
logger.debug("Failing all snapshot operation listeners because this node is not cluster-manager any longer", e);
synchronized (currentlyFinalizing) {
if (ExceptionsHelper.unwrap(e, NotClusterManagerException.class, FailedToCommitClusterStateException.class) != null) {
repositoryOperations.clear();
for (Snapshot snapshot : new HashSet<>(snapshotCompletionListeners.keySet())) {
failSnapshotCompletionListeners(snapshot, new SnapshotException(snapshot, "no longer cluster-manager"));
}
final Exception wrapped = new RepositoryException("_all", "Failed to update cluster state during repository operation", e);
for (Iterator>> iterator = snapshotDeletionListeners.values().iterator(); iterator.hasNext();) {
final List> listeners = iterator.next();
iterator.remove();
failListenersIgnoringException(listeners, wrapped);
}
assert snapshotDeletionListeners.isEmpty() : "No new listeners should have been added but saw " + snapshotDeletionListeners;
} else {
assert false : new AssertionError(
"Modifying snapshot state should only ever fail because we failed to publish new state",
e
);
logger.error("Unexpected failure during cluster state update", e);
}
currentlyFinalizing.clear();
}
}
/**
* A cluster state update that will remove a given {@link SnapshotDeletionsInProgress.Entry} from the cluster state
* and trigger running the next snapshot-delete or -finalization operation available to execute if there is one
* ready in the cluster state as a result of this state update.
*/
private abstract class RemoveSnapshotDeletionAndContinueTask extends ClusterStateUpdateTask {
// Snapshots that can be finalized after the delete operation has been removed from the cluster state
protected final List newFinalizations = new ArrayList<>();
private List readyDeletions = Collections.emptyList();
protected final SnapshotDeletionsInProgress.Entry deleteEntry;
private final RepositoryData repositoryData;
RemoveSnapshotDeletionAndContinueTask(SnapshotDeletionsInProgress.Entry deleteEntry, RepositoryData repositoryData) {
this.deleteEntry = deleteEntry;
this.repositoryData = repositoryData;
}
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotDeletionsInProgress deletions = currentState.custom(SnapshotDeletionsInProgress.TYPE);
assert deletions != null : "We only run this if there were deletions in the cluster state before";
final SnapshotDeletionsInProgress updatedDeletions = deletions.withRemovedEntry(deleteEntry.uuid());
if (updatedDeletions == deletions) {
return currentState;
}
final SnapshotDeletionsInProgress newDeletions = filterDeletions(updatedDeletions);
final Tuple> res = readyDeletions(
updateWithSnapshots(currentState, updatedSnapshotsInProgress(currentState, newDeletions), newDeletions)
);
readyDeletions = res.v2();
return res.v1();
}
@Override
public void onFailure(String source, Exception e) {
logger.warn(() -> new ParameterizedMessage("{} failed to remove snapshot deletion metadata", deleteEntry), e);
repositoryOperations.finishDeletion(deleteEntry.uuid());
failAllListenersOnMasterFailOver(e);
}
protected SnapshotDeletionsInProgress filterDeletions(SnapshotDeletionsInProgress deletions) {
return deletions;
}
@Override
public final void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
final List> deleteListeners;
repositoryOperations.finishDeletion(deleteEntry.uuid());
deleteListeners = snapshotDeletionListeners.remove(deleteEntry.uuid());
handleListeners(deleteListeners);
if (newFinalizations.isEmpty()) {
if (readyDeletions.isEmpty()) {
leaveRepoLoop(deleteEntry.repository());
} else {
for (SnapshotDeletionsInProgress.Entry readyDeletion : readyDeletions) {
deleteSnapshotsFromRepository(readyDeletion, repositoryData, newState.nodes().getMinNodeVersion());
}
}
} else {
leaveRepoLoop(deleteEntry.repository());
assert readyDeletions.stream().noneMatch(entry -> entry.repository().equals(deleteEntry.repository()))
: "New finalizations " + newFinalizations + " added even though deletes " + readyDeletions + " are ready";
for (SnapshotsInProgress.Entry entry : newFinalizations) {
endSnapshot(entry, newState.metadata(), repositoryData);
}
}
}
/**
* Invoke snapshot delete listeners for {@link #deleteEntry}.
*
* @param deleteListeners delete snapshot listeners or {@code null} if there weren't any for {@link #deleteEntry}.
*/
protected abstract void handleListeners(@Nullable List> deleteListeners);
/**
* Computes an updated {@link SnapshotsInProgress} that takes into account an updated version of
* {@link SnapshotDeletionsInProgress} that has a {@link SnapshotDeletionsInProgress.Entry} removed from it
* relative to the {@link SnapshotDeletionsInProgress} found in {@code currentState}.
* The removal of a delete from the cluster state can trigger two possible actions on in-progress snapshots:
*
* - Snapshots that had unfinished shard snapshots in state {@link ShardSnapshotStatus#UNASSIGNED_QUEUED} that
* could not be started because the delete was running can have those started.
* - Snapshots that had all their shards reach a completed state while a delete was running (e.g. as a result of
* nodes dropping out of the cluster or another incoming delete aborting them) need not be updated in the cluster
* state but need to have their finalization triggered now that it's possible with the removal of the delete
* from the state.
*
*
* @param currentState current cluster state
* @param updatedDeletions deletions with removed entry
* @return updated snapshot in progress instance or {@code null} if there are no changes to it
*/
@Nullable
private SnapshotsInProgress updatedSnapshotsInProgress(ClusterState currentState, SnapshotDeletionsInProgress updatedDeletions) {
final SnapshotsInProgress snapshotsInProgress = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List snapshotEntries = new ArrayList<>();
// Keep track of shardIds that we started snapshots for as a result of removing this delete so we don't assign
// them to multiple snapshots by accident
final Set reassignedShardIds = new HashSet<>();
boolean changed = false;
final String repoName = deleteEntry.repository();
// Computing the new assignments can be quite costly, only do it once below if actually needed
Map shardAssignments = null;
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.repository().equals(repoName)) {
if (entry.state().completed() == false) {
// Collect waiting shards that in entry that we can assign now that we are done with the deletion
final List canBeUpdated = new ArrayList<>();
for (final Map.Entry value : entry.shards().entrySet()) {
if (value.getValue().equals(ShardSnapshotStatus.UNASSIGNED_QUEUED)
&& reassignedShardIds.contains(value.getKey()) == false) {
canBeUpdated.add(value.getKey());
}
}
if (canBeUpdated.isEmpty()) {
// No shards can be updated in this snapshot so we just add it as is again
snapshotEntries.add(entry);
} else {
if (shardAssignments == null) {
shardAssignments = shards(
snapshotsInProgress,
updatedDeletions,
currentState.metadata(),
currentState.routingTable(),
entry.indices(),
entry.version().onOrAfter(SHARD_GEN_IN_REPO_DATA_VERSION),
repositoryData,
repoName
);
}
final Map updatedAssignmentsBuilder = new HashMap<>(entry.shards());
for (ShardId shardId : canBeUpdated) {
final ShardSnapshotStatus updated = shardAssignments.get(shardId);
if (updated == null) {
// We don't have a new assignment for this shard because its index was concurrently deleted
assert currentState.routingTable().hasIndex(shardId.getIndex()) == false : "Missing assignment for ["
+ shardId
+ "]";
updatedAssignmentsBuilder.put(shardId, ShardSnapshotStatus.MISSING);
} else {
final boolean added = reassignedShardIds.add(shardId);
assert added;
updatedAssignmentsBuilder.put(shardId, updated);
}
}
final SnapshotsInProgress.Entry updatedEntry = entry.withShardStates(updatedAssignmentsBuilder);
snapshotEntries.add(updatedEntry);
changed = true;
// When all the required shards for a snapshot are missing, the snapshot state will be "completed"
// need to finalize it.
if (updatedEntry.state().completed()) {
newFinalizations.add(entry);
}
}
} else {
// Entry is already completed so we will finalize it now that the delete doesn't block us after
// this CS update finishes
newFinalizations.add(entry);
snapshotEntries.add(entry);
}
} else {
// Entry is for another repository we just keep it as is
snapshotEntries.add(entry);
}
}
return changed ? SnapshotsInProgress.of(snapshotEntries) : null;
}
}
/**
* Shortcut to build new {@link ClusterState} from the current state and updated values of {@link SnapshotsInProgress} and
* {@link SnapshotDeletionsInProgress}.
*
* @param state current cluster state
* @param snapshotsInProgress new value for {@link SnapshotsInProgress} or {@code null} if it's unchanged
* @param snapshotDeletionsInProgress new value for {@link SnapshotDeletionsInProgress} or {@code null} if it's unchanged
* @return updated cluster state
*/
public static ClusterState updateWithSnapshots(
ClusterState state,
@Nullable SnapshotsInProgress snapshotsInProgress,
@Nullable SnapshotDeletionsInProgress snapshotDeletionsInProgress
) {
if (snapshotsInProgress == null && snapshotDeletionsInProgress == null) {
return state;
}
ClusterState.Builder builder = ClusterState.builder(state);
if (snapshotsInProgress != null) {
builder.putCustom(SnapshotsInProgress.TYPE, snapshotsInProgress);
}
if (snapshotDeletionsInProgress != null) {
builder.putCustom(SnapshotDeletionsInProgress.TYPE, snapshotDeletionsInProgress);
}
return builder.build();
}
private static void failListenersIgnoringException(@Nullable List> listeners, Exception failure) {
if (listeners != null) {
try {
ActionListener.onFailure(listeners, failure);
} catch (Exception ex) {
assert false : new AssertionError(ex);
logger.warn("Failed to notify listeners", ex);
}
}
}
private static void completeListenersIgnoringException(@Nullable List> listeners, T result) {
if (listeners != null) {
try {
ActionListener.onResponse(listeners, result);
} catch (Exception ex) {
assert false : new AssertionError(ex);
logger.warn("Failed to notify listeners", ex);
}
}
}
/**
* Calculates the assignment of shards to data nodes for a new snapshot based on the given cluster state and the
* indices that should be included in the snapshot.
*
* @param indices Indices to snapshot
* @param useShardGenerations whether to write {@link ShardGenerations} during the snapshot
* @return list of shard to be included into current snapshot
*/
private static Map shards(
SnapshotsInProgress snapshotsInProgress,
@Nullable SnapshotDeletionsInProgress deletionsInProgress,
Metadata metadata,
RoutingTable routingTable,
List indices,
boolean useShardGenerations,
RepositoryData repositoryData,
String repoName
) {
final Map builder = new HashMap<>();
final ShardGenerations shardGenerations = repositoryData.shardGenerations();
final InFlightShardSnapshotStates inFlightShardStates = InFlightShardSnapshotStates.forRepo(
repoName,
snapshotsInProgress.entries()
);
final boolean readyToExecute = deletionsInProgress == null
|| deletionsInProgress.getEntries()
.stream()
.noneMatch(entry -> entry.repository().equals(repoName) && entry.state() == SnapshotDeletionsInProgress.State.STARTED);
for (IndexId index : indices) {
final String indexName = index.getName();
final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false;
IndexMetadata indexMetadata = metadata.index(indexName);
if (indexMetadata == null) {
// The index was deleted before we managed to start the snapshot - mark it as missing.
builder.put(new ShardId(indexName, IndexMetadata.INDEX_UUID_NA_VALUE, 0), ShardSnapshotStatus.MISSING);
} else {
final IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
final ShardId shardId = indexRoutingTable.shard(i).shardId();
final String shardRepoGeneration;
if (useShardGenerations) {
final String inFlightGeneration = inFlightShardStates.generationForShard(index, shardId.id(), shardGenerations);
if (inFlightGeneration == null && isNewIndex) {
assert shardGenerations.getShardGen(index, shardId.getId()) == null : "Found shard generation for new index ["
+ index
+ "]";
shardRepoGeneration = ShardGenerations.NEW_SHARD_GEN;
} else {
shardRepoGeneration = inFlightGeneration;
}
} else {
shardRepoGeneration = null;
}
final ShardSnapshotStatus shardSnapshotStatus;
if (indexRoutingTable == null) {
shardSnapshotStatus = new SnapshotsInProgress.ShardSnapshotStatus(
null,
ShardState.MISSING,
"missing routing table",
shardRepoGeneration
);
} else {
ShardRouting primary = indexRoutingTable.shard(i).primaryShard();
if (readyToExecute == false || inFlightShardStates.isActive(indexName, i)) {
shardSnapshotStatus = ShardSnapshotStatus.UNASSIGNED_QUEUED;
} else if (primary == null || !primary.assignedToNode()) {
shardSnapshotStatus = new ShardSnapshotStatus(
null,
ShardState.MISSING,
"primary shard is not allocated",
shardRepoGeneration
);
} else if (primary.relocating() || primary.initializing()) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, shardRepoGeneration);
} else if (!primary.started()) {
shardSnapshotStatus = new ShardSnapshotStatus(
primary.currentNodeId(),
ShardState.MISSING,
"primary shard hasn't been started yet",
shardRepoGeneration
);
} else {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), shardRepoGeneration);
}
}
builder.put(shardId, shardSnapshotStatus);
}
}
}
return Collections.unmodifiableMap(builder);
}
private static ShardGenerations buildShardsGenerationFromRepositoryData(
Metadata metadata,
RoutingTable routingTable,
List indices,
RepositoryData repositoryData
) {
ShardGenerations.Builder builder = ShardGenerations.builder();
final ShardGenerations shardGenerations = repositoryData.shardGenerations();
for (IndexId index : indices) {
final String indexName = index.getName();
final boolean isNewIndex = repositoryData.getIndices().containsKey(indexName) == false;
IndexMetadata indexMetadata = metadata.index(indexName);
final IndexRoutingTable indexRoutingTable = routingTable.index(indexName);
for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
final ShardId shardId = indexRoutingTable.shard(i).shardId();
final String shardRepoGeneration;
if (isNewIndex) {
assert shardGenerations.getShardGen(index, shardId.getId()) == null : "Found shard generation for new index ["
+ index
+ "]";
shardRepoGeneration = ShardGenerations.NEW_SHARD_GEN;
} else {
shardRepoGeneration = shardGenerations.getShardGen(index, shardId.id());
}
builder.put(index, shardId.id(), shardRepoGeneration);
}
}
return builder.build();
}
/**
* Returns the data streams that are currently being snapshotted (with partial == false) and that are contained in the
* indices-to-check set.
*/
public static Set snapshottingDataStreams(final ClusterState currentState, final Set dataStreamsToCheck) {
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
if (snapshots == null) {
return emptySet();
}
Map dataStreams = currentState.metadata().dataStreams();
return snapshots.entries()
.stream()
.filter(e -> e.partial() == false)
.flatMap(e -> e.dataStreams().stream())
.filter(ds -> dataStreams.containsKey(ds) && dataStreamsToCheck.contains(ds))
.collect(Collectors.toSet());
}
/**
* Returns the indices that are currently being snapshotted (with partial == false) and that are contained in the indices-to-check set.
*/
public static Set snapshottingIndices(final ClusterState currentState, final Set indicesToCheck) {
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
if (snapshots == null) {
return emptySet();
}
final Set indices = new HashSet<>();
for (final SnapshotsInProgress.Entry entry : snapshots.entries()) {
if (entry.partial() == false) {
for (IndexId index : entry.indices()) {
IndexMetadata indexMetadata = currentState.metadata().index(index.getName());
if (indexMetadata != null && indicesToCheck.contains(indexMetadata.getIndex())) {
indices.add(indexMetadata.getIndex());
}
}
}
}
return indices;
}
/**
* Adds snapshot completion listener
*
* @param snapshot Snapshot to listen for
* @param listener listener
*/
private void addListener(Snapshot snapshot, ActionListener> listener) {
snapshotCompletionListeners.computeIfAbsent(snapshot, k -> new CopyOnWriteArrayList<>()).add(listener);
}
@Override
protected void doStart() {
assert this.updateSnapshotStatusHandler != null;
assert transportService.getRequestHandler(UPDATE_SNAPSHOT_STATUS_ACTION_NAME) != null;
}
@Override
protected void doStop() {
}
@Override
protected void doClose() {
clusterService.removeApplier(this);
}
/**
* Assert that no in-memory state for any running snapshot-create or -delete operation exists in this instance.
*/
public boolean assertAllListenersResolved() {
final DiscoveryNode localNode = clusterService.localNode();
assert endingSnapshots.isEmpty() : "Found leaked ending snapshots " + endingSnapshots + " on [" + localNode + "]";
assert snapshotCompletionListeners.isEmpty() : "Found leaked snapshot completion listeners "
+ snapshotCompletionListeners
+ " on ["
+ localNode
+ "]";
assert currentlyFinalizing.isEmpty() : "Found leaked finalizations " + currentlyFinalizing + " on [" + localNode + "]";
assert snapshotDeletionListeners.isEmpty() : "Found leaked snapshot delete listeners "
+ snapshotDeletionListeners
+ " on ["
+ localNode
+ "]";
assert repositoryOperations.isEmpty() : "Found leaked snapshots to finalize " + repositoryOperations + " on [" + localNode + "]";
return true;
}
/**
* Executor that applies {@link ShardSnapshotUpdate}s to the current cluster state. The algorithm implemented below works as described
* below:
* Every shard snapshot or clone state update can result in multiple snapshots being updated. In order to determine whether or not a
* shard update has an effect we use an outer loop over all current executing snapshot operations that iterates over them in the order
* they were started in and an inner loop over the list of shard update tasks.
*
* If the inner loop finds that a shard update task applies to a given snapshot and either a shard-snapshot or shard-clone operation in
* it then it will update the state of the snapshot entry accordingly. If that update was a noop, then the task is removed from the
* iteration as it was already applied before and likely just arrived on the cluster-manager node again due to retries upstream.
* If the update was not a noop, then it means that the shard it applied to is now available for another snapshot or clone operation
* to be re-assigned if there is another snapshot operation that is waiting for the shard to become available. We therefore record the
* fact that a task was executed by adding it to a collection of executed tasks. If a subsequent execution of the outer loop finds that
* a task in the executed tasks collection applied to a shard it was waiting for to become available, then the shard snapshot operation
* will be started for that snapshot entry and the task removed from the collection of tasks that need to be applied to snapshot
* entries since it can not have any further effects.
*
* Package private to allow for tests.
*/
static final ClusterStateTaskExecutor SHARD_STATE_EXECUTOR = new ClusterStateTaskExecutor() {
@Override
public ClusterTasksResult execute(ClusterState currentState, List tasks)
throws Exception {
return shardStateExecutor.execute(currentState, tasks);
}
@Override
public ClusterManagerTaskThrottler.ThrottlingKey getClusterManagerThrottlingKey() {
return updateSnapshotStateTaskKey;
}
};
static final ClusterStateTaskExecutor shardStateExecutor = (currentState, tasks) -> {
int changedCount = 0;
int startedCount = 0;
final List entries = new ArrayList<>();
final String localNodeId = currentState.nodes().getLocalNodeId();
// Tasks to check for updates for running snapshots.
final List unconsumedTasks = new ArrayList<>(tasks);
// Tasks that were used to complete an existing in-progress shard snapshot
final Set executedTasks = new HashSet<>();
// Outer loop over all snapshot entries in the order they were created in
for (SnapshotsInProgress.Entry entry : currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY).entries()) {
if (entry.state().completed()) {
// completed snapshots do not require any updates so we just add them to the new list and keep going
entries.add(entry);
continue;
}
Map shards = null;
Map clones = null;
Map indicesLookup = null;
// inner loop over all the shard updates that are potentially applicable to the current snapshot entry
for (Iterator iterator = unconsumedTasks.iterator(); iterator.hasNext();) {
final ShardSnapshotUpdate updateSnapshotState = iterator.next();
final Snapshot updatedSnapshot = updateSnapshotState.snapshot;
final String updatedRepository = updatedSnapshot.getRepository();
if (entry.repository().equals(updatedRepository) == false) {
// the update applies to a different repository so it is irrelevant here
continue;
}
if (updateSnapshotState.isClone()) {
// The update applied to a shard clone operation
final RepositoryShardId finishedShardId = updateSnapshotState.repoShardId;
if (entry.snapshot().getSnapshotId().equals(updatedSnapshot.getSnapshotId())) {
assert entry.isClone() : "Non-clone snapshot ["
+ entry
+ "] received update for clone ["
+ updateSnapshotState
+ "]";
final ShardSnapshotStatus existing = entry.clones().get(finishedShardId);
if (existing == null) {
logger.warn(
"Received clone shard snapshot status update [{}] but this shard is not tracked in [{}]",
updateSnapshotState,
entry
);
assert false
: "This should never happen, cluster-manager will not submit a state update for a non-existing clone";
continue;
}
if (existing.state().completed()) {
// No point in doing noop updates that might happen if data nodes resends shard status after a disconnect.
iterator.remove();
continue;
}
logger.trace(
"[{}] Updating shard clone [{}] with status [{}]",
updatedSnapshot,
finishedShardId,
updateSnapshotState.updatedState.state()
);
if (clones == null) {
clones = new HashMap<>(entry.clones());
}
changedCount++;
clones.put(finishedShardId, updateSnapshotState.updatedState);
executedTasks.add(updateSnapshotState);
} else if (executedTasks.contains(updateSnapshotState)) {
// the update was already executed on the clone operation it applied to, now we check if it may be possible to
// start a shard snapshot or clone operation on the current entry
if (entry.isClone()) {
// current entry is a clone operation
final ShardSnapshotStatus existingStatus = entry.clones().get(finishedShardId);
if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) {
continue;
}
if (clones == null) {
clones = new HashMap<>(entry.clones());
}
final ShardSnapshotStatus finishedStatus = updateSnapshotState.updatedState;
logger.trace(
"Starting clone [{}] on [{}] with generation [{}]",
finishedShardId,
finishedStatus.nodeId(),
finishedStatus.generation()
);
assert finishedStatus.nodeId().equals(localNodeId) : "Clone updated with node id ["
+ finishedStatus.nodeId()
+ "] but local node id is ["
+ localNodeId
+ "]";
clones.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation()));
iterator.remove();
} else {
// current entry is a snapshot operation so we must translate the repository shard id to a routing shard id
final IndexMetadata indexMeta = currentState.metadata().index(finishedShardId.indexName());
if (indexMeta == null) {
// The index name that finished cloning does not exist in the cluster state so it isn't relevant to a
// normal snapshot
continue;
}
final ShardId finishedRoutingShardId = new ShardId(indexMeta.getIndex(), finishedShardId.shardId());
final ShardSnapshotStatus existingStatus = entry.shards().get(finishedRoutingShardId);
if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) {
continue;
}
if (shards == null) {
shards = new HashMap<>(entry.shards());
}
final ShardSnapshotStatus finishedStatus = updateSnapshotState.updatedState;
logger.trace(
"Starting [{}] on [{}] with generation [{}]",
finishedShardId,
finishedStatus.nodeId(),
finishedStatus.generation()
);
// A clone was updated, so we must use the correct data node id for the reassignment as actual shard
// snapshot
final ShardSnapshotStatus shardSnapshotStatus = startShardSnapshotAfterClone(
currentState,
updateSnapshotState.updatedState.generation(),
finishedRoutingShardId
);
shards.put(finishedRoutingShardId, shardSnapshotStatus);
if (shardSnapshotStatus.isActive()) {
// only remove the update from the list of tasks that might hold a reusable shard if we actually
// started a snapshot and didn't just fail
iterator.remove();
}
}
}
} else {
// a (non-clone) shard snapshot operation was updated
final ShardId finishedShardId = updateSnapshotState.shardId;
if (entry.snapshot().getSnapshotId().equals(updatedSnapshot.getSnapshotId())) {
final ShardSnapshotStatus existing = entry.shards().get(finishedShardId);
if (existing == null) {
logger.warn(
"Received shard snapshot status update [{}] but this shard is not tracked in [{}]",
updateSnapshotState,
entry
);
assert false : "This should never happen, data nodes should only send updates for expected shards";
continue;
}
if (existing.state().completed()) {
// No point in doing noop updates that might happen if data nodes resends shard status after a disconnect.
iterator.remove();
continue;
}
logger.trace(
"[{}] Updating shard [{}] with status [{}]",
updatedSnapshot,
finishedShardId,
updateSnapshotState.updatedState.state()
);
if (shards == null) {
shards = new HashMap(entry.shards());
}
shards.put(finishedShardId, updateSnapshotState.updatedState);
executedTasks.add(updateSnapshotState);
changedCount++;
} else if (executedTasks.contains(updateSnapshotState)) {
// We applied the update for a shard snapshot state to its snapshot entry, now check if we can update
// either a clone or a snapshot
if (entry.isClone()) {
// Since we updated a normal snapshot we need to translate its shard ids to repository shard ids which requires
// a lookup for the index ids
if (indicesLookup == null) {
indicesLookup = entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity()));
}
// shard snapshot was completed, we check if we can start a clone operation for the same repo shard
final IndexId indexId = indicesLookup.get(finishedShardId.getIndexName());
// If the lookup finds the index id then at least the entry is concerned with the index id just updated
// so we check on a shard level
if (indexId != null) {
final RepositoryShardId repoShardId = new RepositoryShardId(indexId, finishedShardId.getId());
final ShardSnapshotStatus existingStatus = entry.clones().get(repoShardId);
if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) {
continue;
}
if (clones == null) {
clones = new HashMap<>(entry.clones());
}
final ShardSnapshotStatus finishedStatus = updateSnapshotState.updatedState;
logger.trace(
"Starting clone [{}] on [{}] with generation [{}]",
finishedShardId,
finishedStatus.nodeId(),
finishedStatus.generation()
);
clones.put(repoShardId, new ShardSnapshotStatus(localNodeId, finishedStatus.generation()));
iterator.remove();
startedCount++;
}
} else {
// shard snapshot was completed, we check if we can start another snapshot
final ShardSnapshotStatus existingStatus = entry.shards().get(finishedShardId);
if (existingStatus == null || existingStatus.state() != ShardState.QUEUED) {
continue;
}
if (shards == null) {
shards = new HashMap<>(entry.shards());
}
final ShardSnapshotStatus finishedStatus = updateSnapshotState.updatedState;
logger.trace(
"Starting [{}] on [{}] with generation [{}]",
finishedShardId,
finishedStatus.nodeId(),
finishedStatus.generation()
);
shards.put(finishedShardId, new ShardSnapshotStatus(finishedStatus.nodeId(), finishedStatus.generation()));
iterator.remove();
}
}
}
}
final SnapshotsInProgress.Entry updatedEntry;
if (shards != null) {
assert clones == null : "Should not have updated clones when updating shard snapshots but saw "
+ clones
+ " as well as "
+ shards;
updatedEntry = entry.withShardStates(shards);
} else if (clones != null) {
updatedEntry = entry.withClones(clones);
} else {
updatedEntry = entry;
}
entries.add(updatedEntry);
}
if (changedCount > 0) {
logger.trace(
"changed cluster state triggered by [{}] snapshot state updates and resulted in starting " + "[{}] shard snapshots",
changedCount,
startedCount
);
return ClusterStateTaskExecutor.ClusterTasksResult.builder()
.successes(tasks)
.build(ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, SnapshotsInProgress.of(entries)).build());
}
return ClusterStateTaskExecutor.ClusterTasksResult.builder().successes(tasks).build(currentState);
};
/**
* Creates a {@link ShardSnapshotStatus} entry for a snapshot after the shard has become available for snapshotting as a result
* of a snapshot clone completing.
*
* @param currentState current cluster state
* @param shardGeneration shard generation of the shard in the repository
* @param shardId shard id of the shard that just finished cloning
* @return shard snapshot status
*/
private static ShardSnapshotStatus startShardSnapshotAfterClone(ClusterState currentState, String shardGeneration, ShardId shardId) {
final ShardRouting primary = currentState.routingTable().index(shardId.getIndex()).shard(shardId.id()).primaryShard();
final ShardSnapshotStatus shardSnapshotStatus;
if (primary == null || !primary.assignedToNode()) {
shardSnapshotStatus = new ShardSnapshotStatus(null, ShardState.MISSING, "primary shard is not allocated", shardGeneration);
} else if (primary.relocating() || primary.initializing()) {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), ShardState.WAITING, shardGeneration);
} else if (primary.started() == false) {
shardSnapshotStatus = new ShardSnapshotStatus(
primary.currentNodeId(),
ShardState.MISSING,
"primary shard hasn't been started yet",
shardGeneration
);
} else {
shardSnapshotStatus = new ShardSnapshotStatus(primary.currentNodeId(), shardGeneration);
}
return shardSnapshotStatus;
}
/**
* An update to the snapshot state of a shard.
*
* Package private for testing
*/
static final class ShardSnapshotUpdate {
private final Snapshot snapshot;
private final ShardId shardId;
private final RepositoryShardId repoShardId;
private final ShardSnapshotStatus updatedState;
ShardSnapshotUpdate(Snapshot snapshot, RepositoryShardId repositoryShardId, ShardSnapshotStatus updatedState) {
this.snapshot = snapshot;
this.shardId = null;
this.updatedState = updatedState;
this.repoShardId = repositoryShardId;
}
ShardSnapshotUpdate(Snapshot snapshot, ShardId shardId, ShardSnapshotStatus updatedState) {
this.snapshot = snapshot;
this.shardId = shardId;
this.updatedState = updatedState;
repoShardId = null;
}
public boolean isClone() {
return repoShardId != null;
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if ((other instanceof ShardSnapshotUpdate) == false) {
return false;
}
final ShardSnapshotUpdate that = (ShardSnapshotUpdate) other;
return this.snapshot.equals(that.snapshot)
&& Objects.equals(this.shardId, that.shardId)
&& Objects.equals(this.repoShardId, that.repoShardId)
&& this.updatedState == that.updatedState;
}
@Override
public int hashCode() {
return Objects.hash(snapshot, shardId, updatedState, repoShardId);
}
}
/**
* Updates the shard status in the cluster state
*
* @param update shard snapshot status update
*/
private void innerUpdateSnapshotState(ShardSnapshotUpdate update, ActionListener listener) {
logger.trace("received updated snapshot restore state [{}]", update);
clusterService.submitStateUpdateTask(
"update snapshot state",
update,
ClusterStateTaskConfig.build(Priority.NORMAL),
SHARD_STATE_EXECUTOR,
new ClusterStateTaskListener() {
@Override
public void onFailure(String source, Exception e) {
listener.onFailure(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
try {
listener.onResponse(null);
} finally {
// Maybe this state update completed the snapshot. If we are not already ending it because of a concurrent
// state update we check if its state is completed and end it if it is.
final SnapshotsInProgress snapshotsInProgress = newState.custom(
SnapshotsInProgress.TYPE,
SnapshotsInProgress.EMPTY
);
if (endingSnapshots.contains(update.snapshot) == false) {
final SnapshotsInProgress.Entry updatedEntry = snapshotsInProgress.snapshot(update.snapshot);
// If the entry is still in the cluster state and is completed, try finalizing the snapshot in the repo
if (updatedEntry != null && updatedEntry.state().completed()) {
endSnapshot(updatedEntry, newState.metadata(), null);
}
}
startExecutableClones(snapshotsInProgress, update.snapshot.getRepository());
}
}
}
);
}
private void startExecutableClones(SnapshotsInProgress snapshotsInProgress, @Nullable String repoName) {
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.isClone() && entry.state() == State.STARTED && (repoName == null || entry.repository().equals(repoName))) {
// this is a clone, see if new work is ready
for (final Map.Entry clone : entry.clones().entrySet()) {
if (clone.getValue().state() == ShardState.INIT) {
final boolean remoteStoreIndexShallowCopy = Boolean.TRUE.equals(entry.remoteStoreIndexShallowCopy());
runReadyClone(
entry.snapshot(),
entry.source(),
clone.getValue(),
clone.getKey(),
repositoriesService.repository(entry.repository()),
remoteStoreIndexShallowCopy
);
}
}
}
}
}
private boolean hasWildCardPatterForCloneSnapshotV2(String[] indices) {
for (String index : indices) {
if ("*".equals(index)) {
return true;
}
}
return false;
}
private class UpdateSnapshotStatusAction extends TransportClusterManagerNodeAction<
UpdateIndexShardSnapshotStatusRequest,
UpdateIndexShardSnapshotStatusResponse> {
UpdateSnapshotStatusAction(
TransportService transportService,
ClusterService clusterService,
ThreadPool threadPool,
ActionFilters actionFilters,
IndexNameExpressionResolver indexNameExpressionResolver
) {
super(
UPDATE_SNAPSHOT_STATUS_ACTION_NAME,
false,
transportService,
clusterService,
threadPool,
actionFilters,
UpdateIndexShardSnapshotStatusRequest::new,
indexNameExpressionResolver
);
}
@Override
protected String executor() {
return ThreadPool.Names.SAME;
}
@Override
protected UpdateIndexShardSnapshotStatusResponse read(StreamInput in) throws IOException {
return UpdateIndexShardSnapshotStatusResponse.INSTANCE;
}
@Override
protected void clusterManagerOperation(
UpdateIndexShardSnapshotStatusRequest request,
ClusterState state,
ActionListener listener
) throws Exception {
innerUpdateSnapshotState(
new ShardSnapshotUpdate(request.snapshot(), request.shardId(), request.status()),
ActionListener.delegateFailure(listener, (l, v) -> l.onResponse(UpdateIndexShardSnapshotStatusResponse.INSTANCE))
);
}
@Override
protected ClusterBlockException checkBlock(UpdateIndexShardSnapshotStatusRequest request, ClusterState state) {
return null;
}
}
/**
* Cluster state update task that removes all {@link SnapshotsInProgress.Entry} and {@link SnapshotDeletionsInProgress.Entry} for a
* given repository from the cluster state and afterwards fails all relevant listeners in {@link #snapshotCompletionListeners} and
* {@link #snapshotDeletionListeners}.
*/
private final class FailPendingRepoTasksTask extends ClusterStateUpdateTask {
// Snapshots to fail after the state update
private final List snapshotsToFail = new ArrayList<>();
// Delete uuids to fail because after the state update
private final List deletionsToFail = new ArrayList<>();
// Failure that caused the decision to fail all snapshots and deletes for a repo
private final Exception failure;
private final String repository;
FailPendingRepoTasksTask(String repository, Exception failure) {
this.repository = repository;
this.failure = failure;
}
@Override
public ClusterState execute(ClusterState currentState) {
final SnapshotDeletionsInProgress deletionsInProgress = currentState.custom(
SnapshotDeletionsInProgress.TYPE,
SnapshotDeletionsInProgress.EMPTY
);
boolean changed = false;
final List remainingEntries = deletionsInProgress.getEntries();
List updatedEntries = new ArrayList<>(remainingEntries.size());
for (SnapshotDeletionsInProgress.Entry entry : remainingEntries) {
if (entry.repository().equals(repository)) {
changed = true;
deletionsToFail.add(entry.uuid());
} else {
updatedEntries.add(entry);
}
}
final SnapshotDeletionsInProgress updatedDeletions = changed ? SnapshotDeletionsInProgress.of(updatedEntries) : null;
final SnapshotsInProgress snapshotsInProgress = currentState.custom(SnapshotsInProgress.TYPE, SnapshotsInProgress.EMPTY);
final List snapshotEntries = new ArrayList<>();
boolean changedSnapshots = false;
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.repository().equals(repository)) {
// We failed to read repository data for this delete, it is not the job of SnapshotsService to
// retry these kinds of issues so we fail all the pending snapshots
snapshotsToFail.add(entry.snapshot());
changedSnapshots = true;
} else {
// Entry is for another repository we just keep it as is
snapshotEntries.add(entry);
}
}
final SnapshotsInProgress updatedSnapshotsInProgress = changedSnapshots ? SnapshotsInProgress.of(snapshotEntries) : null;
return updateWithSnapshots(currentState, updatedSnapshotsInProgress, updatedDeletions);
}
@Override
public void onFailure(String source, Exception e) {
logger.info(
() -> new ParameterizedMessage("Failed to remove all snapshot tasks for repo [{}] from cluster state", repository),
e
);
failAllListenersOnMasterFailOver(e);
}
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
logger.warn(
() -> new ParameterizedMessage(
"Removed all snapshot tasks for repository [{}] from cluster state, now failing listeners",
repository
),
failure
);
synchronized (currentlyFinalizing) {
Tuple finalization;
while ((finalization = repositoryOperations.pollFinalization(repository)) != null) {
assert snapshotsToFail.contains(finalization.v1().snapshot()) : "["
+ finalization.v1()
+ "] not found in snapshots to fail "
+ snapshotsToFail;
}
leaveRepoLoop(repository);
for (Snapshot snapshot : snapshotsToFail) {
failSnapshotCompletionListeners(snapshot, failure);
}
for (String delete : deletionsToFail) {
failListenersIgnoringException(snapshotDeletionListeners.remove(delete), failure);
repositoryOperations.finishDeletion(delete);
}
}
}
}
private static final class OngoingRepositoryOperations {
/**
* Map of repository name to a deque of {@link SnapshotsInProgress.Entry} that need to be finalized for the repository and the
* {@link Metadata to use when finalizing}.
*/
private final Map> snapshotsToFinalize = new HashMap<>();
/**
* Set of delete operations currently being executed against the repository. The values in this set are the delete UUIDs returned
* by {@link SnapshotDeletionsInProgress.Entry#uuid()}.
*/
private final Set runningDeletions = Collections.synchronizedSet(new HashSet<>());
@Nullable
private Metadata latestKnownMetaData;
@Nullable
synchronized Tuple pollFinalization(String repository) {
assertConsistent();
final SnapshotsInProgress.Entry nextEntry;
final Deque queued = snapshotsToFinalize.get(repository);
if (queued == null) {
return null;
}
nextEntry = queued.pollFirst();
assert nextEntry != null;
final Tuple res = Tuple.tuple(nextEntry, latestKnownMetaData);
if (queued.isEmpty()) {
snapshotsToFinalize.remove(repository);
}
if (snapshotsToFinalize.isEmpty()) {
latestKnownMetaData = null;
}
assert assertConsistent();
return res;
}
boolean startDeletion(String deleteUUID) {
return runningDeletions.add(deleteUUID);
}
void finishDeletion(String deleteUUID) {
runningDeletions.remove(deleteUUID);
}
synchronized void addFinalization(SnapshotsInProgress.Entry entry, Metadata metadata) {
snapshotsToFinalize.computeIfAbsent(entry.repository(), k -> new LinkedList<>()).add(entry);
this.latestKnownMetaData = metadata;
assertConsistent();
}
/**
* Clear all state associated with running snapshots. To be used on cluster-manager-failover if the current node stops
* being cluster-manager.
*/
synchronized void clear() {
snapshotsToFinalize.clear();
runningDeletions.clear();
latestKnownMetaData = null;
}
synchronized boolean isEmpty() {
return snapshotsToFinalize.isEmpty();
}
synchronized boolean assertNotQueued(Snapshot snapshot) {
assert snapshotsToFinalize.getOrDefault(snapshot.getRepository(), new LinkedList<>())
.stream()
.noneMatch(entry -> entry.snapshot().equals(snapshot)) : "Snapshot [" + snapshot + "] is still in finalization queue";
return true;
}
synchronized boolean assertConsistent() {
assert (latestKnownMetaData == null && snapshotsToFinalize.isEmpty())
|| (latestKnownMetaData != null && snapshotsToFinalize.isEmpty() == false)
: "Should not hold on to metadata if there are no more queued snapshots";
assert snapshotsToFinalize.values().stream().noneMatch(Collection::isEmpty) : "Found empty queue in " + snapshotsToFinalize;
return true;
}
}
}