org.elasticsearch.snapshots.SnapshotShardsService Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.snapshots;
import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.logging.log4j.util.Supplier;
import org.apache.lucene.index.IndexCommit;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateListener;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.cluster.SnapshotsInProgress.ShardSnapshotStatus;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.engine.SnapshotFailedEngineException;
import org.elasticsearch.index.shard.IndexEventListener;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.IndexShardState;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.snapshots.IndexShardSnapshotFailedException;
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus.Stage;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.repositories.IndexId;
import org.elasticsearch.repositories.Repository;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.EmptyTransportResponseHandler;
import org.elasticsearch.transport.TransportChannel;
import org.elasticsearch.transport.TransportRequest;
import org.elasticsearch.transport.TransportRequestHandler;
import org.elasticsearch.transport.TransportResponse;
import org.elasticsearch.transport.TransportService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;
import java.util.stream.Collectors;
import static java.util.Collections.emptyMap;
import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.cluster.SnapshotsInProgress.completed;
/**
* This service runs on data and master nodes and controls currently snapshotted shards on these nodes. It is responsible for
* starting and stopping shard level snapshots
*/
public class SnapshotShardsService extends AbstractLifecycleComponent implements ClusterStateListener, IndexEventListener {
public static final String UPDATE_SNAPSHOT_ACTION_NAME = "internal:cluster/snapshot/update_snapshot";
private final ClusterService clusterService;
private final IndicesService indicesService;
private final SnapshotsService snapshotsService;
private final TransportService transportService;
private final ThreadPool threadPool;
private final Lock shutdownLock = new ReentrantLock();
private final Condition shutdownCondition = shutdownLock.newCondition();
private volatile Map shardSnapshots = emptyMap();
private final BlockingQueue updatedSnapshotStateQueue = ConcurrentCollections.newBlockingQueue();
@Inject
public SnapshotShardsService(Settings settings, ClusterService clusterService, SnapshotsService snapshotsService, ThreadPool threadPool,
TransportService transportService, IndicesService indicesService) {
super(settings);
this.indicesService = indicesService;
this.snapshotsService = snapshotsService;
this.transportService = transportService;
this.clusterService = clusterService;
this.threadPool = threadPool;
if (DiscoveryNode.isDataNode(settings)) {
// this is only useful on the nodes that can hold data
// addLast to make sure that Repository will be created before snapshot
clusterService.addLast(this);
}
if (DiscoveryNode.isMasterNode(settings)) {
// This needs to run only on nodes that can become masters
transportService.registerRequestHandler(UPDATE_SNAPSHOT_ACTION_NAME, UpdateIndexShardSnapshotStatusRequest::new, ThreadPool.Names.SAME, new UpdateSnapshotStateRequestHandler());
}
}
@Override
protected void doStart() {
}
@Override
protected void doStop() {
shutdownLock.lock();
try {
while(!shardSnapshots.isEmpty() && shutdownCondition.await(5, TimeUnit.SECONDS)) {
// Wait for at most 5 second for locally running snapshots to finish
}
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
} finally {
shutdownLock.unlock();
}
}
@Override
protected void doClose() {
clusterService.remove(this);
}
@Override
public void clusterChanged(ClusterChangedEvent event) {
try {
SnapshotsInProgress prev = event.previousState().custom(SnapshotsInProgress.TYPE);
SnapshotsInProgress curr = event.state().custom(SnapshotsInProgress.TYPE);
if ((prev == null && curr != null) || (prev != null && prev.equals(curr) == false)) {
processIndexShardSnapshots(event);
}
String masterNodeId = event.state().nodes().getMasterNodeId();
if (masterNodeId != null && masterNodeId.equals(event.previousState().nodes().getMasterNodeId()) == false) {
syncShardStatsOnNewMaster(event);
}
} catch (Exception e) {
logger.warn("Failed to update snapshot state ", e);
}
}
@Override
public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard, Settings indexSettings) {
// abort any snapshots occurring on the soon-to-be closed shard
Map snapshotShardsMap = shardSnapshots;
for (Map.Entry snapshotShards : snapshotShardsMap.entrySet()) {
Map shards = snapshotShards.getValue().shards;
if (shards.containsKey(shardId)) {
logger.debug("[{}] shard closing, abort snapshotting for snapshot [{}]", shardId, snapshotShards.getKey().getSnapshotId());
shards.get(shardId).abort();
}
}
}
/**
* Returns status of shards that are snapshotted on the node and belong to the given snapshot
*
* This method is executed on data node
*
*
* @param snapshot snapshot
* @return map of shard id to snapshot status
*/
public Map currentSnapshotShards(Snapshot snapshot) {
SnapshotShards snapshotShards = shardSnapshots.get(snapshot);
if (snapshotShards == null) {
return null;
} else {
return snapshotShards.shards;
}
}
/**
* Checks if any new shards should be snapshotted on this node
*
* @param event cluster state changed event
*/
private void processIndexShardSnapshots(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
Map survivors = new HashMap<>();
// First, remove snapshots that are no longer there
for (Map.Entry entry : shardSnapshots.entrySet()) {
final Snapshot snapshot = entry.getKey();
if (snapshotsInProgress != null && snapshotsInProgress.snapshot(snapshot) != null) {
survivors.put(entry.getKey(), entry.getValue());
} else {
// abort any running snapshots of shards for the removed entry;
// this could happen if for some reason the cluster state update for aborting
// running shards is missed, then the snapshot is removed is a subsequent cluster
// state update, which is being processed here
for (IndexShardSnapshotStatus snapshotStatus : entry.getValue().shards.values()) {
if (snapshotStatus.stage() == Stage.INIT || snapshotStatus.stage() == Stage.STARTED) {
snapshotStatus.abort();
}
}
}
}
// For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running
// snapshots in the future
Map> newSnapshots = new HashMap<>();
// Now go through all snapshots and update existing or create missing
final String localNodeId = clusterService.localNode().getId();
final Map> snapshotIndices = new HashMap<>();
if (snapshotsInProgress != null) {
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
snapshotIndices.put(entry.snapshot(),
entry.indices().stream().collect(Collectors.toMap(IndexId::getName, Function.identity())));
if (entry.state() == SnapshotsInProgress.State.STARTED) {
Map startedShards = new HashMap<>();
SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
for (ObjectObjectCursor shard : entry.shards()) {
// Add all new shards to start processing on
if (localNodeId.equals(shard.value.nodeId())) {
if (shard.value.state() == SnapshotsInProgress.State.INIT && (snapshotShards == null || !snapshotShards.shards.containsKey(shard.key))) {
logger.trace("[{}] - Adding shard to the queue", shard.key);
startedShards.put(shard.key, new IndexShardSnapshotStatus());
}
}
}
if (!startedShards.isEmpty()) {
newSnapshots.put(entry.snapshot(), startedShards);
if (snapshotShards != null) {
// We already saw this snapshot but we need to add more started shards
Map shards = new HashMap<>();
// Put all shards that were already running on this node
shards.putAll(snapshotShards.shards);
// Put all newly started shards
shards.putAll(startedShards);
survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(shards)));
} else {
// Brand new snapshot that we haven't seen before
survivors.put(entry.snapshot(), new SnapshotShards(unmodifiableMap(startedShards)));
}
}
} else if (entry.state() == SnapshotsInProgress.State.ABORTED) {
// Abort all running shards for this snapshot
SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshot());
if (snapshotShards != null) {
for (ObjectObjectCursor shard : entry.shards()) {
IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.key);
if (snapshotStatus != null) {
switch (snapshotStatus.stage()) {
case INIT:
case STARTED:
snapshotStatus.abort();
break;
case FINALIZE:
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, letting it finish", entry.snapshot(), shard.key);
break;
case DONE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshot(), shard.key);
updateIndexShardSnapshotStatus(entry.snapshot(), shard.key,
new ShardSnapshotStatus(event.state().nodes().getLocalNodeId(), SnapshotsInProgress.State.SUCCESS));
break;
case FAILURE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshot(), shard.key);
updateIndexShardSnapshotStatus(entry.snapshot(), shard.key,
new ShardSnapshotStatus(event.state().nodes().getLocalNodeId(), SnapshotsInProgress.State.FAILED, snapshotStatus.failure()));
break;
default:
throw new IllegalStateException("Unknown snapshot shard stage " + snapshotStatus.stage());
}
}
}
}
}
}
}
// Update the list of snapshots that we saw and tried to started
// If startup of these shards fails later, we don't want to try starting these shards again
shutdownLock.lock();
try {
shardSnapshots = unmodifiableMap(survivors);
if (shardSnapshots.isEmpty()) {
// Notify all waiting threads that no more snapshots
shutdownCondition.signalAll();
}
} finally {
shutdownLock.unlock();
}
// We have new shards to starts
if (newSnapshots.isEmpty() == false) {
Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
for (final Map.Entry> entry : newSnapshots.entrySet()) {
Map indicesMap = snapshotIndices.get(entry.getKey());
assert indicesMap != null;
for (final Map.Entry shardEntry : entry.getValue().entrySet()) {
final ShardId shardId = shardEntry.getKey();
try {
final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).getShardOrNull(shardId.id());
final IndexId indexId = indicesMap.get(shardId.getIndexName());
assert indexId != null;
executor.execute(new AbstractRunnable() {
@Override
public void doRun() {
snapshot(indexShard, entry.getKey(), indexId, shardEntry.getValue());
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, SnapshotsInProgress.State.SUCCESS));
}
@Override
public void onFailure(Exception e) {
logger.warn((Supplier) () -> new ParameterizedMessage("[{}] [{}] failed to create snapshot", shardId, entry.getKey()), e);
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, SnapshotsInProgress.State.FAILED, ExceptionsHelper.detailedMessage(e)));
}
});
} catch (Exception e) {
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new ShardSnapshotStatus(localNodeId, SnapshotsInProgress.State.FAILED, ExceptionsHelper.detailedMessage(e)));
}
}
}
}
}
/**
* Creates shard snapshot
*
* @param snapshot snapshot
* @param snapshotStatus snapshot status
*/
private void snapshot(final IndexShard indexShard, final Snapshot snapshot, final IndexId indexId, final IndexShardSnapshotStatus snapshotStatus) {
Repository repository = snapshotsService.getRepositoriesService().repository(snapshot.getRepository());
ShardId shardId = indexShard.shardId();
if (!indexShard.routingEntry().primary()) {
throw new IndexShardSnapshotFailedException(shardId, "snapshot should be performed only on primary");
}
if (indexShard.routingEntry().relocating()) {
// do not snapshot when in the process of relocation of primaries so we won't get conflicts
throw new IndexShardSnapshotFailedException(shardId, "cannot snapshot while relocating");
}
if (indexShard.state() == IndexShardState.CREATED || indexShard.state() == IndexShardState.RECOVERING) {
// shard has just been created, or still recovering
throw new IndexShardSnapshotFailedException(shardId, "shard didn't fully recover yet");
}
try {
// we flush first to make sure we get the latest writes snapshotted
IndexCommit snapshotIndexCommit = indexShard.acquireIndexCommit(true);
try {
repository.snapshotShard(indexShard, snapshot.getSnapshotId(), indexId, snapshotIndexCommit, snapshotStatus);
if (logger.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
sb.append(" index : version [").append(snapshotStatus.indexVersion()).append("], number_of_files [").append(snapshotStatus.numberOfFiles()).append("] with total_size [").append(new ByteSizeValue(snapshotStatus.totalSize())).append("]\n");
logger.debug("snapshot ({}) completed to {}, took [{}]\n{}", snapshot, repository,
TimeValue.timeValueMillis(snapshotStatus.time()), sb);
}
} finally {
indexShard.releaseIndexCommit(snapshotIndexCommit);
}
} catch (SnapshotFailedEngineException e) {
throw e;
} catch (IndexShardSnapshotFailedException e) {
throw e;
} catch (Exception e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to snapshot", e);
}
}
/**
* Checks if any shards were processed that the new master doesn't know about
*/
private void syncShardStatsOnNewMaster(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
if (snapshotsInProgress == null) {
return;
}
for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
if (snapshot.state() == SnapshotsInProgress.State.STARTED || snapshot.state() == SnapshotsInProgress.State.ABORTED) {
Map localShards = currentSnapshotShards(snapshot.snapshot());
if (localShards != null) {
ImmutableOpenMap masterShards = snapshot.shards();
for(Map.Entry localShard : localShards.entrySet()) {
ShardId shardId = localShard.getKey();
IndexShardSnapshotStatus localShardStatus = localShard.getValue();
ShardSnapshotStatus masterShard = masterShards.get(shardId);
if (masterShard != null && masterShard.state().completed() == false) {
// Master knows about the shard and thinks it has not completed
if (localShardStatus.stage() == Stage.DONE) {
// but we think the shard is done - we need to make new master know that the shard is done
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard is done locally, updating status on the master", snapshot.snapshot(), shardId);
updateIndexShardSnapshotStatus(snapshot.snapshot(), shardId,
new ShardSnapshotStatus(event.state().nodes().getLocalNodeId(), SnapshotsInProgress.State.SUCCESS));
} else if (localShard.getValue().stage() == Stage.FAILURE) {
// but we think the shard failed - we need to make new master know that the shard failed
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard failed locally, updating status on master", snapshot.snapshot(), shardId);
updateIndexShardSnapshotStatus(snapshot.snapshot(), shardId,
new ShardSnapshotStatus(event.state().nodes().getLocalNodeId(), SnapshotsInProgress.State.FAILED, localShardStatus.failure()));
}
}
}
}
}
}
}
/**
* Stores the list of shards that has to be snapshotted on this node
*/
private static class SnapshotShards {
private final Map shards;
private SnapshotShards(Map shards) {
this.shards = shards;
}
}
/**
* Internal request that is used to send changes in snapshot status to master
*/
public static class UpdateIndexShardSnapshotStatusRequest extends TransportRequest {
private Snapshot snapshot;
private ShardId shardId;
private ShardSnapshotStatus status;
private volatile boolean processed; // state field, no need to serialize
public UpdateIndexShardSnapshotStatusRequest() {
}
public UpdateIndexShardSnapshotStatusRequest(Snapshot snapshot, ShardId shardId, ShardSnapshotStatus status) {
this.snapshot = snapshot;
this.shardId = shardId;
this.status = status;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
snapshot = new Snapshot(in);
shardId = ShardId.readShardId(in);
status = new ShardSnapshotStatus(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
snapshot.writeTo(out);
shardId.writeTo(out);
status.writeTo(out);
}
public Snapshot snapshot() {
return snapshot;
}
public ShardId shardId() {
return shardId;
}
public ShardSnapshotStatus status() {
return status;
}
@Override
public String toString() {
return "" + snapshot + ", shardId [" + shardId + "], status [" + status.state() + "]";
}
public void markAsProcessed() {
processed = true;
}
public boolean isProcessed() {
return processed;
}
}
/**
* Updates the shard status
*/
public void updateIndexShardSnapshotStatus(Snapshot snapshot, ShardId shardId, ShardSnapshotStatus status) {
UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshot, shardId, status);
try {
if (clusterService.state().nodes().isLocalNodeElectedMaster()) {
innerUpdateSnapshotState(request);
} else {
transportService.sendRequest(clusterService.state().nodes().getMasterNode(),
UPDATE_SNAPSHOT_ACTION_NAME, request, EmptyTransportResponseHandler.INSTANCE_SAME);
}
} catch (Exception e) {
logger.warn((Supplier) () -> new ParameterizedMessage("[{}] [{}] failed to update snapshot state", request.snapshot(), request.status()), e);
}
}
/**
* Updates the shard status on master node
*
* @param request update shard status request
*/
private void innerUpdateSnapshotState(final UpdateIndexShardSnapshotStatusRequest request) {
logger.trace("received updated snapshot restore state [{}]", request);
updatedSnapshotStateQueue.add(request);
clusterService.submitStateUpdateTask("update snapshot state", new ClusterStateUpdateTask() {
private final List drainedRequests = new ArrayList<>();
@Override
public ClusterState execute(ClusterState currentState) {
// The request was already processed as a part of an early batch - skipping
if (request.isProcessed()) {
return currentState;
}
updatedSnapshotStateQueue.drainTo(drainedRequests);
final int batchSize = drainedRequests.size();
// nothing to process (a previous event has processed it already)
if (batchSize == 0) {
return currentState;
}
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
if (snapshots != null) {
int changedCount = 0;
final List entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
ImmutableOpenMap.Builder shards = ImmutableOpenMap.builder();
boolean updated = false;
for (int i = 0; i < batchSize; i++) {
final UpdateIndexShardSnapshotStatusRequest updateSnapshotState = drainedRequests.get(i);
updateSnapshotState.markAsProcessed();
if (entry.snapshot().equals(updateSnapshotState.snapshot())) {
logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshot(), updateSnapshotState.shardId(), updateSnapshotState.status().state());
if (updated == false) {
shards.putAll(entry.shards());
updated = true;
}
shards.put(updateSnapshotState.shardId(), updateSnapshotState.status());
changedCount++;
}
}
if (updated) {
if (completed(shards.values()) == false) {
entries.add(new SnapshotsInProgress.Entry(entry, shards.build()));
} else {
// Snapshot is finished - mark it as done
// TODO: Add PARTIAL_SUCCESS status?
SnapshotsInProgress.Entry updatedEntry = new SnapshotsInProgress.Entry(entry, SnapshotsInProgress.State.SUCCESS, shards.build());
entries.add(updatedEntry);
// Finalize snapshot in the repository
snapshotsService.endSnapshot(updatedEntry);
logger.info("snapshot [{}] is done", updatedEntry.snapshot());
}
} else {
entries.add(entry);
}
}
if (changedCount > 0) {
logger.trace("changed cluster state triggered by {} snapshot state updates", changedCount);
final SnapshotsInProgress updatedSnapshots = new SnapshotsInProgress(entries.toArray(new SnapshotsInProgress.Entry[entries.size()]));
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, updatedSnapshots).build();
}
}
return currentState;
}
@Override
public void onFailure(String source, Exception e) {
for (UpdateIndexShardSnapshotStatusRequest request : drainedRequests) {
logger.warn((Supplier) () -> new ParameterizedMessage("[{}][{}] failed to update snapshot status to [{}]", request.snapshot(), request.shardId(), request.status()), e);
}
}
});
}
/**
* Transport request handler that is used to send changes in snapshot status to master
*/
class UpdateSnapshotStateRequestHandler implements TransportRequestHandler {
@Override
public void messageReceived(UpdateIndexShardSnapshotStatusRequest request, final TransportChannel channel) throws Exception {
innerUpdateSnapshotState(request);
channel.sendResponse(TransportResponse.Empty.INSTANCE);
}
}
}