org.elasticsearch.snapshots.SnapshotShardsService Maven / Gradle / Ivy
The newest version!
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.snapshots;
import com.google.common.collect.ImmutableMap;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateListener;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.SnapshotsInProgress;
import org.elasticsearch.cluster.metadata.SnapshotId;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
import org.elasticsearch.index.engine.SnapshotFailedEngineException;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.IndexShardState;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.snapshots.IndexShardRepository;
import org.elasticsearch.index.snapshots.IndexShardSnapshotFailedException;
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.EmptyTransportResponseHandler;
import org.elasticsearch.transport.TransportChannel;
import org.elasticsearch.transport.TransportRequest;
import org.elasticsearch.transport.TransportRequestHandler;
import org.elasticsearch.transport.TransportResponse;
import org.elasticsearch.transport.TransportService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import static com.google.common.collect.Maps.newHashMap;
import static org.elasticsearch.cluster.SnapshotsInProgress.completed;
/**
* This service runs on data and master nodes and controls currently snapshotted shards on these nodes. It is responsible for
* starting and stopping shard level snapshots
*/
public class SnapshotShardsService extends AbstractLifecycleComponent implements ClusterStateListener {
public static final String UPDATE_SNAPSHOT_ACTION_NAME = "internal:cluster/snapshot/update_snapshot";
private final ClusterService clusterService;
private final IndicesService indicesService;
private final SnapshotsService snapshotsService;
private final TransportService transportService;
private final ThreadPool threadPool;
private final Lock shutdownLock = new ReentrantLock();
private final Condition shutdownCondition = shutdownLock.newCondition();
private volatile ImmutableMap shardSnapshots = ImmutableMap.of();
private final BlockingQueue updatedSnapshotStateQueue = ConcurrentCollections.newBlockingQueue();
@Inject
public SnapshotShardsService(Settings settings, ClusterService clusterService, SnapshotsService snapshotsService, ThreadPool threadPool,
TransportService transportService, IndicesService indicesService) {
super(settings);
this.indicesService = indicesService;
this.snapshotsService = snapshotsService;
this.transportService = transportService;
this.clusterService = clusterService;
this.threadPool = threadPool;
if (DiscoveryNode.dataNode(settings)) {
// this is only useful on the nodes that can hold data
// addLast to make sure that Repository will be created before snapshot
clusterService.addLast(this);
}
if (DiscoveryNode.masterNode(settings)) {
// This needs to run only on nodes that can become masters
transportService.registerRequestHandler(UPDATE_SNAPSHOT_ACTION_NAME, UpdateIndexShardSnapshotStatusRequest.class, ThreadPool.Names.SAME, new UpdateSnapshotStateRequestHandler());
}
}
@Override
protected void doStart() {
}
@Override
protected void doStop() {
shutdownLock.lock();
try {
while(!shardSnapshots.isEmpty() && shutdownCondition.await(5, TimeUnit.SECONDS)) {
// Wait for at most 5 second for locally running snapshots to finish
}
} catch (InterruptedException ex) {
Thread.currentThread().interrupt();
} finally {
shutdownLock.unlock();
}
}
@Override
protected void doClose() {
clusterService.remove(this);
}
@Override
public void clusterChanged(ClusterChangedEvent event) {
try {
SnapshotsInProgress prev = event.previousState().custom(SnapshotsInProgress.TYPE);
SnapshotsInProgress curr = event.state().custom(SnapshotsInProgress.TYPE);
if (prev == null) {
if (curr != null) {
processIndexShardSnapshots(event);
}
} else if (prev.equals(curr) == false) {
processIndexShardSnapshots(event);
}
String masterNodeId = event.state().nodes().masterNodeId();
if (masterNodeId != null && masterNodeId.equals(event.previousState().nodes().masterNodeId()) == false) {
syncShardStatsOnNewMaster(event);
}
} catch (Throwable t) {
logger.warn("Failed to update snapshot state ", t);
}
}
/**
* Returns status of shards that are snapshotted on the node and belong to the given snapshot
*
* This method is executed on data node
*
*
* @param snapshotId snapshot id
* @return map of shard id to snapshot status
*/
public Map currentSnapshotShards(SnapshotId snapshotId) {
SnapshotShards snapshotShards = shardSnapshots.get(snapshotId);
if (snapshotShards == null) {
return null;
} else {
return snapshotShards.shards;
}
}
/**
* Checks if any new shards should be snapshotted on this node
*
* @param event cluster state changed event
*/
private void processIndexShardSnapshots(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
Map survivors = newHashMap();
// First, remove snapshots that are no longer there
for (Map.Entry entry : shardSnapshots.entrySet()) {
if (snapshotsInProgress != null && snapshotsInProgress.snapshot(entry.getKey()) != null) {
survivors.put(entry.getKey(), entry.getValue());
}
}
// For now we will be mostly dealing with a single snapshot at a time but might have multiple simultaneously running
// snapshots in the future
Map> newSnapshots = newHashMap();
// Now go through all snapshots and update existing or create missing
final String localNodeId = clusterService.localNode().id();
if (snapshotsInProgress != null) {
for (SnapshotsInProgress.Entry entry : snapshotsInProgress.entries()) {
if (entry.state() == SnapshotsInProgress.State.STARTED) {
Map startedShards = newHashMap();
SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshotId());
for (Map.Entry shard : entry.shards().entrySet()) {
// Add all new shards to start processing on
if (localNodeId.equals(shard.getValue().nodeId())) {
if (shard.getValue().state() == SnapshotsInProgress.State.INIT && (snapshotShards == null || !snapshotShards.shards.containsKey(shard.getKey()))) {
logger.trace("[{}] - Adding shard to the queue", shard.getKey());
startedShards.put(shard.getKey(), new IndexShardSnapshotStatus());
}
}
}
if (!startedShards.isEmpty()) {
newSnapshots.put(entry.snapshotId(), startedShards);
if (snapshotShards != null) {
// We already saw this snapshot but we need to add more started shards
ImmutableMap.Builder shards = ImmutableMap.builder();
// Put all shards that were already running on this node
shards.putAll(snapshotShards.shards);
// Put all newly started shards
shards.putAll(startedShards);
survivors.put(entry.snapshotId(), new SnapshotShards(shards.build()));
} else {
// Brand new snapshot that we haven't seen before
survivors.put(entry.snapshotId(), new SnapshotShards(ImmutableMap.copyOf(startedShards)));
}
}
} else if (entry.state() == SnapshotsInProgress.State.ABORTED) {
// Abort all running shards for this snapshot
SnapshotShards snapshotShards = shardSnapshots.get(entry.snapshotId());
if (snapshotShards != null) {
for (Map.Entry shard : entry.shards().entrySet()) {
IndexShardSnapshotStatus snapshotStatus = snapshotShards.shards.get(shard.getKey());
if (snapshotStatus != null) {
switch (snapshotStatus.stage()) {
case INIT:
case STARTED:
snapshotStatus.abort();
break;
case FINALIZE:
logger.debug("[{}] trying to cancel snapshot on shard [{}] that is finalizing, letting it finish", entry.snapshotId(), shard.getKey());
break;
case DONE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that is already done, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(entry.snapshotId(), shard.getKey(),
new SnapshotsInProgress.ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotsInProgress.State.SUCCESS));
break;
case FAILURE:
logger.debug("[{}] trying to cancel snapshot on the shard [{}] that has already failed, updating status on the master", entry.snapshotId(), shard.getKey());
updateIndexShardSnapshotStatus(entry.snapshotId(), shard.getKey(),
new SnapshotsInProgress.ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotsInProgress.State.FAILED, snapshotStatus.failure()));
break;
default:
throw new IllegalStateException("Unknown snapshot shard stage " + snapshotStatus.stage());
}
}
}
}
}
}
}
// Update the list of snapshots that we saw and tried to started
// If startup of these shards fails later, we don't want to try starting these shards again
shutdownLock.lock();
try {
shardSnapshots = ImmutableMap.copyOf(survivors);
if (shardSnapshots.isEmpty()) {
// Notify all waiting threads that no more snapshots
shutdownCondition.signalAll();
}
} finally {
shutdownLock.unlock();
}
// We have new shards to starts
if (newSnapshots.isEmpty() == false) {
Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
for (final Map.Entry> entry : newSnapshots.entrySet()) {
for (final Map.Entry shardEntry : entry.getValue().entrySet()) {
final ShardId shardId = shardEntry.getKey();
try {
final IndexShard indexShard = indicesService.indexServiceSafe(shardId.getIndex()).shard(shardId.id());
executor.execute(new AbstractRunnable() {
@Override
public void doRun() {
snapshot(indexShard, entry.getKey(), shardEntry.getValue());
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new SnapshotsInProgress.ShardSnapshotStatus(localNodeId, SnapshotsInProgress.State.SUCCESS));
}
@Override
public void onFailure(Throwable t) {
logger.warn("[{}] [{}] failed to create snapshot", t, shardId, entry.getKey());
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new SnapshotsInProgress.ShardSnapshotStatus(localNodeId, SnapshotsInProgress.State.FAILED, ExceptionsHelper.detailedMessage(t)));
}
});
} catch (Throwable t) {
updateIndexShardSnapshotStatus(entry.getKey(), shardId, new SnapshotsInProgress.ShardSnapshotStatus(localNodeId, SnapshotsInProgress.State.FAILED, ExceptionsHelper.detailedMessage(t)));
}
}
}
}
}
/**
* Creates shard snapshot
*
* @param snapshotId snapshot id
* @param snapshotStatus snapshot status
*/
private void snapshot(final IndexShard indexShard, final SnapshotId snapshotId, final IndexShardSnapshotStatus snapshotStatus) {
IndexShardRepository indexShardRepository = snapshotsService.getRepositoriesService().indexShardRepository(snapshotId.getRepository());
ShardId shardId = indexShard.shardId();
if (!indexShard.routingEntry().primary()) {
throw new IndexShardSnapshotFailedException(shardId, "snapshot should be performed only on primary");
}
if (indexShard.routingEntry().relocating()) {
// do not snapshot when in the process of relocation of primaries so we won't get conflicts
throw new IndexShardSnapshotFailedException(shardId, "cannot snapshot while relocating");
}
if (indexShard.state() == IndexShardState.CREATED || indexShard.state() == IndexShardState.RECOVERING) {
// shard has just been created, or still recovering
throw new IndexShardSnapshotFailedException(shardId, "shard didn't fully recover yet");
}
try {
// we flush first to make sure we get the latest writes snapshotted
SnapshotIndexCommit snapshotIndexCommit = indexShard.snapshotIndex(true);
try {
indexShardRepository.snapshot(snapshotId, shardId, snapshotIndexCommit, snapshotStatus);
if (logger.isDebugEnabled()) {
StringBuilder sb = new StringBuilder();
sb.append("snapshot (").append(snapshotId.getSnapshot()).append(") completed to ").append(indexShardRepository).append(", took [").append(TimeValue.timeValueMillis(snapshotStatus.time())).append("]\n");
sb.append(" index : version [").append(snapshotStatus.indexVersion()).append("], number_of_files [").append(snapshotStatus.numberOfFiles()).append("] with total_size [").append(new ByteSizeValue(snapshotStatus.totalSize())).append("]\n");
logger.debug(sb.toString());
}
} finally {
snapshotIndexCommit.close();
}
} catch (SnapshotFailedEngineException e) {
throw e;
} catch (IndexShardSnapshotFailedException e) {
throw e;
} catch (Throwable e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to snapshot", e);
}
}
/**
* Checks if any shards were processed that the new master doesn't know about
*/
private void syncShardStatsOnNewMaster(ClusterChangedEvent event) {
SnapshotsInProgress snapshotsInProgress = event.state().custom(SnapshotsInProgress.TYPE);
if (snapshotsInProgress == null) {
return;
}
for (SnapshotsInProgress.Entry snapshot : snapshotsInProgress.entries()) {
if (snapshot.state() == SnapshotsInProgress.State.STARTED || snapshot.state() == SnapshotsInProgress.State.ABORTED) {
Map localShards = currentSnapshotShards(snapshot.snapshotId());
if (localShards != null) {
ImmutableMap masterShards = snapshot.shards();
for(Map.Entry localShard : localShards.entrySet()) {
ShardId shardId = localShard.getKey();
IndexShardSnapshotStatus localShardStatus = localShard.getValue();
SnapshotsInProgress.ShardSnapshotStatus masterShard = masterShards.get(shardId);
if (masterShard != null && masterShard.state().completed() == false) {
// Master knows about the shard and thinks it has not completed
if (localShardStatus.stage() == IndexShardSnapshotStatus.Stage.DONE) {
// but we think the shard is done - we need to make new master know that the shard is done
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard is done locally, updating status on the master", snapshot.snapshotId(), shardId);
updateIndexShardSnapshotStatus(snapshot.snapshotId(), shardId,
new SnapshotsInProgress.ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotsInProgress.State.SUCCESS));
} else if (localShard.getValue().stage() == IndexShardSnapshotStatus.Stage.FAILURE) {
// but we think the shard failed - we need to make new master know that the shard failed
logger.debug("[{}] new master thinks the shard [{}] is not completed but the shard failed locally, updating status on master", snapshot.snapshotId(), shardId);
updateIndexShardSnapshotStatus(snapshot.snapshotId(), shardId,
new SnapshotsInProgress.ShardSnapshotStatus(event.state().nodes().localNodeId(), SnapshotsInProgress.State.FAILED, localShardStatus.failure()));
}
}
}
}
}
}
}
/**
* Stores the list of shards that has to be snapshotted on this node
*/
private static class SnapshotShards {
private final Map shards;
private SnapshotShards(ImmutableMap shards) {
this.shards = shards;
}
}
/**
* Internal request that is used to send changes in snapshot status to master
*/
public static class UpdateIndexShardSnapshotStatusRequest extends TransportRequest {
private SnapshotId snapshotId;
private ShardId shardId;
private SnapshotsInProgress.ShardSnapshotStatus status;
private volatile boolean processed; // state field, no need to serialize
public UpdateIndexShardSnapshotStatusRequest() {
}
public UpdateIndexShardSnapshotStatusRequest(SnapshotId snapshotId, ShardId shardId, SnapshotsInProgress.ShardSnapshotStatus status) {
this.snapshotId = snapshotId;
this.shardId = shardId;
this.status = status;
}
@Override
public void readFrom(StreamInput in) throws IOException {
super.readFrom(in);
snapshotId = SnapshotId.readSnapshotId(in);
shardId = ShardId.readShardId(in);
status = SnapshotsInProgress.ShardSnapshotStatus.readShardSnapshotStatus(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
snapshotId.writeTo(out);
shardId.writeTo(out);
status.writeTo(out);
}
public SnapshotId snapshotId() {
return snapshotId;
}
public ShardId shardId() {
return shardId;
}
public SnapshotsInProgress.ShardSnapshotStatus status() {
return status;
}
@Override
public String toString() {
return "" + snapshotId + ", shardId [" + shardId + "], status [" + status.state() + "]";
}
public void markAsProcessed() {
processed = true;
}
public boolean isProcessed() {
return processed;
}
}
/**
* Updates the shard status
*/
public void updateIndexShardSnapshotStatus(SnapshotId snapshotId, ShardId shardId, SnapshotsInProgress.ShardSnapshotStatus status) {
UpdateIndexShardSnapshotStatusRequest request = new UpdateIndexShardSnapshotStatusRequest(snapshotId, shardId, status);
try {
if (clusterService.state().nodes().localNodeMaster()) {
innerUpdateSnapshotState(request);
} else {
transportService.sendRequest(clusterService.state().nodes().masterNode(),
UPDATE_SNAPSHOT_ACTION_NAME, request, EmptyTransportResponseHandler.INSTANCE_SAME);
}
} catch (Throwable t) {
logger.warn("[{}] [{}] failed to update snapshot state", t, request.snapshotId(), request.status());
}
}
/**
* Updates the shard status on master node
*
* @param request update shard status request
*/
private void innerUpdateSnapshotState(final UpdateIndexShardSnapshotStatusRequest request) {
logger.trace("received updated snapshot restore state [{}]", request);
updatedSnapshotStateQueue.add(request);
clusterService.submitStateUpdateTask("update snapshot state", new ClusterStateUpdateTask() {
private final List drainedRequests = new ArrayList<>();
@Override
public ClusterState execute(ClusterState currentState) {
// The request was already processed as a part of an early batch - skipping
if (request.isProcessed()) {
return currentState;
}
updatedSnapshotStateQueue.drainTo(drainedRequests);
final int batchSize = drainedRequests.size();
// nothing to process (a previous event has processed it already)
if (batchSize == 0) {
return currentState;
}
final SnapshotsInProgress snapshots = currentState.custom(SnapshotsInProgress.TYPE);
if (snapshots != null) {
int changedCount = 0;
final List entries = new ArrayList<>();
for (SnapshotsInProgress.Entry entry : snapshots.entries()) {
final Map shards = newHashMap();
boolean updated = false;
for (int i = 0; i < batchSize; i++) {
final UpdateIndexShardSnapshotStatusRequest updateSnapshotState = drainedRequests.get(i);
updateSnapshotState.markAsProcessed();
if (entry.snapshotId().equals(updateSnapshotState.snapshotId())) {
logger.trace("[{}] Updating shard [{}] with status [{}]", updateSnapshotState.snapshotId(), updateSnapshotState.shardId(), updateSnapshotState.status().state());
if (updated == false) {
shards.putAll(entry.shards());
updated = true;
}
shards.put(updateSnapshotState.shardId(), updateSnapshotState.status());
changedCount++;
}
}
if (updated) {
if (completed(shards.values()) == false) {
entries.add(new SnapshotsInProgress.Entry(entry, ImmutableMap.copyOf(shards)));
} else {
// Snapshot is finished - mark it as done
// TODO: Add PARTIAL_SUCCESS status?
SnapshotsInProgress.Entry updatedEntry = new SnapshotsInProgress.Entry(entry, SnapshotsInProgress.State.SUCCESS, ImmutableMap.copyOf(shards));
entries.add(updatedEntry);
// Finalize snapshot in the repository
snapshotsService.endSnapshot(updatedEntry);
logger.info("snapshot [{}] is done", updatedEntry.snapshotId());
}
} else {
entries.add(entry);
}
}
if (changedCount > 0) {
logger.trace("changed cluster state triggered by {} snapshot state updates", changedCount);
final SnapshotsInProgress updatedSnapshots = new SnapshotsInProgress(entries.toArray(new SnapshotsInProgress.Entry[entries.size()]));
return ClusterState.builder(currentState).putCustom(SnapshotsInProgress.TYPE, updatedSnapshots).build();
}
}
return currentState;
}
@Override
public void onFailure(String source, Throwable t) {
for (UpdateIndexShardSnapshotStatusRequest request : drainedRequests) {
logger.warn("[{}][{}] failed to update snapshot status to [{}]", t, request.snapshotId(), request.shardId(), request.status());
}
}
});
}
/**
* Transport request handler that is used to send changes in snapshot status to master
*/
class UpdateSnapshotStateRequestHandler extends TransportRequestHandler {
@Override
public void messageReceived(UpdateIndexShardSnapshotStatusRequest request, final TransportChannel channel) throws Exception {
innerUpdateSnapshotState(request);
channel.sendResponse(TransportResponse.Empty.INSTANCE);
}
}
}