All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.indices.recovery.RecoveryTarget Maven / Gradle / Ivy

There is a newer version: 8.15.1
Show newest version
/*
 * Licensed to ElasticSearch and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. ElasticSearch licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.indices.recovery;

import com.google.common.collect.Sets;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.StopWatch;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.common.util.concurrent.ConcurrentMapLong;
import org.elasticsearch.index.IndexShardMissingException;
import org.elasticsearch.index.engine.RecoveryEngineException;
import org.elasticsearch.index.shard.*;
import org.elasticsearch.index.shard.service.IndexShard;
import org.elasticsearch.index.shard.service.InternalIndexShard;
import org.elasticsearch.index.store.Store;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.indices.IndexMissingException;
import org.elasticsearch.indices.IndicesLifecycle;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.*;

import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import static org.elasticsearch.common.unit.TimeValue.timeValueMillis;

/**
 * The recovery target handles recoveries of peer shards of the shard+node to recover to.
 * 

*

Note, it can be safely assumed that there will only be a single recovery per shard (index+id) and * not several of them (since we don't allocate several shard replicas to the same node). */ public class RecoveryTarget extends AbstractComponent { public static class Actions { public static final String FILES_INFO = "index/shard/recovery/filesInfo"; public static final String FILE_CHUNK = "index/shard/recovery/fileChunk"; public static final String CLEAN_FILES = "index/shard/recovery/cleanFiles"; public static final String TRANSLOG_OPS = "index/shard/recovery/translogOps"; public static final String PREPARE_TRANSLOG = "index/shard/recovery/prepareTranslog"; public static final String FINALIZE = "index/shard/recovery/finalize"; } private final ThreadPool threadPool; private final TransportService transportService; private final IndicesService indicesService; private final RecoverySettings recoverySettings; private final ConcurrentMapLong onGoingRecoveries = ConcurrentCollections.newConcurrentMapLong(); @Inject public RecoveryTarget(Settings settings, ThreadPool threadPool, TransportService transportService, IndicesService indicesService, IndicesLifecycle indicesLifecycle, RecoverySettings recoverySettings) { super(settings); this.threadPool = threadPool; this.transportService = transportService; this.indicesService = indicesService; this.recoverySettings = recoverySettings; transportService.registerHandler(Actions.FILES_INFO, new FilesInfoRequestHandler()); transportService.registerHandler(Actions.FILE_CHUNK, new FileChunkTransportRequestHandler()); transportService.registerHandler(Actions.CLEAN_FILES, new CleanFilesRequestHandler()); transportService.registerHandler(Actions.PREPARE_TRANSLOG, new PrepareForTranslogOperationsRequestHandler()); transportService.registerHandler(Actions.TRANSLOG_OPS, new TranslogOperationsRequestHandler()); transportService.registerHandler(Actions.FINALIZE, new FinalizeRecoveryRequestHandler()); indicesLifecycle.addListener(new IndicesLifecycle.Listener() { @Override public void beforeIndexShardClosed(ShardId shardId, @Nullable IndexShard indexShard) { if (indexShard != null) { removeAndCleanOnGoingRecovery(findRecoveryByShard(indexShard)); } } }); } public RecoveryStatus peerRecoveryStatus(ShardId shardId) { RecoveryStatus peerRecoveryStatus = findRecoveryByShardId(shardId); if (peerRecoveryStatus == null) { return null; } // update how long it takes if we are still recovering... if (peerRecoveryStatus.startTime > 0 && peerRecoveryStatus.stage != RecoveryStatus.Stage.DONE) { peerRecoveryStatus.time = System.currentTimeMillis() - peerRecoveryStatus.startTime; } return peerRecoveryStatus; } public void cancelRecovery(IndexShard indexShard) { RecoveryStatus recoveryStatus = findRecoveryByShard(indexShard); // it might be if the recovery source got canceled first if (recoveryStatus == null) { return; } if (recoveryStatus.sentCanceledToSource) { return; } recoveryStatus.cancel(); try { if (recoveryStatus.recoveryThread != null) { recoveryStatus.recoveryThread.interrupt(); } // give it a grace period of actually getting the sent ack part final long sleepTime = 100; final long maxSleepTime = 10000; long rounds = Math.round(maxSleepTime / sleepTime); while (!recoveryStatus.sentCanceledToSource && rounds > 0) { rounds--; try { Thread.sleep(sleepTime); } catch (InterruptedException e) { Thread.currentThread().interrupt(); break; // interrupted - step out! } } } finally { removeAndCleanOnGoingRecovery(recoveryStatus); } } public void startRecovery(final StartRecoveryRequest request, final InternalIndexShard indexShard, final RecoveryListener listener) { try { indexShard.recovering("from " + request.sourceNode()); } catch (IllegalIndexShardStateException e) { // that's fine, since we might be called concurrently, just ignore this, we are already recovering listener.onIgnoreRecovery(false, "already in recovering process, " + e.getMessage()); return; } threadPool.generic().execute(new Runnable() { @Override public void run() { // create a new recovery status, and process... RecoveryStatus recoveryStatus = new RecoveryStatus(request.recoveryId(), indexShard); onGoingRecoveries.put(recoveryStatus.recoveryId, recoveryStatus); doRecovery(request, recoveryStatus, listener); } }); } public void retryRecovery(final StartRecoveryRequest request, final RecoveryStatus status, final RecoveryListener listener) { threadPool.generic().execute(new Runnable() { @Override public void run() { doRecovery(request, status, listener); } }); } private void doRecovery(final StartRecoveryRequest request, final RecoveryStatus recoveryStatus, final RecoveryListener listener) { if (request.sourceNode() == null) { listener.onIgnoreRecovery(false, "No node to recover from, retry on next cluster state update"); return; } final InternalIndexShard shard = recoveryStatus.indexShard; if (shard == null) { listener.onIgnoreRecovery(false, "shard missing locally, stop recovery"); return; } if (shard.state() == IndexShardState.CLOSED) { listener.onIgnoreRecovery(false, "local shard closed, stop recovery"); return; } if (recoveryStatus.isCanceled()) { // don't remove it, the cancellation code will remove it... listener.onIgnoreRecovery(false, "canceled recovery"); return; } recoveryStatus.recoveryThread = Thread.currentThread(); try { logger.trace("[{}][{}] starting recovery from {}", request.shardId().index().name(), request.shardId().id(), request.sourceNode()); StopWatch stopWatch = new StopWatch().start(); RecoveryResponse recoveryResponse = transportService.submitRequest(request.sourceNode(), RecoverySource.Actions.START_RECOVERY, request, new FutureTransportResponseHandler() { @Override public RecoveryResponse newInstance() { return new RecoveryResponse(); } }).txGet(); if (shard.state() == IndexShardState.CLOSED) { removeAndCleanOnGoingRecovery(recoveryStatus); listener.onIgnoreRecovery(false, "local shard closed, stop recovery"); return; } stopWatch.stop(); if (logger.isDebugEnabled()) { logger.debug("recovery completed from [{}], took [{}]", request.shardId(), request.sourceNode(), stopWatch.totalTime()); } else if (logger.isTraceEnabled()) { StringBuilder sb = new StringBuilder(); sb.append('[').append(request.shardId().index().name()).append(']').append('[').append(request.shardId().id()).append("] "); sb.append("recovery completed from ").append(request.sourceNode()).append(", took[").append(stopWatch.totalTime()).append("]\n"); sb.append(" phase1: recovered_files [").append(recoveryResponse.phase1FileNames.size()).append("]").append(" with total_size of [").append(new ByteSizeValue(recoveryResponse.phase1TotalSize)).append("]") .append(", took [").append(timeValueMillis(recoveryResponse.phase1Time)).append("], throttling_wait [").append(timeValueMillis(recoveryResponse.phase1ThrottlingWaitTime)).append(']') .append("\n"); sb.append(" : reusing_files [").append(recoveryResponse.phase1ExistingFileNames.size()).append("] with total_size of [").append(new ByteSizeValue(recoveryResponse.phase1ExistingTotalSize)).append("]\n"); sb.append(" phase2: start took [").append(timeValueMillis(recoveryResponse.startTime)).append("]\n"); sb.append(" : recovered [").append(recoveryResponse.phase2Operations).append("]").append(" transaction log operations") .append(", took [").append(timeValueMillis(recoveryResponse.phase2Time)).append("]") .append("\n"); sb.append(" phase3: recovered [").append(recoveryResponse.phase3Operations).append("]").append(" transaction log operations") .append(", took [").append(timeValueMillis(recoveryResponse.phase3Time)).append("]"); logger.trace(sb.toString()); } removeAndCleanOnGoingRecovery(recoveryStatus); listener.onRecoveryDone(); } catch (Throwable e) { // logger.trace("[{}][{}] Got exception on recovery", e, request.shardId().index().name(), request.shardId().id()); if (recoveryStatus.isCanceled()) { // don't remove it, the cancellation code will remove it... listener.onIgnoreRecovery(false, "canceled recovery"); return; } if (shard.state() == IndexShardState.CLOSED) { removeAndCleanOnGoingRecovery(recoveryStatus); listener.onIgnoreRecovery(false, "local shard closed, stop recovery"); return; } Throwable cause = ExceptionsHelper.unwrapCause(e); if (cause instanceof RecoveryEngineException) { // unwrap an exception that was thrown as part of the recovery cause = cause.getCause(); } // do it twice, in case we have double transport exception cause = ExceptionsHelper.unwrapCause(cause); if (cause instanceof RecoveryEngineException) { // unwrap an exception that was thrown as part of the recovery cause = cause.getCause(); } // here, we would add checks against exception that need to be retried (and not removeAndClean in this case) if (cause instanceof IndexShardNotStartedException || cause instanceof IndexMissingException || cause instanceof IndexShardMissingException) { // if the target is not ready yet, retry listener.onRetryRecovery(TimeValue.timeValueMillis(500), recoveryStatus); return; } if (cause instanceof DelayRecoveryException) { listener.onRetryRecovery(TimeValue.timeValueMillis(500), recoveryStatus); return; } // here, we check against ignore recovery options // in general, no need to clean the shard on ignored recovery, since we want to try and reuse it later // it will get deleted in the IndicesStore if all are allocated and no shard exists on this node... removeAndCleanOnGoingRecovery(recoveryStatus); if (cause instanceof ConnectTransportException) { listener.onIgnoreRecovery(true, "source node disconnected (" + request.sourceNode() + ")"); return; } if (cause instanceof IndexShardClosedException) { listener.onIgnoreRecovery(true, "source shard is closed (" + request.sourceNode() + ")"); return; } if (cause instanceof AlreadyClosedException) { listener.onIgnoreRecovery(true, "source shard is closed (" + request.sourceNode() + ")"); return; } logger.trace("[{}][{}] recovery from [{}] failed", e, request.shardId().index().name(), request.shardId().id(), request.sourceNode()); listener.onRecoveryFailure(new RecoveryFailedException(request, e), true); } } public static interface RecoveryListener { void onRecoveryDone(); void onRetryRecovery(TimeValue retryAfter, RecoveryStatus status); void onIgnoreRecovery(boolean removeShard, String reason); void onRecoveryFailure(RecoveryFailedException e, boolean sendShardFailure); } @Nullable private RecoveryStatus findRecoveryByShardId(ShardId shardId) { for (RecoveryStatus recoveryStatus : onGoingRecoveries.values()) { if (recoveryStatus.shardId.equals(shardId)) { return recoveryStatus; } } return null; } @Nullable private RecoveryStatus findRecoveryByShard(IndexShard indexShard) { for (RecoveryStatus recoveryStatus : onGoingRecoveries.values()) { if (recoveryStatus.indexShard == indexShard) { return recoveryStatus; } } return null; } private void removeAndCleanOnGoingRecovery(@Nullable RecoveryStatus status) { if (status == null) { return; } // clean it from the on going recoveries since it is being closed status = onGoingRecoveries.remove(status.recoveryId); if (status == null) { return; } // just mark it as canceled as well, just in case there are in flight requests // coming from the recovery target status.cancel(); // clean open index outputs Set> entrySet = status.cancleAndClearOpenIndexInputs(); Iterator> iterator = entrySet.iterator(); while (iterator.hasNext()) { Map.Entry entry = iterator.next(); synchronized (entry.getValue()) { IOUtils.closeWhileHandlingException(entry.getValue()); } iterator.remove(); } status.checksums = null; } class PrepareForTranslogOperationsRequestHandler extends BaseTransportRequestHandler { @Override public RecoveryPrepareForTranslogOperationsRequest newInstance() { return new RecoveryPrepareForTranslogOperationsRequest(); } @Override public String executor() { return ThreadPool.Names.GENERIC; } @Override public void messageReceived(RecoveryPrepareForTranslogOperationsRequest request, TransportChannel channel) throws Exception { RecoveryStatus onGoingRecovery = onGoingRecoveries.get(request.recoveryId()); if (onGoingRecovery == null) { // shard is getting closed on us throw new IndexShardClosedException(request.shardId()); } if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } onGoingRecovery.stage = RecoveryStatus.Stage.TRANSLOG; onGoingRecovery.indexShard.performRecoveryPrepareForTranslog(); channel.sendResponse(TransportResponse.Empty.INSTANCE); } } class FinalizeRecoveryRequestHandler extends BaseTransportRequestHandler { @Override public RecoveryFinalizeRecoveryRequest newInstance() { return new RecoveryFinalizeRecoveryRequest(); } @Override public String executor() { return ThreadPool.Names.GENERIC; } @Override public void messageReceived(RecoveryFinalizeRecoveryRequest request, TransportChannel channel) throws Exception { RecoveryStatus onGoingRecovery = onGoingRecoveries.get(request.recoveryId()); if (onGoingRecovery == null) { // shard is getting closed on us throw new IndexShardClosedException(request.shardId()); } if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } onGoingRecovery.stage = RecoveryStatus.Stage.FINALIZE; onGoingRecovery.indexShard.performRecoveryFinalization(false, onGoingRecovery); onGoingRecovery.time = System.currentTimeMillis() - onGoingRecovery.startTime; onGoingRecovery.stage = RecoveryStatus.Stage.DONE; channel.sendResponse(TransportResponse.Empty.INSTANCE); } } class TranslogOperationsRequestHandler extends BaseTransportRequestHandler { @Override public RecoveryTranslogOperationsRequest newInstance() { return new RecoveryTranslogOperationsRequest(); } @Override public String executor() { return ThreadPool.Names.GENERIC; } @Override public void messageReceived(RecoveryTranslogOperationsRequest request, TransportChannel channel) throws Exception { RecoveryStatus onGoingRecovery = onGoingRecoveries.get(request.recoveryId()); if (onGoingRecovery == null) { // shard is getting closed on us throw new IndexShardClosedException(request.shardId()); } if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } InternalIndexShard shard = (InternalIndexShard) indicesService.indexServiceSafe(request.shardId().index().name()).shardSafe(request.shardId().id()); for (Translog.Operation operation : request.operations()) { if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } shard.performRecoveryOperation(operation); onGoingRecovery.currentTranslogOperations++; } channel.sendResponse(TransportResponse.Empty.INSTANCE); } } class FilesInfoRequestHandler extends BaseTransportRequestHandler { @Override public RecoveryFilesInfoRequest newInstance() { return new RecoveryFilesInfoRequest(); } @Override public String executor() { return ThreadPool.Names.GENERIC; } @Override public void messageReceived(RecoveryFilesInfoRequest request, TransportChannel channel) throws Exception { RecoveryStatus onGoingRecovery = onGoingRecoveries.get(request.recoveryId()); if (onGoingRecovery == null) { // shard is getting closed on us throw new IndexShardClosedException(request.shardId()); } if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } onGoingRecovery.phase1FileNames = request.phase1FileNames; onGoingRecovery.phase1FileSizes = request.phase1FileSizes; onGoingRecovery.phase1ExistingFileNames = request.phase1ExistingFileNames; onGoingRecovery.phase1ExistingFileSizes = request.phase1ExistingFileSizes; onGoingRecovery.phase1TotalSize = request.phase1TotalSize; onGoingRecovery.phase1ExistingTotalSize = request.phase1ExistingTotalSize; onGoingRecovery.stage = RecoveryStatus.Stage.INDEX; channel.sendResponse(TransportResponse.Empty.INSTANCE); } } class CleanFilesRequestHandler extends BaseTransportRequestHandler { @Override public RecoveryCleanFilesRequest newInstance() { return new RecoveryCleanFilesRequest(); } @Override public String executor() { return ThreadPool.Names.GENERIC; } @Override public void messageReceived(RecoveryCleanFilesRequest request, TransportChannel channel) throws Exception { RecoveryStatus onGoingRecovery = onGoingRecoveries.get(request.recoveryId()); if (onGoingRecovery == null) { // shard is getting closed on us throw new IndexShardClosedException(request.shardId()); } if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } Store store = onGoingRecovery.indexShard.store(); // first, we go and move files that were created with the recovery id suffix to // the actual names, its ok if we have a corrupted index here, since we have replicas // to recover from in case of a full cluster shutdown just when this code executes... String prefix = "recovery." + onGoingRecovery.startTime + "."; Set filesToRename = Sets.newHashSet(); for (String existingFile : store.directory().listAll()) { if (existingFile.startsWith(prefix)) { filesToRename.add(existingFile.substring(prefix.length(), existingFile.length())); } } Exception failureToRename = null; if (!filesToRename.isEmpty()) { // first, go and delete the existing ones final Directory directory = store.directory(); for (String file : filesToRename) { try { directory.deleteFile(file); } catch (Throwable ex) { logger.debug("failed to delete file [{}]", ex, file); } } for (String fileToRename : filesToRename) { // now, rename the files... and fail it it won't work store.renameFile(prefix + fileToRename, fileToRename); } } // now write checksums store.writeChecksums(onGoingRecovery.checksums); for (String existingFile : store.directory().listAll()) { // don't delete snapshot file, or the checksums file (note, this is extra protection since the Store won't delete checksum) if (!request.snapshotFiles().contains(existingFile) && !Store.isChecksum(existingFile)) { try { store.directory().deleteFile(existingFile); } catch (Exception e) { // ignore, we don't really care, will get deleted later on } } } channel.sendResponse(TransportResponse.Empty.INSTANCE); } } class FileChunkTransportRequestHandler extends BaseTransportRequestHandler { @Override public RecoveryFileChunkRequest newInstance() { return new RecoveryFileChunkRequest(); } @Override public String executor() { return ThreadPool.Names.GENERIC; } @Override public void messageReceived(final RecoveryFileChunkRequest request, TransportChannel channel) throws Exception { RecoveryStatus onGoingRecovery = onGoingRecoveries.get(request.recoveryId()); if (onGoingRecovery == null) { // shard is getting closed on us throw new IndexShardClosedException(request.shardId()); } if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } Store store = onGoingRecovery.indexShard.store(); IndexOutput indexOutput; if (request.position() == 0) { // first request onGoingRecovery.checksums.remove(request.name()); indexOutput = onGoingRecovery.removeOpenIndexOutputs(request.name()); IOUtils.closeWhileHandlingException(indexOutput); // we create an output with no checksum, this is because the pure binary data of the file is not // the checksum (because of seek). We will create the checksum file once copying is done // also, we check if the file already exists, if it does, we create a file name based // on the current recovery "id" and later we make the switch, the reason for that is that // we only want to overwrite the index files once we copied all over, and not create a // case where the index is half moved String fileName = request.name(); if (store.directory().fileExists(fileName)) { fileName = "recovery." + onGoingRecovery.startTime + "." + fileName; } indexOutput = onGoingRecovery.openAndPutIndexOutput(request.name(), fileName, store); } else { indexOutput = onGoingRecovery.getOpenIndexOutput(request.name()); } if (indexOutput == null) { // shard is getting closed on us throw new IndexShardClosedException(request.shardId()); } boolean success = false; synchronized (indexOutput) { try { if (recoverySettings.rateLimiter() != null) { recoverySettings.rateLimiter().pause(request.content().length()); } BytesReference content = request.content(); if (!content.hasArray()) { content = content.toBytesArray(); } indexOutput.writeBytes(content.array(), content.arrayOffset(), content.length()); onGoingRecovery.currentFilesSize.addAndGet(request.length()); if (indexOutput.getFilePointer() == request.length()) { // we are done indexOutput.close(); // write the checksum if (request.checksum() != null) { onGoingRecovery.checksums.put(request.name(), request.checksum()); } store.directory().sync(Collections.singleton(request.name())); IndexOutput remove = onGoingRecovery.removeOpenIndexOutputs(request.name()); assert remove == indexOutput; } success = true; } finally { if (!success || onGoingRecovery.isCanceled()) { IndexOutput remove = onGoingRecovery.removeOpenIndexOutputs(request.name()); assert remove == indexOutput; IOUtils.closeWhileHandlingException(indexOutput); } } } if (onGoingRecovery.isCanceled()) { onGoingRecovery.sentCanceledToSource = true; throw new IndexShardClosedException(request.shardId()); } channel.sendResponse(TransportResponse.Empty.INSTANCE); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy