org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardRepository Maven / Gradle / Ivy
Show all versions of elasticsearch Show documentation
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.snapshots.blobstore;
import com.google.common.collect.Iterables;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.RateLimiter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.ClusterService;
import org.elasticsearch.cluster.metadata.SnapshotId;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobMetaData;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.store.InputStreamIndexInput;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.snapshots.IndexShardRepository;
import org.elasticsearch.index.snapshots.IndexShardRestoreFailedException;
import org.elasticsearch.index.snapshots.IndexShardSnapshotException;
import org.elasticsearch.index.snapshots.IndexShardSnapshotFailedException;
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
import org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo;
import org.elasticsearch.index.store.Store;
import org.elasticsearch.index.store.StoreFileMetaData;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.recovery.RecoveryState;
import org.elasticsearch.repositories.RepositoryName;
import org.elasticsearch.repositories.RepositoryVerificationException;
import org.elasticsearch.repositories.blobstore.BlobStoreFormat;
import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
import org.elasticsearch.repositories.blobstore.ChecksumBlobStoreFormat;
import org.elasticsearch.repositories.blobstore.LegacyBlobStoreFormat;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.repositories.blobstore.BlobStoreRepository.testBlobPrefix;
/**
* Blob store based implementation of IndexShardRepository
*/
public class BlobStoreIndexShardRepository extends AbstractComponent implements IndexShardRepository {
private static final int BUFFER_SIZE = 4096;
private BlobStore blobStore;
private BlobPath basePath;
private final String repositoryName;
private ByteSizeValue chunkSize;
private final IndicesService indicesService;
private final ClusterService clusterService;
private RateLimiter snapshotRateLimiter;
private RateLimiter restoreRateLimiter;
private RateLimiterListener rateLimiterListener;
private RateLimitingInputStream.Listener snapshotThrottleListener;
private RateLimitingInputStream.Listener restoreThrottleListener;
private boolean compress;
private final ParseFieldMatcher parseFieldMatcher;
protected static final String LEGACY_SNAPSHOT_PREFIX = "snapshot-";
protected static final String LEGACY_SNAPSHOT_NAME_FORMAT = LEGACY_SNAPSHOT_PREFIX + "%s";
protected static final String SNAPSHOT_PREFIX = "snap-";
protected static final String SNAPSHOT_NAME_FORMAT = SNAPSHOT_PREFIX + "%s.dat";
protected static final String SNAPSHOT_CODEC = "snapshot";
protected static final String SNAPSHOT_INDEX_PREFIX = "index-";
protected static final String SNAPSHOT_INDEX_NAME_FORMAT = SNAPSHOT_INDEX_PREFIX + "%s";
protected static final String SNAPSHOT_INDEX_CODEC = "snapshots";
protected static final String DATA_BLOB_PREFIX = "__";
private ChecksumBlobStoreFormat indexShardSnapshotFormat;
private LegacyBlobStoreFormat indexShardSnapshotLegacyFormat;
private ChecksumBlobStoreFormat indexShardSnapshotsFormat;
@Inject
public BlobStoreIndexShardRepository(Settings settings, RepositoryName repositoryName, IndicesService indicesService, ClusterService clusterService) {
super(settings);
this.parseFieldMatcher = new ParseFieldMatcher(settings);
this.repositoryName = repositoryName.name();
this.indicesService = indicesService;
this.clusterService = clusterService;
}
/**
* Called by {@link org.elasticsearch.repositories.blobstore.BlobStoreRepository} on repository startup
*
* @param blobStore blob store
* @param basePath base path to blob store
* @param chunkSize chunk size
*/
public void initialize(BlobStore blobStore, BlobPath basePath, ByteSizeValue chunkSize,
RateLimiter snapshotRateLimiter, RateLimiter restoreRateLimiter,
final RateLimiterListener rateLimiterListener, boolean compress) {
this.blobStore = blobStore;
this.basePath = basePath;
this.chunkSize = chunkSize;
this.snapshotRateLimiter = snapshotRateLimiter;
this.restoreRateLimiter = restoreRateLimiter;
this.rateLimiterListener = rateLimiterListener;
this.snapshotThrottleListener = new RateLimitingInputStream.Listener() {
@Override
public void onPause(long nanos) {
rateLimiterListener.onSnapshotPause(nanos);
}
};
this.restoreThrottleListener = new RateLimitingInputStream.Listener() {
@Override
public void onPause(long nanos) {
rateLimiterListener.onRestorePause(nanos);
}
};
this.compress = compress;
indexShardSnapshotFormat = new ChecksumBlobStoreFormat<>(SNAPSHOT_CODEC, SNAPSHOT_NAME_FORMAT, BlobStoreIndexShardSnapshot.PROTO, parseFieldMatcher, isCompress());
indexShardSnapshotLegacyFormat = new LegacyBlobStoreFormat<>(LEGACY_SNAPSHOT_NAME_FORMAT, BlobStoreIndexShardSnapshot.PROTO, parseFieldMatcher);
indexShardSnapshotsFormat = new ChecksumBlobStoreFormat<>(SNAPSHOT_INDEX_CODEC, SNAPSHOT_INDEX_NAME_FORMAT, BlobStoreIndexShardSnapshots.PROTO, parseFieldMatcher, isCompress());
}
/**
* {@inheritDoc}
*/
@Override
public void snapshot(SnapshotId snapshotId, ShardId shardId, SnapshotIndexCommit snapshotIndexCommit, IndexShardSnapshotStatus snapshotStatus) {
SnapshotContext snapshotContext = new SnapshotContext(snapshotId, shardId, snapshotStatus);
snapshotStatus.startTime(System.currentTimeMillis());
try {
snapshotContext.snapshot(snapshotIndexCommit);
snapshotStatus.time(System.currentTimeMillis() - snapshotStatus.startTime());
snapshotStatus.updateStage(IndexShardSnapshotStatus.Stage.DONE);
} catch (Throwable e) {
snapshotStatus.time(System.currentTimeMillis() - snapshotStatus.startTime());
snapshotStatus.updateStage(IndexShardSnapshotStatus.Stage.FAILURE);
snapshotStatus.failure(ExceptionsHelper.detailedMessage(e));
if (e instanceof IndexShardSnapshotFailedException) {
throw (IndexShardSnapshotFailedException) e;
} else {
throw new IndexShardSnapshotFailedException(shardId, e.getMessage(), e);
}
}
}
/**
* {@inheritDoc}
*/
@Override
public void restore(SnapshotId snapshotId, Version version, ShardId shardId, ShardId snapshotShardId, RecoveryState recoveryState) {
final RestoreContext snapshotContext = new RestoreContext(snapshotId, version, shardId, snapshotShardId, recoveryState);
try {
snapshotContext.restore();
} catch (Throwable e) {
throw new IndexShardRestoreFailedException(shardId, "failed to restore snapshot [" + snapshotId.getSnapshot() + "]", e);
}
}
/**
* {@inheritDoc}
*/
@Override
public IndexShardSnapshotStatus snapshotStatus(SnapshotId snapshotId, Version version, ShardId shardId) {
Context context = new Context(snapshotId, version, shardId);
BlobStoreIndexShardSnapshot snapshot = context.loadSnapshot();
IndexShardSnapshotStatus status = new IndexShardSnapshotStatus();
status.updateStage(IndexShardSnapshotStatus.Stage.DONE);
status.startTime(snapshot.startTime());
status.files(snapshot.numberOfFiles(), snapshot.totalSize());
// The snapshot is done which means the number of processed files is the same as total
status.processedFiles(snapshot.numberOfFiles(), snapshot.totalSize());
status.time(snapshot.time());
return status;
}
@Override
public void verify(String seed) {
BlobContainer testBlobContainer = blobStore.blobContainer(basePath.add(testBlobPrefix(seed)));
DiscoveryNode localNode = clusterService.localNode();
if (testBlobContainer.blobExists("master.dat")) {
try {
testBlobContainer.writeBlob("data-" + localNode.getId() + ".dat", new BytesArray(seed));
} catch (IOException exp) {
throw new RepositoryVerificationException(repositoryName, "store location [" + blobStore + "] is not accessible on the node [" + localNode + "]", exp);
}
} else {
throw new RepositoryVerificationException(repositoryName, "a file written by master to the store [" + blobStore + "] cannot be accessed on the node [" + localNode + "]. "
+ "This might indicate that the store [" + blobStore + "] is not shared between this node and the master node or "
+ "that permissions on the store don't allow reading files written by the master node");
}
}
/**
* Delete shard snapshot
*
* @param snapshotId snapshot id
* @param shardId shard id
*/
public void delete(SnapshotId snapshotId, Version version, ShardId shardId) {
Context context = new Context(snapshotId, version, shardId, shardId);
context.delete();
}
@Override
public String toString() {
return "BlobStoreIndexShardRepository[" +
"[" + repositoryName +
"], [" + blobStore + ']' +
']';
}
/**
* Returns true if metadata files should be compressed
*
* @return true if compression is needed
*/
protected boolean isCompress() {
return compress;
}
BlobStoreFormat indexShardSnapshotFormat(Version version) {
if (BlobStoreRepository.legacyMetaData(version)) {
return indexShardSnapshotLegacyFormat;
} else {
return indexShardSnapshotFormat;
}
}
/**
* Context for snapshot/restore operations
*/
private class Context {
protected final SnapshotId snapshotId;
protected final ShardId shardId;
protected final BlobContainer blobContainer;
protected final Version version;
public Context(SnapshotId snapshotId, Version version, ShardId shardId) {
this(snapshotId, version, shardId, shardId);
}
public Context(SnapshotId snapshotId, Version version, ShardId shardId, ShardId snapshotShardId) {
this.snapshotId = snapshotId;
this.version = version;
this.shardId = shardId;
blobContainer = blobStore.blobContainer(basePath.add("indices").add(snapshotShardId.getIndex()).add(Integer.toString(snapshotShardId.getId())));
}
/**
* Delete shard snapshot
*/
public void delete() {
final Map blobs;
try {
blobs = blobContainer.listBlobs();
} catch (IOException e) {
throw new IndexShardSnapshotException(shardId, "Failed to list content of gateway", e);
}
Tuple tuple = buildBlobStoreIndexShardSnapshots(blobs);
BlobStoreIndexShardSnapshots snapshots = tuple.v1();
int fileListGeneration = tuple.v2();
try {
indexShardSnapshotFormat(version).delete(blobContainer, snapshotId.getSnapshot());
} catch (IOException e) {
logger.debug("[{}] [{}] failed to delete shard snapshot file", shardId, snapshotId);
}
// Build a list of snapshots that should be preserved
List newSnapshotsList = new ArrayList<>();
for (SnapshotFiles point : snapshots) {
if (!point.snapshot().equals(snapshotId.getSnapshot())) {
newSnapshotsList.add(point);
}
}
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
}
/**
* Loads information about shard snapshot
*/
public BlobStoreIndexShardSnapshot loadSnapshot() {
try {
return indexShardSnapshotFormat(version).read(blobContainer, snapshotId.getSnapshot());
} catch (IOException ex) {
throw new IndexShardRestoreFailedException(shardId, "failed to read shard snapshot file", ex);
}
}
/**
* Removes all unreferenced files from the repository and writes new index file
*
* We need to be really careful in handling index files in case of failures to make sure we have index file that
* points to files that were deleted.
*
*
* @param snapshots list of active snapshots in the container
* @param fileListGeneration the generation number of the snapshot index file
* @param blobs list of blobs in the container
*/
protected void finalize(List snapshots, int fileListGeneration, Map blobs) {
BlobStoreIndexShardSnapshots newSnapshots = new BlobStoreIndexShardSnapshots(snapshots);
List blobsToDelete = new ArrayList<>();
// delete old index files first
for (String blobName : blobs.keySet()) {
// delete old file lists
if (indexShardSnapshotsFormat.isTempBlobName(blobName) || blobName.startsWith(SNAPSHOT_INDEX_PREFIX)) {
blobsToDelete.add(blobName);
}
}
try {
blobContainer.deleteBlobs(blobsToDelete);
} catch (IOException e) {
// We cannot delete index file - this is fatal, we cannot continue, otherwise we might end up
// with references to non-existing files
throw new IndexShardSnapshotFailedException(shardId, "error deleting index files during cleanup, reason: " + e.getMessage(), e);
}
blobsToDelete = new ArrayList<>();
// now go over all the blobs, and if they don't exists in a snapshot, delete them
for (String blobName : blobs.keySet()) {
// delete unused files
if (blobName.startsWith(DATA_BLOB_PREFIX)) {
if (newSnapshots.findNameFile(FileInfo.canonicalName(blobName)) == null) {
blobsToDelete.add(blobName);
}
}
}
try {
blobContainer.deleteBlobs(blobsToDelete);
} catch (IOException e) {
logger.debug("[{}] [{}] error deleting some of the blobs [{}] during cleanup", e, snapshotId, shardId, blobsToDelete);
}
// If we deleted all snapshots - we don't need to create the index file
if (snapshots.size() > 0) {
try {
indexShardSnapshotsFormat.writeAtomic(newSnapshots, blobContainer, Integer.toString(fileListGeneration));
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to write file list", e);
}
}
}
/**
* Generates blob name
*
* @param generation the blob number
* @return the blob name
*/
protected String fileNameFromGeneration(long generation) {
return DATA_BLOB_PREFIX + Long.toString(generation, Character.MAX_RADIX);
}
/**
* Finds the next available blob number
*
* @param blobs list of blobs in the repository
* @return next available blob number
*/
protected long findLatestFileNameGeneration(Map blobs) {
long generation = -1;
for (String name : blobs.keySet()) {
if (!name.startsWith(DATA_BLOB_PREFIX)) {
continue;
}
name = FileInfo.canonicalName(name);
try {
long currentGen = Long.parseLong(name.substring(DATA_BLOB_PREFIX.length()), Character.MAX_RADIX);
if (currentGen > generation) {
generation = currentGen;
}
} catch (NumberFormatException e) {
logger.warn("file [{}] does not conform to the '{}' schema", name, DATA_BLOB_PREFIX);
}
}
return generation;
}
/**
* Loads all available snapshots in the repository
*
* @param blobs list of blobs in repository
* @return tuple of BlobStoreIndexShardSnapshots and the last snapshot index generation
*/
protected Tuple buildBlobStoreIndexShardSnapshots(Map blobs) {
int latest = -1;
for (String name : blobs.keySet()) {
if (name.startsWith(SNAPSHOT_INDEX_PREFIX)) {
try {
int gen = Integer.parseInt(name.substring(SNAPSHOT_INDEX_PREFIX.length()));
if (gen > latest) {
latest = gen;
}
} catch (NumberFormatException ex) {
logger.warn("failed to parse index file name [{}]", name);
}
}
}
if (latest >= 0) {
try {
return new Tuple<>(indexShardSnapshotsFormat.read(blobContainer, Integer.toString(latest)), latest);
} catch (IOException e) {
logger.warn("failed to read index file [{}]", e, SNAPSHOT_INDEX_PREFIX + latest);
}
}
// We couldn't load the index file - falling back to loading individual snapshots
List snapshots = new ArrayList<>();
for (String name : blobs.keySet()) {
try {
BlobStoreIndexShardSnapshot snapshot = null;
if (name.startsWith(SNAPSHOT_PREFIX)) {
snapshot = indexShardSnapshotFormat.readBlob(blobContainer, name);
} else if (name.startsWith(LEGACY_SNAPSHOT_PREFIX)) {
snapshot = indexShardSnapshotLegacyFormat.readBlob(blobContainer, name);
}
if (snapshot != null) {
snapshots.add(new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles()));
}
} catch (IOException e) {
logger.warn("failed to read commit point [{}]", e, name);
}
}
return new Tuple<>(new BlobStoreIndexShardSnapshots(snapshots), -1);
}
}
/**
* Context for snapshot operations
*/
private class SnapshotContext extends Context {
private final Store store;
private final IndexShardSnapshotStatus snapshotStatus;
/**
* Constructs new context
*
* @param snapshotId snapshot id
* @param shardId shard to be snapshotted
* @param snapshotStatus snapshot status to report progress
*/
public SnapshotContext(SnapshotId snapshotId, ShardId shardId, IndexShardSnapshotStatus snapshotStatus) {
super(snapshotId, Version.CURRENT, shardId);
IndexService indexService = indicesService.indexServiceSafe(shardId.getIndex());
store = indexService.shardInjectorSafe(shardId.id()).getInstance(Store.class);
this.snapshotStatus = snapshotStatus;
}
/**
* Create snapshot from index commit point
*
* @param snapshotIndexCommit snapshot commit point
*/
public void snapshot(SnapshotIndexCommit snapshotIndexCommit) {
logger.debug("[{}] [{}] snapshot to [{}] ...", shardId, snapshotId, repositoryName);
store.incRef();
try {
final Map blobs;
try {
blobs = blobContainer.listBlobs();
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "failed to list blobs", e);
}
long generation = findLatestFileNameGeneration(blobs);
Tuple tuple = buildBlobStoreIndexShardSnapshots(blobs);
BlobStoreIndexShardSnapshots snapshots = tuple.v1();
int fileListGeneration = tuple.v2();
final List indexCommitPointFiles = new ArrayList<>();
int indexNumberOfFiles = 0;
long indexTotalFilesSize = 0;
ArrayList filesToSnapshot = new ArrayList<>();
final Store.MetadataSnapshot metadata;
// TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
try {
metadata = store.getMetadata(snapshotIndexCommit);
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to get store file metadata", e);
}
for (String fileName : snapshotIndexCommit.getFiles()) {
if (snapshotStatus.aborted()) {
logger.debug("[{}] [{}] Aborted on the file [{}], exiting", shardId, snapshotId, fileName);
throw new IndexShardSnapshotFailedException(shardId, "Aborted");
}
logger.trace("[{}] [{}] Processing [{}]", shardId, snapshotId, fileName);
final StoreFileMetaData md = metadata.get(fileName);
FileInfo existingFileInfo = null;
List filesInfo = snapshots.findPhysicalIndexFiles(fileName);
if (filesInfo != null) {
for (FileInfo fileInfo : filesInfo) {
try {
// in 1.3.3 we added additional hashes for .si / segments_N files
// to ensure we don't double the space in the repo since old snapshots
// don't have this hash we try to read that hash from the blob store
// in a bwc compatible way.
maybeRecalculateMetadataHash(blobContainer, fileInfo, metadata);
} catch (Throwable e) {
logger.warn("{} Can't calculate hash from blob for file [{}] [{}]", e, shardId, fileInfo.physicalName(), fileInfo.metadata());
}
if (fileInfo.isSame(md) && snapshotFileExistsInBlobs(fileInfo, blobs)) {
// a commit point file with the same name, size and checksum was already copied to repository
// we will reuse it for this snapshot
existingFileInfo = fileInfo;
break;
}
}
}
if (existingFileInfo == null) {
indexNumberOfFiles++;
indexTotalFilesSize += md.length();
// create a new FileInfo
BlobStoreIndexShardSnapshot.FileInfo snapshotFileInfo = new BlobStoreIndexShardSnapshot.FileInfo(fileNameFromGeneration(++generation), md, chunkSize);
indexCommitPointFiles.add(snapshotFileInfo);
filesToSnapshot.add(snapshotFileInfo);
} else {
indexCommitPointFiles.add(existingFileInfo);
}
}
snapshotStatus.files(indexNumberOfFiles, indexTotalFilesSize);
if (snapshotStatus.aborted()) {
logger.debug("[{}] [{}] Aborted during initialization", shardId, snapshotId);
throw new IndexShardSnapshotFailedException(shardId, "Aborted");
}
snapshotStatus.updateStage(IndexShardSnapshotStatus.Stage.STARTED);
for (FileInfo snapshotFileInfo : filesToSnapshot) {
try {
snapshotFile(snapshotFileInfo);
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to perform snapshot (index files)", e);
}
}
snapshotStatus.indexVersion(snapshotIndexCommit.getGeneration());
// now create and write the commit point
snapshotStatus.updateStage(IndexShardSnapshotStatus.Stage.FINALIZE);
BlobStoreIndexShardSnapshot snapshot = new BlobStoreIndexShardSnapshot(snapshotId.getSnapshot(),
snapshotIndexCommit.getGeneration(), indexCommitPointFiles, snapshotStatus.startTime(),
// snapshotStatus.startTime() is assigned on the same machine, so it's safe to use with VLong
System.currentTimeMillis() - snapshotStatus.startTime(), indexNumberOfFiles, indexTotalFilesSize);
//TODO: The time stored in snapshot doesn't include cleanup time.
logger.trace("[{}] [{}] writing shard snapshot file", shardId, snapshotId);
try {
indexShardSnapshotFormat.write(snapshot, blobContainer, snapshotId.getSnapshot());
} catch (IOException e) {
throw new IndexShardSnapshotFailedException(shardId, "Failed to write commit point", e);
}
// delete all files that are not referenced by any commit point
// build a new BlobStoreIndexShardSnapshot, that includes this one and all the saved ones
List newSnapshotsList = new ArrayList<>();
newSnapshotsList.add(new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles()));
for (SnapshotFiles point : snapshots) {
newSnapshotsList.add(point);
}
// finalize the snapshot and rewrite the snapshot index with the next sequential snapshot index
finalize(newSnapshotsList, fileListGeneration + 1, blobs);
snapshotStatus.updateStage(IndexShardSnapshotStatus.Stage.DONE);
} finally {
store.decRef();
}
}
/**
* Snapshot individual file
*
* This is asynchronous method. Upon completion of the operation latch is getting counted down and any failures are
* added to the {@code failures} list
*
* @param fileInfo file to be snapshotted
*/
private void snapshotFile(final BlobStoreIndexShardSnapshot.FileInfo fileInfo) throws IOException {
final String file = fileInfo.physicalName();
try (IndexInput indexInput = store.openVerifyingInput(file, IOContext.READONCE, fileInfo.metadata())) {
for (int i = 0; i < fileInfo.numberOfParts(); i++) {
final long partBytes = fileInfo.partBytes(i);
final InputStreamIndexInput inputStreamIndexInput = new InputStreamIndexInput(indexInput, partBytes);
InputStream inputStream = snapshotRateLimiter == null ? inputStreamIndexInput : new RateLimitingInputStream(inputStreamIndexInput, snapshotRateLimiter, snapshotThrottleListener);
inputStream = new AbortableInputStream(inputStream, fileInfo.physicalName());
blobContainer.writeBlob(fileInfo.partName(i), inputStream, partBytes);
}
Store.verify(indexInput);
snapshotStatus.addProcessedFile(fileInfo.length());
} catch (Throwable t) {
failStoreIfCorrupted(t);
snapshotStatus.addProcessedFile(0);
throw t;
}
}
private void failStoreIfCorrupted(Throwable t) {
if (t instanceof CorruptIndexException || t instanceof IndexFormatTooOldException || t instanceof IndexFormatTooNewException) {
try {
store.markStoreCorrupted((IOException) t);
} catch (IOException e) {
logger.warn("store cannot be marked as corrupted", e);
}
}
}
/**
* Checks if snapshot file already exists in the list of blobs
*
* @param fileInfo file to check
* @param blobs list of blobs
* @return true if file exists in the list of blobs
*/
private boolean snapshotFileExistsInBlobs(BlobStoreIndexShardSnapshot.FileInfo fileInfo, Map blobs) {
BlobMetaData blobMetaData = blobs.get(fileInfo.name());
if (blobMetaData != null) {
return blobMetaData.length() == fileInfo.length();
} else if (blobs.containsKey(fileInfo.partName(0))) {
// multi part file sum up the size and check
int part = 0;
long totalSize = 0;
while (true) {
blobMetaData = blobs.get(fileInfo.partName(part++));
if (blobMetaData == null) {
break;
}
totalSize += blobMetaData.length();
}
return totalSize == fileInfo.length();
}
// no file, not exact and not multipart
return false;
}
private class AbortableInputStream extends FilterInputStream {
private final String fileName;
public AbortableInputStream(InputStream delegate, String fileName) {
super(delegate);
this.fileName = fileName;
}
@Override
public int read() throws IOException {
checkAborted();
return in.read();
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
checkAborted();
return in.read(b, off, len);
}
private void checkAborted() {
if (snapshotStatus.aborted()) {
logger.debug("[{}] [{}] Aborted on the file [{}], exiting", shardId, snapshotId, fileName);
throw new IndexShardSnapshotFailedException(shardId, "Aborted");
}
}
}
}
/**
* This is a BWC layer to ensure we update the snapshots metdata with the corresponding hashes before we compare them.
* The new logic for StoreFileMetaData reads the entire .si and segments.n files to strengthen the
* comparison of the files on a per-segment / per-commit level.
*/
private static void maybeRecalculateMetadataHash(final BlobContainer blobContainer, final FileInfo fileInfo, Store.MetadataSnapshot snapshot) throws Throwable {
final StoreFileMetaData metadata;
if (fileInfo != null && (metadata = snapshot.get(fileInfo.physicalName())) != null) {
if (metadata.hash().length > 0 && fileInfo.metadata().hash().length == 0) {
// we have a hash - check if our repo has a hash too otherwise we have
// to calculate it.
// we might have multiple parts even though the file is small... make sure we read all of it.
try (final InputStream stream = new PartSliceStream(blobContainer, fileInfo)) {
BytesRefBuilder builder = new BytesRefBuilder();
Store.MetadataSnapshot.hashFile(builder, stream, fileInfo.length());
BytesRef hash = fileInfo.metadata().hash(); // reset the file infos metadata hash
assert hash.length == 0;
hash.bytes = builder.bytes();
hash.offset = 0;
hash.length = builder.length();
}
}
}
}
private static final class PartSliceStream extends SlicedInputStream {
private final BlobContainer container;
private final FileInfo info;
public PartSliceStream(BlobContainer container, FileInfo info) {
super(info.numberOfParts());
this.info = info;
this.container = container;
}
@Override
protected InputStream openSlice(long slice) throws IOException {
return container.readBlob(info.partName(slice));
}
}
/**
* Context for restore operations
*/
private class RestoreContext extends Context {
private final Store store;
private final RecoveryState recoveryState;
/**
* Constructs new restore context
*
* @param snapshotId snapshot id
* @param shardId shard to be restored
* @param snapshotShardId shard in the snapshot that data should be restored from
* @param recoveryState recovery state to report progress
*/
public RestoreContext(SnapshotId snapshotId, Version version, ShardId shardId, ShardId snapshotShardId, RecoveryState recoveryState) {
super(snapshotId, version, shardId, snapshotShardId);
store = indicesService.indexServiceSafe(shardId.getIndex()).shardInjectorSafe(shardId.id()).getInstance(Store.class);
this.recoveryState = recoveryState;
}
/**
* Performs restore operation
*/
public void restore() throws IOException {
store.incRef();
try {
logger.debug("[{}] [{}] restoring to [{}] ...", snapshotId, repositoryName, shardId);
BlobStoreIndexShardSnapshot snapshot = loadSnapshot();
SnapshotFiles snapshotFiles = new SnapshotFiles(snapshot.snapshot(), snapshot.indexFiles());
final Store.MetadataSnapshot recoveryTargetMetadata;
try {
recoveryTargetMetadata = store.getMetadataOrEmpty();
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException e) {
logger.warn("{} Can't read metadata from store", e, shardId);
throw new IndexShardRestoreFailedException(shardId, "Can't restore corrupted shard", e);
}
final List filesToRecover = new ArrayList<>();
final Map snapshotMetaData = new HashMap<>();
final Map fileInfos = new HashMap<>();
for (final FileInfo fileInfo : snapshot.indexFiles()) {
try {
// in 1.3.3 we added additional hashes for .si / segments_N files
// to ensure we don't double the space in the repo since old snapshots
// don't have this hash we try to read that hash from the blob store
// in a bwc compatible way.
maybeRecalculateMetadataHash(blobContainer, fileInfo, recoveryTargetMetadata);
} catch (Throwable e) {
// if the index is broken we might not be able to read it
logger.warn("{} Can't calculate hash from blog for file [{}] [{}]", e, shardId, fileInfo.physicalName(), fileInfo.metadata());
}
snapshotMetaData.put(fileInfo.metadata().name(), fileInfo.metadata());
fileInfos.put(fileInfo.metadata().name(), fileInfo);
}
final Store.MetadataSnapshot sourceMetaData = new Store.MetadataSnapshot(snapshotMetaData, Collections.EMPTY_MAP, 0);
final Store.RecoveryDiff diff = sourceMetaData.recoveryDiff(recoveryTargetMetadata);
for (StoreFileMetaData md : diff.identical) {
FileInfo fileInfo = fileInfos.get(md.name());
recoveryState.getIndex().addFileDetail(fileInfo.name(), fileInfo.length(), true);
if (logger.isTraceEnabled()) {
logger.trace("[{}] [{}] not_recovering [{}] from [{}], exists in local store and is same", shardId, snapshotId, fileInfo.physicalName(), fileInfo.name());
}
}
for (StoreFileMetaData md : Iterables.concat(diff.different, diff.missing)) {
FileInfo fileInfo = fileInfos.get(md.name());
filesToRecover.add(fileInfo);
recoveryState.getIndex().addFileDetail(fileInfo.name(), fileInfo.length(), false);
if (logger.isTraceEnabled()) {
if (md == null) {
logger.trace("[{}] [{}] recovering [{}] from [{}], does not exists in local store", shardId, snapshotId, fileInfo.physicalName(), fileInfo.name());
} else {
logger.trace("[{}] [{}] recovering [{}] from [{}], exists in local store but is different", shardId, snapshotId, fileInfo.physicalName(), fileInfo.name());
}
}
}
final RecoveryState.Index index = recoveryState.getIndex();
if (filesToRecover.isEmpty()) {
logger.trace("no files to recover, all exists within the local store");
}
if (logger.isTraceEnabled()) {
logger.trace("[{}] [{}] recovering_files [{}] with total_size [{}], reusing_files [{}] with reused_size [{}]", shardId, snapshotId,
index.totalRecoverFiles(), new ByteSizeValue(index.totalRecoverBytes()), index.reusedFileCount(), new ByteSizeValue(index.reusedFileCount()));
}
try {
for (final FileInfo fileToRecover : filesToRecover) {
logger.trace("[{}] [{}] restoring file [{}]", shardId, snapshotId, fileToRecover.name());
restoreFile(fileToRecover);
}
} catch (IOException ex) {
throw new IndexShardRestoreFailedException(shardId, "Failed to recover index", ex);
}
final StoreFileMetaData restoredSegmentsFile = sourceMetaData.getSegmentsFile();
if (recoveryTargetMetadata == null) {
throw new IndexShardRestoreFailedException(shardId, "Snapshot has no segments file");
}
assert restoredSegmentsFile != null;
// read the snapshot data persisted
final SegmentInfos segmentCommitInfos;
try {
segmentCommitInfos = Lucene.pruneUnreferencedFiles(restoredSegmentsFile.name(), store.directory());
} catch (IOException e) {
throw new IndexShardRestoreFailedException(shardId, "Failed to fetch index version after copying it over", e);
}
recoveryState.getIndex().updateVersion(segmentCommitInfos.getVersion());
/// now, go over and clean files that are in the store, but were not in the snapshot
try {
for (String storeFile : store.directory().listAll()) {
if (Store.isAutogenerated(storeFile) || snapshotFiles.containPhysicalIndexFile(storeFile)) {
continue; //skip write.lock, checksum files and files that exist in the snapshot
}
try {
store.deleteQuiet("restore", storeFile);
store.directory().deleteFile(storeFile);
} catch (IOException e) {
logger.warn("[{}] failed to delete file [{}] during snapshot cleanup", snapshotId, storeFile);
}
}
} catch (IOException e) {
logger.warn("[{}] failed to list directory - some of files might not be deleted", snapshotId);
}
} finally {
store.decRef();
}
}
/**
* Restores a file
* This is asynchronous method. Upon completion of the operation latch is getting counted down and any failures are
* added to the {@code failures} list
*
* @param fileInfo file to be restored
*/
private void restoreFile(final FileInfo fileInfo) throws IOException {
boolean success = false;
try (InputStream partSliceStream = new PartSliceStream(blobContainer, fileInfo)) {
final InputStream stream;
if (restoreRateLimiter == null) {
stream = partSliceStream;
} else {
stream = new RateLimitingInputStream(partSliceStream, restoreRateLimiter, restoreThrottleListener);
}
try (final IndexOutput indexOutput = store.createVerifyingOutput(fileInfo.physicalName(), fileInfo.metadata(), IOContext.DEFAULT)) {
final byte[] buffer = new byte[BUFFER_SIZE];
int length;
while ((length = stream.read(buffer)) > 0) {
indexOutput.writeBytes(buffer, 0, length);
recoveryState.getIndex().addRecoveredBytesToFile(fileInfo.name(), length);
}
Store.verify(indexOutput);
indexOutput.close();
// write the checksum
if (fileInfo.metadata().hasLegacyChecksum()) {
Store.LegacyChecksums legacyChecksums = new Store.LegacyChecksums();
legacyChecksums.add(fileInfo.metadata());
legacyChecksums.write(store);
}
store.directory().sync(Collections.singleton(fileInfo.physicalName()));
success = true;
} catch (CorruptIndexException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
try {
store.markStoreCorrupted(ex);
} catch (IOException e) {
logger.warn("store cannot be marked as corrupted", e);
}
throw ex;
} finally {
if (success == false) {
store.deleteQuiet(fileInfo.physicalName());
}
}
}
}
}
public interface RateLimiterListener {
void onRestorePause(long nanos);
void onSnapshotPause(long nanos);
}
}