All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.snapshots.SourceOnlySnapshotRepository Maven / Gradle / Ivy

/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
package org.elasticsearch.snapshots;

import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.MappingMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.env.ShardLock;
import org.elasticsearch.index.engine.EngineConfig;
import org.elasticsearch.index.engine.EngineFactory;
import org.elasticsearch.index.engine.ReadOnlyEngine;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
import org.elasticsearch.index.store.Store;
import org.elasticsearch.index.translog.TranslogStats;
import org.elasticsearch.repositories.FilterRepository;
import org.elasticsearch.repositories.IndexId;
import org.elasticsearch.repositories.Repository;
import org.elasticsearch.repositories.RepositoryData;
import org.elasticsearch.repositories.ShardGenerations;

import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Supplier;

/**
 * 

* This is a filter snapshot repository that only snapshots the minimal required information * that is needed to recreate the index. In other words instead of snapshotting the entire shard * with all it's lucene indexed fields, doc values, points etc. it only snapshots the stored * fields including _source and _routing as well as the live docs in oder to distinguish between * live and deleted docs. *

*

* The repository can wrap any other repository delegating the source only snapshot to it to and read * from it. For instance a file repository of type fs by passing settings.delegate_type=fs * at repository creation time. *

* Snapshots restored from source only snapshots are minimal indices that are read-only and only allow * match_all scroll searches in order to reindex the data. */ public final class SourceOnlySnapshotRepository extends FilterRepository { private static final Setting DELEGATE_TYPE = new Setting<>("delegate_type", "", Function.identity(), Setting.Property .NodeScope); public static final Setting SOURCE_ONLY = Setting.boolSetting("index.source_only", false, Setting .Property.IndexScope, Setting.Property.Final, Setting.Property.PrivateIndex); private static final Logger logger = LogManager.getLogger(SourceOnlySnapshotRepository.class); private static final String SNAPSHOT_DIR_NAME = "_snapshot"; SourceOnlySnapshotRepository(Repository in) { super(in); } @Override public void initializeSnapshot(SnapshotId snapshotId, List indices, Metadata metadata) { // we process the index metadata at snapshot time. This means if somebody tries to restore // a _source only snapshot with a plain repository it will be just fine since we already set the // required engine, that the index is read-only and the mapping to a default mapping try { super.initializeSnapshot(snapshotId, indices, metadataToSnapshot(indices, metadata)); } catch (IOException ex) { throw new UncheckedIOException(ex); } } @Override public void finalizeSnapshot(ShardGenerations shardGenerations, long repositoryStateId, Metadata metadata, SnapshotInfo snapshotInfo, Version repositoryMetaVersion, Function stateTransformer, ActionListener listener) { // we process the index metadata at snapshot time. This means if somebody tries to restore // a _source only snapshot with a plain repository it will be just fine since we already set the // required engine, that the index is read-only and the mapping to a default mapping try { super.finalizeSnapshot(shardGenerations, repositoryStateId, metadataToSnapshot(shardGenerations.indices(), metadata), snapshotInfo, repositoryMetaVersion, stateTransformer, listener); } catch (IOException ex) { listener.onFailure(ex); } } private static Metadata metadataToSnapshot(Collection indices, Metadata metadata) throws IOException { Metadata.Builder builder = Metadata.builder(metadata); for (IndexId indexId : indices) { IndexMetadata index = metadata.index(indexId.getName()); IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index); // for a minimal restore we basically disable indexing on all fields and only create an index // that is valid from an operational perspective. ie. it will have all metadata fields like version/ // seqID etc. and an indexed ID field such that we can potentially perform updates on them or delete documents. ImmutableOpenMap mappings = index.getMappings(); Iterator> iterator = mappings.iterator(); while (iterator.hasNext()) { ObjectObjectCursor next = iterator.next(); // we don't need to obey any routing here stuff is read-only anyway and get is disabled final String mapping = "{ \"" + next.key + "\": { \"enabled\": false, \"_meta\": " + next.value.source().string() + " } }"; indexMetadataBuilder.putMapping(next.key, mapping); } indexMetadataBuilder.settings(Settings.builder().put(index.getSettings()) .put(SOURCE_ONLY.getKey(), true) .put("index.blocks.write", true)); // read-only! indexMetadataBuilder.settingsVersion(1 + indexMetadataBuilder.settingsVersion()); builder.put(indexMetadataBuilder); } return builder.build(); } @Override public void snapshotShard(Store store, MapperService mapperService, SnapshotId snapshotId, IndexId indexId, IndexCommit snapshotIndexCommit, String shardStateIdentifier, IndexShardSnapshotStatus snapshotStatus, Version repositoryMetaVersion, Map userMetadata, ActionListener listener) { if (mapperService.documentMapper() != null // if there is no mapping this is null && mapperService.documentMapper().sourceMapper().isComplete() == false) { listener.onFailure( new IllegalStateException("Can't snapshot _source only on an index that has incomplete source ie. has _source disabled " + "or filters the source")); return; } Directory unwrap = FilterDirectory.unwrap(store.directory()); if (unwrap instanceof FSDirectory == false) { throw new AssertionError("expected FSDirectory but got " + unwrap.toString()); } Path dataPath = ((FSDirectory) unwrap).getDirectory().getParent(); // TODO should we have a snapshot tmp directory per shard that is maintained by the system? Path snapPath = dataPath.resolve(SNAPSHOT_DIR_NAME); final List toClose = new ArrayList<>(3); try { SourceOnlySnapshot.LinkedFilesDirectory overlayDir = new SourceOnlySnapshot.LinkedFilesDirectory( new SimpleFSDirectory(snapPath)); toClose.add(overlayDir); Store tempStore = new Store(store.shardId(), store.indexSettings(), overlayDir, new ShardLock(store.shardId()) { @Override protected void closeInternal() { // do nothing; } }, Store.OnClose.EMPTY); Supplier querySupplier = mapperService.hasNested() ? Queries::newNestedFilter : null; // SourceOnlySnapshot will take care of soft- and hard-deletes no special casing needed here SourceOnlySnapshot snapshot; snapshot = new SourceOnlySnapshot(overlayDir, querySupplier); try { snapshot.syncSnapshot(snapshotIndexCommit); } catch (NoSuchFileException | CorruptIndexException | FileAlreadyExistsException e) { logger.warn(() -> new ParameterizedMessage( "Existing staging directory [{}] appears corrupted and will be pruned and recreated.", snapPath), e); Lucene.cleanLuceneIndex(overlayDir); snapshot.syncSnapshot(snapshotIndexCommit); } // we will use the lucene doc ID as the seq ID so we set the local checkpoint to maxDoc with a new index UUID SegmentInfos segmentInfos = tempStore.readLastCommittedSegmentsInfo(); final long maxDoc = segmentInfos.totalMaxDoc(); tempStore.bootstrapNewHistory(maxDoc, maxDoc); store.incRef(); toClose.add(store::decRef); DirectoryReader reader = DirectoryReader.open(tempStore.directory()); toClose.add(reader); IndexCommit indexCommit = reader.getIndexCommit(); super.snapshotShard(tempStore, mapperService, snapshotId, indexId, indexCommit, shardStateIdentifier, snapshotStatus, repositoryMetaVersion, userMetadata, ActionListener.runBefore(listener, () -> IOUtils.close(toClose))); } catch (IOException e) { try { IOUtils.close(toClose); } catch (IOException ex) { e.addSuppressed(ex); } listener.onFailure(e); } } /** * Returns an {@link EngineFactory} for the source only snapshots. */ public static EngineFactory getEngineFactory() { return config -> new ReadOnlyEngine(config, null, new TranslogStats(0, 0, 0, 0, 0), true, readerWrapper(config), true, false); } public static Function readerWrapper(EngineConfig engineConfig) { return reader -> { try { return SeqIdGeneratingFilterReader.wrap(reader, engineConfig.getPrimaryTermSupplier().getAsLong()); } catch (IOException e) { throw new UncheckedIOException(e); } }; } /** * Returns a new source only repository factory */ public static Repository.Factory newRepositoryFactory() { return new Repository.Factory() { @Override public Repository create(RepositoryMetadata metadata) { throw new UnsupportedOperationException(); } @Override public Repository create(RepositoryMetadata metadata, Function typeLookup) throws Exception { String delegateType = DELEGATE_TYPE.get(metadata.settings()); if (Strings.hasLength(delegateType) == false) { throw new IllegalArgumentException(DELEGATE_TYPE.getKey() + " must be set"); } Repository.Factory factory = typeLookup.apply(delegateType); return new SourceOnlySnapshotRepository(factory.create(new RepositoryMetadata(metadata.name(), delegateType, metadata.settings()), typeLookup)); } }; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy