org.elasticsearch.snapshots.SourceOnlySnapshotRepository Maven / Gradle / Ivy

/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
package org.elasticsearch.snapshots;

import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FilterDirectory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.MappingMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.metadata.RepositoryMetadata;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.env.ShardLock;
import org.elasticsearch.index.engine.EngineConfig;
import org.elasticsearch.index.engine.EngineFactory;
import org.elasticsearch.index.engine.ReadOnlyEngine;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
import org.elasticsearch.index.store.Store;
import org.elasticsearch.index.translog.TranslogStats;
import org.elasticsearch.repositories.FilterRepository;
import org.elasticsearch.repositories.IndexId;
import org.elasticsearch.repositories.Repository;
import org.elasticsearch.repositories.RepositoryData;
import org.elasticsearch.repositories.ShardGenerations;

import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.NoSuchFileException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Supplier;

/**
 * 
 * This is a filter snapshot repository that only snapshots the minimal required information
 * that is needed to recreate the index. In other words instead of snapshotting the entire shard
 * with all it's lucene indexed fields, doc values, points etc. it only snapshots the stored
 * fields including _source and _routing as well as the live docs in oder to distinguish between
 * live and deleted docs.
 * 
 * 
 * The repository can wrap any other repository delegating the source only snapshot to it to and read
 * from it. For instance a file repository of type fs by passing settings.delegate_type=fs
 * at repository creation time.
 * 
 * Snapshots restored from source only snapshots are minimal indices that are read-only and only allow
 * match_all scroll searches in order to reindex the data.
 */
public final class SourceOnlySnapshotRepository extends FilterRepository {
    private static final Setting DELEGATE_TYPE = new Setting<>("delegate_type", "", Function.identity(), Setting.Property
        .NodeScope);
    public static final Setting SOURCE_ONLY = Setting.boolSetting("index.source_only", false, Setting
        .Property.IndexScope, Setting.Property.Final, Setting.Property.PrivateIndex);

    private static final Logger logger = LogManager.getLogger(SourceOnlySnapshotRepository.class);

    private static final String SNAPSHOT_DIR_NAME = "_snapshot";

    SourceOnlySnapshotRepository(Repository in) {
        super(in);
    }

    @Override
    public void initializeSnapshot(SnapshotId snapshotId, List indices, Metadata metadata) {
        // we process the index metadata at snapshot time. This means if somebody tries to restore
        // a _source only snapshot with a plain repository it will be just fine since we already set the
        // required engine, that the index is read-only and the mapping to a default mapping
        try {
            super.initializeSnapshot(snapshotId, indices, metadataToSnapshot(indices, metadata));
        } catch (IOException ex) {
            throw new UncheckedIOException(ex);
        }
    }

    @Override
    public void finalizeSnapshot(ShardGenerations shardGenerations, long repositoryStateId, Metadata metadata,
                                 SnapshotInfo snapshotInfo, Version repositoryMetaVersion,
                                 Function stateTransformer,
                                 ActionListener listener) {
        // we process the index metadata at snapshot time. This means if somebody tries to restore
        // a _source only snapshot with a plain repository it will be just fine since we already set the
        // required engine, that the index is read-only and the mapping to a default mapping
        try {
            super.finalizeSnapshot(shardGenerations, repositoryStateId, metadataToSnapshot(shardGenerations.indices(), metadata),
                    snapshotInfo, repositoryMetaVersion, stateTransformer, listener);
        } catch (IOException ex) {
            listener.onFailure(ex);
        }
    }

    private static Metadata metadataToSnapshot(Collection indices, Metadata metadata) throws IOException {
        Metadata.Builder builder = Metadata.builder(metadata);
        for (IndexId indexId : indices) {
            IndexMetadata index = metadata.index(indexId.getName());
            IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(index);
            // for a minimal restore we basically disable indexing on all fields and only create an index
            // that is valid from an operational perspective. ie. it will have all metadata fields like version/
            // seqID etc. and an indexed ID field such that we can potentially perform updates on them or delete documents.
            ImmutableOpenMap mappings = index.getMappings();
            Iterator> iterator = mappings.iterator();
            while (iterator.hasNext()) {
                ObjectObjectCursor next = iterator.next();
                // we don't need to obey any routing here stuff is read-only anyway and get is disabled
                final String mapping = "{ \"" + next.key + "\": { \"enabled\": false, \"_meta\": " + next.value.source().string()
                    + " } }";
                indexMetadataBuilder.putMapping(next.key, mapping);
            }
            indexMetadataBuilder.settings(Settings.builder().put(index.getSettings())
                .put(SOURCE_ONLY.getKey(), true)
                .put("index.blocks.write", true)); // read-only!
            indexMetadataBuilder.settingsVersion(1 + indexMetadataBuilder.settingsVersion());
            builder.put(indexMetadataBuilder);
        }
        return builder.build();
    }


    @Override
    public void snapshotShard(Store store, MapperService mapperService, SnapshotId snapshotId, IndexId indexId,
                              IndexCommit snapshotIndexCommit, String shardStateIdentifier, IndexShardSnapshotStatus snapshotStatus,
                              Version repositoryMetaVersion, Map userMetadata, ActionListener listener) {
        if (mapperService.documentMapper() != null // if there is no mapping this is null
            && mapperService.documentMapper().sourceMapper().isComplete() == false) {
            listener.onFailure(
                new IllegalStateException("Can't snapshot _source only on an index that has incomplete source ie. has _source disabled " +
                    "or filters the source"));
            return;
        }
        Directory unwrap = FilterDirectory.unwrap(store.directory());
        if (unwrap instanceof FSDirectory == false) {
            throw new AssertionError("expected FSDirectory but got " + unwrap.toString());
        }
        Path dataPath = ((FSDirectory) unwrap).getDirectory().getParent();
        // TODO should we have a snapshot tmp directory per shard that is maintained by the system?
        Path snapPath = dataPath.resolve(SNAPSHOT_DIR_NAME);
        final List toClose = new ArrayList<>(3);
        try {
            SourceOnlySnapshot.LinkedFilesDirectory overlayDir = new SourceOnlySnapshot.LinkedFilesDirectory(
                new SimpleFSDirectory(snapPath));
            toClose.add(overlayDir);
            Store tempStore = new Store(store.shardId(), store.indexSettings(), overlayDir, new ShardLock(store.shardId()) {
                @Override
                protected void closeInternal() {
                    // do nothing;
                }
            }, Store.OnClose.EMPTY);
            Supplier querySupplier = mapperService.hasNested() ? Queries::newNestedFilter : null;
            // SourceOnlySnapshot will take care of soft- and hard-deletes no special casing needed here
            SourceOnlySnapshot snapshot;
            snapshot = new SourceOnlySnapshot(overlayDir, querySupplier);
            try {
                snapshot.syncSnapshot(snapshotIndexCommit);
            } catch (NoSuchFileException | CorruptIndexException | FileAlreadyExistsException e) {
                logger.warn(() -> new ParameterizedMessage(
                        "Existing staging directory [{}] appears corrupted and will be pruned and recreated.", snapPath), e);
                Lucene.cleanLuceneIndex(overlayDir);
                snapshot.syncSnapshot(snapshotIndexCommit);
            }
            // we will use the lucene doc ID as the seq ID so we set the local checkpoint to maxDoc with a new index UUID
            SegmentInfos segmentInfos = tempStore.readLastCommittedSegmentsInfo();
            final long maxDoc = segmentInfos.totalMaxDoc();
            tempStore.bootstrapNewHistory(maxDoc, maxDoc);
            store.incRef();
            toClose.add(store::decRef);
            DirectoryReader reader = DirectoryReader.open(tempStore.directory());
            toClose.add(reader);
            IndexCommit indexCommit = reader.getIndexCommit();
            super.snapshotShard(tempStore, mapperService, snapshotId, indexId, indexCommit, shardStateIdentifier, snapshotStatus,
                repositoryMetaVersion, userMetadata, ActionListener.runBefore(listener, () -> IOUtils.close(toClose)));
        } catch (IOException e) {
            try {
                IOUtils.close(toClose);
            } catch (IOException ex) {
                e.addSuppressed(ex);
            }
            listener.onFailure(e);
        }
    }

    /**
     * Returns an {@link EngineFactory} for the source only snapshots.
     */
    public static EngineFactory getEngineFactory() {
        return config -> new ReadOnlyEngine(config, null, new TranslogStats(0, 0, 0, 0, 0), true,
            readerWrapper(config), true, false);
    }

    public static Function readerWrapper(EngineConfig engineConfig) {
        return reader -> {
            try {
                return SeqIdGeneratingFilterReader.wrap(reader, engineConfig.getPrimaryTermSupplier().getAsLong());
            } catch (IOException e) {
                throw new UncheckedIOException(e);
            }
        };
    }

    /**
     * Returns a new source only repository factory
     */
    public static Repository.Factory newRepositoryFactory() {
        return new Repository.Factory() {

            @Override
            public Repository create(RepositoryMetadata metadata) {
                throw new UnsupportedOperationException();
            }

            @Override
            public Repository create(RepositoryMetadata metadata, Function typeLookup) throws Exception {
                String delegateType = DELEGATE_TYPE.get(metadata.settings());
                if (Strings.hasLength(delegateType) == false) {
                    throw new IllegalArgumentException(DELEGATE_TYPE.getKey() + " must be set");
                }
                Repository.Factory factory = typeLookup.apply(delegateType);
                return new SourceOnlySnapshotRepository(factory.create(new RepositoryMetadata(metadata.name(),
                    delegateType, metadata.settings()), typeLookup));
            }
        };
    }
}