org.opensearch.index.store.RemoteDirectory Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
OpenSearch subproject :server
There is a newer version: 2.18.0
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

package org.opensearch.index.store;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.opensearch.ExceptionsHelper;
import org.opensearch.action.LatchedActionListener;
import org.opensearch.common.blobstore.AsyncMultiStreamBlobContainer;
import org.opensearch.common.blobstore.BlobContainer;
import org.opensearch.common.blobstore.BlobMetadata;
import org.opensearch.common.blobstore.exception.CorruptFileException;
import org.opensearch.common.blobstore.stream.write.WriteContext;
import org.opensearch.common.blobstore.stream.write.WritePriority;
import org.opensearch.common.blobstore.transfer.RemoteTransferContainer;
import org.opensearch.common.blobstore.transfer.stream.OffsetRangeIndexInputStream;
import org.opensearch.common.blobstore.transfer.stream.OffsetRangeInputStream;
import org.opensearch.common.lucene.store.ByteArrayIndexInput;
import org.opensearch.core.action.ActionListener;
import org.opensearch.core.common.unit.ByteSizeUnit;
import org.opensearch.index.store.exception.ChecksumCombinationException;
import org.opensearch.index.store.remote.utils.BlockIOContext;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.NoSuchFileException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;

import static org.opensearch.common.blobstore.transfer.RemoteTransferContainer.checksumOfChecksum;

/**
 * A {@code RemoteDirectory} provides an abstraction layer for storing a list of files to a remote store.
 * A remoteDirectory contains only files (no sub-folder hierarchy). This class does not support all the methods in
 * the Directory interface. Currently, it contains implementation of methods which are used to copy files to/from
 * the remote store. Implementation of remaining methods will be added as remote store is integrated with
 * replication, peer recovery etc.
 *
 * @opensearch.internal
 */
public class RemoteDirectory extends Directory {

    protected final BlobContainer blobContainer;
    private static final Logger logger = LogManager.getLogger(RemoteDirectory.class);

    private final UnaryOperator uploadRateLimiter;

    private final UnaryOperator lowPriorityUploadRateLimiter;

    private final UnaryOperator downloadRateLimiter;

    /**
     * Number of bytes in the segment file to store checksum
     */
    private static final int SEGMENT_CHECKSUM_BYTES = 8;

    public BlobContainer getBlobContainer() {
        return blobContainer;
    }

    public RemoteDirectory(BlobContainer blobContainer) {
        this(blobContainer, UnaryOperator.identity(), UnaryOperator.identity(), UnaryOperator.identity());
    }

    public RemoteDirectory(
        BlobContainer blobContainer,
        UnaryOperator uploadRateLimiter,
        UnaryOperator lowPriorityUploadRateLimiter,
        UnaryOperator downloadRateLimiter
    ) {
        this.blobContainer = blobContainer;
        this.lowPriorityUploadRateLimiter = lowPriorityUploadRateLimiter;
        this.uploadRateLimiter = uploadRateLimiter;
        this.downloadRateLimiter = downloadRateLimiter;
    }

    /**
     * Returns names of all files stored in this directory. The output must be in sorted (UTF-16,
     * java's {@link String#compareTo}) order.
     */
    @Override
    public String[] listAll() throws IOException {
        return blobContainer.listBlobs().keySet().stream().sorted().toArray(String[]::new);
    }

    /**
     * Returns names of files with given prefix in this directory.
     * @param filenamePrefix The prefix to match against file names in the directory
     * @return A list of the matching filenames in the directory
     * @throws IOException if there were any failures in reading from the blob container
     */
    public Collection listFilesByPrefix(String filenamePrefix) throws IOException {
        return blobContainer.listBlobsByPrefix(filenamePrefix).keySet();
    }

    public List listFilesByPrefixInLexicographicOrder(String filenamePrefix, int limit) throws IOException {
        List sortedBlobList = new ArrayList<>();
        AtomicReference exception = new AtomicReference<>();
        final CountDownLatch latch = new CountDownLatch(1);
        LatchedActionListener> actionListener = new LatchedActionListener<>(new ActionListener<>() {
            @Override
            public void onResponse(List blobMetadata) {
                sortedBlobList.addAll(blobMetadata.stream().map(BlobMetadata::name).collect(Collectors.toList()));
            }

            @Override
            public void onFailure(Exception e) {
                exception.set(e);
            }
        }, latch);

        try {
            blobContainer.listBlobsByPrefixInSortedOrder(
                filenamePrefix,
                limit,
                BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC,
                actionListener
            );
            latch.await();
        } catch (InterruptedException e) {
            throw new IOException("Exception in listFilesByPrefixInLexicographicOrder with prefix: " + filenamePrefix, e);
        }
        if (exception.get() != null) {
            throw new IOException(exception.get());
        } else {
            return sortedBlobList;
        }
    }

    /**
     * Returns stream emitted from by blob object. Should be used with a closeable block.
     *
     * @param fileName Name of file
     * @return Stream from the blob object
     * @throws IOException if fetch of stream fails with IO error
     */
    public InputStream getBlobStream(String fileName) throws IOException {
        return blobContainer.readBlob(fileName);
    }

    /**
     * Removes an existing file in the directory.
     *
     * This method will not throw an exception when the file doesn't exist and simply ignores this case.
     * This is a deviation from the {@code Directory} interface where it is expected to throw either
     * {@link NoSuchFileException} or {@link FileNotFoundException} if {@code name} points to a non-existing file.
     *
     * @param name the name of an existing file.
     * @throws IOException if the file exists but could not be deleted.
     */
    @Override
    public void deleteFile(String name) throws IOException {
        // ToDo: Add a check for file existence
        blobContainer.deleteBlobsIgnoringIfNotExists(Collections.singletonList(name));
    }

    /**
     * Creates and returns a new instance of {@link RemoteIndexOutput} which will be used to copy files to the remote
     * store.
     *
     *  In the {@link Directory} interface, it is expected to throw {@link java.nio.file.FileAlreadyExistsException}
     * if the file already exists in the remote store. As this method does not open a file, it does not throw the
     * exception.
     *
     * @param name the name of the file to copy to remote store.
     */
    @Override
    public IndexOutput createOutput(String name, IOContext context) {
        return new RemoteIndexOutput(name, blobContainer);
    }

    /**
     * Opens a stream for reading an existing file and returns {@link RemoteIndexInput} enclosing the stream.
     *
     * @param name the name of an existing file.
     * @throws IOException in case of I/O error
     * @throws NoSuchFileException if the file does not exist
     */
    @Override
    public IndexInput openInput(String name, IOContext context) throws IOException {
        return openInput(name, fileLength(name), context);
    }

    public IndexInput openInput(String name, long fileLength, IOContext context) throws IOException {
        InputStream inputStream = null;
        try {
            if (context instanceof BlockIOContext) {
                return getBlockInput(name, fileLength, (BlockIOContext) context);
            } else {
                inputStream = blobContainer.readBlob(name);
                return new RemoteIndexInput(name, downloadRateLimiter.apply(inputStream), fileLength);
            }
        } catch (Exception e) {
            // In case the RemoteIndexInput creation fails, close the input stream to avoid file handler leak.
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (Exception closeEx) {
                    e.addSuppressed(closeEx);
                }
            }
            logger.error("Exception while reading blob for file: " + name + " for path " + blobContainer.path());
            throw e;
        }
    }

    /**
     * Closes the remote directory. Currently, it is a no-op.
     * If remote directory maintains a state in future, we need to clean it before closing the directory
     */
    @Override
    public void close() throws IOException {
        // Do nothing
    }

    /**
     * Returns the byte length of a file in the directory.
     *
     * @param name the name of an existing file.
     * @throws IOException in case of I/O error
     * @throws NoSuchFileException if the file does not exist
     */
    @Override
    public long fileLength(String name) throws IOException {
        // ToDo: Instead of calling remote store each time, keep a cache with segment metadata
        List metadata = blobContainer.listBlobsByPrefixInSortedOrder(name, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC);
        if (metadata.size() == 1 && metadata.get(0).name().equals(name)) {
            return metadata.get(0).length();
        }
        throw new NoSuchFileException(name);
    }

    /**
     * Guaranteed to throw an exception and leave the directory unmodified.
     * Once soft deleting is supported segment files in the remote store, this method will provide details of
     * number of files marked as deleted but not actually deleted from the remote store.
     *
     * @throws UnsupportedOperationException always
     */
    @Override
    public Set getPendingDeletions() throws IOException {
        throw new UnsupportedOperationException();
    }

    /**
     * Guaranteed to throw an exception and leave the directory unmodified.
     * Temporary IndexOutput is not required while working with Remote store.
     *
     * @throws UnsupportedOperationException always
     */
    @Override
    public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) {
        throw new UnsupportedOperationException();
    }

    /**
     * Guaranteed to throw an exception and leave the directory unmodified.
     * Segment upload to the remote store will be permanent and does not require a separate sync API.
     * This may change in the future if segment upload to remote store happens via cache and we need sync API to write
     * the cache contents to the store permanently.
     *
     * @throws UnsupportedOperationException always
     */
    @Override
    public void sync(Collection names) throws IOException {
        throw new UnsupportedOperationException();
    }

    /**
     * Guaranteed to throw an exception and leave the directory unmodified.
     * Once metadata to be stored with each shard is finalized, syncMetaData method will be used to sync the directory
     * metadata to the remote store.
     *
     * @throws UnsupportedOperationException always
     */
    @Override
    public void syncMetaData() {
        throw new UnsupportedOperationException();
    }

    /**
     * Guaranteed to throw an exception and leave the directory unmodified.
     * As this method is used by IndexWriter to publish commits, the implementation of this method is required when
     * IndexWriter is backed by RemoteDirectory.
     *
     * @throws UnsupportedOperationException always
     */
    @Override
    public void rename(String source, String dest) throws IOException {
        throw new UnsupportedOperationException();

    }

    /**
     * Guaranteed to throw an exception and leave the directory unmodified.
     * Once locking segment files in remote store is supported, implementation of this method is required with
     * remote store specific LockFactory.
     *
     * @throws UnsupportedOperationException always
     */
    @Override
    public Lock obtainLock(String name) throws IOException {
        throw new UnsupportedOperationException();
    }

    public void delete() throws IOException {
        blobContainer.delete();
    }

    public boolean copyFrom(
        Directory from,
        String src,
        String remoteFileName,
        IOContext context,
        Runnable postUploadRunner,
        ActionListener listener,
        boolean lowPriorityUpload
    ) {
        if (blobContainer instanceof AsyncMultiStreamBlobContainer) {
            try {
                uploadBlob(from, src, remoteFileName, context, postUploadRunner, listener, lowPriorityUpload);
            } catch (Exception e) {
                listener.onFailure(e);
            }
            return true;
        }
        return false;
    }

    private void uploadBlob(
        Directory from,
        String src,
        String remoteFileName,
        IOContext ioContext,
        Runnable postUploadRunner,
        ActionListener listener,
        boolean lowPriorityUpload
    ) throws Exception {
        long expectedChecksum = calculateChecksumOfChecksum(from, src);
        long contentLength;
        try (IndexInput indexInput = from.openInput(src, ioContext)) {
            contentLength = indexInput.length();
        }
        boolean remoteIntegrityEnabled = false;
        if (getBlobContainer() instanceof AsyncMultiStreamBlobContainer) {
            remoteIntegrityEnabled = ((AsyncMultiStreamBlobContainer) getBlobContainer()).remoteIntegrityCheckSupported();
        }
        lowPriorityUpload = lowPriorityUpload || contentLength > ByteSizeUnit.GB.toBytes(15);
        RemoteTransferContainer.OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier;
        if (lowPriorityUpload) {
            offsetRangeInputStreamSupplier = (size, position) -> lowPriorityUploadRateLimiter.apply(
                new OffsetRangeIndexInputStream(from.openInput(src, ioContext), size, position)
            );
        } else {
            offsetRangeInputStreamSupplier = (size, position) -> uploadRateLimiter.apply(
                new OffsetRangeIndexInputStream(from.openInput(src, ioContext), size, position)
            );
        }
        RemoteTransferContainer remoteTransferContainer = new RemoteTransferContainer(
            src,
            remoteFileName,
            contentLength,
            true,
            lowPriorityUpload ? WritePriority.LOW : WritePriority.NORMAL,
            offsetRangeInputStreamSupplier,
            expectedChecksum,
            remoteIntegrityEnabled
        );
        ActionListener completionListener = ActionListener.wrap(resp -> {
            try {
                postUploadRunner.run();
                listener.onResponse(null);
            } catch (Exception e) {
                logger.error(() -> new ParameterizedMessage("Exception in segment postUpload for file [{}]", src), e);
                listener.onFailure(e);
            }
        }, ex -> {
            logger.error(() -> new ParameterizedMessage("Failed to upload blob {}", src), ex);
            IOException corruptIndexException = ExceptionsHelper.unwrapCorruption(ex);
            if (corruptIndexException != null) {
                listener.onFailure(corruptIndexException);
                return;
            }
            Throwable throwable = ExceptionsHelper.unwrap(ex, CorruptFileException.class);
            if (throwable != null) {
                CorruptFileException corruptFileException = (CorruptFileException) throwable;
                listener.onFailure(new CorruptIndexException(corruptFileException.getMessage(), corruptFileException.getFileName()));
                return;
            }
            listener.onFailure(ex);
        });

        completionListener = ActionListener.runBefore(completionListener, () -> {
            try {
                remoteTransferContainer.close();
            } catch (Exception e) {
                logger.warn("Error occurred while closing streams", e);
            }
        });

        WriteContext writeContext = remoteTransferContainer.createWriteContext();
        ((AsyncMultiStreamBlobContainer) blobContainer).asyncBlobUpload(writeContext, completionListener);
    }

    private long calculateChecksumOfChecksum(Directory directory, String file) throws IOException {
        try (IndexInput indexInput = directory.openInput(file, IOContext.DEFAULT)) {
            try {
                return checksumOfChecksum(indexInput, SEGMENT_CHECKSUM_BYTES);
            } catch (Exception e) {
                throw new ChecksumCombinationException(
                    "Potentially corrupted file: Checksum combination failed while combining stored checksum "
                        + "and calculated checksum of stored checksum in segment file: "
                        + file
                        + ", directory: "
                        + directory,
                    file,
                    e
                );
            }
        }
    }

    private IndexInput getBlockInput(String name, long fileLength, BlockIOContext blockIOContext) throws IOException {
        long position = blockIOContext.getBlockStart();
        long length = blockIOContext.getBlockSize();
        if (position < 0 || length < 0 || (position + length > fileLength)) {
            throw new IllegalArgumentException("Invalid values of block start and size");
        }
        byte[] bytes;
        try (InputStream inputStream = blobContainer.readBlob(name, position, length)) {
            // TODO - Explore how we can buffer small chunks of data instead of having the whole 8MB block in memory
            bytes = downloadRateLimiter.apply(inputStream).readAllBytes();
        }
        return new ByteArrayIndexInput(name, bytes);
    }
}