All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opensearch.common.blobstore.transfer.RemoteTransferContainer Maven / Gradle / Ivy

There is a newer version: 2.18.0
Show newest version
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

package org.opensearch.common.blobstore.transfer;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.IndexInput;
import org.opensearch.common.CheckedTriFunction;
import org.opensearch.common.SetOnce;
import org.opensearch.common.StreamContext;
import org.opensearch.common.blobstore.stream.write.WriteContext;
import org.opensearch.common.blobstore.stream.write.WritePriority;
import org.opensearch.common.blobstore.transfer.stream.OffsetRangeInputStream;
import org.opensearch.common.blobstore.transfer.stream.RateLimitingOffsetRangeInputStream;
import org.opensearch.common.blobstore.transfer.stream.ResettableCheckedInputStream;
import org.opensearch.common.io.InputStreamContainer;
import org.opensearch.common.util.ByteUtils;

import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Supplier;
import java.util.zip.CRC32;

import com.jcraft.jzlib.JZlib;

/**
 * RemoteTransferContainer is an encapsulation for managing file transfers.
 *
 * @opensearch.internal
 */
public class RemoteTransferContainer implements Closeable {

    private int numberOfParts;
    private long partSize;
    private long lastPartSize;

    private final long contentLength;
    private final SetOnce[]> checksumSuppliers = new SetOnce<>();
    private final String fileName;
    private final String remoteFileName;
    private final boolean failTransferIfFileExists;
    private final WritePriority writePriority;
    private final Long expectedChecksum;
    private final OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier;
    private final boolean isRemoteDataIntegritySupported;
    private final AtomicBoolean readBlock = new AtomicBoolean();
    private final Map metadata;

    private static final Logger log = LogManager.getLogger(RemoteTransferContainer.class);

    /**
     * Construct a new RemoteTransferContainer object
     *
     * @param fileName                       Name of the local file
     * @param remoteFileName                 Name of the remote file
     * @param contentLength                  Total content length of the file to be uploaded
     * @param failTransferIfFileExists       A boolean to determine if upload has to be failed if file exists
     * @param writePriority                  The {@link WritePriority} of current upload
     * @param offsetRangeInputStreamSupplier A supplier to create OffsetRangeInputStreams
     * @param expectedChecksum               The expected checksum value for the file being uploaded. This checksum will be used for local or remote data integrity checks
     * @param isRemoteDataIntegritySupported A boolean to signify whether the remote repository supports server side data integrity verification
     */
    public RemoteTransferContainer(
        String fileName,
        String remoteFileName,
        long contentLength,
        boolean failTransferIfFileExists,
        WritePriority writePriority,
        OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier,
        Long expectedChecksum,
        boolean isRemoteDataIntegritySupported
    ) {
        this(
            fileName,
            remoteFileName,
            contentLength,
            failTransferIfFileExists,
            writePriority,
            offsetRangeInputStreamSupplier,
            expectedChecksum,
            isRemoteDataIntegritySupported,
            null
        );
    }

    /**
     * Construct a new RemoteTransferContainer object with metadata.
     *
     * @param fileName                       Name of the local file
     * @param remoteFileName                 Name of the remote file
     * @param contentLength                  Total content length of the file to be uploaded
     * @param failTransferIfFileExists       A boolean to determine if upload has to be failed if file exists
     * @param writePriority                  The {@link WritePriority} of current upload
     * @param offsetRangeInputStreamSupplier A supplier to create OffsetRangeInputStreams
     * @param expectedChecksum               The expected checksum value for the file being uploaded. This checksum will be used for local or remote data integrity checks
     * @param isRemoteDataIntegritySupported A boolean to signify whether the remote repository supports server side data integrity verification
     * @param metadata                       Object metadata to be store with the file.
     */
    public RemoteTransferContainer(
        String fileName,
        String remoteFileName,
        long contentLength,
        boolean failTransferIfFileExists,
        WritePriority writePriority,
        OffsetRangeInputStreamSupplier offsetRangeInputStreamSupplier,
        Long expectedChecksum,
        boolean isRemoteDataIntegritySupported,
        Map metadata
    ) {
        this.fileName = fileName;
        this.remoteFileName = remoteFileName;
        this.contentLength = contentLength;
        this.failTransferIfFileExists = failTransferIfFileExists;
        this.writePriority = writePriority;
        this.offsetRangeInputStreamSupplier = offsetRangeInputStreamSupplier;
        this.expectedChecksum = expectedChecksum;
        this.isRemoteDataIntegritySupported = isRemoteDataIntegritySupported;
        this.metadata = metadata;
    }

    /**
     * @return The {@link  WriteContext} for the current upload
     */
    public WriteContext createWriteContext() {
        return new WriteContext.Builder().fileName(remoteFileName)
            .streamContextSupplier(this::supplyStreamContext)
            .fileSize(contentLength)
            .failIfAlreadyExists(failTransferIfFileExists)
            .writePriority(writePriority)
            .uploadFinalizer(this::finalizeUpload)
            .doRemoteDataIntegrityCheck(isRemoteDataIntegrityCheckPossible())
            .expectedChecksum(isRemoteDataIntegrityCheckPossible() ? expectedChecksum : null)
            .metadata(metadata)
            .build();
    }

    // package-private for testing

    /**
     * This method is called to create the {@link StreamContext} object that will be used by the vendor plugin to
     * open streams during uploads. Calling this method won't actually create the streams, for that the consumer needs
     * to call {@link StreamContext#provideStream}
     *
     * @param partSize Part sizes of all parts apart from the last one, which is determined internally
     * @return The {@link StreamContext} object that will be used by the vendor plugin to retrieve streams during upload
     */
    StreamContext supplyStreamContext(long partSize) {
        try {
            return this.openMultipartStreams(partSize);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    @SuppressWarnings({ "unchecked" })
    private StreamContext openMultipartStreams(long partSize) throws IOException {
        if (checksumSuppliers.get() != null) {
            throw new IOException("Multi-part streams are already created.");
        }

        this.partSize = partSize;
        this.lastPartSize = (contentLength % partSize) != 0 ? contentLength % partSize : partSize;
        this.numberOfParts = (int) ((contentLength % partSize) == 0 ? contentLength / partSize : (contentLength / partSize) + 1);
        Supplier[] suppliers = new Supplier[numberOfParts];
        checksumSuppliers.set(suppliers);

        return new StreamContext(getTransferPartStreamSupplier(), partSize, lastPartSize, numberOfParts);
    }

    private CheckedTriFunction getTransferPartStreamSupplier() {
        return ((partNo, size, position) -> {
            assert checksumSuppliers.get() != null : "expected container to be initialised";
            return getMultipartStreamSupplier(partNo, size, position).get();
        });
    }

    /**
     * OffsetRangeInputStreamSupplier is used to get the offset based input streams at runtime
     */
    public interface OffsetRangeInputStreamSupplier {
        OffsetRangeInputStream get(long size, long position) throws IOException;
    }

    interface LocalStreamSupplier {
        Stream get() throws IOException;
    }

    private LocalStreamSupplier getMultipartStreamSupplier(
        final int streamIdx,
        final long size,
        final long position
    ) {
        return () -> {
            try {
                OffsetRangeInputStream offsetRangeInputStream = offsetRangeInputStreamSupplier.get(size, position);
                if (offsetRangeInputStream instanceof RateLimitingOffsetRangeInputStream) {
                    RateLimitingOffsetRangeInputStream rangeIndexInputStream = (RateLimitingOffsetRangeInputStream) offsetRangeInputStream;
                    rangeIndexInputStream.setReadBlock(readBlock);
                }
                InputStream inputStream;
                if (isRemoteDataIntegrityCheckPossible() == false) {
                    ResettableCheckedInputStream resettableCheckedInputStream = new ResettableCheckedInputStream(
                        offsetRangeInputStream,
                        fileName
                    );
                    Objects.requireNonNull(checksumSuppliers.get())[streamIdx] = resettableCheckedInputStream::getChecksum;
                    inputStream = resettableCheckedInputStream;
                } else {
                    inputStream = offsetRangeInputStream;
                }

                return new InputStreamContainer(inputStream, size, position);
            } catch (IOException e) {
                log.error("Failed to create input stream", e);
                throw e;
            }
        };
    }

    private boolean isRemoteDataIntegrityCheckPossible() {
        return isRemoteDataIntegritySupported && Objects.nonNull(expectedChecksum);
    }

    private void finalizeUpload(boolean uploadSuccessful) throws IOException {
        if (isRemoteDataIntegrityCheckPossible()) {
            return;
        }

        if (uploadSuccessful && Objects.nonNull(expectedChecksum)) {
            long actualChecksum = getActualChecksum();
            if (actualChecksum != expectedChecksum) {
                throw new CorruptIndexException(
                    "Data integrity check done after upload for file "
                        + fileName
                        + " failed, actual checksum: "
                        + actualChecksum
                        + ", expected checksum: "
                        + expectedChecksum,
                    fileName
                );
            }
        }
    }

    /**
     * @return The total content length of current upload
     */
    public long getContentLength() {
        return contentLength;
    }

    private long getActualChecksum() {
        Supplier[] ckSumSuppliers = Objects.requireNonNull(checksumSuppliers.get());
        long checksum = ckSumSuppliers[0].get();
        for (int checkSumIdx = 1; checkSumIdx < ckSumSuppliers.length - 1; checkSumIdx++) {
            checksum = JZlib.crc32_combine(checksum, ckSumSuppliers[checkSumIdx].get(), partSize);
        }
        if (numberOfParts > 1) {
            checksum = JZlib.crc32_combine(checksum, ckSumSuppliers[numberOfParts - 1].get(), lastPartSize);
        }

        return checksum;
    }

    @Override
    public void close() throws IOException {
        // Setting a read block on all streams ever created by the container.
        readBlock.set(true);
    }

    /**
     * Compute final checksum for IndexInput container checksum footer added by {@link CodecUtil}
     * @param indexInput IndexInput with checksum in footer
     * @param checksumBytesLength length of checksum bytes
     * @return final computed checksum of entire indexInput
     */
    public static long checksumOfChecksum(IndexInput indexInput, int checksumBytesLength) throws IOException {
        long storedChecksum = CodecUtil.retrieveChecksum(indexInput);
        CRC32 checksumOfChecksum = new CRC32();
        checksumOfChecksum.update(ByteUtils.toByteArrayBE(storedChecksum));
        return JZlib.crc32_combine(storedChecksum, checksumOfChecksum.getValue(), checksumBytesLength);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy