com.azure.storage.blob.specialized.BlobOutputStream Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of azure-storage-blob Show documentation
This module contains client library for Microsoft Azure Blob Storage.
There is a newer version: 12.29.0
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
package com.azure.storage.blob.specialized;

import com.azure.core.util.Context;
import com.azure.core.util.FluxUtil;
import com.azure.core.util.logging.ClientLogger;
import com.azure.storage.blob.BlobAsyncClient;
import com.azure.storage.blob.BlobServiceVersion;
import com.azure.storage.blob.implementation.util.StorageBlockingSink;
import com.azure.storage.blob.models.AccessTier;
import com.azure.storage.blob.models.AppendBlobRequestConditions;
import com.azure.storage.blob.models.BlobHttpHeaders;
import com.azure.storage.blob.models.BlobRequestConditions;
import com.azure.storage.blob.models.BlobStorageException;
import com.azure.storage.blob.models.PageBlobRequestConditions;
import com.azure.storage.blob.models.PageRange;
import com.azure.storage.blob.models.ParallelTransferOptions;
import com.azure.storage.blob.options.BlobParallelUploadOptions;
import com.azure.storage.blob.options.BlockBlobOutputStreamOptions;
import com.azure.storage.common.StorageOutputStream;
import com.azure.storage.common.implementation.Constants;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Map;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

/**
 * BlobOutputStream allows for the uploading of data to a blob using a stream-like approach.
 */
public abstract class BlobOutputStream extends StorageOutputStream {

    private volatile boolean isClosed;

    /**
     *
     * @param writeThreshold How many bytes the output will retain before it initiates a write to the Storage service.
     */
    BlobOutputStream(final int writeThreshold) {
        super(writeThreshold);
    }

    static BlobOutputStream appendBlobOutputStream(final AppendBlobAsyncClient client,
        final AppendBlobRequestConditions appendBlobRequestConditions) {
        return new AppendBlobOutputStream(client, appendBlobRequestConditions);
    }

    /**
     * Creates a block blob output stream from a BlobAsyncClient
     * @param client {@link BlobAsyncClient} The blob client.
     * @param parallelTransferOptions {@link ParallelTransferOptions} used to configure buffered uploading.
     * @param headers {@link BlobHttpHeaders}
     * @param metadata Metadata to associate with the blob. If there is leading or trailing whitespace in any
     * metadata key or value, it must be removed or encoded.
     * @param tier {@link AccessTier} for the destination blob.
     * @param requestConditions {@link BlobRequestConditions}
     * @return {@link BlobOutputStream} associated with the blob.
     */
    public static BlobOutputStream blockBlobOutputStream(final BlobAsyncClient client,
        final ParallelTransferOptions parallelTransferOptions, final BlobHttpHeaders headers,
        final Map metadata, final AccessTier tier, final BlobRequestConditions requestConditions) {
        return blockBlobOutputStream(client, parallelTransferOptions, headers, metadata, tier, requestConditions,
            Context.NONE);
    }

    /**
     * Creates a block blob output stream from a BlobAsyncClient
     * @param client {@link BlobAsyncClient} The blob client.
     * @param parallelTransferOptions {@link ParallelTransferOptions} used to configure buffered uploading.
     * @param headers {@link BlobHttpHeaders}
     * @param metadata Metadata to associate with the blob. If there is leading or trailing whitespace in any
     * metadata key or value, it must be removed or encoded.
     * @param tier {@link AccessTier} for the destination blob.
     * @param requestConditions {@link BlobRequestConditions}
     * @param context Additional context that is passed through the Http pipeline during the service call.
     * @return {@link BlobOutputStream} associated with the blob.
     */
    public static BlobOutputStream blockBlobOutputStream(final BlobAsyncClient client,
        final ParallelTransferOptions parallelTransferOptions, final BlobHttpHeaders headers,
        final Map metadata, final AccessTier tier,
        final BlobRequestConditions requestConditions, Context context) {
        return blockBlobOutputStream(client, new BlockBlobOutputStreamOptions()
                .setParallelTransferOptions(parallelTransferOptions).setHeaders(headers).setMetadata(metadata)
                .setTier(tier).setRequestConditions(requestConditions),
            context);
    }

    /**
     * Creates a block blob output stream from a BlobAsyncClient
     * @param client {@link BlobAsyncClient} The blob client.
     * @param options {@link BlockBlobOutputStreamOptions}
     * @param context Additional context that is passed through the Http pipeline during the service call.
     * @return {@link BlobOutputStream} associated with the blob.
     */
    public static BlobOutputStream blockBlobOutputStream(final BlobAsyncClient client,
        BlockBlobOutputStreamOptions options, Context context) {
        options = options == null ? new BlockBlobOutputStreamOptions() : options;
        return new BlockBlobOutputStream(client, options.getParallelTransferOptions(), options.getHeaders(),
            options.getMetadata(), options.getTags(), options.getTier(), options.getRequestConditions(), context);
    }

    static BlobOutputStream pageBlobOutputStream(final PageBlobAsyncClient client, final PageRange pageRange,
        final BlobRequestConditions requestConditions) {
        return new PageBlobOutputStream(client, pageRange, requestConditions);
    }

    abstract void commit();

    /**
     * Closes this output stream and releases any system resources associated with this stream. If any data remains in
     * the buffer it is committed to the service.
     *
     * @throws IOException If an I/O error occurs.
     */
    @Override
    public synchronized void close() throws IOException {
        try {
            // if the stream is already closed, we can stop executing any further steps to avoid throwing
            // STREAM_CLOSED exception
            if (isClosed) {
                return;
            }
            // if an exception was thrown by any thread in the threadExecutor, realize it now
            this.checkStreamState();

            // flush any remaining data
            this.flush();

            // try to commit the blob
            try {
                this.commit();
            } catch (final BlobStorageException e) {
                throw new IOException("The blob has not been committed. Data has not been persisted.", e);
            }
            /* Need this check because for block blob the buffered upload error only manifests itself after commit is
               called */
            if (this.lastError != null) {
                throw lastError;
            }
        } finally {
            this.lastError = new IOException(Constants.STREAM_CLOSED);
            isClosed = true;
        }
    }

    private static final class AppendBlobOutputStream extends BlobOutputStream {
        private static final String INVALID_BLOCK_SIZE =
            "Block data should not exceed BlockBlobURL.MAX_STAGE_BLOCK_BYTES";

        private final AppendBlobRequestConditions appendBlobRequestConditions;
        private final AppendBlobAsyncClient client;

        private AppendBlobOutputStream(final AppendBlobAsyncClient client,
            final AppendBlobRequestConditions appendBlobRequestConditions) {
            // service versions 2022-11-02 and above support uploading block bytes up to 100MB, all older service
            // versions support up to 4MB
            super(client.getServiceVersion().ordinal() < BlobServiceVersion.V2022_11_02.ordinal()
                ? AppendBlobClient.MAX_APPEND_BLOCK_BYTES_VERSIONS_2021_12_02_AND_BELOW
                : AppendBlobClient.MAX_APPEND_BLOCK_BYTES_VERSIONS_2022_11_02_AND_ABOVE);

            this.client = client;
            this.appendBlobRequestConditions = (appendBlobRequestConditions == null)
                ? new AppendBlobRequestConditions() : appendBlobRequestConditions;

            if (this.appendBlobRequestConditions.getAppendPosition() == null) {
                this.appendBlobRequestConditions.setAppendPosition(client.getProperties().block().getBlobSize());
            }
        }

        private Mono appendBlock(Flux blockData, long writeLength) {
            long newAppendOffset = appendBlobRequestConditions.getAppendPosition() + writeLength;
            return client.appendBlockWithResponse(blockData, writeLength, null, appendBlobRequestConditions)
                .doOnNext(ignored -> appendBlobRequestConditions.setAppendPosition(newAppendOffset))
                .then()
                .onErrorResume(t -> t instanceof IOException || t instanceof BlobStorageException, e -> {
                    this.lastError = new IOException(e);
                    return Mono.empty();
                });
        }

        @Override
        protected Mono dispatchWrite(byte[] data, int writeLength, long offset) {
            if (writeLength == 0) {
                return Mono.empty();
            }

            // We cannot differentiate between max size condition failing only in the retry versus failing in the
            // first attempt and retry even for a single writer scenario. So we will eliminate the latter and handle
            // the former in the append block method.
            if (appendBlobRequestConditions.getMaxSize() != null
                && appendBlobRequestConditions.getAppendPosition() > appendBlobRequestConditions.getMaxSize()) {
                this.lastError = new IOException(INVALID_BLOCK_SIZE);
                return Mono.error(this.lastError);
            }

            return this.appendBlock(Mono.fromCallable(() -> ByteBuffer.wrap(data, (int) offset, writeLength)).flux(),
                writeLength);
        }

        @Override
        void commit() {
            // AppendBlob doesn't need to commit anything.
        }
    }

    private static final class BlockBlobOutputStream extends BlobOutputStream {

        private final Lock lock;
        private final Condition transferComplete;
        private final StorageBlockingSink sink;

        boolean complete;

        private BlockBlobOutputStream(final BlobAsyncClient client,
            final ParallelTransferOptions parallelTransferOptions, final BlobHttpHeaders headers,
            final Map metadata, Map tags, final AccessTier tier,
            final BlobRequestConditions requestConditions, Context context) {
            super(Integer.MAX_VALUE); // writeThreshold is effectively not used by BlockBlobOutputStream.
            // There is a bug in reactor core that does not handle converting Context.NONE to a reactor context.
            context = context == null || context.equals(Context.NONE) ? null : context;

            this.lock = new ReentrantLock();
            this.transferComplete = lock.newCondition();
            this.sink = new StorageBlockingSink();

            Flux body = this.sink.asFlux();

            client.uploadWithResponse(new BlobParallelUploadOptions(body)
                .setParallelTransferOptions(parallelTransferOptions).setHeaders(headers).setMetadata(metadata)
                .setTags(tags).setTier(tier).setRequestConditions(requestConditions))
                // This allows the operation to continue while maintaining the error that occurred.
                .onErrorResume(e -> {
                    if (e instanceof IOException) {
                        this.lastError = (IOException) e;
                    } else {
                        this.lastError = new IOException(e);
                    }
                    return Mono.empty();
                })
                // Use doFinally to cover all termination scenarios of the Flux.
                .doFinally(signalType -> {
                    lock.lock();
                    try {
                        complete = true;
                        transferComplete.signal();
                    } finally {
                        lock.unlock();
                    }
                })
                .contextWrite(FluxUtil.toReactorContext(context))
                .subscribe();
        }

        @Override
        void commit() {

            // Need to wait until the uploadTask completes
            lock.lock();
            try {
                sink.emitCompleteOrThrow(); /* Allow upload task to try to complete. */

                while (!complete) {
                    transferComplete.await();
                }
            } catch (InterruptedException e) {
                this.lastError = new IOException(e.getMessage()); // Should we just throw and not populate this since its recoverable?
            } catch (Exception e) { // Catch any exceptions by the sink.
                this.lastError = new IOException(e);
            } finally {
                lock.unlock();
            }

        }

        @Override
        protected void writeInternal(final byte[] data, int offset, int length) {
            this.checkStreamState();
            /*
            We need to do a deep copy here because the writing is async in this case. It is a common pattern for
            customers writing to an output stream to perform the writes in a tight loop with a reused buffer. This
            coupled with async network behavior can result in the data being overwritten as the buffer is reused.
             */
            byte[] buffer = new byte[length];
            System.arraycopy(data, offset, buffer, 0, length);

            try {
                this.sink.emitNext(ByteBuffer.wrap(buffer));
            } catch (Exception e) {
                this.lastError = new IOException(e);
            }
        }

        // Never called
        @Override
        protected Mono dispatchWrite(byte[] data, int writeLength, long offset) {
            return Mono.empty();
        }
    }

    private static final class PageBlobOutputStream extends BlobOutputStream {
        private static final String INVALID_NUMBER_OF_BYTES_IN_THE_BUFFER =
            "Page data must be a multiple of 512 bytes. Buffer currently contains %d bytes.";

        private static final ClientLogger LOGGER = new ClientLogger(PageBlobOutputStream.class);
        private final PageBlobAsyncClient client;
        private final PageBlobRequestConditions pageBlobRequestConditions;
        private final PageRange pageRange;

        private PageBlobOutputStream(final PageBlobAsyncClient client, final PageRange pageRange,
            final BlobRequestConditions blobRequestConditions) {
            super(PageBlobClient.MAX_PUT_PAGES_BYTES);
            this.client = client;
            this.pageRange = pageRange;

            if (blobRequestConditions != null) {
                this.pageBlobRequestConditions = new PageBlobRequestConditions()
                    .setLeaseId(blobRequestConditions.getLeaseId())
                    .setIfMatch(blobRequestConditions.getIfMatch())
                    .setIfNoneMatch(blobRequestConditions.getIfNoneMatch())
                    .setIfModifiedSince(blobRequestConditions.getIfModifiedSince())
                    .setIfUnmodifiedSince(blobRequestConditions.getIfUnmodifiedSince());
            } else {
                this.pageBlobRequestConditions = null;
            }
        }

        private Mono writePages(Flux pageData, int length, long offset) {
            return client.uploadPagesWithResponse(new PageRange().setStart(offset).setEnd(offset + length - 1),
                pageData, null, pageBlobRequestConditions)
                .then()
                .onErrorResume(BlobStorageException.class, e -> {
                    this.lastError = new IOException(e);
                    return Mono.empty();
                });
        }

        @Override
        protected Mono dispatchWrite(byte[] data, int writeLength, long offset) {
            if (writeLength == 0) {
                return Mono.empty();
            }

            if (writeLength % PageBlobAsyncClient.PAGE_BYTES != 0) {
                return Mono.error(new IOException(String.format(INVALID_NUMBER_OF_BYTES_IN_THE_BUFFER,
                    writeLength)));
            }

            long pageOffset = pageRange.getStart();
            if (pageOffset + writeLength - 1 > pageRange.getEnd()) {
                throw LOGGER.logExceptionAsError(
                    new RuntimeException("The input data length is larger than the page range."));
            }
            pageRange.setStart(pageRange.getStart() + writeLength);

            return this.writePages(Mono.fromCallable(() -> ByteBuffer.wrap(data, (int) offset, writeLength)).flux(),
                writeLength, pageOffset);
        }

        @Override
        void commit() {
            // PageBlob doesn't need to commit anything.
        }
    }
}