All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.microsoft.azure.storage.blob.BlobOutputStreamInternal Maven / Gradle / Ivy

There is a newer version: 8.6.6
Show newest version
/**
 * Copyright Microsoft Corporation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.microsoft.azure.storage.blob;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;

import com.microsoft.azure.storage.AccessCondition;
import com.microsoft.azure.storage.Constants;
import com.microsoft.azure.storage.DoesServiceRequest;
import com.microsoft.azure.storage.OperationContext;
import com.microsoft.azure.storage.StorageErrorCodeStrings;
import com.microsoft.azure.storage.StorageException;
import com.microsoft.azure.storage.core.Base64;
import com.microsoft.azure.storage.core.Logger;
import com.microsoft.azure.storage.core.SR;
import com.microsoft.azure.storage.core.Utility;

/**
 * The class is an append-only stream for writing into storage.
 */
final class BlobOutputStreamInternal extends BlobOutputStream {

    private static class BlobOutputStreamThreadFactory implements ThreadFactory {
        private final ThreadGroup group;
        private final AtomicInteger threadNumber = new AtomicInteger(1);
        private final String namePrefix;

                BlobOutputStreamThreadFactory() {
                        SecurityManager s = System.getSecurityManager();
                        group = (s != null) ? s.getThreadGroup() :
                                        Thread.currentThread().getThreadGroup();
                        namePrefix = "azure-storage-bloboutputstream-thread-";
                    }

                public Thread newThread(Runnable r) {
                        Thread t = new Thread(group, r,
                                        namePrefix + threadNumber.getAndIncrement(),
                                        0);
                        t.setDaemon(true);
                        if (t.getPriority() != Thread.NORM_PRIORITY)
                                t.setPriority(Thread.NORM_PRIORITY);
                        return t;
                    }
    }

    /**
     * Holds the {@link AccessCondition} object that represents the access conditions for the blob.
     */
    private AccessCondition accessCondition;

    /**
     * Used for block blobs, holds the block id prefix.
     */
    private String blockIdPrefix;

    /**
     * Used for block blobs, holds the block list.
     */
    private ArrayList blockList;

    /**
     * The CompletionService used to await task completion for this stream.
     */
    private final ExecutorCompletionService completionService;
    
    /**
     * Holds the futures of the executing tasks. The starting size of the set is a multiple of the concurrent request
     * count to reduce the cost of resizing the set later.
     */
    private final Set> futureSet;

    /**
     * Holds the write threshold of number of bytes to buffer prior to dispatching a write. For block blob this is the
     * block size, for page blob this is the Page commit size.
     */
    private int internalWriteThreshold = -1;
    
    /**
     * Holds the last exception this stream encountered.
     */
    private volatile IOException lastError = null;

    /**
     * Holds the reference to the MD5 digest for the blob.
     */
    private MessageDigest md5Digest;

    /**
     * Holds the OperationContext for the current stream.
     */
    private final OperationContext opContext;

    /**
     * Holds the options for the current stream.
     */
    private final BlobRequestOptions options;
    

    private long currentBlobOffset;

    /**
     * A private buffer to store data prior to committing to the cloud.
     */
    private volatile ByteArrayOutputStream outBuffer;

    /**
     * Holds the reference to the blob this stream is associated with.
     */
    private final CloudBlob parentBlobRef;
     
    /**
     * Determines if this stream is used against a page blob or block blob.
     */
    private BlobType streamType = BlobType.UNSPECIFIED;

    /**
     * The ExecutorService used to schedule tasks for this stream.
     */
    private final ThreadPoolExecutor threadExecutor;

    /**
     * Indicates whether the stream has been aborted and therefore closing will skip committing data.
     */
    private boolean aborted;

    /**
     * Initializes a new instance of the BlobOutputStream class.
     * 
     * @param parentBlob
     *            A {@link CloudBlob} object which represents the blob that this stream is associated with.
     * @param accessCondition
     *            An {@link AccessCondition} object which represents the access conditions for the blob.
     * @param options
     *            A {@link BlobRequestOptions} object which specifies any additional options for the request.
     * @param opContext
     *            An {@link OperationContext} object which is used to track the execution of the operation.
     * 
     * @throws StorageException
     *             An exception representing any error which occurred during the operation.
     */
    private BlobOutputStreamInternal(final CloudBlob parentBlob, final AccessCondition accessCondition,
            final BlobRequestOptions options, final OperationContext opContext) throws StorageException {
        this.accessCondition = accessCondition;
        this.parentBlobRef = parentBlob;
        this.parentBlobRef.assertCorrectBlobType();
        this.options = new BlobRequestOptions(options);
        this.outBuffer = new ByteArrayOutputStream();
        this.opContext = opContext;

        if (this.options.getConcurrentRequestCount() < 1) {
            throw new IllegalArgumentException("ConcurrentRequestCount");
        }
        
        this.futureSet = Collections.newSetFromMap(new ConcurrentHashMap, Boolean>(
                this.options.getConcurrentRequestCount() == null ? 1 : this.options.getConcurrentRequestCount() * 2));

        if (this.options.getStoreBlobContentMD5()) {
            try {
                this.md5Digest = MessageDigest.getInstance("MD5");
            }
            catch (final NoSuchAlgorithmException e) {
                // This wont happen, throw fatal.
                throw Utility.generateNewUnexpectedStorageException(e);
            }
        }

        // V2 cachedThreadPool for perf.        
        this.threadExecutor = new ThreadPoolExecutor(
                this.options.getConcurrentRequestCount(),
                this.options.getConcurrentRequestCount(),
                10,
                TimeUnit.SECONDS,
                new LinkedBlockingQueue(),
                new BlobOutputStreamThreadFactory());
        this.completionService = new ExecutorCompletionService(this.threadExecutor);
    }

    /**
     * Initializes a new instance of the BlobOutputStream class for a CloudBlockBlob
     * 
     * @param parentBlob
     *            A {@link CloudBlockBlob} object which represents the blob that this stream is associated with.
     * @param accessCondition
     *            An {@link AccessCondition} object which represents the access conditions for the blob.
     * @param options
     *            A {@link BlobRequestOptions} object which specifies any additional options for the request.
     * @param opContext
     *            An {@link OperationContext} object which is used to track the execution of the operation.
     * 
     * @throws StorageException
     *             An exception representing any error which occurred during the operation.
     */
    protected BlobOutputStreamInternal(final CloudBlockBlob parentBlob, final AccessCondition accessCondition,
            final BlobRequestOptions options, final OperationContext opContext) throws StorageException {
        this((CloudBlob) parentBlob, accessCondition, options, opContext);

        this.blockList = new ArrayList();
        this.blockIdPrefix = UUID.randomUUID().toString() + "-";
        
        this.streamType = BlobType.BLOCK_BLOB;
        this.internalWriteThreshold = this.parentBlobRef.getStreamWriteSizeInBytes();
    }

    /**
     * Initializes a new instance of the BlobOutputStream class for a CloudPageBlob
     * 
     * @param parentBlob
     *            A {@link CloudPageBlob} object which represents the blob that this stream is associated with.
     * @param length
     *            A long which represents the length of the page blob in bytes, which must be a multiple of
     *            512.
     * @param accessCondition
     *            An {@link AccessCondition} object which represents the access conditions for the blob.
     * @param options
     *            A {@link BlobRequestOptions} object which specifies any additional options for the request
     * @param opContext
     *            An {@link OperationContext} object which is used to track the execution of the operation
     * 
     * @throws StorageException
     *             An exception representing any error which occurred during the operation.
     */
    @DoesServiceRequest
    protected BlobOutputStreamInternal(final CloudPageBlob parentBlob, final long length,
            final AccessCondition accessCondition, final BlobRequestOptions options, final OperationContext opContext)
            throws StorageException {
        this(parentBlob, accessCondition, options, opContext);
        this.streamType = BlobType.PAGE_BLOB;
        
        this.internalWriteThreshold = (int) Math.min(this.parentBlobRef.getStreamWriteSizeInBytes(), length);
    }
    
    /**
     * Initializes a new instance of the BlobOutputStream class for a CloudAppendBlob
     * 
     * @param parentBlob
     *            A {@link CloudAppendBlob} object which represents the blob that this stream is associated with.
     * @param accessCondition
     *            An {@link AccessCondition} object which represents the access conditions for the blob.
     * @param options
     *            A {@link BlobRequestOptions} object which specifies any additional options for the request
     * @param opContext
     *            An {@link OperationContext} object which is used to track the execution of the operation
     * 
     * @throws StorageException
     *             An exception representing any error which occurred during the operation.
     */
    @DoesServiceRequest
    protected BlobOutputStreamInternal(final CloudAppendBlob parentBlob, final AccessCondition accessCondition, 
            final BlobRequestOptions options, final OperationContext opContext)
            throws StorageException {
        this((CloudBlob)parentBlob, accessCondition, options, opContext);
        this.streamType = BlobType.APPEND_BLOB;
        
        this.accessCondition = accessCondition != null ? accessCondition : new AccessCondition();
        if (this.accessCondition.getIfAppendPositionEqual() != null) {
            this.currentBlobOffset = this.accessCondition.getIfAppendPositionEqual();
        } 
        else {
            // If this is an existing blob, we've done a downloadProperties to get the length
            // If this is a new blob, getLength will correctly return 0
            this.currentBlobOffset = parentBlob.getProperties().getLength();
        }
        
        this.internalWriteThreshold = this.parentBlobRef.getStreamWriteSizeInBytes();
    }

    /**
     * Helper function to check if the stream is faulted, if it is it surfaces the exception.
     * 
     * @throws IOException
     *             If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
     *             closed.
     */
    private void checkStreamState() throws IOException {
        if (this.lastError != null) {
            throw this.lastError;
        }
    }

    /**
     * Closes this output stream and releases any system resources associated with this stream. If any data remains in
     * the buffer it is committed to the service.
     * 
     * @throws IOException
     *             If an I/O error occurs.
     */
    @Override
    @DoesServiceRequest
    public synchronized void close() throws IOException {
        try {
            // if the user has already closed the stream, this will throw a STREAM_CLOSED exception
            // if an exception was thrown by any thread in the threadExecutor, realize it now
            this.checkStreamState();

            // flush any remaining data
            if (!this.aborted) {
                this.flush();
            }

            // shut down the ExecutorService.
            this.threadExecutor.shutdown();

            // try to commit the blob
            if (!this.aborted) {
                try {
                    this.commit();
                } catch (final StorageException e) {
                    throw Utility.initIOException(e);
                }
            }
        }
        finally {
            // if close() is called again, an exception will be thrown
            this.lastError = new IOException(SR.STREAM_CLOSED);

            // if an exception was thrown and the executor was not yet closed, call shutDownNow() to cancel all tasks 
            // and shutdown the ExecutorService
            if (!this.threadExecutor.isShutdown()) {
                this.threadExecutor.shutdownNow();
            }
        }
    }

    @Override
    public void abort() throws IOException {
        this.aborted = true;
    }

    /**
     * Commits the blob, for block blob this uploads the block list.
     * 
     * @throws StorageException
     *             An exception representing any error which occurred during the operation.
     */
    @DoesServiceRequest
    private synchronized void commit() throws StorageException {
        if (this.options.getStoreBlobContentMD5()) {
            this.parentBlobRef.getProperties().setContentMD5(Base64.encode(this.md5Digest.digest()));
        }

        if (this.streamType == BlobType.BLOCK_BLOB) {
            // wait for all blocks to finish
            final CloudBlockBlob blobRef = (CloudBlockBlob) this.parentBlobRef;
            blobRef.commitBlockList(this.blockList, this.accessCondition, this.options, this.opContext);
        }
        else if (this.options.getStoreBlobContentMD5()) {
            this.parentBlobRef.uploadProperties(this.accessCondition, this.options, this.opContext);
        }
    }
    
    /**
     * Dispatches a write operation for a given length.
     * 
     * @param writeLength
     *            An int which represents the length of the data to write, this is the write threshold that
     *            triggered the write.
     * 
     * @throws IOException
     *             If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
     *             closed.
     */
    @DoesServiceRequest
    private synchronized void dispatchWrite() throws IOException {
        final int writeLength = this.outBuffer.size();
        if (writeLength == 0) {
            return;
        }
        
        if (this.streamType == BlobType.PAGE_BLOB && (writeLength % Constants.PAGE_SIZE != 0)) {
            throw new IOException(String.format(SR.INVALID_NUMBER_OF_BYTES_IN_THE_BUFFER, writeLength));
        }

        Callable worker = null;

        if (this.threadExecutor.getQueue().size() >= this.options.getConcurrentRequestCount() * 2) {
            this.waitForTaskToComplete();
        } 
        
        if (this.futureSet.size() >= this.options.getConcurrentRequestCount() * 2) {
            this.clearCompletedFutures();
        }

        final ByteArrayInputStream bufferRef = new ByteArrayInputStream(this.outBuffer.toByteArray());

        if (this.streamType == BlobType.BLOCK_BLOB) {
            final String blockID = this.getCurrentBlockId();

            this.blockList.add(new BlockEntry(blockID, BlockSearchMode.LATEST));

            worker = new Callable() {
                @Override
                public Void call() {
                    BlobOutputStreamInternal.this.writeBlock(bufferRef, blockID, writeLength);
                    return null;
                }
            };
        }
        else if (this.streamType == BlobType.PAGE_BLOB) {
            final long opOffset = this.currentBlobOffset;
            this.currentBlobOffset += writeLength;

            worker = new Callable() {
                @Override
                public Void call() {
                    BlobOutputStreamInternal.this.writePages(bufferRef, opOffset, writeLength);
                    return null;
                }
            };
        }
        else if (this.streamType == BlobType.APPEND_BLOB) {
            final long opOffset = this.currentBlobOffset;
            this.currentBlobOffset += writeLength;

            // We cannot differentiate between max size condition failing only in the retry versus failing in the 
            // first attempt and retry even for a single writer scenario. So we will eliminate the latter and handle 
            // the former in the append block method.
            if (this.accessCondition.getIfMaxSizeLessThanOrEqual() != null
                    && this.currentBlobOffset > this.accessCondition.getIfMaxSizeLessThanOrEqual()) {
                this.lastError = new IOException(SR.INVALID_BLOCK_SIZE);
                throw this.lastError;
            }
            
            worker = new Callable() {
                @Override
                public Void call() {
                    BlobOutputStreamInternal.this.appendBlock(bufferRef, opOffset, writeLength);
                    return null;
                }
            };
        }

        // Add future to set
        this.futureSet.add(this.completionService.submit(worker));
        
        // Reset buffer.
        this.outBuffer = new ByteArrayOutputStream();
    }
    
    private void writeBlock(ByteArrayInputStream blockData, String blockId, long writeLength) {
        final CloudBlockBlob blobRef = (CloudBlockBlob) this.parentBlobRef;

        try {
            blobRef.uploadBlock(blockId, blockData, writeLength, this.accessCondition, this.options, this.opContext);
        }
        catch (final IOException e) {
            this.lastError = e;
        }
        catch (final StorageException e) {
            this.lastError = Utility.initIOException(e);
        }
    }

    private void writePages(ByteArrayInputStream pageData, long offset, long writeLength) {
        final CloudPageBlob blobRef = (CloudPageBlob) this.parentBlobRef;

        try {
            blobRef.uploadPages(pageData, offset, writeLength, this.accessCondition, this.options, this.opContext);
        }
        catch (final IOException e) {
            this.lastError = e;
        }
        catch (final StorageException e) {
            this.lastError = Utility.initIOException(e);
        }
    }

    private void appendBlock(ByteArrayInputStream blockData, long offset, long writeLength) {
        final CloudAppendBlob blobRef = (CloudAppendBlob) this.parentBlobRef;
        this.accessCondition.setIfAppendPositionEqual(offset);

        int previousResultsCount = this.opContext.getRequestResults().size();
        try {
            blobRef.appendBlock(blockData, writeLength, this.accessCondition, this.options, this.opContext);
        }
        catch (final IOException e) {
            this.lastError = e;
        }
        catch (final StorageException e) {
            if (this.options.getAbsorbConditionalErrorsOnRetry()
                    && e.getHttpStatusCode() == HttpURLConnection.HTTP_PRECON_FAILED
                    && e.getExtendedErrorInformation() != null
                    && e.getErrorCode() != null
                    && (e.getErrorCode()
                            .equals(StorageErrorCodeStrings.INVALID_APPEND_POSITION) ||
                            e.getErrorCode().equals(StorageErrorCodeStrings.INVALID_MAX_BLOB_SIZE_CONDITION))
                    && (this.opContext.getRequestResults().size() - previousResultsCount > 1)) {

                // Pre-condition failure on a retry should be ignored in a single writer scenario since 
                // the request succeeded in the first attempt.
                Logger.info(this.opContext, SR.PRECONDITION_FAILURE_IGNORED);
            }
            else {
                this.lastError = Utility.initIOException(e);
            }
        }
    }

    /**
     * Flushes this output stream and forces any buffered output bytes to be written out. If any data remains in the
     * buffer it is committed to the service.
     * 
     * @throws IOException
     *             If an I/O error occurs.
     */
    @Override
    @DoesServiceRequest
    public void flush() throws IOException {
        this.checkStreamState();

        this.dispatchWrite();

        // Waits for all submitted tasks to complete
        Set> requests = new HashSet>(this.futureSet);
        for (Future request : requests) {
            // wait for the future to complete
            try {
                request.get();
            }
            catch (Exception e) {
                throw Utility.initIOException(e);
            }

            // If that task threw an error, fail fast
            this.checkStreamState();
        }
    }
   
    /**
     * Generates a new block ID to be used for PutBlock.
     * 
     * @return Base64 encoded block ID
     * @throws IOException
     */
    private String getCurrentBlockId() throws IOException
    {
        String blockIdSuffix = String.format("%06d", this.blockList.size());
        
        byte[] blockIdInBytes;
        try {
            blockIdInBytes = (this.blockIdPrefix + blockIdSuffix).getBytes(Constants.UTF8_CHARSET);
        } catch (UnsupportedEncodingException e) {
            // this should never happen, UTF8 is a default charset
            throw new IOException(e);
        }
        
        return Base64.encode(blockIdInBytes);
    }

    /**
     * Waits for at least one task to complete.
     * 
     * @throws IOException
     *             If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
     *             closed.
     */
    private void waitForTaskToComplete() throws IOException {
        boolean completed = false;
        while (this.completionService.poll() != null) {
            completed = true;
        }
        
        if (!completed) {
            try {
                this.completionService.take();
            }
            catch (final InterruptedException e) {
                throw Utility.initIOException(e);
            }
        }
    }
    
    
    /**
     * Removes futures which are done from the future set.
     */
    private void clearCompletedFutures() {
        for (Future request : this.futureSet) {
            if (request.isDone()) {
                this.futureSet.remove(request);
            }
        }
    }

    /**
     * Writes b.length bytes from the specified byte array to this output stream.
     * 

* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at * {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to * true is acceptable for you. * * @param data * A byte array which represents the data to write. * * @throws IOException * If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been * closed. */ @Override @DoesServiceRequest public void write(final byte[] data) throws IOException { this.write(data, 0, data.length); } /** * Writes length bytes from the specified byte array starting at offset to this output stream. *

* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at * {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to * true is acceptable for you. * * @param data * A byte array which represents the data to write. * @param offset * An int which represents the start offset in the data. * @param length * An int which represents the number of bytes to write. * * @throws IOException * If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been * closed. */ @Override @DoesServiceRequest public void write(final byte[] data, final int offset, final int length) throws IOException { if (offset < 0 || length < 0 || length > data.length - offset) { throw new IndexOutOfBoundsException(); } this.writeInternal(data, offset, length); } /** * Writes all data from the InputStream to the Blob. *

* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at * {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to * true is acceptable for you. * * @param sourceStream * An {@link InputStream} object which species the data to write to the Blob. * * @throws IOException * If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been * closed. * @throws StorageException * An exception representing any error which occurred during the operation. */ @DoesServiceRequest public void write(final InputStream sourceStream, final long writeLength) throws IOException, StorageException { Utility.writeToOutputStream(sourceStream, this, writeLength, false, false, this.opContext, this.options, false); } /** * Writes the specified byte to this output stream. The general contract for write is that one byte is written to * the output stream. The byte to be written is the eight low-order bits of the argument b. The 24 high-order bits * of b are ignored. *

* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at * {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to * true is acceptable for you. * * @param byteVal * An int which represents the bye value to write. * * @throws IOException * If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been * closed. */ @Override @DoesServiceRequest public void write(final int byteVal) throws IOException { this.write(new byte[] { (byte) (byteVal & 0xFF) }); } /** * Writes the data to the buffer and triggers writes to the service as needed. * * @param data * A byte array which represents the data to write. * @param offset * An int which represents the start offset in the data. * @param length * An int which represents the number of bytes to write. * * @throws IOException * If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been * closed. */ @DoesServiceRequest private synchronized void writeInternal(final byte[] data, int offset, int length) throws IOException { while (length > 0) { this.checkStreamState(); final int availableBufferBytes = this.internalWriteThreshold - this.outBuffer.size(); final int nextWrite = Math.min(availableBufferBytes, length); // If we need to set MD5 then update the digest accordingly if (this.options.getStoreBlobContentMD5()) { this.md5Digest.update(data, offset, nextWrite); } this.outBuffer.write(data, offset, nextWrite); offset += nextWrite; length -= nextWrite; if (this.outBuffer.size() == this.internalWriteThreshold) { this.dispatchWrite(); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy