All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.microsoft.azure.storage.blob.BlobInputStream Maven / Gradle / Ivy

There is a newer version: 8.6.6
Show newest version
/**
 * Copyright Microsoft Corporation
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.microsoft.azure.storage.blob;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

import com.microsoft.azure.storage.AccessCondition;
import com.microsoft.azure.storage.Constants;
import com.microsoft.azure.storage.DoesServiceRequest;
import com.microsoft.azure.storage.OperationContext;
import com.microsoft.azure.storage.StorageErrorCode;
import com.microsoft.azure.storage.StorageErrorCodeStrings;
import com.microsoft.azure.storage.StorageException;
import com.microsoft.azure.storage.core.Base64;
import com.microsoft.azure.storage.core.SR;
import com.microsoft.azure.storage.core.Utility;

/**
 * Provides an input stream to read a given blob resource.
 */
public final class BlobInputStream extends InputStream {
    /**
     * Holds the reference to the blob this stream is associated with.
     */
    private final CloudBlob parentBlobRef;

    /**
     * Holds the reference to the MD5 digest for the blob.
     */
    private MessageDigest md5Digest;

    /**
     * A flag to determine if the stream is faulted, if so the last error will be thrown on next operation.
     */
    private volatile boolean streamFaulted;

    /**
     * Holds the last exception this stream encountered.
     */
    private IOException lastError;

    /**
     * Holds the OperationContext for the current stream.
     */
    private final OperationContext opContext;

    /**
     * Holds the options for the current stream
     */
    private final BlobRequestOptions options;

    /**
     * Holds the stream length.
     */
    private long streamLength;

    /**
     * Holds the stream read size for both block and page blobs.
     */
    private final int readSize;

    /**
     * A flag indicating if the Blob MD5 should be validated.
     */
    private boolean validateBlobMd5;

    /**
     * Holds the Blob MD5.
     */
    private final String retrievedContentMD5Value;

    /**
     * Holds the reference to the current buffered data.
     */
    private ByteArrayInputStream currentBuffer;

    /**
     * Holds an absolute byte position for the mark feature.
     */
    private long markedPosition;

    /**
     * Holds the mark delta for which the mark position is expired.
     */
    private int markExpiry;

    /**
     * Holds an absolute byte position of the current read position.
     */
    private long currentAbsoluteReadPosition;

    /**
     * Holds the absolute byte position of the start of the current buffer.
     */
    private long bufferStartOffset;

    /**
     * Holds the length of the current buffer in bytes.
     */
    private int bufferSize;

    /**
     * Holds the {@link AccessCondition} object that represents the access conditions for the blob.
     */
    private AccessCondition accessCondition = null;

    /**
     * Offset of the source blob this class is configured to stream from.
     */
    private final long blobRangeOffset;

    /**
     * Initializes a new instance of the BlobInputStream class.
     *
     * @param parentBlob
     *            A {@link CloudBlob} object which represents the blob that this stream is associated with.
     * @param accessCondition
     *            An {@link AccessCondition} object which represents the access conditions for the blob.
     * @param options
     *            A {@link BlobRequestOptions} object which represents that specifies any additional options for the
     *            request.
     * @param opContext
     *            An {@link OperationContext} object which is used to track the execution of the operation.
     *
     * @throws StorageException
     *             An exception representing any error which occurred during the operation.
     */
    @DoesServiceRequest
    protected BlobInputStream(final CloudBlob parentBlob, final AccessCondition accessCondition,
                              final BlobRequestOptions options, final OperationContext opContext) throws StorageException {
        this(0, null, parentBlob, accessCondition, options, opContext);
    }

    /**
     * Initializes a new instance of the BlobInputStream class.
     * Note that if {@code blobRangeOffset} is not {@code 0} or {@code blobRangeLength} is not {@code null}, there will
     * be no content MD5 verification.
     *
     * @param blobRangeOffset
     *            The offset of blob data to begin stream.
     * @param blobRangeLength
     *            How much data the stream should return after blobRangeOffset.
     * @param parentBlob
     *            A {@link CloudBlob} object which represents the blob that this stream is associated with.
     * @param accessCondition
     *            An {@link AccessCondition} object which represents the access conditions for the blob.
     * @param options
     *            A {@link BlobRequestOptions} object which represents that specifies any additional options for the
     *            request.
     * @param opContext
     *            An {@link OperationContext} object which is used to track the execution of the operation.
     * 
     * @throws StorageException
     *             An exception representing any error which occurred during the operation.
     */
    @DoesServiceRequest
    protected BlobInputStream(long blobRangeOffset, Long blobRangeLength, final CloudBlob parentBlob,
            final AccessCondition accessCondition, final BlobRequestOptions options, final OperationContext opContext)
            throws StorageException {

        this.blobRangeOffset = blobRangeOffset;
        this.parentBlobRef = parentBlob;
        this.parentBlobRef.assertCorrectBlobType();
        this.options = new BlobRequestOptions(options);
        this.opContext = opContext;
        this.streamFaulted = false;
        this.currentAbsoluteReadPosition = blobRangeOffset;
        this.readSize = parentBlob.getStreamMinimumReadSizeInBytes();

        if (options.getUseTransactionalContentMD5() && this.readSize > 4 * Constants.MB) {
            throw new IllegalArgumentException(SR.INVALID_RANGE_CONTENT_MD5_HEADER);
        }

        parentBlob.downloadAttributes(accessCondition, this.options, this.opContext);

        if (blobRangeOffset < 0 || (blobRangeLength != null && blobRangeLength <= 0)) {
            throw new IndexOutOfBoundsException();
        }

        this.retrievedContentMD5Value = parentBlob.getProperties().getContentMD5();

        // Will validate it if it was returned
        this.validateBlobMd5 = !options.getDisableContentMD5Validation()
                && !Utility.isNullOrEmpty(this.retrievedContentMD5Value);

        // Need the whole blob to validate MD5. If we download a range, don't bother trying.
        if (blobRangeOffset != 0 || blobRangeLength != null) {
            this.validateBlobMd5 = false;
        }

        // Validates the first option, and sets future requests to use if match
        // request option.

        // If there is an existing conditional validate it, as we intend to
        // replace if for future requests.
        String previousLeaseId = null;
        if (accessCondition != null) {
            previousLeaseId = accessCondition.getLeaseID();

            if (!accessCondition.verifyConditional(this.parentBlobRef.getProperties().getEtag(), this.parentBlobRef
                    .getProperties().getLastModified())) {
                throw new StorageException(StorageErrorCode.CONDITION_FAILED.toString(),
                        SR.INVALID_CONDITIONAL_HEADERS, HttpURLConnection.HTTP_PRECON_FAILED, null, null);
            }
        }

        this.accessCondition = new AccessCondition();
        this.accessCondition.setLeaseID(previousLeaseId);
        if (!options.getSkipEtagLocking()) {
            this.accessCondition.setIfMatch(this.parentBlobRef.getProperties().getEtag());
        }

        this.streamLength = blobRangeLength == null
                ? this.parentBlobRef.getProperties().getLength() - this.blobRangeOffset
                : Math.min(this.parentBlobRef.getProperties().getLength() - this.blobRangeOffset, blobRangeLength);

        if (this.validateBlobMd5) {
            try {
                this.md5Digest = MessageDigest.getInstance("MD5");
            }
            catch (final NoSuchAlgorithmException e) {
                // This wont happen, throw fatal.
                throw Utility.generateNewUnexpectedStorageException(e);
            }
        }

        this.reposition(blobRangeOffset);
    }

    /**
     * Returns an estimate of the number of bytes that can be read (or skipped over) from this input stream without
     * blocking by the next invocation of a method for this input stream. The next invocation might be the same thread
     * or another thread. A single read or skip of this many bytes will not block, but may read or skip fewer bytes.
     * 
     * @return An int which represents an estimate of the number of bytes that can be read (or skipped
     *         over)
     *         from this input stream without blocking, or 0 when it reaches the end of the input stream.
     * 
     * @throws IOException
     *             If an I/O error occurs.
     */
    @Override
    public synchronized int available() throws IOException {
        return this.bufferSize - (int) (this.currentAbsoluteReadPosition - this.bufferStartOffset);
    }

    /**
     * Helper function to check if the stream is faulted, if it is it surfaces the exception.
     * 
     * @throws IOException
     *             If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
     *             closed.
     */
    private synchronized void checkStreamState() throws IOException {
        if (this.streamFaulted) {
            throw this.lastError;
        }
    }

    /**
     * Closes this input stream and releases any system resources associated with the stream.
     * 
     * @throws IOException
     *             If an I/O error occurs.
     */
    @Override
    public synchronized void close() throws IOException {
        this.currentBuffer = null;
        this.streamFaulted = true;
        this.lastError = new IOException(SR.STREAM_CLOSED);
    }

    /**
     * Dispatches a read operation of N bytes. When using sparse page blobs, the page ranges are evaluated and zero
     * bytes may be generated on the client side for some ranges that do not exist.
     * 
     * @param readLength
     *            An int which represents the number of bytes to read.
     * 
     * @throws IOException
     *             If an I/O error occurs.
     */
    @DoesServiceRequest
    private synchronized void dispatchRead(final int readLength) throws IOException {
        try {
            final byte[] byteBuffer = new byte[readLength];

            this.parentBlobRef.downloadRangeInternal(this.currentAbsoluteReadPosition, (long) readLength, byteBuffer,
                    0, this.accessCondition, this.options, this.opContext);

            this.currentBuffer = new ByteArrayInputStream(byteBuffer);
            this.bufferSize = readLength;
            this.bufferStartOffset = this.currentAbsoluteReadPosition;
        }
        catch (final StorageException e) {
            this.streamFaulted = true;
            this.lastError = Utility.initIOException(e);
            throw this.lastError;
        }
    }

    /**
     * Marks the current position in this input stream. A subsequent call to the reset method repositions this stream at
     * the last marked position so that subsequent reads re-read the same bytes.
     * 
     * @param readlimit
     *            An int which represents the maximum limit of bytes that can be read before the mark
     *            position becomes invalid.
     */
    @Override
    public synchronized void mark(final int readlimit) {
        this.markedPosition = this.currentAbsoluteReadPosition;
        this.markExpiry = readlimit;
    }

    /**
     * Tests if this input stream supports the mark and reset methods. Whether or not mark and reset are supported is an
     * invariant property of a particular input stream instance. The markSupported method of {@link InputStream} returns
     * false.
     * 
     * @return True if this stream instance supports the mark and reset methods; False
     *         otherwise.
     */
    @Override
    public boolean markSupported() {
        return true;
    }

    /**
     * Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255. If
     * no byte is available because the end of the stream has been reached, the value -1 is returned. This method blocks
     * until input data is available, the end of the stream is detected, or an exception is thrown.
     * 
     * @return An int which represents the total number of bytes read into the buffer, or -1 if
     *         there is no more data because the end of the stream has been reached.
     * 
     * @throws IOException
     *             If an I/O error occurs.
     */
    @Override
    @DoesServiceRequest
    public int read() throws IOException {
        final byte[] tBuff = new byte[1];
        final int numberOfBytesRead = this.read(tBuff, 0, 1);

        if (numberOfBytesRead > 0) {
            return tBuff[0] & 0xFF;
        }
        else if (numberOfBytesRead == 0) {
            throw new IOException(SR.UNEXPECTED_STREAM_READ_ERROR);
        }
        else {
            return -1;
        }
    }

    /**
     * Reads some number of bytes from the input stream and stores them into the buffer array b. The number
     * of bytes
     * actually read is returned as an integer. This method blocks until input data is available, end of file is
     * detected, or an exception is thrown. If the length of b is zero, then no bytes are read and 0 is
     * returned;
     * otherwise, there is an attempt to read at least one byte. If no byte is available because the stream is at the
     * end of the file, the value -1 is returned; otherwise, at least one byte is read and stored into b.
     * 
     * The first byte read is stored into element b[0], the next one into b[1], and so on. The
     * number of bytes read is,
     * at most, equal to the length of b. Let k be the number of bytes actually read; these
     * bytes will be stored in
     * elements b[0] through b[k-1], leaving elements b[k] through
     * b[b.length-1] unaffected.
     * 
     * The read(b) method for class {@link InputStream} has the same effect as:
     * 
     * read(b, 0, b.length)
     * 
     * @param b
     *            A byte array which represents the buffer into which the data is read.
     * 
     * @throws IOException
     *             If the first byte cannot be read for any reason other than the end of the file, if the input stream
     *             has been closed, or if some other I/O error occurs.
     * @throws NullPointerException
     *             If the byte array b is null.
     */
    @Override
    @DoesServiceRequest
    public int read(final byte[] b) throws IOException {
        return this.read(b, 0, b.length);
    }

    /**
     * Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to
     * read as many as len bytes, but a smaller number may be read. The number of bytes actually read is
     * returned as an integer. This
     * method blocks until input data is available, end of file is detected, or an exception is thrown.
     * 
     * If len is zero, then no bytes are read and 0 is returned; otherwise, there is an attempt to read at
     * least one
     * byte. If no byte is available because the stream is at end of file, the value -1 is returned; otherwise, at least
     * one byte is read and stored into b.
     * 
     * The first byte read is stored into element b[off], the next one into b[off+1], and so
     * on. The number of bytes
     * read is, at most, equal to len. Let k be the number of bytes actually read; these bytes
     * will be stored in
     * elements b[off] through b[off+k-1], leaving elements b[off+k] through
     * b[off+len-1] unaffected.
     * 
     * In every case, elements b[0] through b[off] and elements b[off+len]
     * through b[b.length-1] are unaffected.
     * 
     * The read(b, off, len) method for class {@link InputStream} simply calls the method
     * read() repeatedly. If the first such
     * call results in an IOException, that exception is returned from the call to the
     * read(b, off, len) method. If any
     * subsequent call to read() results in a IOException, the exception is caught and treated
     * as if it were end of
     * file; the bytes read up to that point are stored into b and the number of bytes read before the
     * exception
     * occurred is returned. The default implementation of this method blocks until the requested amount of input data
     * len has been read, end of file is detected, or an exception is thrown. Subclasses are encouraged to
     * provide a
     * more efficient implementation of this method.
     * 
     * @param b
     *            A byte array which represents the buffer into which the data is read.
     * @param off
     *            An int which represents the start offset in the byte array at which the data
     *            is written.
     * @param len
     *            An int which represents the maximum number of bytes to read.
     * 
     * @return An int which represents the total number of bytes read into the buffer, or -1 if
     *         there is no more data because the end of the stream has been reached.
     * 
     * @throws IOException
     *             If the first byte cannot be read for any reason other than end of file, or if the input stream has
     *             been closed, or if some other I/O error occurs.
     * @throws NullPointerException
     *             If the byte array b is null.
     * @throws IndexOutOfBoundsException
     *             If off is negative, len is negative, or len is greater than
     *             b.length - off.
     */
    @Override
    @DoesServiceRequest
    public int read(final byte[] b, final int off, final int len) throws IOException {
        if (off < 0 || len < 0 || len > b.length - off) {
            throw new IndexOutOfBoundsException();
        }

        return this.readInternal(b, off, len);
    }

    /**
     * Performs internal read to the given byte buffer.
     * 
     * @param b
     *            A byte array which represents the buffer into which the data is read.
     * @param off
     *            An int which represents the start offset in the byte array b at
     *            which the data is written.
     * @param len
     *            An int which represents the maximum number of bytes to read.
     * 
     * @return An int which represents the total number of bytes read into the buffer, or -1 if
     *         there is no more data because the end of the stream has been reached.
     * 
     * @throws IOException
     *             If the first byte cannot be read for any reason other than end of file, or if the input stream has
     *             been closed, or if some other I/O error occurs.
     */
    @DoesServiceRequest
    private synchronized int readInternal(final byte[] b, final int off, int len) throws IOException {
        this.checkStreamState();

        // if buffer is empty do next get operation
        if ((this.currentBuffer == null || this.currentBuffer.available() == 0)
                && this.currentAbsoluteReadPosition < this.streamLength + this.blobRangeOffset) {
            this.dispatchRead((int) Math.min(this.readSize, this.streamLength + this.blobRangeOffset - this.currentAbsoluteReadPosition));
        }

        len = Math.min(len, this.readSize);

        // do read from buffer
        final int numberOfBytesRead = this.currentBuffer.read(b, off, len);

        if (numberOfBytesRead > 0) {
            this.currentAbsoluteReadPosition += numberOfBytesRead;

            if (this.validateBlobMd5) {
                this.md5Digest.update(b, off, numberOfBytesRead);

                if (this.currentAbsoluteReadPosition == this.streamLength + this.blobRangeOffset) {
                    // Reached end of stream, validate md5.
                    final String calculatedMd5 = Base64.encode(this.md5Digest.digest());
                    if (!calculatedMd5.equals(this.retrievedContentMD5Value)) {
                        this.lastError = Utility
                                .initIOException(new StorageException(
                                        StorageErrorCodeStrings.INVALID_MD5,
                                        String.format(
                                                "Blob data corrupted (integrity check failed), Expected value is %s, retrieved %s",
                                                this.retrievedContentMD5Value, calculatedMd5),
                                        Constants.HeaderConstants.HTTP_UNUSED_306, null, null));
                        this.streamFaulted = true;
                        throw this.lastError;
                    }
                }
            }
        }

        // update markers
        if (this.markExpiry > 0 && this.markedPosition + this.markExpiry < this.currentAbsoluteReadPosition) {
            this.markedPosition = this.blobRangeOffset;
            this.markExpiry = 0;
        }

        return numberOfBytesRead;
    }

    /**
     * Repositions the stream to the given absolute byte offset.
     * 
     * @param absolutePosition
     *            A long which represents the absolute byte offset withitn the stream reposition.
     */
    private synchronized void reposition(final long absolutePosition) {
        this.currentAbsoluteReadPosition = absolutePosition;
        this.currentBuffer = new ByteArrayInputStream(new byte[0]);
    }

    /**
     * Repositions this stream to the position at the time the mark method was last called on this input stream. Note
     * repositioning the blob read stream will disable blob MD5 checking.
     * 
     * @throws IOException
     *             If this stream has not been marked or if the mark has been invalidated.
     */
    @Override
    public synchronized void reset() throws IOException {
        if (this.markedPosition + this.markExpiry < this.currentAbsoluteReadPosition) {
            throw new IOException(SR.MARK_EXPIRED);
        }

        this.validateBlobMd5 = false;
        this.md5Digest = null;
        this.reposition(this.markedPosition);
    }

    /**
     * Skips over and discards n bytes of data from this input stream. The skip method may, for a variety of reasons,
     * end up skipping over some smaller number of bytes, possibly 0. This may result from any of a number of
     * conditions; reaching end of file before n bytes have been skipped is only one possibility. The actual number of
     * bytes skipped is returned. If n is negative, no bytes are skipped.
     * 
     * Note repositioning the blob read stream will disable blob MD5 checking.
     * 
     * @param n
     *            A long which represents the number of bytes to skip.
     */
    @Override
    public synchronized long skip(final long n) throws IOException {
        if (n == 0) {
            return 0;
        }

        if (n < 0 || this.currentAbsoluteReadPosition + n > this.streamLength + this.blobRangeOffset) {
            throw new IndexOutOfBoundsException();
        }

        this.validateBlobMd5 = false;
        this.md5Digest = null;
        this.reposition(this.currentAbsoluteReadPosition + n);
        return n;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy