com.microsoft.azure.storage.blob.BlobInputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of azure-storage Show documentation
Show all versions of azure-storage Show documentation
SDK for Microsoft Azure Storage Clients
/**
* Copyright Microsoft Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.azure.storage.blob;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import com.microsoft.azure.storage.AccessCondition;
import com.microsoft.azure.storage.Constants;
import com.microsoft.azure.storage.DoesServiceRequest;
import com.microsoft.azure.storage.OperationContext;
import com.microsoft.azure.storage.StorageErrorCode;
import com.microsoft.azure.storage.StorageErrorCodeStrings;
import com.microsoft.azure.storage.StorageException;
import com.microsoft.azure.storage.core.Base64;
import com.microsoft.azure.storage.core.SR;
import com.microsoft.azure.storage.core.Utility;
/**
* Provides an input stream to read a given blob resource.
*/
public final class BlobInputStream extends InputStream {
/**
* Holds the reference to the blob this stream is associated with.
*/
private final CloudBlob parentBlobRef;
/**
* Holds the reference to the MD5 digest for the blob.
*/
private MessageDigest md5Digest;
/**
* A flag to determine if the stream is faulted, if so the last error will be thrown on next operation.
*/
private volatile boolean streamFaulted;
/**
* Holds the last exception this stream encountered.
*/
private IOException lastError;
/**
* Holds the OperationContext for the current stream.
*/
private final OperationContext opContext;
/**
* Holds the options for the current stream
*/
private final BlobRequestOptions options;
/**
* Holds the stream length.
*/
private long streamLength;
/**
* Holds the stream read size for both block and page blobs.
*/
private final int readSize;
/**
* A flag indicating if the Blob MD5 should be validated.
*/
private boolean validateBlobMd5;
/**
* Holds the Blob MD5.
*/
private final String retrievedContentMD5Value;
/**
* Holds the reference to the current buffered data.
*/
private ByteArrayInputStream currentBuffer;
/**
* Holds an absolute byte position for the mark feature.
*/
private long markedPosition;
/**
* Holds the mark delta for which the mark position is expired.
*/
private int markExpiry;
/**
* Holds an absolute byte position of the current read position.
*/
private long currentAbsoluteReadPosition;
/**
* Holds the absolute byte position of the start of the current buffer.
*/
private long bufferStartOffset;
/**
* Holds the length of the current buffer in bytes.
*/
private int bufferSize;
/**
* Holds the {@link AccessCondition} object that represents the access conditions for the blob.
*/
private AccessCondition accessCondition = null;
/**
* Offset of the source blob this class is configured to stream from.
*/
private final long blobRangeOffset;
/**
* Initializes a new instance of the BlobInputStream class.
*
* @param parentBlob
* A {@link CloudBlob} object which represents the blob that this stream is associated with.
* @param accessCondition
* An {@link AccessCondition} object which represents the access conditions for the blob.
* @param options
* A {@link BlobRequestOptions} object which represents that specifies any additional options for the
* request.
* @param opContext
* An {@link OperationContext} object which is used to track the execution of the operation.
*
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
@DoesServiceRequest
protected BlobInputStream(final CloudBlob parentBlob, final AccessCondition accessCondition,
final BlobRequestOptions options, final OperationContext opContext) throws StorageException {
this(0, null, parentBlob, accessCondition, options, opContext);
}
/**
* Initializes a new instance of the BlobInputStream class.
* Note that if {@code blobRangeOffset} is not {@code 0} or {@code blobRangeLength} is not {@code null}, there will
* be no content MD5 verification.
*
* @param blobRangeOffset
* The offset of blob data to begin stream.
* @param blobRangeLength
* How much data the stream should return after blobRangeOffset.
* @param parentBlob
* A {@link CloudBlob} object which represents the blob that this stream is associated with.
* @param accessCondition
* An {@link AccessCondition} object which represents the access conditions for the blob.
* @param options
* A {@link BlobRequestOptions} object which represents that specifies any additional options for the
* request.
* @param opContext
* An {@link OperationContext} object which is used to track the execution of the operation.
*
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
@DoesServiceRequest
protected BlobInputStream(long blobRangeOffset, Long blobRangeLength, final CloudBlob parentBlob,
final AccessCondition accessCondition, final BlobRequestOptions options, final OperationContext opContext)
throws StorageException {
this.blobRangeOffset = blobRangeOffset;
this.parentBlobRef = parentBlob;
this.parentBlobRef.assertCorrectBlobType();
this.options = new BlobRequestOptions(options);
this.opContext = opContext;
this.streamFaulted = false;
this.currentAbsoluteReadPosition = blobRangeOffset;
this.readSize = parentBlob.getStreamMinimumReadSizeInBytes();
if (options.getUseTransactionalContentMD5() && this.readSize > 4 * Constants.MB) {
throw new IllegalArgumentException(SR.INVALID_RANGE_CONTENT_MD5_HEADER);
}
parentBlob.downloadAttributes(accessCondition, this.options, this.opContext);
if (blobRangeOffset < 0 || (blobRangeLength != null && blobRangeLength <= 0)) {
throw new IndexOutOfBoundsException();
}
this.retrievedContentMD5Value = parentBlob.getProperties().getContentMD5();
// Will validate it if it was returned
this.validateBlobMd5 = !options.getDisableContentMD5Validation()
&& !Utility.isNullOrEmpty(this.retrievedContentMD5Value);
// Need the whole blob to validate MD5. If we download a range, don't bother trying.
if (blobRangeOffset != 0 || blobRangeLength != null) {
this.validateBlobMd5 = false;
}
// Validates the first option, and sets future requests to use if match
// request option.
// If there is an existing conditional validate it, as we intend to
// replace if for future requests.
String previousLeaseId = null;
if (accessCondition != null) {
previousLeaseId = accessCondition.getLeaseID();
if (!accessCondition.verifyConditional(this.parentBlobRef.getProperties().getEtag(), this.parentBlobRef
.getProperties().getLastModified())) {
throw new StorageException(StorageErrorCode.CONDITION_FAILED.toString(),
SR.INVALID_CONDITIONAL_HEADERS, HttpURLConnection.HTTP_PRECON_FAILED, null, null);
}
}
this.accessCondition = new AccessCondition();
this.accessCondition.setLeaseID(previousLeaseId);
if (!options.getSkipEtagLocking()) {
this.accessCondition.setIfMatch(this.parentBlobRef.getProperties().getEtag());
}
this.streamLength = blobRangeLength == null
? this.parentBlobRef.getProperties().getLength() - this.blobRangeOffset
: Math.min(this.parentBlobRef.getProperties().getLength() - this.blobRangeOffset, blobRangeLength);
if (this.validateBlobMd5) {
try {
this.md5Digest = MessageDigest.getInstance("MD5");
}
catch (final NoSuchAlgorithmException e) {
// This wont happen, throw fatal.
throw Utility.generateNewUnexpectedStorageException(e);
}
}
this.reposition(blobRangeOffset);
}
/**
* Returns an estimate of the number of bytes that can be read (or skipped over) from this input stream without
* blocking by the next invocation of a method for this input stream. The next invocation might be the same thread
* or another thread. A single read or skip of this many bytes will not block, but may read or skip fewer bytes.
*
* @return An int
which represents an estimate of the number of bytes that can be read (or skipped
* over)
* from this input stream without blocking, or 0 when it reaches the end of the input stream.
*
* @throws IOException
* If an I/O error occurs.
*/
@Override
public synchronized int available() throws IOException {
return this.bufferSize - (int) (this.currentAbsoluteReadPosition - this.bufferStartOffset);
}
/**
* Helper function to check if the stream is faulted, if it is it surfaces the exception.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
private synchronized void checkStreamState() throws IOException {
if (this.streamFaulted) {
throw this.lastError;
}
}
/**
* Closes this input stream and releases any system resources associated with the stream.
*
* @throws IOException
* If an I/O error occurs.
*/
@Override
public synchronized void close() throws IOException {
this.currentBuffer = null;
this.streamFaulted = true;
this.lastError = new IOException(SR.STREAM_CLOSED);
}
/**
* Dispatches a read operation of N bytes. When using sparse page blobs, the page ranges are evaluated and zero
* bytes may be generated on the client side for some ranges that do not exist.
*
* @param readLength
* An int
which represents the number of bytes to read.
*
* @throws IOException
* If an I/O error occurs.
*/
@DoesServiceRequest
private synchronized void dispatchRead(final int readLength) throws IOException {
try {
final byte[] byteBuffer = new byte[readLength];
int numBytes = this.parentBlobRef.downloadRangeInternal(this.currentAbsoluteReadPosition, (long) readLength,
byteBuffer, 0, this.accessCondition, this.options, this.opContext);
/*
In the case of client-side decryption, we may get fewer bytes than we request at the end of the blob when
we remove padding. We want to ensure our data is the correct size, even in this case. Also, in this case,
we can no longer validate the MD5 because it was calculated on the ciphertext on upload, but this
inputstream calculates it on the plaintext.
*/
if (numBytes < readLength && this.options.getEncryptionPolicy() != null) {
this.validateBlobMd5 = false;
}
this.currentBuffer = new ByteArrayInputStream(byteBuffer, 0, numBytes);
this.bufferSize = readLength;
this.bufferStartOffset = this.currentAbsoluteReadPosition;
}
catch (final StorageException e) {
this.streamFaulted = true;
this.lastError = Utility.initIOException(e);
throw this.lastError;
}
}
/**
* Marks the current position in this input stream. A subsequent call to the reset method repositions this stream at
* the last marked position so that subsequent reads re-read the same bytes.
*
* @param readlimit
* An int
which represents the maximum limit of bytes that can be read before the mark
* position becomes invalid.
*/
@Override
public synchronized void mark(final int readlimit) {
this.markedPosition = this.currentAbsoluteReadPosition;
this.markExpiry = readlimit;
}
/**
* Tests if this input stream supports the mark and reset methods. Whether or not mark and reset are supported is an
* invariant property of a particular input stream instance. The markSupported method of {@link InputStream} returns
* false.
*
* @return True
if this stream instance supports the mark and reset methods; False
* otherwise.
*/
@Override
public boolean markSupported() {
return true;
}
/**
* Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255. If
* no byte is available because the end of the stream has been reached, the value -1 is returned. This method blocks
* until input data is available, the end of the stream is detected, or an exception is thrown.
*
* @return An int
which represents the total number of bytes read into the buffer, or -1 if
* there is no more data because the end of the stream has been reached.
*
* @throws IOException
* If an I/O error occurs.
*/
@Override
@DoesServiceRequest
public int read() throws IOException {
final byte[] tBuff = new byte[1];
final int numberOfBytesRead = this.read(tBuff, 0, 1);
if (numberOfBytesRead > 0) {
return tBuff[0] & 0xFF;
}
else if (numberOfBytesRead == 0) {
throw new IOException(SR.UNEXPECTED_STREAM_READ_ERROR);
}
else {
return -1;
}
}
/**
* Reads some number of bytes from the input stream and stores them into the buffer array b
. The number
* of bytes
* actually read is returned as an integer. This method blocks until input data is available, end of file is
* detected, or an exception is thrown. If the length of b
is zero, then no bytes are read and 0 is
* returned;
* otherwise, there is an attempt to read at least one byte. If no byte is available because the stream is at the
* end of the file, the value -1 is returned; otherwise, at least one byte is read and stored into b
.
*
* The first byte read is stored into element b[0]
, the next one into b[1]
, and so on. The
* number of bytes read is,
* at most, equal to the length of b
. Let k
be the number of bytes actually read; these
* bytes will be stored in
* elements b[0]
through b[k-1]
, leaving elements b[k]
through
* b[b.length-1]
unaffected.
*
* The read(b)
method for class {@link InputStream} has the same effect as:
*
* read(b, 0, b.length)
*
* @param b
* A byte
array which represents the buffer into which the data is read.
*
* @throws IOException
* If the first byte cannot be read for any reason other than the end of the file, if the input stream
* has been closed, or if some other I/O error occurs.
* @throws NullPointerException
* If the byte
array b
is null.
*/
@Override
@DoesServiceRequest
public int read(final byte[] b) throws IOException {
return this.read(b, 0, b.length);
}
/**
* Reads up to len
bytes of data from the input stream into an array of bytes. An attempt is made to
* read as many as len
bytes, but a smaller number may be read. The number of bytes actually read is
* returned as an integer. This
* method blocks until input data is available, end of file is detected, or an exception is thrown.
*
* If len
is zero, then no bytes are read and 0 is returned; otherwise, there is an attempt to read at
* least one
* byte. If no byte is available because the stream is at end of file, the value -1 is returned; otherwise, at least
* one byte is read and stored into b
.
*
* The first byte read is stored into element b[off]
, the next one into b[off+1]
, and so
* on. The number of bytes
* read is, at most, equal to len
. Let k
be the number of bytes actually read; these bytes
* will be stored in
* elements b[off]
through b[off+k-1]
, leaving elements b[off+k]
through
* b[off+len-1]
unaffected.
*
* In every case, elements b[0]
through b[off]
and elements b[off+len]
* through b[b.length-1]
are unaffected.
*
* The read(b, off, len)
method for class {@link InputStream} simply calls the method
* read()
repeatedly. If the first such
* call results in an IOException
, that exception is returned from the call to the
* read(b, off, len)
method. If any
* subsequent call to read()
results in a IOException
, the exception is caught and treated
* as if it were end of
* file; the bytes read up to that point are stored into b
and the number of bytes read before the
* exception
* occurred is returned. The default implementation of this method blocks until the requested amount of input data
* len
has been read, end of file is detected, or an exception is thrown. Subclasses are encouraged to
* provide a
* more efficient implementation of this method.
*
* @param b
* A byte
array which represents the buffer into which the data is read.
* @param off
* An int
which represents the start offset in the byte
array at which the data
* is written.
* @param len
* An int
which represents the maximum number of bytes to read.
*
* @return An int
which represents the total number of bytes read into the buffer, or -1 if
* there is no more data because the end of the stream has been reached.
*
* @throws IOException
* If the first byte cannot be read for any reason other than end of file, or if the input stream has
* been closed, or if some other I/O error occurs.
* @throws NullPointerException
* If the byte
array b
is null.
* @throws IndexOutOfBoundsException
* If off
is negative, len
is negative, or len
is greater than
* b.length - off
.
*/
@Override
@DoesServiceRequest
public int read(final byte[] b, final int off, final int len) throws IOException {
if (off < 0 || len < 0 || len > b.length - off) {
throw new IndexOutOfBoundsException();
}
return this.readInternal(b, off, len);
}
/**
* Performs internal read to the given byte buffer.
*
* @param b
* A byte
array which represents the buffer into which the data is read.
* @param off
* An int
which represents the start offset in the byte
array b
at
* which the data is written.
* @param len
* An int
which represents the maximum number of bytes to read.
*
* @return An int
which represents the total number of bytes read into the buffer, or -1 if
* there is no more data because the end of the stream has been reached.
*
* @throws IOException
* If the first byte cannot be read for any reason other than end of file, or if the input stream has
* been closed, or if some other I/O error occurs.
*/
@DoesServiceRequest
private synchronized int readInternal(final byte[] b, final int off, int len) throws IOException {
this.checkStreamState();
// if buffer is empty do next get operation
if ((this.currentBuffer == null || this.currentBuffer.available() == 0)
&& this.currentAbsoluteReadPosition < this.streamLength + this.blobRangeOffset) {
this.dispatchRead((int) Math.min(this.readSize, this.streamLength + this.blobRangeOffset - this.currentAbsoluteReadPosition));
}
len = Math.min(len, this.readSize);
// do read from buffer
final int numberOfBytesRead = this.currentBuffer.read(b, off, len);
if (numberOfBytesRead > 0) {
this.currentAbsoluteReadPosition += numberOfBytesRead;
if (this.validateBlobMd5) {
this.md5Digest.update(b, off, numberOfBytesRead);
if (this.currentAbsoluteReadPosition == this.streamLength + this.blobRangeOffset) {
// Reached end of stream, validate md5.
final String calculatedMd5 = Base64.encode(this.md5Digest.digest());
if (!calculatedMd5.equals(this.retrievedContentMD5Value)) {
this.lastError = Utility
.initIOException(new StorageException(
StorageErrorCodeStrings.INVALID_MD5,
String.format(
"Blob data corrupted (integrity check failed), Expected value is %s, retrieved %s",
this.retrievedContentMD5Value, calculatedMd5),
Constants.HeaderConstants.HTTP_UNUSED_306, null, null));
this.streamFaulted = true;
throw this.lastError;
}
}
}
}
// update markers
if (this.markExpiry > 0 && this.markedPosition + this.markExpiry < this.currentAbsoluteReadPosition) {
this.markedPosition = this.blobRangeOffset;
this.markExpiry = 0;
}
return numberOfBytesRead;
}
/**
* Repositions the stream to the given absolute byte offset.
*
* @param absolutePosition
* A long
which represents the absolute byte offset withitn the stream reposition.
*/
private synchronized void reposition(final long absolutePosition) {
this.currentAbsoluteReadPosition = absolutePosition;
this.currentBuffer = new ByteArrayInputStream(new byte[0]);
}
/**
* Repositions this stream to the position at the time the mark method was last called on this input stream. Note
* repositioning the blob read stream will disable blob MD5 checking.
*
* @throws IOException
* If this stream has not been marked or if the mark has been invalidated.
*/
@Override
public synchronized void reset() throws IOException {
if (this.markedPosition + this.markExpiry < this.currentAbsoluteReadPosition) {
throw new IOException(SR.MARK_EXPIRED);
}
this.validateBlobMd5 = false;
this.md5Digest = null;
this.reposition(this.markedPosition);
}
/**
* Skips over and discards n bytes of data from this input stream. The skip method may, for a variety of reasons,
* end up skipping over some smaller number of bytes, possibly 0. This may result from any of a number of
* conditions; reaching end of file before n bytes have been skipped is only one possibility. The actual number of
* bytes skipped is returned. If n is negative, no bytes are skipped.
*
* Note repositioning the blob read stream will disable blob MD5 checking.
*
* @param n
* A long
which represents the number of bytes to skip.
*/
@Override
public synchronized long skip(final long n) throws IOException {
if (n == 0) {
return 0;
}
if (n < 0 || this.currentAbsoluteReadPosition + n > this.streamLength + this.blobRangeOffset) {
throw new IndexOutOfBoundsException();
}
this.validateBlobMd5 = false;
this.md5Digest = null;
this.reposition(this.currentAbsoluteReadPosition + n);
return n;
}
}