com.microsoft.azure.storage.blob.BlobOutputStreamInternal Maven / Gradle / Ivy
Show all versions of azure-storage Show documentation
/**
* Copyright Microsoft Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.microsoft.azure.storage.blob;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.io.UnsupportedEncodingException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicInteger;
import com.microsoft.azure.storage.AccessCondition;
import com.microsoft.azure.storage.Constants;
import com.microsoft.azure.storage.DoesServiceRequest;
import com.microsoft.azure.storage.OperationContext;
import com.microsoft.azure.storage.StorageErrorCodeStrings;
import com.microsoft.azure.storage.StorageException;
import com.microsoft.azure.storage.core.Base64;
import com.microsoft.azure.storage.core.Logger;
import com.microsoft.azure.storage.core.SR;
import com.microsoft.azure.storage.core.Utility;
/**
* The class is an append-only stream for writing into storage.
*/
final class BlobOutputStreamInternal extends BlobOutputStream {
private static class BlobOutputStreamThreadFactory implements ThreadFactory {
private final ThreadGroup group;
private final AtomicInteger threadNumber = new AtomicInteger(1);
private final String namePrefix;
BlobOutputStreamThreadFactory() {
SecurityManager s = System.getSecurityManager();
group = (s != null) ? s.getThreadGroup() :
Thread.currentThread().getThreadGroup();
namePrefix = "azure-storage-bloboutputstream-thread-";
}
public Thread newThread(Runnable r) {
Thread t = new Thread(group, r,
namePrefix + threadNumber.getAndIncrement(),
0);
t.setDaemon(true);
if (t.getPriority() != Thread.NORM_PRIORITY)
t.setPriority(Thread.NORM_PRIORITY);
return t;
}
}
/**
* Holds the {@link AccessCondition} object that represents the access conditions for the blob.
*/
private AccessCondition accessCondition;
/**
* Used for block blobs, holds the block id prefix.
*/
private String blockIdPrefix;
/**
* Used for block blobs, holds the block list.
*/
private ArrayList blockList;
/**
* The CompletionService used to await task completion for this stream.
*/
private final ExecutorCompletionService completionService;
/**
* Holds the futures of the executing tasks. The starting size of the set is a multiple of the concurrent request
* count to reduce the cost of resizing the set later.
*/
private final Set> futureSet;
/**
* Holds the write threshold of number of bytes to buffer prior to dispatching a write. For block blob this is the
* block size, for page blob this is the Page commit size.
*/
private int internalWriteThreshold = -1;
/**
* Holds the last exception this stream encountered.
*/
private volatile IOException lastError = null;
/**
* Holds the reference to the MD5 digest for the blob.
*/
private MessageDigest md5Digest;
/**
* Holds the OperationContext for the current stream.
*/
private final OperationContext opContext;
/**
* Holds the options for the current stream.
*/
private final BlobRequestOptions options;
private long currentBlobOffset;
/**
* A private buffer to store data prior to committing to the cloud.
*/
private volatile ByteArrayOutputStream outBuffer;
/**
* Holds the reference to the blob this stream is associated with.
*/
private final CloudBlob parentBlobRef;
/**
* Determines if this stream is used against a page blob or block blob.
*/
private BlobType streamType = BlobType.UNSPECIFIED;
/**
* The ExecutorService used to schedule tasks for this stream.
*/
private final ThreadPoolExecutor threadExecutor;
/**
* Indicates whether the stream has been aborted and therefore closing will skip committing data.
*/
private boolean aborted;
/**
* Initializes a new instance of the BlobOutputStream class.
*
* @param parentBlob
* A {@link CloudBlob} object which represents the blob that this stream is associated with.
* @param accessCondition
* An {@link AccessCondition} object which represents the access conditions for the blob.
* @param options
* A {@link BlobRequestOptions} object which specifies any additional options for the request.
* @param opContext
* An {@link OperationContext} object which is used to track the execution of the operation.
*
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
private BlobOutputStreamInternal(final CloudBlob parentBlob, final AccessCondition accessCondition,
final BlobRequestOptions options, final OperationContext opContext) throws StorageException {
this.accessCondition = accessCondition;
this.parentBlobRef = parentBlob;
this.parentBlobRef.assertCorrectBlobType();
this.options = new BlobRequestOptions(options);
this.outBuffer = new ByteArrayOutputStream();
this.opContext = opContext;
if (this.options.getConcurrentRequestCount() < 1) {
throw new IllegalArgumentException("ConcurrentRequestCount");
}
this.futureSet = Collections.newSetFromMap(new ConcurrentHashMap, Boolean>(
this.options.getConcurrentRequestCount() == null ? 1 : this.options.getConcurrentRequestCount() * 2));
if (this.options.getStoreBlobContentMD5()) {
try {
this.md5Digest = MessageDigest.getInstance("MD5");
}
catch (final NoSuchAlgorithmException e) {
// This wont happen, throw fatal.
throw Utility.generateNewUnexpectedStorageException(e);
}
}
// V2 cachedThreadPool for perf.
this.threadExecutor = new ThreadPoolExecutor(
this.options.getConcurrentRequestCount(),
this.options.getConcurrentRequestCount(),
10,
TimeUnit.SECONDS,
new LinkedBlockingQueue(),
new BlobOutputStreamThreadFactory());
this.completionService = new ExecutorCompletionService(this.threadExecutor);
}
/**
* Initializes a new instance of the BlobOutputStream class for a CloudBlockBlob
*
* @param parentBlob
* A {@link CloudBlockBlob} object which represents the blob that this stream is associated with.
* @param accessCondition
* An {@link AccessCondition} object which represents the access conditions for the blob.
* @param options
* A {@link BlobRequestOptions} object which specifies any additional options for the request.
* @param opContext
* An {@link OperationContext} object which is used to track the execution of the operation.
*
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
protected BlobOutputStreamInternal(final CloudBlockBlob parentBlob, final AccessCondition accessCondition,
final BlobRequestOptions options, final OperationContext opContext) throws StorageException {
this((CloudBlob) parentBlob, accessCondition, options, opContext);
this.blockList = new ArrayList();
this.blockIdPrefix = UUID.randomUUID().toString() + "-";
this.streamType = BlobType.BLOCK_BLOB;
this.internalWriteThreshold = this.parentBlobRef.getStreamWriteSizeInBytes();
}
/**
* Initializes a new instance of the BlobOutputStream class for a CloudPageBlob
*
* @param parentBlob
* A {@link CloudPageBlob} object which represents the blob that this stream is associated with.
* @param length
* A long
which represents the length of the page blob in bytes, which must be a multiple of
* 512.
* @param accessCondition
* An {@link AccessCondition} object which represents the access conditions for the blob.
* @param options
* A {@link BlobRequestOptions} object which specifies any additional options for the request
* @param opContext
* An {@link OperationContext} object which is used to track the execution of the operation
*
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
@DoesServiceRequest
protected BlobOutputStreamInternal(final CloudPageBlob parentBlob, final long length,
final AccessCondition accessCondition, final BlobRequestOptions options, final OperationContext opContext)
throws StorageException {
this(parentBlob, accessCondition, options, opContext);
this.streamType = BlobType.PAGE_BLOB;
this.internalWriteThreshold = (int) Math.min(this.parentBlobRef.getStreamWriteSizeInBytes(), length);
}
/**
* Initializes a new instance of the BlobOutputStream class for a CloudAppendBlob
*
* @param parentBlob
* A {@link CloudAppendBlob} object which represents the blob that this stream is associated with.
* @param accessCondition
* An {@link AccessCondition} object which represents the access conditions for the blob.
* @param options
* A {@link BlobRequestOptions} object which specifies any additional options for the request
* @param opContext
* An {@link OperationContext} object which is used to track the execution of the operation
*
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
@DoesServiceRequest
protected BlobOutputStreamInternal(final CloudAppendBlob parentBlob, final AccessCondition accessCondition,
final BlobRequestOptions options, final OperationContext opContext)
throws StorageException {
this((CloudBlob)parentBlob, accessCondition, options, opContext);
this.streamType = BlobType.APPEND_BLOB;
this.accessCondition = accessCondition != null ? accessCondition : new AccessCondition();
if (this.accessCondition.getIfAppendPositionEqual() != null) {
this.currentBlobOffset = this.accessCondition.getIfAppendPositionEqual();
}
else {
// If this is an existing blob, we've done a downloadProperties to get the length
// If this is a new blob, getLength will correctly return 0
this.currentBlobOffset = parentBlob.getProperties().getLength();
}
this.internalWriteThreshold = this.parentBlobRef.getStreamWriteSizeInBytes();
}
/**
* Helper function to check if the stream is faulted, if it is it surfaces the exception.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
private void checkStreamState() throws IOException {
if (this.lastError != null) {
throw this.lastError;
}
}
/**
* Closes this output stream and releases any system resources associated with this stream. If any data remains in
* the buffer it is committed to the service.
*
* @throws IOException
* If an I/O error occurs.
*/
@Override
@DoesServiceRequest
public synchronized void close() throws IOException {
try {
// if the user has already closed the stream, this will throw a STREAM_CLOSED exception
// if an exception was thrown by any thread in the threadExecutor, realize it now
this.checkStreamState();
// flush any remaining data
if (!this.aborted) {
this.flush();
}
// shut down the ExecutorService.
this.threadExecutor.shutdown();
// try to commit the blob
if (!this.aborted) {
try {
this.commit();
} catch (final StorageException e) {
throw Utility.initIOException(e);
}
}
}
finally {
// if close() is called again, an exception will be thrown
this.lastError = new IOException(SR.STREAM_CLOSED);
// if an exception was thrown and the executor was not yet closed, call shutDownNow() to cancel all tasks
// and shutdown the ExecutorService
if (!this.threadExecutor.isShutdown()) {
this.threadExecutor.shutdownNow();
}
}
}
@Override
public void abort() throws IOException {
this.aborted = true;
}
/**
* Commits the blob, for block blob this uploads the block list.
*
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
@DoesServiceRequest
private synchronized void commit() throws StorageException {
if (this.options.getStoreBlobContentMD5()) {
this.parentBlobRef.getProperties().setContentMD5(Base64.encode(this.md5Digest.digest()));
}
if (this.streamType == BlobType.BLOCK_BLOB) {
// wait for all blocks to finish
final CloudBlockBlob blobRef = (CloudBlockBlob) this.parentBlobRef;
blobRef.commitBlockList(this.blockList, this.accessCondition, this.options, this.opContext);
}
else if (this.options.getStoreBlobContentMD5()) {
this.parentBlobRef.uploadProperties(this.accessCondition, this.options, this.opContext);
}
}
/**
* Dispatches a write operation for a given length.
*
* @param writeLength
* An int
which represents the length of the data to write, this is the write threshold that
* triggered the write.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
@DoesServiceRequest
private synchronized void dispatchWrite() throws IOException {
final int writeLength = this.outBuffer.size();
if (writeLength == 0) {
return;
}
if (this.streamType == BlobType.PAGE_BLOB && (writeLength % Constants.PAGE_SIZE != 0)) {
throw new IOException(String.format(SR.INVALID_NUMBER_OF_BYTES_IN_THE_BUFFER, writeLength));
}
Callable worker = null;
if (this.threadExecutor.getQueue().size() >= this.options.getConcurrentRequestCount() * 2) {
this.waitForTaskToComplete();
}
if (this.futureSet.size() >= this.options.getConcurrentRequestCount() * 2) {
this.clearCompletedFutures();
}
final ByteArrayInputStream bufferRef = new ByteArrayInputStream(this.outBuffer.toByteArray());
if (this.streamType == BlobType.BLOCK_BLOB) {
final String blockID = this.getCurrentBlockId();
this.blockList.add(new BlockEntry(blockID, BlockSearchMode.LATEST));
worker = new Callable() {
@Override
public Void call() {
BlobOutputStreamInternal.this.writeBlock(bufferRef, blockID, writeLength);
return null;
}
};
}
else if (this.streamType == BlobType.PAGE_BLOB) {
final long opOffset = this.currentBlobOffset;
this.currentBlobOffset += writeLength;
worker = new Callable() {
@Override
public Void call() {
BlobOutputStreamInternal.this.writePages(bufferRef, opOffset, writeLength);
return null;
}
};
}
else if (this.streamType == BlobType.APPEND_BLOB) {
final long opOffset = this.currentBlobOffset;
this.currentBlobOffset += writeLength;
// We cannot differentiate between max size condition failing only in the retry versus failing in the
// first attempt and retry even for a single writer scenario. So we will eliminate the latter and handle
// the former in the append block method.
if (this.accessCondition.getIfMaxSizeLessThanOrEqual() != null
&& this.currentBlobOffset > this.accessCondition.getIfMaxSizeLessThanOrEqual()) {
this.lastError = new IOException(SR.INVALID_BLOCK_SIZE);
throw this.lastError;
}
worker = new Callable() {
@Override
public Void call() {
BlobOutputStreamInternal.this.appendBlock(bufferRef, opOffset, writeLength);
return null;
}
};
}
// Add future to set
this.futureSet.add(this.completionService.submit(worker));
// Reset buffer.
this.outBuffer = new ByteArrayOutputStream();
}
private void writeBlock(ByteArrayInputStream blockData, String blockId, long writeLength) {
final CloudBlockBlob blobRef = (CloudBlockBlob) this.parentBlobRef;
try {
blobRef.uploadBlock(blockId, blockData, writeLength, this.accessCondition, this.options, this.opContext);
}
catch (final IOException e) {
this.lastError = e;
}
catch (final StorageException e) {
this.lastError = Utility.initIOException(e);
}
}
private void writePages(ByteArrayInputStream pageData, long offset, long writeLength) {
final CloudPageBlob blobRef = (CloudPageBlob) this.parentBlobRef;
try {
blobRef.uploadPages(pageData, offset, writeLength, this.accessCondition, this.options, this.opContext);
}
catch (final IOException e) {
this.lastError = e;
}
catch (final StorageException e) {
this.lastError = Utility.initIOException(e);
}
}
private void appendBlock(ByteArrayInputStream blockData, long offset, long writeLength) {
final CloudAppendBlob blobRef = (CloudAppendBlob) this.parentBlobRef;
this.accessCondition.setIfAppendPositionEqual(offset);
int previousResultsCount = this.opContext.getRequestResults().size();
try {
blobRef.appendBlock(blockData, writeLength, this.accessCondition, this.options, this.opContext);
}
catch (final IOException e) {
this.lastError = e;
}
catch (final StorageException e) {
if (this.options.getAbsorbConditionalErrorsOnRetry()
&& e.getHttpStatusCode() == HttpURLConnection.HTTP_PRECON_FAILED
&& e.getExtendedErrorInformation() != null
&& e.getErrorCode() != null
&& (e.getErrorCode()
.equals(StorageErrorCodeStrings.INVALID_APPEND_POSITION) ||
e.getErrorCode().equals(StorageErrorCodeStrings.INVALID_MAX_BLOB_SIZE_CONDITION))
&& (this.opContext.getRequestResults().size() - previousResultsCount > 1)) {
// Pre-condition failure on a retry should be ignored in a single writer scenario since
// the request succeeded in the first attempt.
Logger.info(this.opContext, SR.PRECONDITION_FAILURE_IGNORED);
}
else {
this.lastError = Utility.initIOException(e);
}
}
}
/**
* Flushes this output stream and forces any buffered output bytes to be written out. If any data remains in the
* buffer it is committed to the service.
*
* @throws IOException
* If an I/O error occurs.
*/
@Override
@DoesServiceRequest
public void flush() throws IOException {
this.checkStreamState();
this.dispatchWrite();
// Waits for all submitted tasks to complete
Set> requests = new HashSet>(this.futureSet);
for (Future request : requests) {
// wait for the future to complete
try {
request.get();
}
catch (Exception e) {
throw Utility.initIOException(e);
}
// If that task threw an error, fail fast
this.checkStreamState();
}
}
/**
* Generates a new block ID to be used for PutBlock.
*
* @return Base64 encoded block ID
* @throws IOException
*/
private String getCurrentBlockId() throws IOException
{
String blockIdSuffix = String.format("%06d", this.blockList.size());
byte[] blockIdInBytes;
try {
blockIdInBytes = (this.blockIdPrefix + blockIdSuffix).getBytes(Constants.UTF8_CHARSET);
} catch (UnsupportedEncodingException e) {
// this should never happen, UTF8 is a default charset
throw new IOException(e);
}
return Base64.encode(blockIdInBytes);
}
/**
* Waits for at least one task to complete.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
private void waitForTaskToComplete() throws IOException {
boolean completed = false;
while (this.completionService.poll() != null) {
completed = true;
}
if (!completed) {
try {
this.completionService.take();
}
catch (final InterruptedException e) {
throw Utility.initIOException(e);
}
}
}
/**
* Removes futures which are done from the future set.
*/
private void clearCompletedFutures() {
for (Future request : this.futureSet) {
if (request.isDone()) {
this.futureSet.remove(request);
}
}
}
/**
* Writes b.length
bytes from the specified byte array to this output stream.
*
* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at
* {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to
* true
is acceptable for you.
*
* @param data
* A byte
array which represents the data to write.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
@Override
@DoesServiceRequest
public void write(final byte[] data) throws IOException {
this.write(data, 0, data.length);
}
/**
* Writes length bytes from the specified byte array starting at offset to this output stream.
*
* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at
* {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to
* true
is acceptable for you.
*
* @param data
* A byte
array which represents the data to write.
* @param offset
* An int
which represents the start offset in the data.
* @param length
* An int
which represents the number of bytes to write.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
@Override
@DoesServiceRequest
public void write(final byte[] data, final int offset, final int length) throws IOException {
if (offset < 0 || length < 0 || length > data.length - offset) {
throw new IndexOutOfBoundsException();
}
this.writeInternal(data, offset, length);
}
/**
* Writes all data from the InputStream to the Blob.
*
* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at
* {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to
* true
is acceptable for you.
*
* @param sourceStream
* An {@link InputStream} object which species the data to write to the Blob.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
* @throws StorageException
* An exception representing any error which occurred during the operation.
*/
@DoesServiceRequest
public void write(final InputStream sourceStream, final long writeLength) throws IOException, StorageException {
Utility.writeToOutputStream(sourceStream, this, writeLength, false, false, this.opContext, this.options, false);
}
/**
* Writes the specified byte to this output stream. The general contract for write is that one byte is written to
* the output stream. The byte to be written is the eight low-order bits of the argument b. The 24 high-order bits
* of b are ignored.
*
* If you are using {@link CloudAppendBlob} and are certain of a single writer scenario, please look at
* {@link BlobRequestOptions#setAbsorbConditionalErrorsOnRetry(Boolean)} and see if setting this flag to
* true
is acceptable for you.
*
* @param byteVal
* An int
which represents the bye value to write.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
@Override
@DoesServiceRequest
public void write(final int byteVal) throws IOException {
this.write(new byte[] { (byte) (byteVal & 0xFF) });
}
/**
* Writes the data to the buffer and triggers writes to the service as needed.
*
* @param data
* A byte
array which represents the data to write.
* @param offset
* An int
which represents the start offset in the data.
* @param length
* An int
which represents the number of bytes to write.
*
* @throws IOException
* If an I/O error occurs. In particular, an IOException may be thrown if the output stream has been
* closed.
*/
@DoesServiceRequest
private synchronized void writeInternal(final byte[] data, int offset, int length) throws IOException {
while (length > 0) {
this.checkStreamState();
final int availableBufferBytes = this.internalWriteThreshold - this.outBuffer.size();
final int nextWrite = Math.min(availableBufferBytes, length);
// If we need to set MD5 then update the digest accordingly
if (this.options.getStoreBlobContentMD5()) {
this.md5Digest.update(data, offset, nextWrite);
}
this.outBuffer.write(data, offset, nextWrite);
offset += nextWrite;
length -= nextWrite;
if (this.outBuffer.size() == this.internalWriteThreshold) {
this.dispatchWrite();
}
}
}
}