All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alex.mojaki.s3upload.MultiPartOutputStream Maven / Gradle / Ivy

Go to download

Manages streaming of data to S3 without knowing the size beforehand and without keeping it all in memory or writing to disk.

There is a newer version: 2.2.4
Show newest version
package alex.mojaki.s3upload;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.OutputStream;
import java.util.concurrent.BlockingQueue;

import static com.amazonaws.services.s3.internal.Constants.MB;

/**
 * An {@code OutputStream} which packages data written to it into discrete {@link StreamPart}s which can be obtained
 * in a separate thread via iteration and uploaded to S3.
 * 

* A single {@code MultiPartOutputStream} is allocated a range of part numbers it can assign to the {@code StreamPart}s * it produces, which is determined at construction. *

* It's essential to call * {@link MultiPartOutputStream#close()} when finished so that it can create the final {@code StreamPart} and consumers * can finish. *

* Writing to the stream may lead to trying to place a completed part on a queue, * which will block if the queue is full and may lead to an {@code InterruptedException}. */ public class MultiPartOutputStream extends OutputStream { private static final Logger log = LoggerFactory.getLogger(MultiPartOutputStream.class); private ConvertibleOutputStream currentStream; public static final int S3_MIN_PART_SIZE = 5 * MB; private static final int STREAM_EXTRA_ROOM = MB; private BlockingQueue queue; private final int partNumberStart; private final int partNumberEnd; private final int partSize; private int currentPartNumber; /** * Creates a new stream that will produce parts of the given size with part numbers in the given range. * * @param partNumberStart the part number of the first part the stream will produce. Minimum 1. * @param partNumberEnd 1 more than the last part number that the parts are allowed to have. Maximum 10 001. * @param partSize the minimum size in bytes of parts to be produced. * @param queue where stream parts are put on production. */ MultiPartOutputStream(int partNumberStart, int partNumberEnd, int partSize, BlockingQueue queue) { if (partNumberStart < 1) { throw new IndexOutOfBoundsException("The lowest allowed part number is 1. The value given was " + partNumberStart); } if (partNumberEnd > 10001) { throw new IndexOutOfBoundsException( "The highest allowed part number is 10 000, so partNumberEnd must be at most 10 001. The value given was " + partNumberEnd); } if (partNumberEnd <= partNumberStart) { throw new IndexOutOfBoundsException( String.format("The part number end (%d) must be greater than the part number start (%d).", partNumberEnd, partNumberStart)); } if (partSize < S3_MIN_PART_SIZE) { throw new IllegalArgumentException(String.format( "The given part size (%d) is less than 5 MB.", partSize)); } this.partNumberStart = partNumberStart; this.partNumberEnd = partNumberEnd; this.queue = queue; this.partSize = partSize; log.debug("Creating {}", this); currentPartNumber = partNumberStart; currentStream = new ConvertibleOutputStream(getStreamAllocatedSize()); } /** * Returns the initial capacity in bytes of the {@code ByteArrayOutputStream} that a part uses. */ private int getStreamAllocatedSize() { /* This consists of the size that the user asks for, the extra 5 MB to avoid small parts (see the comment in checkSize()), and some extra space to make resizing and copying unlikely. */ return partSize + S3_MIN_PART_SIZE + STREAM_EXTRA_ROOM; } /** * Checks if the stream currently contains enough data to create a new part. */ private void checkSize() { /* This class avoids producing parts < 5 MB if possible by only producing a part when it has an extra 5 MB to spare for the next part. For example, suppose the following. A stream is producing parts of 10 MB. Someone writes 10 MB and then calls this method, and then writes only 3 MB more before closing. If the initial 10 MB were immediately packaged into a StreamPart and a new ConvertibleOutputStream was started on for the rest, it would end up producing a part with just 3 MB. So instead the class waits until it contains 15 MB of data and then it splits the stream into two: one with 10 MB that gets produced as a part, and one with 5 MB that it continues with. In this way users of the class are less likely to encounter parts < 5 MB which cause trouble: see the caveat on order in the StreamTransferManager and the way it handles these small parts, referred to as 'leftover'. Such parts are only produced when the user closes a stream that never had more than 5 MB written to it. */ if (currentStream == null) { throw new IllegalStateException("The stream is closed and cannot be written to."); } if (currentStream.size() > partSize + S3_MIN_PART_SIZE) { ConvertibleOutputStream newStream = currentStream.split( currentStream.size() - S3_MIN_PART_SIZE, getStreamAllocatedSize()); putCurrentStream(); currentStream = newStream; } } private void putCurrentStream() { if (currentStream.size() == 0) { return; } if (currentPartNumber >= partNumberEnd) { throw new IndexOutOfBoundsException( String.format("This stream was allocated the part numbers from %d (inclusive) to %d (exclusive)" + "and it has gone beyond the end.", partNumberStart, partNumberEnd)); } StreamPart streamPart = new StreamPart(currentStream, currentPartNumber++); log.debug("Putting {} on queue", streamPart); try { queue.put(streamPart); } catch (InterruptedException e) { Utils.throwRuntimeInterruptedException(e); } } @Override public void write(int b) { currentStream.write(b); checkSize(); } @Override public void write(byte b[], int off, int len) { currentStream.write(b, off, len); checkSize(); } @Override public void write(byte b[]) { write(b, 0, b.length); checkSize(); } /** * Packages any remaining data into a {@link StreamPart} and signals to the {@code StreamTransferManager} that there are no more parts * afterwards. You cannot write to the stream after it has been closed. */ @Override public void close() { log.info("Called close() on {}", this); if (currentStream == null) { log.warn("{} is already closed", this); return; } try { putCurrentStream(); log.debug("Placing poison pill on queue for {}", this); queue.put(StreamPart.POISON); } catch (InterruptedException e) { log.error("Interrupted while closing {}", this); Utils.throwRuntimeInterruptedException(e); } currentStream = null; } @Override public String toString() { return String.format("[MultipartOutputStream for parts %d - %d]", partNumberStart, partNumberEnd - 1); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy