All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.mats3.util.compression.ByteArrayDeflaterOutputStreamWithStats Maven / Gradle / Ivy

Go to download

Mats^3 Utilities - notably the MatsFuturizer, which provides a bridge from synchronous processes to the highly asynchronous Mats^3 services.

There is a newer version: 0.19.22-2024-11-09
Show newest version
package io.mats3.util.compression;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;

/**
 * A specialization of {@link DeflaterOutputStreamWithStats} which writes the compressed data to a byte array, as if the
 * target was a {@link ByteArrayOutputStream}. It is marginally more efficient as it doesn't use an intermediate buffer
 * to write to the target byte array. The growing strategy is also a bit more memory conservative in that the max grow
 * increment is capped at 8 MiB, instead of pure doubling. Also, no method throw IOException, as it is writing to a byte
 * array.
 * 

* It allows you to supply an {@link #ByteArrayDeflaterOutputStreamWithStats(byte[], int) initial byte array}, and a * starting position in that array, which is useful if you want to use an existing array that may contain some existing * data in front. This can be used to e.g. write multiple compressed data streams into the same byte array. You probably * want to know about {@link #getUncroppedInternalArray()} in that case, also read below. *

* If the byte array is filled up, it is grown by allocating a new larger array and copying the data over. It does this * by using a capped exponential growth strategy, starting at an increment of 1KiB, and doubling the increment each * time, capped at 8MiB. Compared to the ByteArrayOutputStream, which grows by doubling each time, this is * a trade-off: This strategy will at large sizes have higher memory churn (as it grows and thus reallocates and copies * more often), but it will have lower max memory usage (as it grows less each time). This becomes pronounced when the * data becomes large: When the size for example tips over 200MiB, this solution will at the grow-point have a max * memory usage of 408MiB (200MiB + 208MiB), while ByteArrayOutputStream will need 600MiB (200MiB + * 400MiB). *

* The method {@link #toByteArray()} returns the compressed data as a byte array of the correct size (chopped to the * correct size). The method {@link #getUncroppedInternalArray()} returns the internal byte array that the compressed * data is written to, which might be the original array if supplied in the construction and the data fits, or a new, * larger array after growing. It is probably not of the correct size. The reason why you would use this latter method * is if you want to add more data to the array, e.g. by using it as the target in a new instance of this class for * adding another compressed "file". The current position in the array is given by {@link #getCurrentPosition()}. *

* Thread-safety: This class is not thread-safe. */ public class ByteArrayDeflaterOutputStreamWithStats extends DeflaterOutputStreamWithStats { private byte[] _outputArray; private int _currentPosition; private long _growTimeNanos; public ByteArrayDeflaterOutputStreamWithStats() { this(new byte[1024], 0); } public ByteArrayDeflaterOutputStreamWithStats(byte[] outputArray, int offset) { super(dummyOutputStream, 1); if (outputArray == null) { throw new IllegalArgumentException("outputArray must not be null."); } if (offset < 0) { throw new IllegalArgumentException("offset must be >= 0, was [" + offset + "]"); } if (offset > outputArray.length) { throw new IllegalArgumentException("offset must be <= outputArray.length, was [" + offset + "]"); } _outputArray = outputArray; _currentPosition = offset; } private byte[] _tempBuffer; // dummy output stream, since super's constructor null-checks the output stream argument. private static final OutputStream dummyOutputStream = new OutputStream() { @Override public void write(int b) { } }; @Override public void write(int b) { try { super.write(b); } catch (IOException e) { throw new UnexpectedException(e); } } @Override public void write(byte[] b, int off, int len) { try { super.write(b, off, len); } catch (IOException e) { throw new UnexpectedException(e); } } @Override public void write(byte[] b) { try { super.write(b); } catch (IOException e) { throw new UnexpectedException(e); } } /** * Returns the current position in the output array - that is, where any subsequent written data would be output. * After finishing and thus completing the compression process, as will be done by any of {@link #finish()}, * {@link #close()}, {@link #toByteArray()} or {@link #getUncroppedInternalArray()}, the value returned by this * method will be equal to the length of the byte array returned by {@link #toByteArray()}. * * @return the current position in the output array. */ public int getCurrentPosition() { return _currentPosition; } /** * Returns the time spent on growing the output array (allocate new, copy over), in nanoseconds. Note that * {@link #getDeflateAndWriteTimeNanos()} includes this time. * * @return the time spent on growing the output array (allocate new, copy over), in nanoseconds. */ public long getGrowTimeNanos() { return _growTimeNanos; } /** * Returns the uncropped internal byte array that the compressed data is written to - this method returns whatever * array is currently in use, which in case the user supplied an array might be the original array, or a new, larger * array after resizing. It is very likely not of the correct size. The reason why you would use this variant as * opposed to {@link #toByteArray()} is if you want to add more data to the array, e.g. by using it as the target in * a new instance of this class for adding another compressed "file". The current position in the array is given by * {@link #getCurrentPosition()}. *

* Note: For convenience, {@link #close()} is invoked for you. This finishes the compression process, and this * instance can no longer be used. * * @return the internal byte array that the compressed data is written to. */ public byte[] getUncroppedInternalArray() { close(); return _outputArray; } /** * Returns the compressed data as a byte array of the correct size (chopped to the correct size). Contrast this with * {@link #getUncroppedInternalArray()} which returns the internal byte array, which is likely larger than the * correct size. *

* Note: For convenience, {@link #close()} is invoked for you. This finishes the compression process, and this * instance can no longer be used. * * @return the compressed data as a byte array of the correct size. */ public byte[] toByteArray() { close(); // ?: Did we by chance hit the right size exactly? if (_currentPosition == _outputArray.length) { // -> Yes, it is exactly the right size, so just return the array. return _outputArray; } // E-> No, it is not exactly the right size, so create a new array of the right size and copy the data. byte[] result = new byte[_currentPosition]; System.arraycopy(_outputArray, 0, result, 0, _currentPosition); return result; } @Override public void flush() { // NOTE: We don't allow SYNC_FLUSH in the constructors, so we don't need to do what super does. // :: Not sure if this makes any sense, but its at least a sensible way to flush the deflater. // ?: Are we finished? if (!def.finished()) { // -> No, we're not finished, so invoke deflate() until the deflater says it needs input. while (!def.needsInput()) { deflate(); } } // We don't have to flush the underlying stream, as we're writing to a byte array. } @Override public void close() { try { super.close(); } catch (IOException e) { throw new UnexpectedException(e); } } /** * Thrown all the places where an IOException may occur by OutputStream contract, which should never happen in this * class since we're writing to a byte array, This to avoid having to declare IOException in the method signatures, * which should make it a bit more convenient to use. */ private static class UnexpectedException extends RuntimeException { public UnexpectedException(Throwable cause) { super("This should never happen, as we're writing to a byte array.", cause); } } // ===== Internals private final static int FIRST_INCREMENT = 1024; // First increment size of 1KiB private final static int MAX_INCREMENT = 8 * 1024 * 1024; // Max increment size of 8MiB private final static int OBJECT_HEADER_SIZE = 24; // Approximate size of array object header private final static int MAX_ARRAY_SIZE = Integer.MAX_VALUE - OBJECT_HEADER_SIZE; private int _increment = FIRST_INCREMENT; private void growOutputArray() { // :: Calculate the target length long targetLength = _outputArray.length + _increment; // Calculate the new increment size _increment = Math.min(MAX_INCREMENT, _increment * 2); // ?: Is the target length larger than the maximum array size? if (targetLength > MAX_ARRAY_SIZE) { // -> Yes, the target length is larger than the maximum array size. // ?: Is the current array size already at the maximum size? if (_outputArray.length >= MAX_ARRAY_SIZE) { // -> Yes, the current array size is already at the maximum size, so we can't grow the array more. throw new OutOfMemoryError("When resizing array, we hit MAX_ARRAY_SIZE=" + MAX_ARRAY_SIZE + "."); } else { // -> No, the current array size is not at the maximum size, so set the target length to max. targetLength = MAX_ARRAY_SIZE; } } // :: Allocate a new array of the target length, and copy the data over. byte[] newOutputArray = new byte[(int) targetLength]; System.arraycopy(_outputArray, 0, newOutputArray, 0, _outputArray.length); _outputArray = newOutputArray; } @Override protected void deflate() { // The Deflater thing is a bit annoying. It doesn't have a "outputBytesAvailable()"-type method, and due to the // way this deflate() method is invoked by super in both write(byte[], int, int) and finish(), we may end up // with growing the array, but we didn't need to. Therefore we use a temporary buffer effectively as a "peek" // buffer to see how many bytes are available, and only grow the array if we need to. long nanos_Start = System.nanoTime(); // ?: Check if we're empty of space in the actual output array if (_currentPosition == _outputArray.length) { // -> No, we don't have any bytes left in the output array, so might need to grow the array. // :: Check whether there actually are bytes left in the deflater, using a temp array. This to avoid // growing the array if we don't need to. // ?: Do we have a temporary buffer? if (_tempBuffer == null) { // -> No, we don't have a temporary buffer, so create one. _tempBuffer = new byte[512]; } int len = def.deflate(_tempBuffer, 0, _tempBuffer.length); // ?: Was there any data? if (len > 0) { // -> Yes, there was data, so grow the array and copy the data over. long nanos_StartGrow = System.nanoTime(); growOutputArray(); System.arraycopy(_tempBuffer, 0, _outputArray, _currentPosition, len); _growTimeNanos += (System.nanoTime() - nanos_StartGrow); // Increment the current position. _currentPosition += len; } } else { // -> Yes, we have bytes left in the output array, so just deflate straight into the output array. int len = def.deflate(_outputArray, _currentPosition, _outputArray.length - _currentPosition); // Increment the current position (might have been zero, but no use in checking). _currentPosition += len; } // Record the time spent on this deflate() call. long nanos_Total = System.nanoTime() - nanos_Start; _deflateTimeNanos += nanos_Total; _deflateAndWriteTimeNanos += nanos_Total; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy