io.trino.orc.OrcOutputBuffer Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.orc;
import com.google.common.annotations.VisibleForTesting;
import io.airlift.compress.v3.Compressor;
import io.airlift.compress.v3.deflate.DeflateCompressor;
import io.airlift.compress.v3.lz4.Lz4Compressor;
import io.airlift.compress.v3.snappy.SnappyCompressor;
import io.airlift.compress.v3.zstd.ZstdCompressor;
import io.airlift.slice.SizeOf;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import io.trino.orc.checkpoint.InputStreamCheckpoint;
import io.trino.orc.metadata.CompressionKind;
import io.trino.plugin.base.io.ChunkedSliceOutput;
import jakarta.annotation.Nullable;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.airlift.slice.SizeOf.SIZE_OF_BYTE;
import static io.airlift.slice.SizeOf.SIZE_OF_INT;
import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
import static io.airlift.slice.SizeOf.SIZE_OF_SHORT;
import static io.airlift.slice.SizeOf.instanceSize;
import static io.airlift.slice.Slices.wrappedBuffer;
import static java.lang.Math.clamp;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
public class OrcOutputBuffer
extends SliceOutput
{
private static final int INSTANCE_SIZE = instanceSize(OrcOutputBuffer.class);
private static final int INITIAL_BUFFER_SIZE = 256;
private static final int DIRECT_FLUSH_SIZE = 32 * 1024;
private static final int MINIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 4 * 1024;
private static final int MAXIMUM_OUTPUT_BUFFER_CHUNK_SIZE = 1024 * 1024;
private final int maxBufferSize;
private final ChunkedSliceOutput compressedOutputStream;
@Nullable
private final Compressor compressor;
private byte[] compressionBuffer = new byte[0];
private Slice slice;
private byte[] buffer;
/**
* Offset of buffer within stream.
*/
private long bufferOffset;
/**
* Current position for writing in buffer.
*/
private int bufferPosition;
public OrcOutputBuffer(CompressionKind compression, int maxBufferSize)
{
requireNonNull(compression, "compression is null");
checkArgument(maxBufferSize > 0, "maximum buffer size should be greater than 0");
this.maxBufferSize = maxBufferSize;
this.buffer = new byte[INITIAL_BUFFER_SIZE];
this.slice = wrappedBuffer(buffer);
compressedOutputStream = new ChunkedSliceOutput(MINIMUM_OUTPUT_BUFFER_CHUNK_SIZE, MAXIMUM_OUTPUT_BUFFER_CHUNK_SIZE);
this.compressor = getCompressor(compression);
}
@Nullable
private static Compressor getCompressor(CompressionKind compression)
{
return switch (compression) {
case NONE -> null;
case SNAPPY -> SnappyCompressor.create();
case ZLIB -> new DeflateCompressor();
case LZ4 -> Lz4Compressor.create();
case ZSTD -> ZstdCompressor.create();
};
}
public long getOutputDataSize()
{
checkState(bufferPosition == 0, "Buffer must be flushed before getOutputDataSize can be called");
return compressedOutputStream.size();
}
public long estimateOutputDataSize()
{
return compressedOutputStream.size() + bufferPosition;
}
public int writeDataTo(SliceOutput outputStream)
{
checkState(bufferPosition == 0, "Buffer must be closed before writeDataTo can be called");
for (Slice slice : compressedOutputStream.getSlices()) {
outputStream.writeBytes(slice);
}
return compressedOutputStream.size();
}
public long getCheckpoint()
{
if (compressor == null) {
return size();
}
return InputStreamCheckpoint.createInputStreamCheckpoint(compressedOutputStream.size(), bufferPosition);
}
@Override
public void flush()
{
flushBufferToOutputStream();
}
@Override
public void close()
{
flushBufferToOutputStream();
}
@Override
public void reset()
{
compressedOutputStream.reset();
bufferOffset = 0;
bufferPosition = 0;
}
@Override
public void reset(int position)
{
throw new UnsupportedOperationException();
}
@Override
public int size()
{
return toIntExact(bufferOffset + bufferPosition);
}
@Override
public long getRetainedSize()
{
return INSTANCE_SIZE + compressedOutputStream.getRetainedSize() + slice.getRetainedSize() + SizeOf.sizeOf(compressionBuffer);
}
@Override
public int writableBytes()
{
return Integer.MAX_VALUE;
}
@Override
public boolean isWritable()
{
return true;
}
@Override
public void writeByte(int value)
{
ensureWritableBytes(SIZE_OF_BYTE);
slice.setByte(bufferPosition, value);
bufferPosition += SIZE_OF_BYTE;
}
@Override
public void writeShort(int value)
{
ensureWritableBytes(SIZE_OF_SHORT);
slice.setShort(bufferPosition, value);
bufferPosition += SIZE_OF_SHORT;
}
@Override
public void writeInt(int value)
{
ensureWritableBytes(SIZE_OF_INT);
slice.setInt(bufferPosition, value);
bufferPosition += SIZE_OF_INT;
}
@Override
public void writeLong(long value)
{
ensureWritableBytes(SIZE_OF_LONG);
slice.setLong(bufferPosition, value);
bufferPosition += SIZE_OF_LONG;
}
@Override
public void writeFloat(float value)
{
// This normalizes NaN values like `java.io.DataOutputStream` does
writeInt(Float.floatToIntBits(value));
}
@Override
public void writeDouble(double value)
{
// This normalizes NaN values like `java.io.DataOutputStream` does
writeLong(Double.doubleToLongBits(value));
}
@Override
public void writeBytes(Slice source)
{
writeBytes(source, 0, source.length());
}
@Override
public void writeBytes(Slice source, int sourceIndex, int length)
{
// Write huge chunks direct to OutputStream
if (length >= DIRECT_FLUSH_SIZE) {
flushBufferToOutputStream();
writeDirectlyToOutputStream(source.byteArray(), sourceIndex + source.byteArrayOffset(), length);
bufferOffset += length;
}
else {
ensureWritableBytes(length);
slice.setBytes(bufferPosition, source, sourceIndex, length);
bufferPosition += length;
}
}
@Override
public void writeBytes(byte[] source)
{
writeBytes(source, 0, source.length);
}
@Override
public void writeBytes(byte[] source, int sourceIndex, int length)
{
// Write huge chunks direct to OutputStream
if (length >= DIRECT_FLUSH_SIZE) {
// todo fill buffer before flushing
flushBufferToOutputStream();
writeDirectlyToOutputStream(source, sourceIndex, length);
bufferOffset += length;
}
else {
ensureWritableBytes(length);
slice.setBytes(bufferPosition, source, sourceIndex, length);
bufferPosition += length;
}
}
@Override
public void writeShorts(short[] source, int sourceIndex, int length)
{
while (length > 0) {
int flushLength = ensureBatchSize(length * Short.BYTES) / Short.BYTES;
slice.setShorts(bufferPosition, source, sourceIndex, flushLength);
bufferPosition += flushLength * Short.BYTES;
sourceIndex += flushLength;
length -= flushLength;
}
}
@Override
public void writeInts(int[] source, int sourceIndex, int length)
{
while (length > 0) {
int flushLength = ensureBatchSize(length * Integer.BYTES) / Integer.BYTES;
slice.setInts(bufferPosition, source, sourceIndex, flushLength);
bufferPosition += flushLength * Integer.BYTES;
sourceIndex += flushLength;
length -= flushLength;
}
}
@Override
public void writeLongs(long[] source, int sourceIndex, int length)
{
while (length > 0) {
int flushLength = ensureBatchSize(length * Long.BYTES) / Long.BYTES;
slice.setLongs(bufferPosition, source, sourceIndex, flushLength);
bufferPosition += flushLength * Long.BYTES;
sourceIndex += flushLength;
length -= flushLength;
}
}
@Override
public void writeFloats(float[] source, int sourceIndex, int length)
{
while (length > 0) {
int flushLength = ensureBatchSize(length * Float.BYTES) / Float.BYTES;
slice.setFloats(bufferPosition, source, sourceIndex, flushLength);
bufferPosition += flushLength * Float.BYTES;
sourceIndex += flushLength;
length -= flushLength;
}
}
@Override
public void writeDoubles(double[] source, int sourceIndex, int length)
{
while (length > 0) {
int flushLength = ensureBatchSize(length * Double.BYTES) / Double.BYTES;
slice.setDoubles(bufferPosition, source, sourceIndex, flushLength);
bufferPosition += flushLength * Double.BYTES;
sourceIndex += flushLength;
length -= flushLength;
}
}
@Override
public void writeBytes(InputStream in, int length)
throws IOException
{
while (length > 0) {
int batch = ensureBatchSize(length);
slice.setBytes(bufferPosition, in, batch);
bufferPosition += batch;
length -= batch;
}
}
@Override
public void writeZero(int length)
{
checkArgument(length >= 0, "length must be 0 or greater than 0.");
while (length > 0) {
int batch = ensureBatchSize(length);
Arrays.fill(buffer, bufferPosition, bufferPosition + batch, (byte) 0);
bufferPosition += batch;
length -= batch;
}
}
@Override
public SliceOutput appendLong(long value)
{
writeLong(value);
return this;
}
@Override
public SliceOutput appendDouble(double value)
{
writeDouble(value);
return this;
}
@Override
public SliceOutput appendInt(int value)
{
writeInt(value);
return this;
}
@Override
public SliceOutput appendShort(int value)
{
writeShort(value);
return this;
}
@Override
public SliceOutput appendByte(int value)
{
writeByte(value);
return this;
}
@Override
public SliceOutput appendBytes(byte[] source, int sourceIndex, int length)
{
writeBytes(source, sourceIndex, length);
return this;
}
@Override
public SliceOutput appendBytes(byte[] source)
{
writeBytes(source);
return this;
}
@Override
public SliceOutput appendBytes(Slice slice)
{
writeBytes(slice);
return this;
}
@Override
public Slice slice()
{
throw new UnsupportedOperationException();
}
@Override
public Slice getUnderlyingSlice()
{
throw new UnsupportedOperationException();
}
@Override
public String toString(Charset charset)
{
throw new UnsupportedOperationException();
}
@Override
public String toString()
{
StringBuilder builder = new StringBuilder("OutputStreamSliceOutputAdapter{");
builder.append("outputStream=").append(compressedOutputStream);
builder.append("bufferSize=").append(slice.length());
builder.append('}');
return builder.toString();
}
private void ensureWritableBytes(int minWritableBytes)
{
int neededBufferSize = bufferPosition + minWritableBytes;
if (neededBufferSize <= slice.length()) {
return;
}
if (slice.length() >= maxBufferSize) {
flushBufferToOutputStream();
return;
}
// grow the buffer size
int newBufferSize = clamp(slice.length() * 2L, minWritableBytes, maxBufferSize);
if (neededBufferSize <= newBufferSize) {
// we have capacity in the new buffer; just copy the data to the new buffer
byte[] previousBuffer = buffer;
buffer = new byte[newBufferSize];
slice = wrappedBuffer(buffer);
System.arraycopy(previousBuffer, 0, buffer, 0, bufferPosition);
}
else {
// there is no enough capacity in the new buffer; flush the data and allocate the new buffer
flushBufferToOutputStream();
buffer = new byte[newBufferSize];
slice = wrappedBuffer(buffer);
}
}
private int ensureBatchSize(int length)
{
ensureWritableBytes(min(DIRECT_FLUSH_SIZE, length));
return min(length, slice.length() - bufferPosition);
}
private void flushBufferToOutputStream()
{
if (bufferPosition > 0) {
writeChunkToOutputStream(buffer, 0, bufferPosition);
bufferOffset += bufferPosition;
bufferPosition = 0;
}
}
private void writeChunkToOutputStream(byte[] chunk, int offset, int length)
{
if (compressor == null) {
compressedOutputStream.write(chunk, offset, length);
return;
}
checkArgument(length <= buffer.length, "Write chunk length must be less than compression buffer size");
int minCompressionBufferSize = compressor.maxCompressedLength(length);
if (compressionBuffer.length < minCompressionBufferSize) {
compressionBuffer = new byte[minCompressionBufferSize];
}
int compressedSize = compressor.compress(chunk, offset, length, compressionBuffer, 0, compressionBuffer.length);
if (compressedSize < length) {
int chunkHeader = (compressedSize << 1);
compressedOutputStream.write(chunkHeader & 0x00_00FF);
compressedOutputStream.write((chunkHeader & 0x00_FF00) >> 8);
compressedOutputStream.write((chunkHeader & 0xFF_0000) >> 16);
compressedOutputStream.writeBytes(compressionBuffer, 0, compressedSize);
}
else {
int header = (length << 1) + 1;
compressedOutputStream.write(header & 0x00_00FF);
compressedOutputStream.write((header & 0x00_FF00) >> 8);
compressedOutputStream.write((header & 0xFF_0000) >> 16);
compressedOutputStream.writeBytes(chunk, offset, length);
}
}
private void writeDirectlyToOutputStream(byte[] bytes, int bytesOffset, int length)
{
if (compressor == null) {
compressedOutputStream.writeBytes(bytes, bytesOffset, length);
return;
}
while (length > 0) {
int chunkSize = min(length, buffer.length);
writeChunkToOutputStream(bytes, bytesOffset, chunkSize);
length -= chunkSize;
bytesOffset += chunkSize;
}
}
@VisibleForTesting
int getBufferCapacity()
{
return slice.length();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy