All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.orc.OrcOutputBuffer Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.orc;

import com.facebook.presto.orc.checkpoint.InputStreamCheckpoint;
import com.facebook.presto.orc.metadata.CompressionKind;
import com.facebook.presto.orc.writer.CompressionBufferPool;
import com.facebook.presto.orc.zlib.DeflateCompressor;
import com.facebook.presto.orc.zstd.ZstdJniCompressor;
import com.google.common.annotations.VisibleForTesting;
import io.airlift.compress.Compressor;
import io.airlift.compress.lz4.Lz4Compressor;
import io.airlift.compress.snappy.SnappyCompressor;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceOutput;
import org.openjdk.jol.info.ClassLayout;

import javax.annotation.Nullable;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Optional;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.airlift.slice.SizeOf.SIZE_OF_BYTE;
import static io.airlift.slice.SizeOf.SIZE_OF_INT;
import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
import static io.airlift.slice.SizeOf.SIZE_OF_SHORT;
import static io.airlift.slice.Slices.wrappedBuffer;
import static java.lang.Math.max;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
import static sun.misc.Unsafe.ARRAY_BYTE_BASE_OFFSET;

public class OrcOutputBuffer
        extends SliceOutput
{
    private static final int INSTANCE_SIZE = ClassLayout.parseClass(OrcOutputBuffer.class).instanceSize();
    private static final int PAGE_HEADER_SIZE = 3; // ORC spec 3 byte header
    private static final int INITIAL_BUFFER_SIZE = 256;
    private final int maxBufferSize;
    private final int minOutputBufferChunkSize;
    private final int maxOutputBufferChunkSize;
    private final int minCompressibleSize;

    private final CompressionBufferPool compressionBufferPool;
    private final Optional dwrfEncryptor;
    @Nullable
    private final Compressor compressor;

    private ChunkedSliceOutput compressedOutputStream;
    private Slice slice;
    private byte[] buffer;

    /**
     * Offset of buffer within stream.
     */
    private long bufferOffset;
    /**
     * Current position for writing in buffer.
     */
    private int bufferPosition;

    public OrcOutputBuffer(ColumnWriterOptions columnWriterOptions, Optional dwrfEncryptor)
    {
        requireNonNull(columnWriterOptions, "columnWriterOptions is null");
        requireNonNull(dwrfEncryptor, "dwrfEncryptor is null");
        int maxBufferSize = columnWriterOptions.getCompressionMaxBufferSize();
        checkArgument(maxBufferSize > PAGE_HEADER_SIZE, "maximum buffer size should be greater than page header size");

        CompressionKind compressionKind = columnWriterOptions.getCompressionKind();
        this.maxBufferSize = compressionKind == CompressionKind.NONE ? maxBufferSize : maxBufferSize - PAGE_HEADER_SIZE;
        this.minOutputBufferChunkSize = columnWriterOptions.getMinOutputBufferChunkSize();
        this.maxOutputBufferChunkSize = columnWriterOptions.getMaxOutputBufferChunkSize();
        this.minCompressibleSize = compressionKind.getMinCompressibleSize();

        this.buffer = new byte[INITIAL_BUFFER_SIZE];
        this.slice = wrappedBuffer(buffer);

        this.compressionBufferPool = columnWriterOptions.getCompressionBufferPool();
        this.dwrfEncryptor = requireNonNull(dwrfEncryptor, "dwrfEncryptor is null");

        if (compressionKind == CompressionKind.NONE) {
            this.compressor = null;
        }
        else if (compressionKind == CompressionKind.SNAPPY) {
            this.compressor = new SnappyCompressor();
        }
        else if (compressionKind == CompressionKind.ZLIB) {
            this.compressor = new DeflateCompressor(columnWriterOptions.getCompressionLevel());
        }
        else if (compressionKind == CompressionKind.LZ4) {
            this.compressor = new Lz4Compressor();
        }
        else if (compressionKind == CompressionKind.ZSTD) {
            this.compressor = new ZstdJniCompressor(columnWriterOptions.getCompressionLevel());
        }
        else {
            throw new IllegalArgumentException("Unsupported compression " + compressionKind);
        }
    }

    public long getOutputDataSize()
    {
        checkState(bufferPosition == 0, "Buffer must be flushed before getOutputDataSize can be called");
        return getCompressedOutputSize();
    }

    private int getCompressedOutputSize()
    {
        return compressedOutputStream != null ? compressedOutputStream.size() : 0;
    }

    public long estimateOutputDataSize()
    {
        return getCompressedOutputSize() + bufferPosition;
    }

    public int writeDataTo(SliceOutput outputStream)
    {
        checkState(bufferPosition == 0, "Buffer must be closed before writeDataTo can be called");
        if (compressedOutputStream == null) {
            return 0;
        }

        compressedOutputStream.writeTo(outputStream);
        return compressedOutputStream.size();
    }

    public long getCheckpoint()
    {
        if (compressor == null && !dwrfEncryptor.isPresent()) {
            return size();
        }
        return InputStreamCheckpoint.createInputStreamCheckpoint(getCompressedOutputSize(), bufferPosition);
    }

    @Override
    public void flush()
    {
        flushBufferToOutputStream();
    }

    @Override
    public void close()
    {
        flushBufferToOutputStream();
    }

    @Override
    public void reset()
    {
        if (compressedOutputStream != null) {
            compressedOutputStream.reset();
        }
        bufferOffset = 0;
        bufferPosition = 0;
    }

    @Override
    public void reset(int position)
    {
        throw new UnsupportedOperationException();
    }

    @Override
    public int size()
    {
        return toIntExact(bufferOffset + bufferPosition);
    }

    @Override
    public long getRetainedSize()
    {
        return INSTANCE_SIZE
                + (compressedOutputStream != null ? compressedOutputStream.getRetainedSize() : 0L)
                + slice.getRetainedSize();
    }

    @Override
    public int writableBytes()
    {
        return Integer.MAX_VALUE;
    }

    @Override
    public boolean isWritable()
    {
        return true;
    }

    @Override
    public void writeByte(int value)
    {
        ensureWritableBytes(SIZE_OF_BYTE);
        slice.setByte(bufferPosition, value);
        bufferPosition += SIZE_OF_BYTE;
    }

    @Override
    public void writeShort(int value)
    {
        ensureWritableBytes(SIZE_OF_SHORT);
        slice.setShort(bufferPosition, value);
        bufferPosition += SIZE_OF_SHORT;
    }

    @Override
    public void writeInt(int value)
    {
        ensureWritableBytes(SIZE_OF_INT);
        slice.setInt(bufferPosition, value);
        bufferPosition += SIZE_OF_INT;
    }

    @Override
    public void writeLong(long value)
    {
        ensureWritableBytes(SIZE_OF_LONG);
        slice.setLong(bufferPosition, value);
        bufferPosition += SIZE_OF_LONG;
    }

    @Override
    public void writeFloat(float value)
    {
        // This normalizes NaN values like `java.io.DataOutputStream` does
        writeInt(Float.floatToIntBits(value));
    }

    @Override
    public void writeDouble(double value)
    {
        // This normalizes NaN values like `java.io.DataOutputStream` does
        writeLong(Double.doubleToLongBits(value));
    }

    @Override
    public void writeBytes(Slice source)
    {
        writeBytes(source, 0, source.length());
    }

    @Override
    public void writeBytes(Slice source, int sourceOffset, int length)
    {
        byte[] bytes = (byte[]) source.getBase();
        int bytesOffset = (int) (source.getAddress() - ARRAY_BYTE_BASE_OFFSET);
        writeBytes(bytes, sourceOffset + bytesOffset, length);
    }

    @Override
    public void writeBytes(byte[] source)
    {
        writeBytes(source, 0, source.length);
    }

    @Override
    public void writeBytes(byte[] bytes, int bytesOffset, int length)
    {
        if (length == 0) {
            return;
        }

        // finish filling the buffer
        if (bufferPosition != 0) {
            int chunkSize = min(length, maxBufferSize - bufferPosition);
            ensureWritableBytes(chunkSize);
            slice.setBytes(bufferPosition, bytes, bytesOffset, chunkSize);
            bufferPosition += chunkSize;
            length -= chunkSize;
            bytesOffset += chunkSize;
        }

        // write maxBufferSize chunks directly to output
        if (length >= maxBufferSize) {
            flushBufferToOutputStream();
            int bytesOffsetBefore = bytesOffset;
            while (length >= maxBufferSize) {
                writeChunkToOutputStream(bytes, bytesOffset, maxBufferSize);
                length -= maxBufferSize;
                bytesOffset += maxBufferSize;
            }
            bufferOffset += bytesOffset - bytesOffsetBefore;
        }

        // write the tail smaller than maxBufferSize to the buffer
        if (length > 0) {
            ensureWritableBytes(length);
            slice.setBytes(bufferPosition, bytes, bytesOffset, length);
            bufferPosition += length;
        }
    }

    @Override
    public void writeBytes(InputStream in, int length)
            throws IOException
    {
        while (length > 0) {
            int batch = ensureBatchSize(length);
            slice.setBytes(bufferPosition, in, batch);
            bufferPosition += batch;
            length -= batch;
        }
    }

    @Override
    public void writeZero(int length)
    {
        checkArgument(length >= 0, "length must be 0 or greater than 0.");

        while (length > 0) {
            int batch = ensureBatchSize(length);
            Arrays.fill(buffer, bufferPosition, bufferPosition + batch, (byte) 0);
            bufferPosition += batch;
            length -= batch;
        }
    }

    private int ensureBatchSize(int length)
    {
        ensureWritableBytes(min(length, maxBufferSize - bufferPosition));
        if (availableInBuffer() == 0) {
            flushBufferToOutputStream();
        }
        return min(length, availableInBuffer());
    }

    private int availableInBuffer()
    {
        return slice.length() - bufferPosition;
    }

    @Override
    public SliceOutput appendLong(long value)
    {
        writeLong(value);
        return this;
    }

    @Override
    public SliceOutput appendDouble(double value)
    {
        writeDouble(value);
        return this;
    }

    @Override
    public SliceOutput appendInt(int value)
    {
        writeInt(value);
        return this;
    }

    @Override
    public SliceOutput appendShort(int value)
    {
        writeShort(value);
        return this;
    }

    @Override
    public SliceOutput appendByte(int value)
    {
        writeByte(value);
        return this;
    }

    @Override
    public SliceOutput appendBytes(byte[] source, int sourceIndex, int length)
    {
        writeBytes(source, sourceIndex, length);
        return this;
    }

    @Override
    public SliceOutput appendBytes(byte[] source)
    {
        writeBytes(source);
        return this;
    }

    @Override
    public SliceOutput appendBytes(Slice slice)
    {
        writeBytes(slice);
        return this;
    }

    @Override
    public Slice slice()
    {
        throw new UnsupportedOperationException();
    }

    @Override
    public Slice getUnderlyingSlice()
    {
        throw new UnsupportedOperationException();
    }

    @Override
    public String toString(Charset charset)
    {
        throw new UnsupportedOperationException();
    }

    @Override
    public String toString()
    {
        StringBuilder builder = new StringBuilder("OrcOutputBuffer{");
        builder.append("outputStream=").append(compressedOutputStream);
        builder.append(", bufferSize=").append(slice.length());
        builder.append('}');
        return builder.toString();
    }

    private void ensureWritableBytes(int minWritableBytes)
    {
        checkArgument(minWritableBytes <= maxBufferSize, "Min writable bytes must not exceed max buffer size");

        int neededBufferSize = bufferPosition + minWritableBytes;
        if (neededBufferSize <= slice.length()) {
            return;
        }

        if (slice.length() >= maxBufferSize) {
            flushBufferToOutputStream();
            return;
        }

        // grow the buffer size up to maxBufferSize
        int newBufferSize = min(max(slice.length() * 2, neededBufferSize), maxBufferSize);
        if (newBufferSize >= neededBufferSize) {
            // we have capacity in the new buffer; just copy the data to the new buffer
            byte[] previousBuffer = buffer;
            buffer = new byte[newBufferSize];
            slice = wrappedBuffer(buffer);
            System.arraycopy(previousBuffer, 0, buffer, 0, bufferPosition);
        }
        else {
            // there is no enough capacity in the new buffer; flush the data and allocate the new buffer
            flushBufferToOutputStream();
            buffer = new byte[newBufferSize];
            slice = wrappedBuffer(buffer);
        }
    }

    private void flushBufferToOutputStream()
    {
        if (bufferPosition > 0) {
            writeChunkToOutputStream(buffer, 0, bufferPosition);
            bufferOffset += bufferPosition;
            bufferPosition = 0;
        }
    }

    private void initCompressedOutputStream()
    {
        checkState(compressedOutputStream == null, "compressedOutputStream is already initialized");
        compressedOutputStream = new ChunkedSliceOutput(minOutputBufferChunkSize, maxOutputBufferChunkSize);
    }

    private void writeChunkToOutputStream(byte[] chunk, int offset, int length)
    {
        if (compressedOutputStream == null) {
            initCompressedOutputStream();
        }

        if (compressor == null && !dwrfEncryptor.isPresent()) {
            compressedOutputStream.write(chunk, offset, length);
            return;
        }

        checkArgument(length <= maxBufferSize, "Write chunk length must be less than max compression buffer size");

        boolean isCompressed = false;
        byte[] compressionBuffer = null;
        try {
            if (compressor != null && length >= minCompressibleSize) {
                int minCompressionBufferSize = compressor.maxCompressedLength(length);
                compressionBuffer = compressionBufferPool.checkOut(minCompressionBufferSize);
                int compressedSize = compressor.compress(chunk, offset, length, compressionBuffer, 0, compressionBuffer.length);
                if (compressedSize < length) {
                    isCompressed = true;
                    chunk = compressionBuffer;
                    length = compressedSize;
                    offset = 0;
                }
            }
            if (dwrfEncryptor.isPresent()) {
                chunk = dwrfEncryptor.get().encrypt(chunk, offset, length);
                length = chunk.length;
                offset = 0;
                // size after encryption should not exceed what the 3 byte header can hold (2^23)
                if (length > 8388608) {
                    throw new OrcEncryptionException("Encrypted data size %s exceeds limit of 2^23", length);
                }
            }

            int header = isCompressed ? length << 1 : (length << 1) + 1;
            writeChunkedOutput(chunk, offset, length, header);
        }
        finally {
            if (compressionBuffer != null) {
                compressionBufferPool.checkIn(compressionBuffer);
            }
        }
    }

    private void writeChunkedOutput(byte[] chunk, int offset, int length, int header)
    {
        compressedOutputStream.write(header & 0x00_00FF);
        compressedOutputStream.write((header & 0x00_FF00) >> 8);
        compressedOutputStream.write((header & 0xFF_0000) >> 16);
        compressedOutputStream.writeBytes(chunk, offset, length);
    }

    @VisibleForTesting
    int getBufferCapacity()
    {
        return slice.length();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy