All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.orc.stream.CompressedOrcChunkLoader Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.orc.stream;

import io.airlift.slice.FixedLengthSliceInput;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.memory.context.AggregatedMemoryContext;
import io.trino.memory.context.LocalMemoryContext;
import io.trino.orc.OrcCorruptionException;
import io.trino.orc.OrcDataSourceId;
import io.trino.orc.OrcDecompressor;
import io.trino.orc.OrcDecompressor.OutputBuffer;

import java.io.IOException;
import java.util.Arrays;

import static com.google.common.base.MoreObjects.toStringHelper;
import static io.airlift.slice.Slices.EMPTY_SLICE;
import static io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint;
import static io.trino.orc.checkpoint.InputStreamCheckpoint.decodeCompressedBlockOffset;
import static io.trino.orc.checkpoint.InputStreamCheckpoint.decodeDecompressedOffset;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;

public final class CompressedOrcChunkLoader
        implements OrcChunkLoader
{
    private final OrcDataReader dataReader;
    private final LocalMemoryContext dataReaderMemoryUsage;
    private final OrcDecompressor decompressor;
    private final LocalMemoryContext decompressionBufferMemoryUsage;

    private FixedLengthSliceInput compressedBufferStream = EMPTY_SLICE.getInput();
    private int compressedBufferStart;
    private int nextUncompressedOffset;
    private long lastCheckpoint;

    private byte[] decompressorOutputBuffer;

    public CompressedOrcChunkLoader(
            OrcDataReader dataReader,
            OrcDecompressor decompressor,
            AggregatedMemoryContext memoryContext)
    {
        this.dataReader = requireNonNull(dataReader, "dataReader is null");
        this.decompressor = requireNonNull(decompressor, "decompressor is null");
        requireNonNull(memoryContext, "memoryContext is null");
        this.dataReaderMemoryUsage = memoryContext.newLocalMemoryContext(CompressedOrcChunkLoader.class.getSimpleName());
        dataReaderMemoryUsage.setBytes(dataReader.getRetainedSize());
        this.decompressionBufferMemoryUsage = memoryContext.newLocalMemoryContext(CompressedOrcChunkLoader.class.getSimpleName());
    }

    @Override
    public OrcDataSourceId getOrcDataSourceId()
    {
        return dataReader.getOrcDataSourceId();
    }

    private int getCurrentCompressedOffset()
    {
        return toIntExact(compressedBufferStart + compressedBufferStream.position());
    }

    @Override
    public boolean hasNextChunk()
    {
        return getCurrentCompressedOffset() < dataReader.getSize();
    }

    @Override
    public long getLastCheckpoint()
    {
        return lastCheckpoint;
    }

    @Override
    public void seekToCheckpoint(long checkpoint)
            throws IOException
    {
        int compressedOffset = decodeCompressedBlockOffset(checkpoint);
        if (compressedOffset >= dataReader.getSize()) {
            throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Seek past end of stream");
        }
        // is the compressed offset within the current compressed buffer
        if (compressedBufferStart <= compressedOffset && compressedOffset < compressedBufferStart + compressedBufferStream.length()) {
            compressedBufferStream.setPosition(compressedOffset - compressedBufferStart);
        }
        else {
            compressedBufferStart = compressedOffset;
            compressedBufferStream = EMPTY_SLICE.getInput();
        }

        nextUncompressedOffset = decodeDecompressedOffset(checkpoint);
        lastCheckpoint = checkpoint;
    }

    @Override
    public Slice nextChunk()
            throws IOException
    {
        // 3 byte header
        // NOTE: this must match BLOCK_HEADER_SIZE
        ensureCompressedBytesAvailable(3);
        lastCheckpoint = createInputStreamCheckpoint(getCurrentCompressedOffset(), nextUncompressedOffset);
        int b0 = compressedBufferStream.readUnsignedByte();
        int b1 = compressedBufferStream.readUnsignedByte();
        int b2 = compressedBufferStream.readUnsignedByte();

        boolean isUncompressed = (b0 & 0x01) == 1;
        int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >>> 1);

        ensureCompressedBytesAvailable(chunkLength);
        Slice chunk = compressedBufferStream.readSlice(chunkLength);

        if (!isUncompressed) {
            int uncompressedSize = decompressor.decompress(
                    chunk.byteArray(),
                    chunk.byteArrayOffset(),
                    chunk.length(),
                    createOutputBuffer());
            chunk = Slices.wrappedBuffer(decompressorOutputBuffer, 0, uncompressedSize);
        }
        if (nextUncompressedOffset != 0) {
            chunk = chunk.slice(nextUncompressedOffset, chunk.length() - nextUncompressedOffset);
            nextUncompressedOffset = 0;
            // if we positioned to the end of the chunk, read the next one
            if (chunk.length() == 0) {
                chunk = nextChunk();
            }
        }
        return chunk;
    }

    private void ensureCompressedBytesAvailable(int size)
            throws IOException
    {
        // is this within the current buffer?
        if (size <= compressedBufferStream.remaining()) {
            return;
        }

        // is this a read larger than the buffer
        if (size > dataReader.getMaxBufferSize()) {
            throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Requested read size (%s bytes) is greater than max buffer size (%s bytes)", size, dataReader.getMaxBufferSize());
        }

        // is this a read past the end of the stream
        if (compressedBufferStart + compressedBufferStream.position() + size > dataReader.getSize()) {
            throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Read past end of stream");
        }

        compressedBufferStart = compressedBufferStart + toIntExact(compressedBufferStream.position());
        Slice compressedBuffer = dataReader.seekBuffer(compressedBufferStart);
        dataReaderMemoryUsage.setBytes(dataReader.getRetainedSize());
        if (compressedBuffer.length() < size) {
            throw new OrcCorruptionException(dataReader.getOrcDataSourceId(), "Requested read of %s bytes but only %s were bytes", size, compressedBuffer.length());
        }
        compressedBufferStream = compressedBuffer.getInput();
    }

    private OutputBuffer createOutputBuffer()
    {
        return new OutputBuffer()
        {
            @Override
            public byte[] initialize(int size)
            {
                if (decompressorOutputBuffer == null || size > decompressorOutputBuffer.length) {
                    decompressorOutputBuffer = new byte[size];
                    decompressionBufferMemoryUsage.setBytes(decompressorOutputBuffer.length);
                }
                return decompressorOutputBuffer;
            }

            @Override
            public byte[] grow(int size)
            {
                if (size > decompressorOutputBuffer.length) {
                    decompressorOutputBuffer = Arrays.copyOfRange(decompressorOutputBuffer, 0, size);
                    decompressionBufferMemoryUsage.setBytes(decompressorOutputBuffer.length);
                }
                return decompressorOutputBuffer;
            }
        };
    }

    @Override
    public String toString()
    {
        return toStringHelper(this)
                .add("loader", dataReader)
                .add("compressedOffset", getCurrentCompressedOffset())
                .add("decompressor", decompressor)
                .toString();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy