All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.orc.stream.OrcInputStream Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.orc.stream;

import com.google.common.primitives.Ints;
import io.airlift.slice.FixedLengthSliceInput;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.orc.OrcCorruptionException;
import io.trino.orc.OrcDataSourceId;
import jakarta.annotation.Nullable;

import java.io.IOException;
import java.io.InputStream;

import static com.google.common.base.MoreObjects.toStringHelper;
import static io.airlift.slice.Slices.EMPTY_SLICE;
import static io.trino.orc.checkpoint.InputStreamCheckpoint.createInputStreamCheckpoint;
import static io.trino.orc.checkpoint.InputStreamCheckpoint.decodeCompressedBlockOffset;
import static io.trino.orc.checkpoint.InputStreamCheckpoint.decodeDecompressedOffset;
import static java.lang.Math.min;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;

public final class OrcInputStream
        extends InputStream
{
    private final OrcChunkLoader chunkLoader;

    // 8 byte temp buffer for reading multibyte values that straddle a buffer boundary
    private final Slice tempBuffer8 = Slices.allocate(8);

    @Nullable
    private FixedLengthSliceInput current = EMPTY_SLICE.getInput();
    private long lastCheckpoint;

    public OrcInputStream(OrcChunkLoader chunkLoader)
    {
        this.chunkLoader = requireNonNull(chunkLoader, "chunkLoader is null");
    }

    @Override
    public void close()
    {
        // close is never called, so do not add code here
    }

    @Override
    public int available()
    {
        if (current == null) {
            return 0;
        }
        return current.available();
    }

    @Override
    public boolean markSupported()
    {
        return false;
    }

    @Override
    public int read()
            throws IOException
    {
        if (current == null) {
            return -1;
        }

        int result = current.read();
        if (result != -1) {
            return result;
        }

        advance();
        return read();
    }

    @Override
    public int read(byte[] b, int off, int length)
            throws IOException
    {
        if (current == null) {
            return -1;
        }

        if (current.remaining() == 0) {
            advance();
            if (current == null) {
                return -1;
            }
        }

        return current.read(b, off, length);
    }

    public void skipFully(long length)
            throws IOException
    {
        while (length > 0) {
            long result = skip(length);
            if (result < 0) {
                throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
            }
            length -= result;
        }
    }

    public void readFully(byte[] buffer, int offset, int length)
            throws IOException
    {
        while (offset < length) {
            int result = read(buffer, offset, length - offset);
            if (result < 0) {
                throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
            }
            offset += result;
        }
    }

    public void readFully(int[] values, int offset, int length)
            throws IOException
    {
        if (current == null) {
            throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
        }

        while (length > 0) {
            int remaining = Ints.saturatedCast(current.remaining());
            if (remaining < Integer.BYTES) {
                // there might be a value split across the buffers
                Slice slice = null;
                if (remaining != 0) {
                    slice = tempBuffer8;
                    current.readBytes(slice, 0, remaining);
                }

                advance();
                if (current == null) {
                    throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
                }

                if (remaining != 0) {
                    current.readBytes(slice, remaining, Integer.BYTES - remaining);
                    values[offset] = slice.getInt(0);
                    length--;
                    offset++;
                }
                remaining = Ints.saturatedCast(current.remaining());
            }

            int chunkSize = min(length, remaining / Integer.BYTES);
            current.readInts(values, offset, chunkSize);
            length -= chunkSize;
            offset += chunkSize;
        }
    }

    public void readFully(long[] values, int offset, int length)
            throws IOException
    {
        if (current == null) {
            throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
        }

        while (length > 0) {
            int remaining = Ints.saturatedCast(current.remaining());
            if (remaining < Long.BYTES) {
                // there might be a value split across the buffers
                Slice slice = null;
                if (remaining != 0) {
                    slice = tempBuffer8;
                    current.readBytes(slice, 0, remaining);
                }

                advance();
                if (current == null) {
                    throw new OrcCorruptionException(chunkLoader.getOrcDataSourceId(), "Unexpected end of stream");
                }

                if (remaining != 0) {
                    current.readBytes(slice, remaining, Long.BYTES - remaining);
                    values[offset] = slice.getLong(0);
                    length--;
                    offset++;
                }
                remaining = Ints.saturatedCast(current.remaining());
            }

            int chunkSize = min(length, remaining / Long.BYTES);
            current.readLongs(values, offset, chunkSize);
            length -= chunkSize;
            offset += chunkSize;
        }
    }

    public OrcDataSourceId getOrcDataSourceId()
    {
        return chunkLoader.getOrcDataSourceId();
    }

    public long getCheckpoint()
    {
        long checkpoint = chunkLoader.getLastCheckpoint();
        if (current != null && current.position() > 0) {
            checkpoint = createInputStreamCheckpoint(decodeCompressedBlockOffset(checkpoint), toIntExact(decodeDecompressedOffset(checkpoint) + current.position()));
        }
        return checkpoint;
    }

    public void seekToCheckpoint(long checkpoint)
            throws IOException
    {
        int compressedOffset = decodeCompressedBlockOffset(checkpoint);
        int decompressedOffset = decodeDecompressedOffset(checkpoint);
        // if checkpoint is within the current buffer, seek locally
        int currentDecompressedBufferOffset = decodeDecompressedOffset(lastCheckpoint);
        if (current != null && compressedOffset == decodeCompressedBlockOffset(lastCheckpoint)
                && decompressedOffset >= currentDecompressedBufferOffset
                && decompressedOffset < currentDecompressedBufferOffset + current.length()) {
            current.setPosition(decompressedOffset - currentDecompressedBufferOffset);
            return;
        }
        // otherwise, drop the current buffer and seek the underlying data loader
        current = EMPTY_SLICE.getInput();
        chunkLoader.seekToCheckpoint(checkpoint);
        lastCheckpoint = checkpoint;
    }

    @Override
    public long skip(long n)
            throws IOException
    {
        if (current == null || n <= 0) {
            return -1;
        }

        long result = current.skip(n);
        if (result != 0) {
            return result;
        }
        if (read() == -1) {
            return 0;
        }
        return 1 + current.skip(n - 1);
    }

    // This comes from the Apache Hive ORC code
    private void advance()
            throws IOException
    {
        if (!chunkLoader.hasNextChunk()) {
            current = null;
            return;
        }
        current = chunkLoader.nextChunk().getInput();
        lastCheckpoint = chunkLoader.getLastCheckpoint();
    }

    @Override
    public String toString()
    {
        return toStringHelper(this)
                .add("source", chunkLoader)
                .add("uncompressedOffset", current == null ? null : current.position())
                .toString();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy