All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.orc.stream.LongInputStreamV1 Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.orc.stream;

import io.trino.orc.OrcCorruptionException;
import io.trino.orc.checkpoint.LongStreamCheckpoint;
import io.trino.orc.checkpoint.LongStreamV1Checkpoint;

import java.io.IOException;

import static java.lang.Math.min;
import static java.lang.Math.toIntExact;

public class LongInputStreamV1
        implements LongInputStream
{
    private static final int MIN_REPEAT_SIZE = 3;
    private static final int MAX_LITERAL_SIZE = 128;

    private final OrcInputStream input;
    private final boolean signed;
    private final long[] literals = new long[MAX_LITERAL_SIZE];
    private int numLiterals;
    private int delta;
    private int used;
    private boolean repeat;
    private long lastReadInputCheckpoint;

    public LongInputStreamV1(OrcInputStream input, boolean signed)
    {
        this.input = input;
        this.signed = signed;
        lastReadInputCheckpoint = input.getCheckpoint();
    }

    // This comes from the Apache Hive ORC code
    private void readValues()
            throws IOException
    {
        lastReadInputCheckpoint = input.getCheckpoint();

        int control = input.read();
        if (control == -1) {
            throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer");
        }

        if (control < 0x80) {
            numLiterals = control + MIN_REPEAT_SIZE;
            used = 0;
            repeat = true;
            int delta = input.read();
            if (delta == -1) {
                throw new OrcCorruptionException(input.getOrcDataSourceId(), "End of stream in RLE Integer");
            }

            // convert from 0 to 255 to -128 to 127 by converting to a signed byte
            this.delta = (byte) delta;
            literals[0] = LongDecode.readVInt(signed, input);
        }
        else {
            numLiterals = 0x100 - control;
            used = 0;
            repeat = false;
            for (int i = 0; i < numLiterals; ++i) {
                literals[i] = LongDecode.readVInt(signed, input);
            }
        }
    }

    @Override
    // This comes from the Apache Hive ORC code
    public long next()
            throws IOException
    {
        long result;
        if (used == numLiterals) {
            readValues();
        }
        if (repeat) {
            result = literals[0] + used++ * (long) delta;
        }
        else {
            result = literals[used++];
        }
        return result;
    }

    @Override
    public void next(long[] values, int items)
            throws IOException
    {
        int offset = 0;
        while (items > 0) {
            if (used == numLiterals) {
                numLiterals = 0;
                used = 0;
                readValues();
            }

            int chunkSize = min(numLiterals - used, items);
            if (repeat) {
                for (int i = 0; i < chunkSize; i++) {
                    values[offset + i] = literals[0] + ((used + i) * (long) delta);
                }
            }
            else {
                System.arraycopy(literals, used, values, offset, chunkSize);
            }
            used += chunkSize;
            offset += chunkSize;
            items -= chunkSize;
        }
    }

    @Override
    public void next(int[] values, int items)
            throws IOException
    {
        int offset = 0;
        while (items > 0) {
            if (used == numLiterals) {
                numLiterals = 0;
                used = 0;
                readValues();
            }

            int chunkSize = min(numLiterals - used, items);
            if (repeat) {
                for (int i = 0; i < chunkSize; i++) {
                    long literal = literals[0] + ((used + i) * (long) delta);
                    int value = (int) literal;
                    if (literal != value) {
                        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
                    }
                    values[offset + i] = value;
                }
            }
            else {
                for (int i = 0; i < chunkSize; i++) {
                    long literal = literals[used + i];
                    int value = (int) literal;
                    if (literal != value) {
                        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 32bit number");
                    }
                    values[offset + i] = value;
                }
            }
            used += chunkSize;
            offset += chunkSize;
            items -= chunkSize;
        }
    }

    @Override
    public void next(short[] values, int items)
            throws IOException
    {
        int offset = 0;
        while (items > 0) {
            if (used == numLiterals) {
                numLiterals = 0;
                used = 0;
                readValues();
            }

            int chunkSize = min(numLiterals - used, items);
            if (repeat) {
                for (int i = 0; i < chunkSize; i++) {
                    long literal = literals[0] + ((used + i) * (long) delta);
                    short value = (short) literal;
                    if (literal != value) {
                        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
                    }
                    values[offset + i] = value;
                }
            }
            else {
                for (int i = 0; i < chunkSize; i++) {
                    long literal = literals[used + i];
                    short value = (short) literal;
                    if (literal != value) {
                        throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decoded value out of range for a 16bit number");
                    }
                    values[offset + i] = value;
                }
            }
            used += chunkSize;
            offset += chunkSize;
            items -= chunkSize;
        }
    }

    @Override
    public void seekToCheckpoint(LongStreamCheckpoint checkpoint)
            throws IOException
    {
        LongStreamV1Checkpoint v1Checkpoint = (LongStreamV1Checkpoint) checkpoint;

        // if the checkpoint is within the current buffer, just adjust the pointer
        if (lastReadInputCheckpoint == v1Checkpoint.getInputStreamCheckpoint() && v1Checkpoint.getOffset() <= numLiterals) {
            used = v1Checkpoint.getOffset();
        }
        else {
            // otherwise, discard the buffer and start over
            input.seekToCheckpoint(v1Checkpoint.getInputStreamCheckpoint());
            numLiterals = 0;
            used = 0;
            skip(v1Checkpoint.getOffset());
        }
    }

    @Override
    public void skip(long items)
            throws IOException
    {
        while (items > 0) {
            if (used == numLiterals) {
                readValues();
            }
            int consume = toIntExact(min(items, numLiterals - used));
            used += consume;
            items -= consume;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy