All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.parquet.reader.decoders.RleBitPackingHybridDecoder Maven / Gradle / Ivy

There is a newer version: 464
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.parquet.reader.decoders;

import io.trino.parquet.reader.SimpleSliceInputStream;

import java.util.Arrays;

import static com.google.common.base.Preconditions.checkArgument;
import static io.trino.parquet.ParquetReaderUtils.readFixedWidthInt;
import static io.trino.parquet.ParquetReaderUtils.readUleb128Int;
import static io.trino.parquet.reader.decoders.IntBitUnpackers.getIntBitUnpacker;
import static io.trino.parquet.reader.decoders.VectorIntBitUnpackers.getVectorIntBitUnpacker;
import static java.lang.Math.min;
import static java.util.Objects.requireNonNull;

/**
 * 
 * Run Length Encoding / Bit-Packing Hybrid (RLE)
 * 
 * This class is similar to {@link io.trino.parquet.reader.flat.NullsDecoders} but specialized for reading integers stored in bit width of 0 - 32.
 * It is used specifically for decoding dictionary ids currently.
 * It can be used for decoding definition and repetition levels of nested columns in future.
 */
public final class RleBitPackingHybridDecoder
        implements ValueDecoder
{
    // Bit-packed values comes in batches of 8 according to specs
    private static final int EXTRACT_BATCH_SIZE = 8;

    private final int bitWidth;
    private final int byteWidth;
    private final IntBitUnpacker unpacker;

    private SimpleSliceInputStream input;

    // Encoding type if decoding stopped in the middle of the group
    private boolean isRle;
    // Values left to decode in the current group
    private int valuesLeftInGroup;
    // With RLE encoding - the current value
    private int rleValue;
    // With bit-packing - buffer of EXTRACT_BATCH_SIZE values currently read.
    private int[] valuesBuffer;
    // Number of values already read in the current buffer while reading bit-packed values
    private int alreadyReadInBuffer;

    public RleBitPackingHybridDecoder(int bitWidth, boolean vectorizedDecodingEnabled)
    {
        checkArgument(bitWidth >= 0 && bitWidth <= 32, "bit width need to be between 0 and 32");
        this.bitWidth = bitWidth;
        this.byteWidth = byteWidth(bitWidth);
        this.unpacker = vectorizedDecodingEnabled ? getVectorIntBitUnpacker(bitWidth) : getIntBitUnpacker(bitWidth);
    }

    @Override
    public void init(SimpleSliceInputStream input)
    {
        this.input = requireNonNull(input, "input is null");
        this.valuesBuffer = new int[EXTRACT_BATCH_SIZE];
    }

    @Override
    public void read(int[] values, int offset, int length)
    {
        while (length > 0) {
            if (valuesLeftInGroup == 0) {
                readGroupHeader();
            }

            if (isRle) {
                int chunkSize = min(length, valuesLeftInGroup);
                Arrays.fill(values, offset, offset + chunkSize, rleValue);
                valuesLeftInGroup -= chunkSize;
                offset += chunkSize;
                length -= chunkSize;
            }
            else if (alreadyReadInBuffer != 0) { // bit-packed - read remaining bytes stored in the buffer
                int remainingValues = EXTRACT_BATCH_SIZE - alreadyReadInBuffer;
                int chunkSize = min(remainingValues, length);
                System.arraycopy(valuesBuffer, alreadyReadInBuffer, values, offset, chunkSize);
                valuesLeftInGroup -= chunkSize;
                alreadyReadInBuffer = (alreadyReadInBuffer + chunkSize) % EXTRACT_BATCH_SIZE;
                offset += chunkSize;
                length -= chunkSize;
            }
            else { // bit-packed
                // At this point we have only full batches to read and valuesLeftInGroup is a multiplication of 8
                int chunkSize = min(length, valuesLeftInGroup);
                int leftToRead = chunkSize % EXTRACT_BATCH_SIZE;
                int fullBatchesToRead = chunkSize - leftToRead;
                unpacker.unpack(values, offset, input, fullBatchesToRead);
                offset += fullBatchesToRead;
                if (leftToRead > 0) {
                    unpacker.unpack(valuesBuffer, 0, input, EXTRACT_BATCH_SIZE); // Unpack to temporary buffer
                    System.arraycopy(valuesBuffer, 0, values, offset, leftToRead);
                    offset += leftToRead;
                }
                alreadyReadInBuffer = leftToRead;
                valuesLeftInGroup -= chunkSize;
                length -= chunkSize;
            }
        }
    }

    @Override
    public void skip(int n)
    {
        while (n > 0) {
            if (valuesLeftInGroup == 0) {
                readGroupHeader();
            }

            if (isRle) {
                int chunkSize = min(n, valuesLeftInGroup);
                valuesLeftInGroup -= chunkSize;
                n -= chunkSize;
            }
            else if (alreadyReadInBuffer != 0) { // bit-packed - skip remaining bytes stored in the buffer
                int remainingValues = EXTRACT_BATCH_SIZE - alreadyReadInBuffer;
                int chunkSize = min(remainingValues, n);
                valuesLeftInGroup -= chunkSize;
                alreadyReadInBuffer = (alreadyReadInBuffer + chunkSize) % EXTRACT_BATCH_SIZE;
                n -= chunkSize;
            }
            else { // bit-packed
                int chunkSize = min(n, valuesLeftInGroup);
                int fullBatchesToRead = chunkSize / EXTRACT_BATCH_SIZE;
                input.skip(fullBatchesToRead * bitWidth * EXTRACT_BATCH_SIZE / Byte.SIZE);
                int leftToRead = chunkSize % EXTRACT_BATCH_SIZE;
                if (leftToRead > 0) {
                    unpacker.unpack(valuesBuffer, 0, input, EXTRACT_BATCH_SIZE);
                }
                alreadyReadInBuffer = leftToRead;
                valuesLeftInGroup -= chunkSize;
                n -= chunkSize;
            }
        }
    }

    private void readGroupHeader()
    {
        int header = readUleb128Int(input);
        isRle = (header & 1) == 0;
        valuesLeftInGroup = header >>> 1;
        if (isRle) {
            rleValue = readFixedWidthInt(input, byteWidth);
        }
        else {
            // Only full bytes are encoded
            valuesLeftInGroup *= 8;
        }
    }

    private static int byteWidth(int bitWidth)
    {
        // Equivalent of Math.ceil(bitWidth / Byte.SIZE) but without double arithmetics
        return (bitWidth + Byte.SIZE - 1) / Byte.SIZE;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy