All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.column.values.bitpacking.ByteBitPackingValuesReader Maven / Gradle / Ivy

There is a newer version: 1.15.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.column.values.bitpacking;

import java.io.IOException;
import org.apache.parquet.bytes.ByteBufferInputStream;
import org.apache.parquet.bytes.BytesUtils;
import org.apache.parquet.column.values.ValuesReader;
import org.apache.parquet.io.ParquetDecodingException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ByteBitPackingValuesReader extends ValuesReader {
  private static final int VALUES_AT_A_TIME = 8; // because we're using unpack8Values()

  private static final Logger LOG = LoggerFactory.getLogger(ByteBitPackingValuesReader.class);

  private final int bitWidth;
  private final BytePacker packer;
  private final int[] decoded = new int[VALUES_AT_A_TIME];
  private int decodedPosition = VALUES_AT_A_TIME - 1;
  private ByteBufferInputStream in;
  private final byte[] tempEncode;

  public ByteBitPackingValuesReader(int bound, Packer packer) {
    this.bitWidth = BytesUtils.getWidthFromMaxInt(bound);
    this.packer = packer.newBytePacker(bitWidth);
    // Create and retain byte array to avoid object creation in the critical path
    this.tempEncode = new byte[this.bitWidth];
  }

  private void readMore() {
    try {
      int avail = in.available();
      if (avail < bitWidth) {
        in.read(tempEncode, 0, avail);
        // Clear the portion of the array we didn't read into
        for (int i = avail; i < bitWidth; i++) tempEncode[i] = 0;
      } else {
        in.read(tempEncode, 0, bitWidth);
      }

      // The "deprecated" unpacker is faster than using the one that takes ByteBuffer
      packer.unpack8Values(tempEncode, 0, decoded, 0);
    } catch (IOException e) {
      throw new ParquetDecodingException("Failed to read packed values", e);
    }
    decodedPosition = 0;
  }

  @Override
  public int readInteger() {
    ++decodedPosition;
    if (decodedPosition == decoded.length) {
      readMore();
    }
    return decoded[decodedPosition];
  }

  @Override
  public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IOException {
    int effectiveBitLength = valueCount * bitWidth;
    int length = BytesUtils.paddedByteCountFromBits(effectiveBitLength); // ceil
    LOG.debug("reading {} bytes for {} values of size {} bits.", length, valueCount, bitWidth);
    // work-around for null values. this will not happen for repetition or
    // definition levels (never null), but will happen when valueCount has not
    // been adjusted for null values in the data.
    length = Math.min(length, stream.available());
    this.in = stream.sliceStream(length);
    this.decodedPosition = VALUES_AT_A_TIME - 1;
    updateNextOffset(length);
  }

  @Override
  public void skip() {
    readInteger();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy