All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.column.values.bitpacking.ByteBitPackingValuesReader Maven / Gradle / Ivy

/* 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.column.values.bitpacking;

import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.parquet.bytes.ByteBufferInputStream;
import org.apache.parquet.bytes.BytesUtils;
import org.apache.parquet.column.values.ValuesReader;
import org.apache.parquet.io.ParquetDecodingException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ByteBitPackingValuesReader extends ValuesReader {
  private static final int VALUES_AT_A_TIME = 8; // because we're using unpack8Values()

  private static final Logger LOG = LoggerFactory.getLogger(ByteBitPackingValuesReader.class);

  private final int bitWidth;
  private final BytePacker packer;
  private final int[] decoded = new int[VALUES_AT_A_TIME];
  private int decodedPosition = VALUES_AT_A_TIME - 1;
  private ByteBufferInputStream in;

  public ByteBitPackingValuesReader(int bound, Packer packer) {
    this.bitWidth = BytesUtils.getWidthFromMaxInt(bound);
    this.packer = packer.newBytePacker(bitWidth);
  }

  @Override
  public int readInteger() {
    ++ decodedPosition;
    if (decodedPosition == decoded.length) {
      try {
        if (in.available() < bitWidth) {
          // unpack8Values needs at least bitWidth bytes to read from,
          // We have to fill in 0 byte at the end of encoded bytes.
          byte[] tempEncode = new byte[bitWidth];
          in.read(tempEncode, 0, in.available());
          packer.unpack8Values(tempEncode, 0, decoded, 0);
        } else {
          ByteBuffer encoded = in.slice(bitWidth);
          packer.unpack8Values(encoded, encoded.position(), decoded, 0);
        }
      } catch (IOException e) {
        throw new ParquetDecodingException("Failed to read packed values", e);
      }
      decodedPosition = 0;
    }
    return decoded[decodedPosition];
  }

  @Override
  public void initFromPage(int valueCount, ByteBufferInputStream stream)
      throws IOException {
    int effectiveBitLength = valueCount * bitWidth;
    int length = BytesUtils.paddedByteCountFromBits(effectiveBitLength); // ceil
    LOG.debug("reading {} bytes for {} values of size {} bits.",
        length, valueCount, bitWidth);
    // work-around for null values. this will not happen for repetition or
    // definition levels (never null), but will happen when valueCount has not
    // been adjusted for null values in the data.
    length = Math.min(length, stream.available());
    this.in = stream.sliceStream(length);
    this.decodedPosition = VALUES_AT_A_TIME - 1;
    updateNextOffset(length);
  }

  @Override
  public void skip() {
    readInteger();
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy