All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.column.values.bitpacking.ByteBasedBitPackingEncoder Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.column.values.bitpacking;

import org.apache.parquet.bytes.BytesInput;
import org.apache.parquet.bytes.BytesUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import static org.apache.parquet.bytes.BytesInput.concat;

/**
 * Uses the generated Byte based bit packing to write ints into a BytesInput
 */
public class ByteBasedBitPackingEncoder {
  private static final Logger LOG = LoggerFactory.getLogger(ByteBasedBitPackingEncoder.class);

  private static final int VALUES_WRITTEN_AT_A_TIME = 8;
  private static final int MAX_SLAB_SIZE_MULT = 64 * 1024;
  private static final int INITIAL_SLAB_SIZE_MULT = 1024;

  private final int bitWidth;
  private final BytePacker packer;
  private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
  private int slabSize;
  private long totalFullSlabSize;
  private int inputSize;
  private byte[] packed;
  private int packedPosition;
  private final List slabs = new ArrayList();
  private int totalValues;

  /**
   * @param bitWidth the number of bits used to encode an int
   * @param packer factory for bit packing implementations
   */
  public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
    this.bitWidth = bitWidth;
    this.inputSize = 0;
    this.totalFullSlabSize = 0;
    // must be a multiple of bitWidth
    this.slabSize = (bitWidth == 0) ? 1 : (bitWidth * INITIAL_SLAB_SIZE_MULT);
    initPackedSlab();
    this.packer = packer.newBytePacker(bitWidth);
  }

  /**
   * writes an int using the requested number of bits.
   * accepts only values less than 2^bitWidth
   * @param value the value to write
   * @throws IOException if there is an exception while writing
   */
  public void writeInt(int value) throws IOException {
    input[inputSize] = value;
    ++ inputSize;
    if (inputSize == VALUES_WRITTEN_AT_A_TIME) {
      pack();
      if (packedPosition == slabSize) {
        slabs.add(BytesInput.from(packed));
        totalFullSlabSize += slabSize;
        if (slabSize < bitWidth * MAX_SLAB_SIZE_MULT) {
          slabSize *= 2;
        }
        initPackedSlab();
      }
    }
  }

  private void pack() {
    packer.pack8Values(input, 0, packed, packedPosition);
    packedPosition += bitWidth;
    totalValues += inputSize;
    inputSize = 0;
  }

  private void initPackedSlab() {
    packed = new byte[slabSize];
    packedPosition = 0;
  }

  /**
   * @return the bytes representing the packed values
   * @throws IOException if there is an exception while creating the BytesInput
   */
  public BytesInput toBytes() throws IOException {
    int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);

    LOG.debug("writing {} bytes", (totalFullSlabSize + packedByteLength));
    if (inputSize > 0) {
      for (int i = inputSize; i < input.length; i++) {
        input[i] = 0;
      }
      pack();
    }
    return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength));
  }

  /**
   * @return size of the data as it would be written
   */
  public long getBufferSize() {
    return BytesUtils.paddedByteCountFromBits((totalValues + inputSize) * bitWidth);
  }

  /**
   * @return total memory allocated
   */
  public long getAllocatedSize() {
    return totalFullSlabSize + packed.length + input.length * 4;
  }

  public String memUsageString(String prefix) {
    return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix, slabs.size(), getAllocatedSize());
  }

  /**
   * @return number of full slabs along with the current slab (debug aid)
   */
  int getNumSlabs() {
    return slabs.size() + 1;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy