All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parquet.column.values.bitpacking.ByteBasedBitPackingEncoder Maven / Gradle / Ivy

/* 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package parquet.column.values.bitpacking;

import static parquet.Log.DEBUG;
import static parquet.bytes.BytesInput.concat;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import parquet.Log;
import parquet.bytes.BytesInput;
import parquet.bytes.BytesUtils;

/**
 * Uses the generated Byte based bit packing to write ints into a BytesInput
 *
 * @author Julien Le Dem
 *
 */
public class ByteBasedBitPackingEncoder {
  private static final Log LOG = Log.getLog(ByteBasedBitPackingEncoder.class);

  private static final int VALUES_WRITTEN_AT_A_TIME = 8;

  private final int bitWidth;
  private final BytePacker packer;
  private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
  private final int slabSize;
  private int inputSize;
  private byte[] packed;
  private int packedPosition;
  private final List slabs = new ArrayList();
  private int totalValues;

  /**
   * @param bitWidth the number of bits used to encode an int
   */
  public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
    this.bitWidth = bitWidth;
    this.inputSize = 0;
    // must be a multiple of bitWidth
    this.slabSize = bitWidth * 64 * 1024;
    initPackedSlab();
    this.packer = packer.newBytePacker(bitWidth);
  }

  /**
   * writes an int using the requested number of bits.
   * accepts only value < 2^bitWidth
   * @param value the value to write
   * @throws IOException
   */
  public void writeInt(int value) throws IOException {
    input[inputSize] = value;
    ++ inputSize;
    if (inputSize == VALUES_WRITTEN_AT_A_TIME) {
      pack();
      if (packedPosition == slabSize) {
        slabs.add(BytesInput.from(packed));
        initPackedSlab();
      }
    }
  }

  private void pack() {
    packer.pack8Values(input, 0, packed, packedPosition);
    packedPosition += bitWidth;
    totalValues += inputSize;
    inputSize = 0;
  }

  private void initPackedSlab() {
    packed = new byte[slabSize];
    packedPosition = 0;
  }

  /**
   * @return the bytes representing the packed values
   * @throws IOException
   */
  public BytesInput toBytes() throws IOException {
    int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);

    if (DEBUG) LOG.debug("writing " + (slabs.size() * slabSize + packedByteLength) + " bytes");
    if (inputSize > 0) {
      for (int i = inputSize; i < input.length; i++) {
        input[i] = 0;
      }
      pack();
    }
    return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength));
  }

  /**
   * @return size of the data as it would be written
   */
  public long getBufferSize() {
    return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth);
  }

  /**
   * @return total memory allocated
   */
  public long getAllocatedSize() {
    return (slabs.size() * slabSize) + packed.length + input.length * 4;
  }

  public String memUsageString(String prefix) {
    return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix, slabs.size(), getAllocatedSize());
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy