parquet.column.values.bitpacking.ByteBasedBitPackingEncoder Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package parquet.column.values.bitpacking;
import static parquet.Log.DEBUG;
import static parquet.bytes.BytesInput.concat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import parquet.Log;
import parquet.bytes.BytesInput;
import parquet.bytes.BytesUtils;
/**
* Uses the generated Byte based bit packing to write ints into a BytesInput
*
* @author Julien Le Dem
*
*/
public class ByteBasedBitPackingEncoder {
private static final Log LOG = Log.getLog(ByteBasedBitPackingEncoder.class);
private static final int VALUES_WRITTEN_AT_A_TIME = 8;
private final int bitWidth;
private final BytePacker packer;
private final int[] input = new int[VALUES_WRITTEN_AT_A_TIME];
private final int slabSize;
private int inputSize;
private byte[] packed;
private int packedPosition;
private final List slabs = new ArrayList();
private int totalValues;
/**
* @param bitWidth the number of bits used to encode an int
*/
public ByteBasedBitPackingEncoder(int bitWidth, Packer packer) {
this.bitWidth = bitWidth;
this.inputSize = 0;
// must be a multiple of bitWidth
this.slabSize = bitWidth * 64 * 1024;
initPackedSlab();
this.packer = packer.newBytePacker(bitWidth);
}
/**
* writes an int using the requested number of bits.
* accepts only value < 2^bitWidth
* @param value the value to write
* @throws IOException
*/
public void writeInt(int value) throws IOException {
input[inputSize] = value;
++ inputSize;
if (inputSize == VALUES_WRITTEN_AT_A_TIME) {
pack();
if (packedPosition == slabSize) {
slabs.add(BytesInput.from(packed));
initPackedSlab();
}
}
}
private void pack() {
packer.pack8Values(input, 0, packed, packedPosition);
packedPosition += bitWidth;
totalValues += inputSize;
inputSize = 0;
}
private void initPackedSlab() {
packed = new byte[slabSize];
packedPosition = 0;
}
/**
* @return the bytes representing the packed values
* @throws IOException
*/
public BytesInput toBytes() throws IOException {
int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth);
if (DEBUG) LOG.debug("writing " + (slabs.size() * slabSize + packedByteLength) + " bytes");
if (inputSize > 0) {
for (int i = inputSize; i < input.length; i++) {
input[i] = 0;
}
pack();
}
return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength));
}
/**
* @return size of the data as it would be written
*/
public long getBufferSize() {
return BytesUtils.paddedByteCountFromBits(totalValues * bitWidth);
}
/**
* @return total memory allocated
*/
public long getAllocatedSize() {
return (slabs.size() * slabSize) + packed.length + input.length * 4;
}
public String memUsageString(String prefix) {
return String.format("%s ByteBitPacking %d slabs, %d bytes", prefix, slabs.size(), getAllocatedSize());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy