
parquet.encoding.bitpacking.ByteBasedBitPackingGenerator Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package parquet.encoding.bitpacking;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import parquet.bytes.BytesUtils;
/**
*
* This class generates bit packers that pack the most significant bit first.
* The result of the generation is checked in. To regenerate the code run this class and check in the result.
*
* TODO: remove the unnecessary masks for perf
*
* @author Julien Le Dem
*
*/
public class ByteBasedBitPackingGenerator {
private static final String CLASS_NAME_PREFIX = "ByteBitPacking";
private static final int PACKER_COUNT = 32;
public static void main(String[] args) throws Exception {
String basePath = args[0];
generateScheme(CLASS_NAME_PREFIX + "BE", true, basePath);
generateScheme(CLASS_NAME_PREFIX + "LE", false, basePath);
}
private static void generateScheme(String className, boolean msbFirst, String basePath) throws IOException {
final File file = new File(basePath + "/parquet/column/values/bitpacking/" + className + ".java").getAbsoluteFile();
if (!file.getParentFile().exists()) {
file.getParentFile().mkdirs();
}
FileWriter fw = new FileWriter(file);
fw.append("package parquet.column.values.bitpacking;\n");
fw.append("\n");
fw.append("/**\n");
if (msbFirst) {
fw.append(" * Packs from the Most Significant Bit first\n");
} else {
fw.append(" * Packs from the Least Significant Bit first\n");
}
fw.append(" * \n");
fw.append(" * @author automatically generated\n");
fw.append(" * @see ByteBasedBitPackingGenerator\n");
fw.append(" *\n");
fw.append(" */\n");
fw.append("public abstract class " + className + " {\n");
fw.append("\n");
fw.append(" private static final BytePacker[] packers = new BytePacker[33];\n");
fw.append(" static {\n");
for (int i = 0; i <= PACKER_COUNT; i++) {
fw.append(" packers[" + i + "] = new Packer" + i + "();\n");
}
fw.append(" }\n");
fw.append("\n");
fw.append(" public static final BytePackerFactory factory = new BytePackerFactory() {\n");
fw.append(" public BytePacker newBytePacker(int bitWidth) {\n");
fw.append(" return packers[bitWidth];\n");
fw.append(" }\n");
fw.append(" };\n");
fw.append("\n");
for (int i = 0; i <= PACKER_COUNT; i++) {
generateClass(fw, i, msbFirst);
fw.append("\n");
}
fw.append("}\n");
fw.close();
}
private static void generateClass(FileWriter fw, int bitWidth, boolean msbFirst) throws IOException {
fw.append(" private static final class Packer" + bitWidth + " extends BytePacker {\n");
fw.append("\n");
fw.append(" private Packer" + bitWidth + "() {\n");
fw.append(" super("+bitWidth+");\n");
fw.append(" }\n");
fw.append("\n");
// Packing
generatePack(fw, bitWidth, 1, msbFirst);
generatePack(fw, bitWidth, 4, msbFirst);
// Unpacking
generateUnpack(fw, bitWidth, 1, msbFirst);
generateUnpack(fw, bitWidth, 4, msbFirst);
fw.append(" }\n");
}
private static int getShift(FileWriter fw, int bitWidth, boolean msbFirst,
int byteIndex, int valueIndex) throws IOException {
// relative positions of the start and end of the value to the start and end of the byte
int valueStartBitIndex = (valueIndex * bitWidth) - (8 * (byteIndex));
int valueEndBitIndex = ((valueIndex + 1) * bitWidth) - (8 * (byteIndex + 1));
// boundaries of the current value that we want
int valueStartBitWanted;
int valueEndBitWanted;
// boundaries of the current byte that will receive them
int byteStartBitWanted;
int byteEndBitWanted;
int shift;
if (msbFirst) {
valueStartBitWanted = valueStartBitIndex < 0 ? bitWidth - 1 + valueStartBitIndex : bitWidth - 1;
valueEndBitWanted = valueEndBitIndex > 0 ? valueEndBitIndex : 0;
byteStartBitWanted = valueStartBitIndex < 0 ? 8 : 7 - valueStartBitIndex;
byteEndBitWanted = valueEndBitIndex > 0 ? 0 : -valueEndBitIndex;
shift = valueEndBitWanted - byteEndBitWanted;
} else {
valueStartBitWanted = bitWidth - 1 - (valueEndBitIndex > 0 ? valueEndBitIndex : 0);
valueEndBitWanted = bitWidth - 1 - (valueStartBitIndex < 0 ? bitWidth - 1 + valueStartBitIndex : bitWidth - 1);
byteStartBitWanted = 7 - (valueEndBitIndex > 0 ? 0 : -valueEndBitIndex);
byteEndBitWanted = 7 - (valueStartBitIndex < 0 ? 8 : 7 - valueStartBitIndex);
shift = valueStartBitWanted - byteStartBitWanted;
}
visualizeAlignment(
fw, bitWidth, valueEndBitIndex,
valueStartBitWanted, valueEndBitWanted,
byteStartBitWanted, byteEndBitWanted,
shift
);
return shift;
}
private static void visualizeAlignment(FileWriter fw, int bitWidth,
int valueEndBitIndex, int valueStartBitWanted, int valueEndBitWanted,
int byteStartBitWanted, int byteEndBitWanted, int shift) throws IOException {
// ASCII art to visualize what is happening
fw.append("//");
int buf = 2 + Math.max(0, bitWidth + 8);
for (int i = 0; i < buf; i++) {
fw.append(" ");
}
fw.append("[");
for (int i = 7; i >= 0; i--) {
if (i<=byteStartBitWanted && i>=byteEndBitWanted) {
fw.append(String.valueOf(i));
} else {
fw.append("_");
}
}
fw.append("]\n //");
for (int i = 0; i < buf + (8 - bitWidth + shift); i++) {
fw.append(" ");
}
fw.append("[");
for (int i = bitWidth - 1; i >= 0 ; i--) {
if (i<=valueStartBitWanted && i>=valueEndBitWanted) {
fw.append(String.valueOf(i % 10));
} else {
fw.append("_");
}
}
fw.append("]\n");
fw.append(" ");
}
private static void generatePack(FileWriter fw, int bitWidth, int batch, boolean msbFirst) throws IOException {
int mask = genMask(bitWidth);
fw.append(" public final void pack" + (batch * 8) + "Values(final int[] in, final int inPos, final byte[] out, final int outPos) {\n");
for (int byteIndex = 0; byteIndex < bitWidth * batch; ++byteIndex) {
fw.append(" out[" + align(byteIndex, 2) + " + outPos] = (byte)((\n");
int startIndex = (byteIndex * 8) / bitWidth;
int endIndex = ((byteIndex + 1) * 8 + bitWidth - 1) / bitWidth;
for (int valueIndex = startIndex; valueIndex < endIndex; valueIndex++) {
if (valueIndex == startIndex) {
fw.append(" ");
} else {
fw.append("\n | ");
}
int shift = getShift(fw, bitWidth, msbFirst, byteIndex, valueIndex);
String shiftString = ""; // used when shift == 0
if (shift > 0) {
shiftString = " >>> " + shift;
} else if (shift < 0) {
shiftString = " << " + ( - shift);
}
fw.append("((in[" + align(valueIndex, 2) + " + inPos] & " + mask + ")" + shiftString + ")");
}
fw.append(") & 255);\n");
}
fw.append(" }\n");
}
private static void generateUnpack(FileWriter fw, int bitWidth, int batch, boolean msbFirst)
throws IOException {
fw.append(" public final void unpack" + (batch * 8) + "Values(final byte[] in, final int inPos, final int[] out, final int outPos) {\n");
if (bitWidth > 0) {
int mask = genMask(bitWidth);
for (int valueIndex = 0; valueIndex < (batch * 8); ++valueIndex) {
fw.append(" out[" + align(valueIndex, 2) + " + outPos] =\n");
int startIndex = valueIndex * bitWidth / 8;
int endIndex = BytesUtils.paddedByteCountFromBits((valueIndex + 1) * bitWidth);
for (int byteIndex = startIndex; byteIndex < endIndex; byteIndex++) {
if (byteIndex == startIndex) {
fw.append(" ");
} else {
fw.append("\n | ");
}
int shift = getShift(fw, bitWidth, msbFirst, byteIndex, valueIndex);
String shiftString = ""; // when shift == 0
if (shift < 0) {
shiftString = ">>> " + (-shift);
} else if (shift > 0){
shiftString = "<< " + shift;
}
fw.append(" (((((int)in[" + align(byteIndex, 2) + " + inPos]) & 255) " + shiftString + ") & " + mask + ")");
}
fw.append(";\n");
}
}
fw.append(" }\n");
}
private static int genMask(int bitWidth) {
int mask = 0;
for (int i = 0; i < bitWidth; i++) {
mask <<= 1;
mask |= 1;
}
return mask;
}
private static String align(int value, int digits) {
final String valueString = String.valueOf(value);
StringBuilder result = new StringBuilder();
for (int i = valueString.length(); i < digits; i++) {
result.append(" ");
}
result.append(valueString);
return result.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy