All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parquet.encoding.bitpacking.ByteBasedBitPackingGenerator Maven / Gradle / Ivy

The newest version!
/* 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package parquet.encoding.bitpacking;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;

import parquet.bytes.BytesUtils;

/**
 *
 * This class generates bit packers that pack the most significant bit first.
 * The result of the generation is checked in. To regenerate the code run this class and check in the result.
 *
 * TODO: remove the unnecessary masks for perf
 *
 * @author Julien Le Dem
 *
 */
public class ByteBasedBitPackingGenerator {

  private static final String CLASS_NAME_PREFIX = "ByteBitPacking";
  private static final int PACKER_COUNT = 32;

  public static void main(String[] args) throws Exception {
    String basePath = args[0];
    generateScheme(CLASS_NAME_PREFIX + "BE", true, basePath);
    generateScheme(CLASS_NAME_PREFIX + "LE", false, basePath);
  }

  private static void generateScheme(String className, boolean msbFirst, String basePath) throws IOException {
    final File file = new File(basePath + "/parquet/column/values/bitpacking/" + className + ".java").getAbsoluteFile();
    if (!file.getParentFile().exists()) {
      file.getParentFile().mkdirs();
    }
    FileWriter fw = new FileWriter(file);
    fw.append("package parquet.column.values.bitpacking;\n");
    fw.append("\n");
    fw.append("/**\n");
    if (msbFirst) {
      fw.append(" * Packs from the Most Significant Bit first\n");
    } else {
      fw.append(" * Packs from the Least Significant Bit first\n");
    }
    fw.append(" * \n");
    fw.append(" * @author automatically generated\n");
    fw.append(" * @see ByteBasedBitPackingGenerator\n");
    fw.append(" *\n");
    fw.append(" */\n");
    fw.append("public abstract class " + className + " {\n");
    fw.append("\n");
    fw.append("  private static final BytePacker[] packers = new BytePacker[33];\n");
    fw.append("  static {\n");
    for (int i = 0; i <= PACKER_COUNT; i++) {
      fw.append("    packers[" + i + "] = new Packer" + i + "();\n");
    }
    fw.append("  }\n");
    fw.append("\n");
    fw.append("  public static final BytePackerFactory factory = new BytePackerFactory() {\n");
    fw.append("    public BytePacker newBytePacker(int bitWidth) {\n");
    fw.append("      return packers[bitWidth];\n");
    fw.append("    }\n");
    fw.append("  };\n");
    fw.append("\n");
    for (int i = 0; i <= PACKER_COUNT; i++) {
      generateClass(fw, i, msbFirst);
      fw.append("\n");
    }
    fw.append("}\n");
    fw.close();
  }

  private static void generateClass(FileWriter fw, int bitWidth, boolean msbFirst) throws IOException {
    fw.append("  private static final class Packer" + bitWidth + " extends BytePacker {\n");
    fw.append("\n");
    fw.append("    private Packer" + bitWidth + "() {\n");
    fw.append("      super("+bitWidth+");\n");
    fw.append("    }\n");
    fw.append("\n");
    // Packing
    generatePack(fw, bitWidth, 1, msbFirst);
    generatePack(fw, bitWidth, 4, msbFirst);

    // Unpacking
    generateUnpack(fw, bitWidth, 1, msbFirst);
    generateUnpack(fw, bitWidth, 4, msbFirst);

    fw.append("  }\n");
  }

  private static int getShift(FileWriter fw, int bitWidth, boolean msbFirst,
      int byteIndex, int valueIndex) throws IOException {
    // relative positions of the start and end of the value to the start and end of the byte
    int valueStartBitIndex = (valueIndex * bitWidth) - (8 * (byteIndex));
    int valueEndBitIndex = ((valueIndex + 1) * bitWidth) - (8 * (byteIndex + 1));

    // boundaries of the current value that we want
    int valueStartBitWanted;
    int valueEndBitWanted;
    // boundaries of the current byte that will receive them
    int byteStartBitWanted;
    int byteEndBitWanted;

    int shift;

    if (msbFirst) {
      valueStartBitWanted = valueStartBitIndex < 0 ? bitWidth - 1 + valueStartBitIndex : bitWidth - 1;
      valueEndBitWanted = valueEndBitIndex > 0 ? valueEndBitIndex : 0;
      byteStartBitWanted = valueStartBitIndex < 0 ? 8 : 7 - valueStartBitIndex;
      byteEndBitWanted = valueEndBitIndex > 0 ? 0 : -valueEndBitIndex;
      shift = valueEndBitWanted - byteEndBitWanted;
    } else {
      valueStartBitWanted = bitWidth - 1 - (valueEndBitIndex > 0 ? valueEndBitIndex : 0);
      valueEndBitWanted = bitWidth - 1 - (valueStartBitIndex < 0 ? bitWidth - 1 + valueStartBitIndex : bitWidth - 1);
      byteStartBitWanted = 7 - (valueEndBitIndex > 0 ? 0 : -valueEndBitIndex);
      byteEndBitWanted = 7 - (valueStartBitIndex < 0 ? 8 : 7 - valueStartBitIndex);
      shift = valueStartBitWanted - byteStartBitWanted;
    }

    visualizeAlignment(
        fw, bitWidth, valueEndBitIndex,
        valueStartBitWanted, valueEndBitWanted,
        byteStartBitWanted, byteEndBitWanted,
        shift
        );
    return shift;
  }

  private static void visualizeAlignment(FileWriter fw, int bitWidth,
      int valueEndBitIndex, int valueStartBitWanted, int valueEndBitWanted,
      int byteStartBitWanted, int byteEndBitWanted, int shift) throws IOException {
    // ASCII art to visualize what is happening
    fw.append("//");
    int buf = 2 + Math.max(0, bitWidth + 8);
    for (int i = 0; i < buf; i++) {
      fw.append(" ");
    }
    fw.append("[");
    for (int i = 7; i >= 0; i--) {
      if (i<=byteStartBitWanted && i>=byteEndBitWanted) {
        fw.append(String.valueOf(i));
      } else {
        fw.append("_");
      }
    }
    fw.append("]\n          //");
    for (int i = 0; i < buf + (8 - bitWidth + shift); i++) {
      fw.append(" ");
    }
    fw.append("[");
    for (int i = bitWidth - 1; i >= 0 ; i--) {
      if (i<=valueStartBitWanted && i>=valueEndBitWanted) {
        fw.append(String.valueOf(i % 10));
      } else {
        fw.append("_");
      }
    }
    fw.append("]\n");
    fw.append("           ");
  }

  private static void generatePack(FileWriter fw, int bitWidth, int batch, boolean msbFirst) throws IOException {
    int mask = genMask(bitWidth);
    fw.append("    public final void pack" + (batch * 8) + "Values(final int[] in, final int inPos, final byte[] out, final int outPos) {\n");
    for (int byteIndex = 0; byteIndex < bitWidth * batch; ++byteIndex) {
      fw.append("      out[" + align(byteIndex, 2) + " + outPos] = (byte)((\n");
      int startIndex = (byteIndex * 8) / bitWidth;
      int endIndex = ((byteIndex + 1) * 8 + bitWidth - 1) / bitWidth;
      for (int valueIndex = startIndex; valueIndex < endIndex; valueIndex++) {

        if (valueIndex == startIndex) {
          fw.append("          ");
        } else {
          fw.append("\n        | ");
        }
        int shift = getShift(fw, bitWidth, msbFirst, byteIndex, valueIndex);

        String shiftString = ""; // used when shift == 0
        if (shift > 0) {
          shiftString = " >>> " + shift;
        } else if (shift < 0) {
          shiftString = " <<  " + ( - shift);
        }
        fw.append("((in[" + align(valueIndex, 2) + " + inPos] & " + mask + ")" + shiftString + ")");
      }
      fw.append(") & 255);\n");
    }
    fw.append("    }\n");
  }

  private static void generateUnpack(FileWriter fw, int bitWidth, int batch, boolean msbFirst)
      throws IOException {
    fw.append("    public final void unpack" + (batch * 8) + "Values(final byte[] in, final int inPos, final int[] out, final int outPos) {\n");
    if (bitWidth > 0) {
      int mask = genMask(bitWidth);
      for (int valueIndex = 0; valueIndex < (batch * 8); ++valueIndex) {
        fw.append("      out[" + align(valueIndex, 2) + " + outPos] =\n");

        int startIndex = valueIndex * bitWidth / 8;
        int endIndex = BytesUtils.paddedByteCountFromBits((valueIndex + 1) * bitWidth);

        for (int byteIndex = startIndex; byteIndex < endIndex; byteIndex++) {
          if (byteIndex == startIndex) {
            fw.append("          ");
          } else {
            fw.append("\n        | ");
          }
          int shift = getShift(fw, bitWidth, msbFirst, byteIndex, valueIndex);

          String shiftString = ""; // when shift == 0
          if (shift < 0) {
            shiftString = ">>>  " + (-shift);
          } else if (shift > 0){
            shiftString = "<<  " + shift;
          }
          fw.append(" (((((int)in[" + align(byteIndex, 2) + " + inPos]) & 255) " + shiftString + ") & " + mask + ")");
        }
        fw.append(";\n");
      }
    }
    fw.append("    }\n");
  }

  private static int genMask(int bitWidth) {
    int mask = 0;
    for (int i = 0; i < bitWidth; i++) {
      mask <<= 1;
      mask |= 1;
    }
    return mask;
  }

  private static String align(int value, int digits) {
    final String valueString = String.valueOf(value);
    StringBuilder result = new StringBuilder();
    for (int i = valueString.length(); i < digits; i++) {
      result.append(" ");
    }
    result.append(valueString);
    return result.toString();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy