All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.column.values.bitpacking.BitPacking Maven / Gradle / Ivy

There is a newer version: 1.15.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.column.values.bitpacking;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.parquet.bytes.BytesUtils;
import org.apache.parquet.column.values.bitpacking.BitPacking.BitPackingReader;
import org.apache.parquet.column.values.bitpacking.BitPacking.BitPackingWriter;

// TODO: rework the whole thing. It does not need to use streams at all

/**
 * provides the correct implementation of a bitpacking based on the width in bits
 */
public class BitPacking {

  /**
   * to writes ints to a stream packed to only the needed bits.
   * there is no guarantee of corecteness if ints larger than the max size are written
   */
  public abstract static class BitPackingWriter {
    /**
     * will write the bits to the underlying stream aligned on the buffer size
     *
     * @param val the value to encode
     * @throws IOException if there is an exception while writing
     */
    public abstract void write(int val) throws IOException;

    /**
     * will flush the buffer to the underlying stream (and pad with 0s)
     *
     * @throws IOException if there is an exception while finishing
     */
    public abstract void finish() throws IOException;
  }

  /**
   * to read back what has been written with the corresponding  writer
   */
  public abstract static class BitPackingReader {

    /**
     * @return and int decoded from the underlying stream
     * @throws IOException if there is an exception while reading
     */
    public abstract int read() throws IOException;
  }

  private BitPacking() {}

  /**
   * @param bitLength the width in bits of the integers to write
   * @param out       the stream to write the bytes to
   * @return the correct implementation for the width
   */
  public static BitPackingWriter getBitPackingWriter(int bitLength, OutputStream out) {
    switch (bitLength) {
      case 0:
        return new ZeroBitPackingWriter();
      case 1:
        return new OneBitPackingWriter(out);
      case 2:
        return new TwoBitPackingWriter(out);
      case 3:
        return new ThreeBitPackingWriter(out);
      case 4:
        return new FourBitPackingWriter(out);
      case 5:
        return new FiveBitPackingWriter(out);
      case 6:
        return new SixBitPackingWriter(out);
      case 7:
        return new SevenBitPackingWriter(out);
      case 8:
        return new EightBitPackingWriter(out);
      default:
        throw new UnsupportedOperationException("only support up to 8 for now");
    }
  }

  /**
   * @param bitLength  the width in bits of the integers to read
   * @param in         the stream to read the bytes from
   * @param valueCount not sure
   * @return the correct implementation for the width
   */
  public static BitPackingReader createBitPackingReader(int bitLength, InputStream in, long valueCount) {
    switch (bitLength) {
      case 0:
        return new ZeroBitPackingReader();
      case 1:
        return new OneBitPackingReader(in);
      case 2:
        return new TwoBitPackingReader(in);
      case 3:
        return new ThreeBitPackingReader(in, valueCount);
      case 4:
        return new FourBitPackingReader(in);
      case 5:
        return new FiveBitPackingReader(in, valueCount);
      case 6:
        return new SixBitPackingReader(in, valueCount);
      case 7:
        return new SevenBitPackingReader(in, valueCount);
      case 8:
        return new EightBitPackingReader(in);
      default:
        throw new UnsupportedOperationException("only support up to 8 for now");
    }
  }
}

abstract class BaseBitPackingWriter extends BitPackingWriter {

  void finish(int numberOfBits, int buffer, OutputStream out) throws IOException {
    int padding = numberOfBits % 8 == 0 ? 0 : 8 - (numberOfBits % 8);
    buffer = buffer << padding;
    int numberOfBytes = (numberOfBits + padding) / 8;
    for (int i = (numberOfBytes - 1) * 8; i >= 0; i -= 8) {
      out.write((buffer >>> i) & 0xFF);
    }
  }

  void finish(int numberOfBits, long buffer, OutputStream out) throws IOException {
    int padding = numberOfBits % 8 == 0 ? 0 : 8 - (numberOfBits % 8);
    buffer = buffer << padding;
    int numberOfBytes = (numberOfBits + padding) / 8;
    for (int i = (numberOfBytes - 1) * 8; i >= 0; i -= 8) {
      out.write((int) (buffer >>> i) & 0xFF);
    }
  }
}

abstract class BaseBitPackingReader extends BitPackingReader {

  int alignToBytes(int bitsCount) {
    return BytesUtils.paddedByteCountFromBits(bitsCount);
  }
}

class ZeroBitPackingWriter extends BitPackingWriter {

  @Override
  public void write(int val) throws IOException {}

  @Override
  public void finish() {}
}

class ZeroBitPackingReader extends BitPackingReader {

  @Override
  public int read() throws IOException {
    return 0;
  }
}

class OneBitPackingWriter extends BitPackingWriter {

  private OutputStream out;

  private int buffer = 0;
  private int count = 0;

  public OneBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    buffer = buffer << 1;
    buffer |= val;
    ++count;
    if (count == 8) {
      out.write(buffer);
      buffer = 0;
      count = 0;
    }
  }

  @Override
  public void finish() throws IOException {
    while (count != 0) {
      write(0);
    }
    // check this does not impede perf
    out = null;
  }
}

class OneBitPackingReader extends BitPackingReader {

  private final InputStream in;

  private int buffer = 0;
  private int count = 0;

  public OneBitPackingReader(InputStream in) {
    this.in = in;
  }

  @Override
  public int read() throws IOException {
    if (count == 0) {
      buffer = in.read();
      count = 8;
    }
    int result = (buffer >> (count - 1)) & 1;
    --count;
    return result;
  }
}

class TwoBitPackingWriter extends BitPackingWriter {

  private OutputStream out;

  private int buffer = 0;
  private int count = 0;

  public TwoBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    buffer = buffer << 2;
    buffer |= val;
    ++count;
    if (count == 4) {
      out.write(buffer);
      buffer = 0;
      count = 0;
    }
  }

  @Override
  public void finish() throws IOException {
    while (count != 0) {
      write(0);
    }
    // check this does not impede perf
    out = null;
  }
}

class TwoBitPackingReader extends BitPackingReader {

  private final InputStream in;

  private int buffer = 0;
  private int count = 0;

  public TwoBitPackingReader(InputStream in) {
    this.in = in;
  }

  @Override
  public int read() throws IOException {
    if (count == 0) {
      buffer = in.read();
      count = 4;
    }
    int result = (buffer >> ((count - 1) * 2)) & 3;
    --count;
    return result;
  }
}

class ThreeBitPackingWriter extends BaseBitPackingWriter {

  private OutputStream out;

  private int buffer = 0;
  private int count = 0;

  public ThreeBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    buffer = buffer << 3;
    buffer |= val;
    ++count;
    if (count == 8) {
      out.write((buffer >>> 16) & 0xFF);
      out.write((buffer >>> 8) & 0xFF);
      out.write((buffer >>> 0) & 0xFF);
      buffer = 0;
      count = 0;
    }
  }

  @Override
  public void finish() throws IOException {
    if (count != 0) {
      int numberOfBits = count * 3;
      finish(numberOfBits, buffer, out);
      buffer = 0;
      count = 0;
    }
    // check this does not impede perf
    out = null;
  }
}

class ThreeBitPackingReader extends BaseBitPackingReader {

  private final InputStream in;
  private final long valueCount;

  private int buffer = 0;
  private int count = 0;

  private long totalRead = 0;

  public ThreeBitPackingReader(InputStream in, long valueCount) {
    this.in = in;
    this.valueCount = valueCount;
  }

  @Override
  public int read() throws IOException {
    if (count == 0) {
      if (valueCount - totalRead < 8) {
        buffer = 0;
        int bitsToRead = 3 * (int) (valueCount - totalRead);
        int bytesToRead = alignToBytes(bitsToRead);
        for (int i = 3 - 1; i >= 3 - bytesToRead; i--) {
          buffer |= in.read() << (i * 8);
        }
        count = 8;
        totalRead = valueCount;
      } else {
        buffer = (in.read() << 16) + (in.read() << 8) + in.read();
        count = 8;
        totalRead += 8;
      }
    }
    int result = (buffer >> ((count - 1) * 3)) & 7;
    --count;
    return result;
  }
}

class FourBitPackingWriter extends BitPackingWriter {

  private OutputStream out;

  private int buffer = 0;
  private int count = 0;

  public FourBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    buffer = buffer << 4;
    buffer |= val;
    ++count;
    if (count == 2) {
      out.write(buffer);
      buffer = 0;
      count = 0;
    }
  }

  @Override
  public void finish() throws IOException {
    while (count != 0) {
      // downside: this aligns on whatever the buffer size is.
      write(0);
    }
    // check this does not impede perf
    out = null;
  }
}

class FourBitPackingReader extends BitPackingReader {

  private final InputStream in;

  private int buffer = 0;
  private int count = 0;

  public FourBitPackingReader(InputStream in) {
    this.in = in;
  }

  @Override
  public int read() throws IOException {
    if (count == 0) {
      buffer = in.read();
      count = 2;
    }
    int result = (buffer >> ((count - 1) * 4)) & 15;
    --count;
    return result;
  }
}

class FiveBitPackingWriter extends BaseBitPackingWriter {

  private OutputStream out;

  private long buffer = 0;
  private int count = 0;

  public FiveBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    buffer = buffer << 5;
    buffer |= val;
    ++count;
    if (count == 8) {
      out.write((int) (buffer >>> 32) & 0xFF);
      out.write((int) (buffer >>> 24) & 0xFF);
      out.write((int) (buffer >>> 16) & 0xFF);
      out.write((int) (buffer >>> 8) & 0xFF);
      out.write((int) (buffer >>> 0) & 0xFF);
      buffer = 0;
      count = 0;
    }
  }

  @Override
  public void finish() throws IOException {
    if (count != 0) {
      int numberOfBits = count * 5;
      finish(numberOfBits, buffer, out);
      buffer = 0;
      count = 0;
    }
    // check this does not impede perf
    out = null;
  }
}

class FiveBitPackingReader extends BaseBitPackingReader {

  private final InputStream in;
  private final long valueCount;

  private long buffer = 0;
  private int count = 0;
  private long totalRead = 0;

  public FiveBitPackingReader(InputStream in, long valueCount) {
    this.in = in;
    this.valueCount = valueCount;
  }

  @Override
  public int read() throws IOException {
    if (count == 0) {
      if (valueCount - totalRead < 8) {
        buffer = 0;
        int bitsToRead = 5 * (int) (valueCount - totalRead);
        int bytesToRead = alignToBytes(bitsToRead);
        for (int i = 5 - 1; i >= 5 - bytesToRead; i--) {
          buffer |= (((long) in.read()) & 255) << (i * 8);
        }
        count = 8;
        totalRead = valueCount;
      } else {
        buffer = ((((long) in.read()) & 255) << 32)
            + ((((long) in.read()) & 255) << 24)
            + (in.read() << 16)
            + (in.read() << 8)
            + in.read();
        count = 8;
        totalRead += 8;
      }
    }
    int result = (((int) (buffer >> ((count - 1) * 5))) & 31);
    --count;
    return result;
  }
}

class SixBitPackingWriter extends BaseBitPackingWriter {

  private OutputStream out;

  private int buffer = 0;
  private int count = 0;

  public SixBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    buffer = buffer << 6;
    buffer |= val;
    ++count;
    if (count == 4) {
      out.write((buffer >>> 16) & 0xFF);
      out.write((buffer >>> 8) & 0xFF);
      out.write((buffer >>> 0) & 0xFF);
      buffer = 0;
      count = 0;
    }
  }

  @Override
  public void finish() throws IOException {
    if (count != 0) {
      int numberOfBits = count * 6;
      finish(numberOfBits, buffer, out);
      buffer = 0;
      count = 0;
    }
    // check this does not impede perf
    out = null;
  }
}

class SixBitPackingReader extends BaseBitPackingReader {

  private final InputStream in;
  private final long valueCount;

  private int buffer = 0;
  private int count = 0;

  private long totalRead = 0;

  public SixBitPackingReader(InputStream in, long valueCount) {
    this.in = in;
    this.valueCount = valueCount;
  }

  @Override
  public int read() throws IOException {
    if (count == 0) {
      if (valueCount - totalRead < 4) {
        buffer = 0;
        int bitsToRead = 6 * (int) (valueCount - totalRead);
        int bytesToRead = alignToBytes(bitsToRead);
        for (int i = 3 - 1; i >= 3 - bytesToRead; i--) {
          buffer |= in.read() << (i * 8);
        }
        count = 4;
        totalRead = valueCount;
      } else {
        buffer = (in.read() << 16) + (in.read() << 8) + in.read();
        count = 4;
        totalRead += 4;
      }
    }
    int result = (buffer >> ((count - 1) * 6)) & 63;
    --count;
    return result;
  }
}

class SevenBitPackingWriter extends BaseBitPackingWriter {

  private OutputStream out;

  private long buffer = 0;
  private int count = 0;

  public SevenBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    buffer = buffer << 7;
    buffer |= val;
    ++count;
    if (count == 8) {
      out.write((int) (buffer >>> 48) & 0xFF);
      out.write((int) (buffer >>> 40) & 0xFF);
      out.write((int) (buffer >>> 32) & 0xFF);
      out.write((int) (buffer >>> 24) & 0xFF);
      out.write((int) (buffer >>> 16) & 0xFF);
      out.write((int) (buffer >>> 8) & 0xFF);
      out.write((int) (buffer >>> 0) & 0xFF);
      buffer = 0;
      count = 0;
    }
  }

  @Override
  public void finish() throws IOException {
    if (count != 0) {
      int numberOfBits = count * 7;
      finish(numberOfBits, buffer, out);
      buffer = 0;
      count = 0;
    }
    // check this does not impede perf
    out = null;
  }
}

class SevenBitPackingReader extends BaseBitPackingReader {

  private final InputStream in;
  private final long valueCount;

  private long buffer = 0;
  private int count = 0;
  private long totalRead = 0;

  public SevenBitPackingReader(InputStream in, long valueCount) {
    this.in = in;
    this.valueCount = valueCount;
  }

  @Override
  public int read() throws IOException {
    if (count == 0) {
      if (valueCount - totalRead < 8) {
        buffer = 0;
        int bitsToRead = 7 * (int) (valueCount - totalRead);
        int bytesToRead = alignToBytes(bitsToRead);
        for (int i = 7 - 1; i >= 7 - bytesToRead; i--) {
          buffer |= (((long) in.read()) & 255) << (i * 8);
        }
        count = 8;
        totalRead = valueCount;
      } else {
        buffer = ((((long) in.read()) & 255) << 48)
            + ((((long) in.read()) & 255) << 40)
            + ((((long) in.read()) & 255) << 32)
            + ((((long) in.read()) & 255) << 24)
            + (in.read() << 16)
            + (in.read() << 8)
            + in.read();
        count = 8;
        totalRead += 8;
      }
    }
    int result = (((int) (buffer >> ((count - 1) * 7))) & 127);
    --count;
    return result;
  }
}

class EightBitPackingWriter extends BitPackingWriter {

  private OutputStream out;

  public EightBitPackingWriter(OutputStream out) {
    this.out = out;
  }

  @Override
  public void write(int val) throws IOException {
    out.write(val);
  }

  @Override
  public void finish() throws IOException {
    // check this does not impede perf
    out = null;
  }
}

class EightBitPackingReader extends BitPackingReader {

  private final InputStream in;

  public EightBitPackingReader(InputStream in) {
    this.in = in;
  }

  @Override
  public int read() throws IOException {
    return in.read();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy