All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parquet.column.values.rle.RunLengthBitPackingHybridDecoder Maven / Gradle / Ivy

There is a newer version: 1.6.0
Show newest version
/**
 * Copyright 2012 Twitter, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package parquet.column.values.rle;

import static parquet.Log.DEBUG;

import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;

import parquet.Log;
import parquet.Preconditions;
import parquet.bytes.BytesUtils;
import parquet.column.values.bitpacking.BytePacker;
import parquet.column.values.bitpacking.Packer;
import parquet.io.ParquetDecodingException;

/**
 * Decodes values written in the grammar described in {@link RunLengthBitPackingHybridEncoder}
 *
 * @author Julien Le Dem
 */
public class RunLengthBitPackingHybridDecoder {
  private static final Log LOG = Log.getLog(RunLengthBitPackingHybridDecoder.class);

  private static enum MODE { RLE, PACKED }

  private final int bitWidth;
  private final BytePacker packer;
  private final ByteArrayInputStream in;

  private MODE mode;
  private int currentCount;
  private int currentValue;
  private int[] currentBuffer;

  public RunLengthBitPackingHybridDecoder(int bitWidth, ByteArrayInputStream in) {
    if (DEBUG) LOG.debug("decoding bitWidth " + bitWidth);

    Preconditions.checkArgument(bitWidth >= 0 && bitWidth <= 32, "bitWidth must be >= 0 and <= 32");
    this.bitWidth = bitWidth;
    this.packer = Packer.LITTLE_ENDIAN.newBytePacker(bitWidth);
    this.in = in;
  }

  public int readInt() throws IOException {
    if (currentCount == 0) {
      readNext();
    }
    -- currentCount;
    int result;
    switch (mode) {
    case RLE:
      result = currentValue;
      break;
    case PACKED:
      result = currentBuffer[currentBuffer.length - 1 - currentCount];
      break;
    default:
      throw new ParquetDecodingException("not a valid mode " + mode);
    }
    return result;
  }

  private void readNext() throws IOException {	
	Preconditions.checkArgument(in.available() > 0, "Reading past RLE/BitPacking stream.");
    final int header = BytesUtils.readUnsignedVarInt(in);
    mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED;
    switch (mode) {
    case RLE:
      currentCount = header >>> 1;
      if (DEBUG) LOG.debug("reading " + currentCount + " values RLE");
      currentValue = BytesUtils.readIntLittleEndianPaddedOnBitWidth(in, bitWidth);
      break;
    case PACKED:
      int numGroups = header >>> 1;
      currentCount = numGroups * 8;
      if (DEBUG) LOG.debug("reading " + currentCount + " values BIT PACKED");
      currentBuffer = new int[currentCount]; // TODO: reuse a buffer
      byte[] bytes = new byte[numGroups * bitWidth];
      // At the end of the file RLE data though, there might not be that many bytes left. 
      int bytesToRead = (int)Math.ceil(currentCount * bitWidth / 8.0);
      bytesToRead = Math.min(bytesToRead, in.available());
      new DataInputStream(in).readFully(bytes, 0, bytesToRead);
      for (int valueIndex = 0, byteIndex = 0; valueIndex < currentCount; valueIndex += 8, byteIndex += bitWidth) {
        packer.unpack8Values(bytes, byteIndex, currentBuffer, valueIndex);
      }
      break;
    default:
      throw new ParquetDecodingException("not a valid mode " + mode);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy