All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.snowflake.ingest.streaming.internal.ParquetBufferValue Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2024 Snowflake Computing Inc. All rights reserved.
 */

package net.snowflake.ingest.streaming.internal;

/** Parquet internal value representation for buffering. */
class ParquetBufferValue {
  // Parquet uses BitPacking to encode boolean, hence 1 bit per value
  public static final float BIT_ENCODING_BYTE_LEN = 1.0f / 8;

  /**
   * On average parquet needs 2 bytes / 8 values for the RLE+bitpack encoded definition and
   * repetition level.
   *
   * 
    * There are two cases how definition and repetition level (0 for null values, 1 for non-null * values) is encoded: *
  • If there are at least 8 repeated values in a row, they are run-length encoded (length + * value itself). E.g. 11111111 -> 8 1 *
  • If there are less than 8 repeated values, they are written in group as part of a * bit-length encoded run, e.g. 1111 -> 15 A bit-length encoded run ends when either 64 * groups of 8 values have been written or if a new RLE run starts. *

    To distinguish between RLE and bitpack run, there is 1 extra bytes written as header * when a bitpack run starts. *

* *
    * For more details see ColumnWriterV1#createDLWriter and {@link * org.apache.parquet.column.values.rle.RunLengthBitPackingHybridEncoder#writeInt(int)} *
*/ public static final float DEFINITION_LEVEL_ENCODING_BYTE_LEN = 2.0f / 8; public static final float REPETITION_LEVEL_ENCODING_BYTE_LEN = 2.0f / 8; // Parquet stores length in 4 bytes before the actual data bytes public static final int BYTE_ARRAY_LENGTH_ENCODING_BYTE_LEN = 4; private final Object value; private final float size; ParquetBufferValue(Object value, float size) { this.value = value; this.size = size; } Object getValue() { return value; } float getSize() { return size; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy