org.apache.hadoop.hive.ql.io.orc.RunLengthIntegerWriterV2 Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.0
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.io.orc;

import java.io.IOException;

/**
 * A writer that performs light weight compression over sequence of integers.
 * 
 * There are four types of lightweight integer compression
 * 

 * SHORT_REPEAT
 * DIRECT
 * PATCHED_BASE
 * DELTA
 * 
 * 
 * The description and format for these types are as below:
 * 
 * SHORT_REPEAT: Used for short repeated integer sequences.
 * 

 * 1 byte header
 * 
 * 2 bits for encoding type
 * 3 bits for bytes required for repeating value
 * 3 bits for repeat count (MIN_REPEAT + run length)
 * 
 * 
 * Blob - repeat value (fixed bytes)
 * 
 * 
 * 
 * DIRECT: Used for random integer sequences whose number of bit
 * requirement doesn't vary a lot.
 * 

 * 2 bytes header
 * 
 * 1st byte
 * 2 bits for encoding type
 * 5 bits for fixed bit width of values in blob
 * 1 bit for storing MSB of run length
 * 
 * 
 * 2nd byte
 * 8 bits for lower run length bits
 * 
 * 
 * Blob - stores the direct values using fixed bit width. The length of the
 * data blob is (fixed width * run length) bits long
 * 
 * 
 * 
 * PATCHED_BASE: Used for random integer sequences whose number of bit
 * requirement varies beyond a threshold.
 * 

 * 4 bytes header
 * 
 * 1st byte
 * 2 bits for encoding type
 * 5 bits for fixed bit width of values in blob
 * 1 bit for storing MSB of run length
 * 
 * 
 * 2nd byte
 * 8 bits for lower run length bits
 * 
 * 
 * 3rd byte
 * 3 bits for bytes required to encode base value
 * 5 bits for patch width
 * 
 * 
 * 4th byte
 * 3 bits for patch gap width
 * 5 bits for patch length
 * 
 * 
 * Base value - Stored using fixed number of bytes. If MSB is set, base
 * value is negative else positive. Length of base value is (base width * 8)
 * bits.
 * Data blob - Base reduced values as stored using fixed bit width. Length
 * of data blob is (fixed width * run length) bits.
 * Patch blob - Patch blob is a list of gap and patch value. Each entry in
 * the patch list is (patch width + patch gap width) bits long. Gap between the
 * subsequent elements to be patched are stored in upper part of entry whereas
 * patch values are stored in lower part of entry. Length of patch blob is
 * ((patch width + patch gap width) * patch length) bits.
 * 
 * 
 * 
 * DELTA Used for monotonically increasing or decreasing sequences,
 * sequences with fixed delta values or long repeated sequences.
 * 

 * 2 bytes header
 * 
 * 1st byte
 * 2 bits for encoding type
 * 5 bits for fixed bit width of values in blob
 * 1 bit for storing MSB of run length
 * 
 * 
 * 2nd byte
 * 8 bits for lower run length bits
 * 
 * 
 * Base value - encoded as varint
 * Delta base - encoded as varint
 * Delta blob - only positive values. monotonicity and orderness are decided
 * based on the sign of the base value and delta base
 * 
 * 
 */
class RunLengthIntegerWriterV2 implements IntegerWriter {

  public enum EncodingType {
    SHORT_REPEAT, DIRECT, PATCHED_BASE, DELTA
  }

  static final int MAX_SCOPE = 512;
  static final int MIN_REPEAT = 3;
  private static final int MAX_SHORT_REPEAT_LENGTH = 10;
  private long prevDelta = 0;
  private int fixedRunLength = 0;
  private int variableRunLength = 0;
  private final long[] literals = new long[MAX_SCOPE];
  private final PositionedOutputStream output;
  private final boolean signed;
  private EncodingType encoding;
  private int numLiterals;
  private final long[] zigzagLiterals = new long[MAX_SCOPE];
  private final long[] baseRedLiterals = new long[MAX_SCOPE];
  private final long[] adjDeltas = new long[MAX_SCOPE];
  private long fixedDelta;
  private int zzBits90p;
  private int zzBits100p;
  private int brBits95p;
  private int brBits100p;
  private int bitsDeltaMax;
  private int patchWidth;
  private int patchGapWidth;
  private int patchLength;
  private long[] gapVsPatchList;
  private long min;
  private boolean isFixedDelta;
  private SerializationUtils utils;
  private boolean alignedBitpacking;

  RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed) {
    this(output, signed, true);
  }

  RunLengthIntegerWriterV2(PositionedOutputStream output, boolean signed,
      boolean alignedBitpacking) {
    this.output = output;
    this.signed = signed;
    this.alignedBitpacking = alignedBitpacking;
    this.utils = new SerializationUtils();
    clear();
  }

  private void writeValues() throws IOException {
    if (numLiterals != 0) {

      if (encoding.equals(EncodingType.SHORT_REPEAT)) {
        writeShortRepeatValues();
      } else if (encoding.equals(EncodingType.DIRECT)) {
        writeDirectValues();
      } else if (encoding.equals(EncodingType.PATCHED_BASE)) {
        writePatchedBaseValues();
      } else {
        writeDeltaValues();
      }

      // clear all the variables
      clear();
    }
  }

  private void writeDeltaValues() throws IOException {
    int len = 0;
    int fb = bitsDeltaMax;
    int efb = 0;

    if (alignedBitpacking) {
      fb = utils.getClosestAlignedFixedBits(fb);
    }

    if (isFixedDelta) {
      // if fixed run length is greater than threshold then it will be fixed
      // delta sequence with delta value 0 else fixed delta sequence with
      // non-zero delta value
      if (fixedRunLength > MIN_REPEAT) {
        // ex. sequence: 2 2 2 2 2 2 2 2
        len = fixedRunLength - 1;
        fixedRunLength = 0;
      } else {
        // ex. sequence: 4 6 8 10 12 14 16
        len = variableRunLength - 1;
        variableRunLength = 0;
      }
    } else {
      // fixed width 0 is used for long repeating values.
      // sequences that require only 1 bit to encode will have an additional bit
      if (fb == 1) {
        fb = 2;
      }
      efb = utils.encodeBitWidth(fb);
      efb = efb << 1;
      len = variableRunLength - 1;
      variableRunLength = 0;
    }

    // extract the 9th bit of run length
    final int tailBits = (len & 0x100) >>> 8;

    // create first byte of the header
    final int headerFirstByte = getOpcode() | efb | tailBits;

    // second byte of the header stores the remaining 8 bits of runlength
    final int headerSecondByte = len & 0xff;

    // write header
    output.write(headerFirstByte);
    output.write(headerSecondByte);

    // store the first value from zigzag literal array
    if (signed) {
      utils.writeVslong(output, literals[0]);
    } else {
      utils.writeVulong(output, literals[0]);
    }

    if (isFixedDelta) {
      // if delta is fixed then we don't need to store delta blob
      utils.writeVslong(output, fixedDelta);
    } else {
      // store the first value as delta value using zigzag encoding
      utils.writeVslong(output, adjDeltas[0]);

      // adjacent delta values are bit packed. The length of adjDeltas array is
      // always one less than the number of literals (delta difference for n
      // elements is n-1). We have already written one element, write the
      // remaining numLiterals - 2 elements here
      utils.writeInts(adjDeltas, 1, numLiterals - 2, fb, output);
    }
  }

  private void writePatchedBaseValues() throws IOException {

    // NOTE: Aligned bit packing cannot be applied for PATCHED_BASE encoding
    // because patch is applied to MSB bits. For example: If fixed bit width of
    // base value is 7 bits and if patch is 3 bits, the actual value is
    // constructed by shifting the patch to left by 7 positions.
    // actual_value = patch << 7 | base_value
    // So, if we align base_value then actual_value can not be reconstructed.

    // write the number of fixed bits required in next 5 bits
    final int fb = brBits95p;
    final int efb = utils.encodeBitWidth(fb) << 1;

    // adjust variable run length, they are one off
    variableRunLength -= 1;

    // extract the 9th bit of run length
    final int tailBits = (variableRunLength & 0x100) >>> 8;

    // create first byte of the header
    final int headerFirstByte = getOpcode() | efb | tailBits;

    // second byte of the header stores the remaining 8 bits of runlength
    final int headerSecondByte = variableRunLength & 0xff;

    // if the min value is negative toggle the sign
    final boolean isNegative = min < 0 ? true : false;
    if (isNegative) {
      min = -min;
    }

    // find the number of bytes required for base and shift it by 5 bits
    // to accommodate patch width. The additional bit is used to store the sign
    // of the base value.
    final int baseWidth = utils.findClosestNumBits(min) + 1;
    final int baseBytes = baseWidth % 8 == 0 ? baseWidth / 8 : (baseWidth / 8) + 1;
    final int bb = (baseBytes - 1) << 5;

    // if the base value is negative then set MSB to 1
    if (isNegative) {
      min |= (1L << ((baseBytes * 8) - 1));
    }

    // third byte contains 3 bits for number of bytes occupied by base
    // and 5 bits for patchWidth
    final int headerThirdByte = bb | utils.encodeBitWidth(patchWidth);

    // fourth byte contains 3 bits for page gap width and 5 bits for
    // patch length
    final int headerFourthByte = (patchGapWidth - 1) << 5 | patchLength;

    // write header
    output.write(headerFirstByte);
    output.write(headerSecondByte);
    output.write(headerThirdByte);
    output.write(headerFourthByte);

    // write the base value using fixed bytes in big endian order
    for(int i = baseBytes - 1; i >= 0; i--) {
      byte b = (byte) ((min >>> (i * 8)) & 0xff);
      output.write(b);
    }

    // base reduced literals are bit packed
    int closestFixedBits = utils.getClosestFixedBits(fb);

    utils.writeInts(baseRedLiterals, 0, numLiterals, closestFixedBits,
        output);

    // write patch list
    closestFixedBits = utils.getClosestFixedBits(patchGapWidth + patchWidth);

    utils.writeInts(gapVsPatchList, 0, gapVsPatchList.length, closestFixedBits,
        output);

    // reset run length
    variableRunLength = 0;
  }

  /**
   * Store the opcode in 2 MSB bits
   * @return opcode
   */
  private int getOpcode() {
    return encoding.ordinal() << 6;
  }

  private void writeDirectValues() throws IOException {

    // write the number of fixed bits required in next 5 bits
    int fb = zzBits100p;

    if (alignedBitpacking) {
      fb = utils.getClosestAlignedFixedBits(fb);
    }

    final int efb = utils.encodeBitWidth(fb) << 1;

    // adjust variable run length
    variableRunLength -= 1;

    // extract the 9th bit of run length
    final int tailBits = (variableRunLength & 0x100) >>> 8;

    // create first byte of the header
    final int headerFirstByte = getOpcode() | efb | tailBits;

    // second byte of the header stores the remaining 8 bits of runlength
    final int headerSecondByte = variableRunLength & 0xff;

    // write header
    output.write(headerFirstByte);
    output.write(headerSecondByte);

    // bit packing the zigzag encoded literals
    utils.writeInts(zigzagLiterals, 0, numLiterals, fb, output);

    // reset run length
    variableRunLength = 0;
  }

  private void writeShortRepeatValues() throws IOException {
    // get the value that is repeating, compute the bits and bytes required
    long repeatVal = 0;
    if (signed) {
      repeatVal = utils.zigzagEncode(literals[0]);
    } else {
      repeatVal = literals[0];
    }

    final int numBitsRepeatVal = utils.findClosestNumBits(repeatVal);
    final int numBytesRepeatVal = numBitsRepeatVal % 8 == 0 ? numBitsRepeatVal >>> 3
        : (numBitsRepeatVal >>> 3) + 1;

    // write encoding type in top 2 bits
    int header = getOpcode();

    // write the number of bytes required for the value
    header |= ((numBytesRepeatVal - 1) << 3);

    // write the run length
    fixedRunLength -= MIN_REPEAT;
    header |= fixedRunLength;

    // write the header
    output.write(header);

    // write the repeating value in big endian byte order
    for(int i = numBytesRepeatVal - 1; i >= 0; i--) {
      int b = (int) ((repeatVal >>> (i * 8)) & 0xff);
      output.write(b);
    }

    fixedRunLength = 0;
  }

  private void determineEncoding() {

    // we need to compute zigzag values for DIRECT encoding if we decide to
    // break early for delta overflows or for shorter runs
    computeZigZagLiterals();

    zzBits100p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 1.0);

    // not a big win for shorter runs to determine encoding
    if (numLiterals <= MIN_REPEAT) {
      encoding = EncodingType.DIRECT;
      return;
    }

    // DELTA encoding check

    // for identifying monotonic sequences
    boolean isIncreasing = true;
    boolean isDecreasing = true;
    this.isFixedDelta = true;

    this.min = literals[0];
    long max = literals[0];
    final long initialDelta = literals[1] - literals[0];
    long currDelta = initialDelta;
    long deltaMax = initialDelta;
    this.adjDeltas[0] = initialDelta;

    for (int i = 1; i < numLiterals; i++) {
      final long l1 = literals[i];
      final long l0 = literals[i - 1];
      currDelta = l1 - l0;
      min = Math.min(min, l1);
      max = Math.max(max, l1);

      isIncreasing &= (l0 <= l1);
      isDecreasing &= (l0 >= l1);

      isFixedDelta &= (currDelta == initialDelta);
      if (i > 1) {
        adjDeltas[i - 1] = Math.abs(currDelta);
        deltaMax = Math.max(deltaMax, adjDeltas[i - 1]);
      }
    }

    // its faster to exit under delta overflow condition without checking for
    // PATCHED_BASE condition as encoding using DIRECT is faster and has less
    // overhead than PATCHED_BASE
    if (!utils.isSafeSubtract(max, min)) {
      encoding = EncodingType.DIRECT;
      return;
    }

    // invariant - subtracting any number from any other in the literals after
    // this point won't overflow

    // if initialDelta is 0 then we cannot delta encode as we cannot identify
    // the sign of deltas (increasing or decreasing)
    if (initialDelta != 0) {

      // if min is equal to max then the delta is 0, this condition happens for
      // fixed values run >10 which cannot be encoded with SHORT_REPEAT
      if (min == max) {
        assert isFixedDelta : min + "==" + max +
            ", isFixedDelta cannot be false";
        assert currDelta == 0 : min + "==" + max + ", currDelta should be zero";
        fixedDelta = 0;
        encoding = EncodingType.DELTA;
        return;
      }

      if (isFixedDelta) {
        assert currDelta == initialDelta
            : "currDelta should be equal to initialDelta for fixed delta encoding";
        encoding = EncodingType.DELTA;
        fixedDelta = currDelta;
        return;
      }

      // stores the number of bits required for packing delta blob in
      // delta encoding
      bitsDeltaMax = utils.findClosestNumBits(deltaMax);

      // monotonic condition
      if (isIncreasing || isDecreasing) {
        encoding = EncodingType.DELTA;
        return;
      }
    }

    // PATCHED_BASE encoding check

    // percentile values are computed for the zigzag encoded values. if the
    // number of bit requirement between 90th and 100th percentile varies
    // beyond a threshold then we need to patch the values. if the variation
    // is not significant then we can use direct encoding

    zzBits90p = utils.percentileBits(zigzagLiterals, 0, numLiterals, 0.9);
    int diffBitsLH = zzBits100p - zzBits90p;

    // if the difference between 90th percentile and 100th percentile fixed
    // bits is > 1 then we need patch the values
    if (diffBitsLH > 1) {

      // patching is done only on base reduced values.
      // remove base from literals
      for (int i = 0; i < numLiterals; i++) {
        baseRedLiterals[i] = literals[i] - min;
      }

      // 95th percentile width is used to determine max allowed value
      // after which patching will be done
      brBits95p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 0.95);

      // 100th percentile is used to compute the max patch width
      brBits100p = utils.percentileBits(baseRedLiterals, 0, numLiterals, 1.0);

      // after base reducing the values, if the difference in bits between
      // 95th percentile and 100th percentile value is zero then there
      // is no point in patching the values, in which case we will
      // fallback to DIRECT encoding.
      // The decision to use patched base was based on zigzag values, but the
      // actual patching is done on base reduced literals.
      if ((brBits100p - brBits95p) != 0) {
        encoding = EncodingType.PATCHED_BASE;
        preparePatchedBlob();
        return;
      } else {
        encoding = EncodingType.DIRECT;
        return;
      }
    } else {
      // if difference in bits between 95th percentile and 100th percentile is
      // 0, then patch length will become 0. Hence we will fallback to direct
      encoding = EncodingType.DIRECT;
      return;
    }
  }

  private void computeZigZagLiterals() {
    // populate zigzag encoded literals
    long zzEncVal = 0;
    for (int i = 0; i < numLiterals; i++) {
      if (signed) {
        zzEncVal = utils.zigzagEncode(literals[i]);
      } else {
        zzEncVal = literals[i];
      }
      zigzagLiterals[i] = zzEncVal;
    }
  }

  private void preparePatchedBlob() {
    // mask will be max value beyond which patch will be generated
    long mask = (1L << brBits95p) - 1;

    // since we are considering only 95 percentile, the size of gap and
    // patch array can contain only be 5% values
    patchLength = (int) Math.ceil((numLiterals * 0.05));

    int[] gapList = new int[patchLength];
    long[] patchList = new long[patchLength];

    // #bit for patch
    patchWidth = brBits100p - brBits95p;
    patchWidth = utils.getClosestFixedBits(patchWidth);

    // if patch bit requirement is 64 then it will not possible to pack
    // gap and patch together in a long. To make sure gap and patch can be
    // packed together adjust the patch width
    if (patchWidth == 64) {
      patchWidth = 56;
      brBits95p = 8;
      mask = (1L << brBits95p) - 1;
    }

    int gapIdx = 0;
    int patchIdx = 0;
    int prev = 0;
    int gap = 0;
    int maxGap = 0;

    for(int i = 0; i < numLiterals; i++) {
      // if value is above mask then create the patch and record the gap
      if (baseRedLiterals[i] > mask) {
        gap = i - prev;
        if (gap > maxGap) {
          maxGap = gap;
        }

        // gaps are relative, so store the previous patched value index
        prev = i;
        gapList[gapIdx++] = gap;

        // extract the most significant bits that are over mask bits
        long patch = baseRedLiterals[i] >>> brBits95p;
        patchList[patchIdx++] = patch;

        // strip off the MSB to enable safe bit packing
        baseRedLiterals[i] &= mask;
      }
    }

    // adjust the patch length to number of entries in gap list
    patchLength = gapIdx;

    // if the element to be patched is the first and only element then
    // max gap will be 0, but to store the gap as 0 we need atleast 1 bit
    if (maxGap == 0 && patchLength != 0) {
      patchGapWidth = 1;
    } else {
      patchGapWidth = utils.findClosestNumBits(maxGap);
    }

    // special case: if the patch gap width is greater than 256, then
    // we need 9 bits to encode the gap width. But we only have 3 bits in
    // header to record the gap width. To deal with this case, we will save
    // two entries in patch list in the following way
    // 256 gap width => 0 for patch value
    // actual gap - 256 => actual patch value
    // We will do the same for gap width = 511. If the element to be patched is
    // the last element in the scope then gap width will be 511. In this case we
    // will have 3 entries in the patch list in the following way
    // 255 gap width => 0 for patch value
    // 255 gap width => 0 for patch value
    // 1 gap width => actual patch value
    if (patchGapWidth > 8) {
      patchGapWidth = 8;
      // for gap = 511, we need two additional entries in patch list
      if (maxGap == 511) {
        patchLength += 2;
      } else {
        patchLength += 1;
      }
    }

    // create gap vs patch list
    gapIdx = 0;
    patchIdx = 0;
    gapVsPatchList = new long[patchLength];
    for(int i = 0; i < patchLength; i++) {
      long g = gapList[gapIdx++];
      long p = patchList[patchIdx++];
      while (g > 255) {
        gapVsPatchList[i++] = (255L << patchWidth);
        g -= 255;
      }

      // store patch value in LSBs and gap in MSBs
      gapVsPatchList[i] = (g << patchWidth) | p;
    }
  }

  /**
   * clears all the variables
   */
  private void clear() {
    numLiterals = 0;
    encoding = null;
    prevDelta = 0;
    fixedDelta = 0;
    zzBits90p = 0;
    zzBits100p = 0;
    brBits95p = 0;
    brBits100p = 0;
    bitsDeltaMax = 0;
    patchGapWidth = 0;
    patchLength = 0;
    patchWidth = 0;
    gapVsPatchList = null;
    min = 0;
    isFixedDelta = true;
  }

  @Override
  public void flush() throws IOException {
    if (numLiterals != 0) {
      if (variableRunLength != 0) {
        determineEncoding();
        writeValues();
      } else if (fixedRunLength != 0) {
        if (fixedRunLength < MIN_REPEAT) {
          variableRunLength = fixedRunLength;
          fixedRunLength = 0;
          determineEncoding();
          writeValues();
        } else if (fixedRunLength >= MIN_REPEAT
            && fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
          encoding = EncodingType.SHORT_REPEAT;
          writeValues();
        } else {
          encoding = EncodingType.DELTA;
          isFixedDelta = true;
          writeValues();
        }
      }
    }
    output.flush();
  }

  @Override
  public void write(long val) throws IOException {
    if (numLiterals == 0) {
      initializeLiterals(val);
    } else {
      if (numLiterals == 1) {
        prevDelta = val - literals[0];
        literals[numLiterals++] = val;
        // if both values are same count as fixed run else variable run
        if (val == literals[0]) {
          fixedRunLength = 2;
          variableRunLength = 0;
        } else {
          fixedRunLength = 0;
          variableRunLength = 2;
        }
      } else {
        long currentDelta = val - literals[numLiterals - 1];
        if (prevDelta == 0 && currentDelta == 0) {
          // fixed delta run

          literals[numLiterals++] = val;

          // if variable run is non-zero then we are seeing repeating
          // values at the end of variable run in which case keep
          // updating variable and fixed runs
          if (variableRunLength > 0) {
            fixedRunLength = 2;
          }
          fixedRunLength += 1;

          // if fixed run met the minimum condition and if variable
          // run is non-zero then flush the variable run and shift the
          // tail fixed runs to start of the buffer
          if (fixedRunLength >= MIN_REPEAT && variableRunLength > 0) {
            numLiterals -= MIN_REPEAT;
            variableRunLength -= MIN_REPEAT - 1;
            // copy the tail fixed runs
            long[] tailVals = new long[MIN_REPEAT];
            System.arraycopy(literals, numLiterals, tailVals, 0, MIN_REPEAT);

            // determine variable encoding and flush values
            determineEncoding();
            writeValues();

            // shift tail fixed runs to beginning of the buffer
            for(long l : tailVals) {
              literals[numLiterals++] = l;
            }
          }

          // if fixed runs reached max repeat length then write values
          if (fixedRunLength == MAX_SCOPE) {
            determineEncoding();
            writeValues();
          }
        } else {
          // variable delta run

          // if fixed run length is non-zero and if it satisfies the
          // short repeat conditions then write the values as short repeats
          // else use delta encoding
          if (fixedRunLength >= MIN_REPEAT) {
            if (fixedRunLength <= MAX_SHORT_REPEAT_LENGTH) {
              encoding = EncodingType.SHORT_REPEAT;
              writeValues();
            } else {
              encoding = EncodingType.DELTA;
              isFixedDelta = true;
              writeValues();
            }
          }

          // if fixed run length is  0 && fixedRunLength < MIN_REPEAT) {
            if (val != literals[numLiterals - 1]) {
              variableRunLength = fixedRunLength;
              fixedRunLength = 0;
            }
          }

          // after writing values re-initialize the variables
          if (numLiterals == 0) {
            initializeLiterals(val);
          } else {
            // keep updating variable run lengths
            prevDelta = val - literals[numLiterals - 1];
            literals[numLiterals++] = val;
            variableRunLength += 1;

            // if variable run length reach the max scope, write it
            if (variableRunLength == MAX_SCOPE) {
              determineEncoding();
              writeValues();
            }
          }
        }
      }
    }
  }

  private void initializeLiterals(long val) {
    literals[numLiterals++] = val;
    fixedRunLength = 1;
    variableRunLength = 1;
  }

  @Override
  public void getPosition(PositionRecorder recorder) throws IOException {
    output.getPosition(recorder);
    recorder.addPosition(numLiterals);
  }
}