org.apache.datasketches.cpc.CpcCompression Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of datasketches-java Show documentation
Core sketch algorithms used alone and by other Java repositories in the DataSketches library.
There is a newer version: 6.1.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.datasketches.cpc;

import static org.apache.datasketches.cpc.CompressionData.columnPermutationsForDecoding;
import static org.apache.datasketches.cpc.CompressionData.columnPermutationsForEncoding;
import static org.apache.datasketches.cpc.CompressionData.decodingTablesForHighEntropyByte;
import static org.apache.datasketches.cpc.CompressionData.encodingTablesForHighEntropyByte;
import static org.apache.datasketches.cpc.CompressionData.lengthLimitedUnaryDecodingTable65;
import static org.apache.datasketches.cpc.CompressionData.lengthLimitedUnaryEncodingTable65;
import static org.apache.datasketches.cpc.PairTable.introspectiveInsertionSort;
//import static org.apache.datasketches.cpc.RuntimeAsserts.rtAssertEquals;

/**
 * @author Lee Rhodes
 * @author Kevin Lang
 */
final class CpcCompression {
  //visible for test
  static final int NEXT_WORD_IDX = 0; //ptrArr[NEXT_WORD_IDX]
  static final int BIT_BUF = 1;       //ptrArr[BIT_BUF]
  static final int BUF_BITS = 2;      //ptrArr[BUF_BITS]

  //visible for test
  static void writeUnary(
      final int[] compressedWords,
      final long[] ptrArr,
      final int theValue) {

    int nextWordIndex = (int) ptrArr[NEXT_WORD_IDX]; //must be int
    assert (nextWordIndex == ptrArr[NEXT_WORD_IDX]); //catch truncation error
    long bitBuf = ptrArr[BIT_BUF];                   //must be long
    int bufBits = (int) ptrArr[BUF_BITS];            //could be byte

    assert (compressedWords != null);
    assert (nextWordIndex >= 0);
    assert (bitBuf >= 0);
    assert ((bufBits >= 0) && (bufBits <= 31));

    int remaining = theValue;

    while (remaining >= 16) {
      remaining -= 16;
      // Here we output 16 zeros, but we don't need to physically write them into bitbuf
      // because it already contains zeros in that region.
      bufBits += 16; // Record the fact that 16 bits of output have occurred.
      //MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
      if (bufBits >= 32) {
        compressedWords[nextWordIndex++] = (int) bitBuf;
        bitBuf >>>= 32;
        bufBits -= 32;
      }
    }

    assert (remaining >= 0) && (remaining <= 15);

    final long theUnaryCode = 1L << remaining; //must be a long
    bitBuf |= theUnaryCode << bufBits;
    bufBits += (1 + remaining);
    //MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
    if (bufBits >= 32) {
      compressedWords[nextWordIndex++] = (int) bitBuf;
      bitBuf >>>= 32;
      bufBits -= 32;
    }

    ptrArr[NEXT_WORD_IDX] = nextWordIndex;
    assert nextWordIndex == ptrArr[NEXT_WORD_IDX]; //catch sign extension error
    ptrArr[BIT_BUF] = bitBuf;
    ptrArr[BUF_BITS] = bufBits;
  }

  //visible for test
  static long readUnary(
      final int[] compressedWords,
      final long[] ptrArr) {

    int nextWordIndex = (int) ptrArr[NEXT_WORD_IDX]; //must be int
    assert nextWordIndex == ptrArr[NEXT_WORD_IDX]; //catch truncation error
    long bitBuf = ptrArr[BIT_BUF];
    int bufBits = (int) ptrArr[BUF_BITS];

    assert compressedWords != null;
    assert nextWordIndex >= 0;
    assert bitBuf >= 0;
    assert bufBits >= 0;

    long subTotal = 0;
    int trailingZeros;

    //readUnaryLoop:
    while (true) {
      //MAYBE_FILL_BITBUF(compressedWords,nextWordIndex,8); // ensure 8 bits in bit buffer
      if (bufBits < 8) {  // Prepare for an 8-bit peek into the bitstream.
        bitBuf |= ((compressedWords[nextWordIndex++] & 0XFFFF_FFFFL) << bufBits);
        bufBits += 32;
      }

      // These 8 bits include either all or part of the Unary codeword.
      final int peek8 = (int) (bitBuf & 0XFFL);
      trailingZeros = Math.min(8, Integer.numberOfTrailingZeros(peek8));

      assert ((trailingZeros >= 0) && (trailingZeros <= 8)) : "TZ+ " + trailingZeros;

      if (trailingZeros == 8) { // The codeword was partial, so read some more.
        subTotal += 8;
        bufBits -= 8;
        bitBuf >>>= 8;
        continue;
      }
      break;
    }

    bufBits -= (1 + trailingZeros);
    bitBuf >>>= (1 + trailingZeros);

    ptrArr[NEXT_WORD_IDX] = nextWordIndex;
    assert nextWordIndex == ptrArr[NEXT_WORD_IDX]; //catch sign extension error
    ptrArr[BIT_BUF] = bitBuf;
    ptrArr[BUF_BITS] = bufBits;
    return subTotal + trailingZeros;
  }

  /**
   * This returns the number of compressedWords that were actually used.
   * @param byteArray input
   * @param numBytesToEncode input
   * @param encodingTable input
   * @param compressedWords output
   * @return the number of compressedWords that were actually used.
   */
  //visible for test
  //It is the caller's responsibility to ensure that the compressedWords array is long enough.
  static int lowLevelCompressBytes(
      final byte[] byteArray,          // input
      final int numBytesToEncode,      // input, must be an int
      final short[] encodingTable,     // input
      final int[] compressedWords) {   // output

    int nextWordIndex = 0;
    long bitBuf = 0;  // bits are packed into this first, then are flushed to compressedWords
    int bufBits = 0;  // number of bits currently in bitbuf; must be between 0 and 31

    for (int byteIndex = 0; byteIndex < numBytesToEncode; byteIndex++) {
      final int theByte = byteArray[byteIndex] & 0XFF;
      final long codeInfo = (encodingTable[theByte] & 0XFFFFL);
      final long codeVal = codeInfo & 0XFFFL;
      final int codeWordLength = (int) (codeInfo >>> 12);
      bitBuf |= (codeVal << bufBits);
      bufBits += codeWordLength;
      //MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
      if (bufBits >= 32) {
        compressedWords[nextWordIndex++] = (int) bitBuf;
        bitBuf >>>= 32;
        bufBits -= 32;
      }
    }

    //Pad the bitstream with 11 zero-bits so that the decompressor's 12-bit peek
    // can't overrun its input.
    bufBits += 11;
    //MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
    if (bufBits >= 32) {
      compressedWords[nextWordIndex++] = (int) bitBuf;
      bitBuf >>>= 32;
      bufBits -= 32;
    }

    if (bufBits > 0) { // We are done encoding now, so we flush the bit buffer.
      assert (bufBits < 32);
      compressedWords[nextWordIndex++] = (int) bitBuf;
    }
    return nextWordIndex;
  }

  //visible for test
  static void lowLevelUncompressBytes(
      final byte[] byteArray,          // output
      final int numBytesToDecode,      // input (but refers to the output)
      final short[] decodingTable,     // input
      final int[] compressedWords,     // input
      final long numCompressedWords) { // input

    int byteIndex = 0;
    int nextWordIndex = 0;
    long bitBuf = 0;
    int bufBits = 0;

    assert (byteArray != null);
    assert (decodingTable != null);
    assert (compressedWords != null);

    for (byteIndex = 0; byteIndex < numBytesToDecode; byteIndex++) {
      //MAYBE_FILL_BITBUF(compressedWords,wordIndex,12); // ensure 12 bits in bit buffer
      if (bufBits < 12) { // Prepare for a 12-bit peek into the bitstream.
        bitBuf |= ((compressedWords[nextWordIndex++] & 0XFFFF_FFFFL) << bufBits);
        bufBits += 32;
      }

      // These 12 bits will include an entire Huffman codeword.
      final int peek12 = (int) (bitBuf & 0XFFFL);
      final int lookup = decodingTable[peek12] & 0XFFFF;
      final int codeWordLength = lookup >>> 8;
      final byte decodedByte = (byte) (lookup & 0XFF);
      byteArray[byteIndex] = decodedByte;
      bitBuf >>>= codeWordLength;
      bufBits -= codeWordLength;
    }

    // Buffer over-run should be impossible unless there is a bug.
    // However, we might as well check here.
    assert (nextWordIndex <= numCompressedWords);
  }

  /**
   * Here "pairs" refers to row/column pairs that specify the positions of surprising values in
   * the bit matrix.
   * @param pairArray input
   * @param numPairsToEncode input
   * @param numBaseBits input
   * @param compressedWords output
   * @return the number of compressedWords actually used
   */
  //visible for test
  static long lowLevelCompressPairs(
      final int[] pairArray,         // input
      final int numPairsToEncode,    // input
      final int numBaseBits,         // input //cannot exceed 63 or 6 bits, could be byte
      final int[] compressedWords) { // output

    int pairIndex = 0;

    final long[] ptrArr = new long[3];
    int nextWordIndex = 0; //must be int
    long bitBuf = 0;       //must be long
    int bufBits = 0;       //could be byte

    final long golombLoMask = (1L << numBaseBits) - 1L;

    int predictedRowIndex = 0;
    int predictedColIndex = 0;

    for (pairIndex = 0; pairIndex < numPairsToEncode; pairIndex++) {
      final int rowCol = pairArray[pairIndex];
      final int rowIndex = rowCol >>> 6;
      final int colIndex = rowCol & 0X3F; //63

      if (rowIndex != predictedRowIndex) { predictedColIndex = 0; }

      assert (rowIndex >= predictedRowIndex);
      assert (colIndex >= predictedColIndex);

      final long yDelta = rowIndex - predictedRowIndex; //cannot exceed 2^26
      final int xDelta = colIndex - predictedColIndex; //cannot exceed 65

      predictedRowIndex = rowIndex;
      predictedColIndex = colIndex + 1;

      final long codeInfo = lengthLimitedUnaryEncodingTable65[xDelta] & 0XFFFFL;
      final long codeVal = codeInfo & 0XFFFL;
      final int codeLen = (int) (codeInfo >>> 12);
      bitBuf |= (codeVal << bufBits);
      bufBits += codeLen;
      //MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
      if (bufBits >= 32) {
        compressedWords[nextWordIndex++] = (int) bitBuf;
        bitBuf >>>= 32;
        bufBits -= 32;
      }

      final long golombLo = yDelta & golombLoMask; //long for bitBuf
      final long golombHi = yDelta >>> numBaseBits; //cannot exceed 2^26

      //TODO Inline WriteUnary
      ptrArr[NEXT_WORD_IDX] = nextWordIndex;
      ptrArr[BIT_BUF] = bitBuf;
      ptrArr[BUF_BITS] = bufBits;
      assert nextWordIndex == ptrArr[NEXT_WORD_IDX]; //catch sign extension error

      writeUnary(compressedWords, ptrArr, (int) golombHi);

      nextWordIndex = (int) ptrArr[NEXT_WORD_IDX];
      bitBuf = ptrArr[BIT_BUF];
      bufBits = (int) ptrArr[BUF_BITS];
      assert nextWordIndex == ptrArr[NEXT_WORD_IDX]; //catch truncation error
      //END Inline WriteUnary

      bitBuf |= golombLo << bufBits;
      bufBits += numBaseBits;
      //MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
      if (bufBits >= 32) {
        compressedWords[nextWordIndex++] = (int) bitBuf;
        bitBuf >>>= 32;
        bufBits -= 32;
      }
    }

    // Pad the bitstream so that the decompressor's 12-bit peek can't overrun its input.
    int padding = 10 - numBaseBits;
    if (padding < 0) { padding = 0; }
    bufBits += padding;
    //MAYBE_FLUSH_BITBUF(compressedWords, nextWordIndex);
    if (bufBits >= 32) {
      compressedWords[nextWordIndex++] = (int) bitBuf;
      bitBuf >>>= 32;
      bufBits -= 32;
    }

    if (bufBits > 0) { // We are done encoding now, so we flush the bit buffer.
      assert (bufBits < 32);
      compressedWords[nextWordIndex++] = (int) bitBuf;
      //bitBuf = 0;
      //bufBits = 0; // not really necessary
    }
    return nextWordIndex;
  }

  //visible for test
  static void lowLevelUncompressPairs(
      final int[] pairArray,           // output
      final int numPairsToDecode,      // input, size of output, must be int
      final int numBaseBits,           // input, cannot exceed 6 bits
      final int[] compressedWords,     // input
      final long numCompressedWords) { // input

    int pairIndex = 0;

    final long[] ptrArr = new long[3];
    int nextWordIndex = 0;
    long bitBuf = 0;
    int bufBits = 0;

    final long golombLoMask = (1L << numBaseBits) - 1L;

    int predictedRowIndex = 0;
    int predictedColIndex = 0;

    // for each pair we need to read:
    // xDelta (12-bit length-limited unary)
    // yDeltaHi (unary)
    // yDeltaLo (basebits)

    for (pairIndex = 0; pairIndex < numPairsToDecode; pairIndex++) {

      //MAYBE_FILL_BITBUF(compressedWords,wordIndex,12); // ensure 12 bits in bit buffer
      if (bufBits < 12) { // Prepare for a 12-bit peek into the bitstream.
        bitBuf |= ((compressedWords[nextWordIndex++] & 0XFFFF_FFFFL) << bufBits);
        bufBits += 32;
      }

      final int peek12 = (int) (bitBuf & 0XFFFL);
      final int lookup = lengthLimitedUnaryDecodingTable65[peek12] & 0XFFFF;
      final int codeWordLength = lookup >>> 8;
      final int xDelta = lookup & 0XFF;
      bitBuf >>>= codeWordLength;
      bufBits -= codeWordLength;

      //TODO Inline ReadUnary
      ptrArr[NEXT_WORD_IDX] = nextWordIndex;
      ptrArr[BIT_BUF] = bitBuf;
      ptrArr[BUF_BITS] = bufBits;
      assert nextWordIndex == ptrArr[NEXT_WORD_IDX]; //catch sign extension error

      final long golombHi = readUnary(compressedWords, ptrArr);

      nextWordIndex = (int) ptrArr[NEXT_WORD_IDX];
      bitBuf = ptrArr[BIT_BUF];
      bufBits = (int) ptrArr[BUF_BITS];
      assert nextWordIndex == ptrArr[NEXT_WORD_IDX]; //catch truncation error
      //END Inline ReadUnary

      //MAYBE_FILL_BITBUF(compressedWords,wordIndex,numBaseBits); // ensure numBaseBits in bit buffer
      if (bufBits < numBaseBits) { // Prepare for a numBaseBits peek into the bitstream.
        bitBuf |= ((compressedWords[nextWordIndex++] & 0XFFFF_FFFFL) << bufBits);
        bufBits += 32;
      }

      final long golombLo = bitBuf & golombLoMask;
      bitBuf >>>= numBaseBits;
      bufBits -= numBaseBits;
      final long yDelta = (golombHi << numBaseBits) | golombLo;

      // Now that we have yDelta and xDelta, we can compute the pair's row and column.
      if (yDelta > 0) { predictedColIndex = 0; }
      final int rowIndex = predictedRowIndex + (int) yDelta;
      final int colIndex = predictedColIndex + xDelta;
      final int rowCol = (rowIndex << 6) | colIndex;
      pairArray[pairIndex] = rowCol;
      predictedRowIndex = rowIndex;
      predictedColIndex = colIndex + 1;
    }
    // check for buffer over-run
    assert (nextWordIndex <= numCompressedWords)
      : "nextWdIdx: " + nextWordIndex + ", #CompWds: " + numCompressedWords;
  }

  private static int safeLengthForCompressedPairBuf(
      final long k, final long numPairs, final long numBaseBits) {
    assert (numPairs > 0);
    // long ybits = k + numPairs; // simpler and safer UB
    // The following tighter UB on ybits is based on page 198
    // of the textbook "Managing Gigabytes" by Witten, Moffat, and Bell.
    // Notice that if numBaseBits == 0 it coincides with (k + numPairs).
    final long ybits = (numPairs * (1L + numBaseBits)) + (k >>> numBaseBits);
    final long xbits = 12 * numPairs;
    long padding = 10L - numBaseBits;
    if (padding < 0) { padding = 0; }
    final long bits = xbits + ybits + padding;
    //final long words = divideLongsRoundingUp(bits, 32);
    final long words = CpcCompression.divideBy32RoundingUp(bits);
    assert words < (1L << 31);
    return (int) words;
  }

  // Explanation of padding: we write
  // 1) xdelta (huffman, provides at least 1 bit, requires 12-bit lookahead)
  // 2) ydeltaGolombHi (unary, provides at least 1 bit, requires 8-bit lookahead)
  // 3) ydeltaGolombLo (straight B bits).
  // So the 12-bit lookahead is the tight constraint, but there are at least (2 + B) bits emitted,
  // so we would be safe with max (0, 10 - B) bits of padding at the end of the bitstream.
  private static int safeLengthForCompressedWindowBuf(final long k) { // measured in 32-bit words
    // 11 bits of padding, due to 12-bit lookahead, with 1 bit certainly present.
    final long bits = (12 * k) + 11;
    //cannot exceed Integer.MAX_VALUE
    //return (int) (divideLongsRoundingUp(bits, 32));
    return (int) CpcCompression.divideBy32RoundingUp(bits);
  }

  private static int determinePseudoPhase(final int lgK, final long numCoupons) {
    final long k = 1L << lgK;
    final long c = numCoupons;
    // This midrange logic produces pseudo-phases. They are used to select encoding tables.
    // The thresholds were chosen by hand after looking at plots of measured compression.
    if ((1000 * c) < (2375 * k)) {
      if      (   (4 * c) <    (3 * k)) { return ( 16 + 0 ); } // midrange table
      else if (  (10 * c) <   (11 * k)) { return ( 16 + 1 ); } // midrange table
      else if ( (100 * c) <  (132 * k)) { return ( 16 + 2 ); } // midrange table
      else if (   (3 * c) <    (5 * k)) { return ( 16 + 3 ); } // midrange table
      else if ((1000 * c) < (1965 * k)) { return ( 16 + 4 ); } // midrange table
      else if ((1000 * c) < (2275 * k)) { return ( 16 + 5 ); } // midrange table
      else { return 6; } // steady-state table employed before its actual phase
    }
    else {
      // This steady-state logic produces true phases. They are used to select
      // encoding tables, and also column permutations for the "Sliding" flavor.
      assert lgK >= 4;
      final long tmp = c >>> (lgK - 4);
      final int phase = (int) (tmp & 15L);
      assert (phase >= 0) && (phase < 16);
      return phase;
    }
  }

  private static void compressTheWindow(final CompressedState target, final CpcSketch source) {
    final int srcLgK = source.lgK;
    final int srcK = 1 << srcLgK;
    final int windowBufLen = safeLengthForCompressedWindowBuf(srcK);
    final int[] windowBuf = new int[windowBufLen];
    final int pseudoPhase = determinePseudoPhase(srcLgK, source.numCoupons);
    target.cwLengthInts = lowLevelCompressBytes(
        source.slidingWindow,
        srcK,
        encodingTablesForHighEntropyByte[pseudoPhase],
        windowBuf);

    // At this point we free the unused portion of the compression output buffer.
    //  final int[] shorterBuf = Arrays.copyOf(windowBuf, target.cwLength);
    //  target.compressedWindow = shorterBuf;
    target.cwStream = windowBuf; //avoid extra copy
  }

  private static void uncompressTheWindow(final CpcSketch target, final CompressedState source) {
    final int srcLgK = source.lgK;
    final int srcK = 1 << srcLgK;
    final byte[] window = new byte[srcK];
    // bzero ((void *) window, (size_t) k); // zeroing not needed here (unlike the Hybrid Flavor)
    assert (target.slidingWindow == null);
    target.slidingWindow = window;
    final int pseudoPhase = determinePseudoPhase(srcLgK, source.numCoupons);
    assert (source.cwStream != null);
    lowLevelUncompressBytes(target.slidingWindow, srcK,
           decodingTablesForHighEntropyByte[pseudoPhase],
           source.cwStream,
           source.cwLengthInts);
  }

  private static void compressTheSurprisingValues(final CompressedState target, final CpcSketch source,
      final int[] pairs, final int numPairs) {
    assert (numPairs > 0);
    target.numCsv = numPairs;
    final int srcK = 1 << source.lgK;
    final int numBaseBits = CpcCompression.golombChooseNumberOfBaseBits(srcK + numPairs, numPairs);
    final int pairBufLen = safeLengthForCompressedPairBuf(srcK, numPairs, numBaseBits);
    final int[] pairBuf = new int[pairBufLen];

    target.csvLengthInts = (int) lowLevelCompressPairs(pairs, numPairs, numBaseBits, pairBuf);

    // At this point we free the unused portion of the compression output buffer.
    //  final int[] shorterBuf = Arrays.copyOf(pairBuf, target.csvLength);
    //  target.compressedWindow = shorterBuf;

    target.csvStream = pairBuf; //avoid extra copy
  }

  //allocates and returns an array of uncompressed pairs.
  //the length of this array is known to the source sketch.
  private static int[] uncompressTheSurprisingValues(final CompressedState source) {
    final int srcK = 1 << source.lgK;
    final int numPairs = source.numCsv;
    assert numPairs > 0;
    final int[] pairs = new int[numPairs];
    final int numBaseBits = CpcCompression.golombChooseNumberOfBaseBits(srcK + numPairs, numPairs);
    lowLevelUncompressPairs(pairs, numPairs, numBaseBits, source.csvStream, source.csvLengthInts);
    return pairs;
  }

  private static void compressSparseFlavor(final CompressedState target, final CpcSketch source) {
    assert (source.slidingWindow == null); //there is no window to compress
    final PairTable srcPairTable = source.pairTable;
    final int srcNumPairs = srcPairTable.getNumPairs();
    final int[] srcPairArr = PairTable.unwrappingGetItems(srcPairTable, srcNumPairs);
    introspectiveInsertionSort(srcPairArr, 0, srcNumPairs - 1);
    compressTheSurprisingValues(target, source, srcPairArr, srcNumPairs);
  }

  private static void uncompressSparseFlavor(final CpcSketch target, final CompressedState source) {
    assert (source.cwStream == null);
    assert (source.csvStream != null);
    final int[] srcPairArr = uncompressTheSurprisingValues(source);
    final int numPairs = source.numCsv;
    final PairTable table = PairTable.newInstanceFromPairsArray(srcPairArr, numPairs, source.lgK);
    target.pairTable = table;
  }

  //The empty space that this leaves at the beginning of the output array
  // will be filled in later by the caller.
  private static int[] trickyGetPairsFromWindow(final byte[] window, final int k, final int numPairsToGet,
      final int emptySpace) {
    final int outputLength = emptySpace + numPairsToGet;
    final int[] pairs = new int[outputLength];
    int rowIndex = 0;
    int pairIndex = emptySpace;
    for (rowIndex = 0; rowIndex < k; rowIndex++) {
      int wByte = window[rowIndex] & 0XFF;
      while (wByte != 0) {
        final int colIndex = Integer.numberOfTrailingZeros(wByte);
        //      assert (colIndex < 8);
        wByte ^= (1 << colIndex); // erase the 1
        pairs[pairIndex++] = (rowIndex << 6) | colIndex;
      }
    }
    assert (pairIndex == outputLength);
    return (pairs);
  }

  //This is complicated because it effectively builds a Sparse version
  //of a Pinned sketch before compressing it. Hence the name Hybrid.
  private static void compressHybridFlavor(final CompressedState target, final CpcSketch source) {
    final int srcK = 1 << source.lgK;
    final PairTable srcPairTable = source.pairTable;
    final int srcNumPairs = srcPairTable.getNumPairs();
    final int[] srcPairArr = PairTable.unwrappingGetItems(srcPairTable, srcNumPairs);
    introspectiveInsertionSort(srcPairArr, 0, srcNumPairs - 1);
    final byte[] srcSlidingWindow = source.slidingWindow;
    final int srcWindowOffset = source.windowOffset;
    final long srcNumCoupons = source.numCoupons;
    assert (srcSlidingWindow != null);
    assert (srcWindowOffset == 0);
    final long numPairs = srcNumCoupons - srcNumPairs; // because the window offset is zero
    assert numPairs < Integer.MAX_VALUE;
    final int numPairsFromArray = (int) numPairs;

    assert (numPairsFromArray + srcNumPairs) == srcNumCoupons; //for test
    final int[] allPairs
      = trickyGetPairsFromWindow(srcSlidingWindow, srcK, numPairsFromArray, srcNumPairs);

    PairTable.merge(srcPairArr, 0, srcNumPairs,
        allPairs, srcNumPairs, numPairsFromArray,
        allPairs, 0);  // note the overlapping subarray trick

    //FOR TESTING If needed
    //        for (int i = 0; i < (source.numCoupons - 1); i++) {
    //          assert (Integer.compareUnsigned(allPairs[i], allPairs[i + 1]) < 0); }

    compressTheSurprisingValues(target, source, allPairs, (int) srcNumCoupons);
  }

  private static void uncompressHybridFlavor(final CpcSketch target, final CompressedState source) {
    assert (source.cwStream == null);
    assert (source.csvStream != null);
    final int[] pairs = uncompressTheSurprisingValues(source); //fail path 3
    final int numPairs = source.numCsv;
    // In the hybrid flavor, some of these pairs actually
    // belong in the window, so we will separate them out,
    // moving the "true" pairs to the bottom of the array.
    final int srcLgK = source.lgK;
    final int k = 1 << srcLgK;

    final byte[] window = new byte[k];

    int nextTruePair = 0;

    for (int i = 0; i < numPairs; i++) {
      final int rowCol = pairs[i];
      assert (rowCol != -1);
      final int col = rowCol & 63;
      if (col < 8) {
        final int row = rowCol >>> 6;
        window[row] |= (1 << col); // set the window bit
      }
      else {
        pairs[nextTruePair++] = rowCol; // move true pair down
      }
    }

    assert (source.getWindowOffset() == 0);
    target.windowOffset = 0;

    final PairTable table = PairTable.newInstanceFromPairsArray(pairs, nextTruePair, srcLgK);
    target.pairTable = table;
    target.slidingWindow = window;
  }

  private static void compressPinnedFlavor(final CompressedState target, final CpcSketch source) {
    compressTheWindow(target, source);
    final PairTable srcPairTable = source.pairTable;
    final int numPairs = srcPairTable.getNumPairs();

    if (numPairs > 0) {
      final int[] pairs = PairTable.unwrappingGetItems(srcPairTable, numPairs);

      // Here we subtract 8 from the column indices.  Because they are stored in the low 6 bits
      // of each rowCol pair, and because no column index is less than 8 for a "Pinned" sketch,
      // I believe we can simply subtract 8 from the pairs themselves.

      // shift the columns over by 8 positions before compressing (because of the window)
      for (int i = 0; i < numPairs; i++) {
        assert (pairs[i] & 63) >= 8;
        pairs[i] -= 8;
      }

      introspectiveInsertionSort(pairs, 0, numPairs - 1);
      compressTheSurprisingValues(target, source, pairs, numPairs);
    }
  }

  private static void uncompressPinnedFlavor(final CpcSketch target, final CompressedState source) {
    assert (source.cwStream != null);
    uncompressTheWindow(target, source);
    final int srcLgK = source.lgK;
    final int numPairs = source.numCsv;
    if (numPairs == 0) {
      target.pairTable = new PairTable(2, 6 + srcLgK);
    }
    else {
      assert numPairs > 0;
      assert source.csvStream != null;
      final int[] pairs = uncompressTheSurprisingValues(source);
      // undo the compressor's 8-column shift
      for (int i = 0; i < numPairs; i++) {
        assert (pairs[i] & 63) < 56;
        pairs[i] += 8;
      }
      final PairTable table = PairTable.newInstanceFromPairsArray(pairs, numPairs, srcLgK);
      target.pairTable = table;
    }
  }

  //Complicated by the existence of both a left fringe and a right fringe.
  private static void compressSlidingFlavor(final CompressedState target, final CpcSketch source) {

    compressTheWindow(target, source);
    final PairTable srcPairTable = source.pairTable;

    final int numPairs = srcPairTable.getNumPairs();

    if (numPairs > 0) {
      final int[] pairs = PairTable.unwrappingGetItems(srcPairTable, numPairs);

      // Here we apply a complicated transformation to the column indices, which
      // changes the implied ordering of the pairs, so we must do it before sorting.

      final int pseudoPhase = determinePseudoPhase(source.lgK, source.numCoupons); // NB
      assert (pseudoPhase < 16);
      final byte[] permutation = columnPermutationsForEncoding[pseudoPhase];

      final int offset = source.windowOffset;
      assert ((offset > 0) && (offset <= 56));

      for (int i = 0; i < numPairs; i++) {
        final int rowCol = pairs[i];
        final int  row = rowCol >>> 6;
        int col = (rowCol & 63);
        // first rotate the columns into a canonical configuration:
        //   new = ((old - (offset+8)) + 64) mod 64
        col = ((col + 56) - offset) & 63;
        assert (col >= 0) && (col < 56);
        // then apply the permutation
        col = permutation[col];
        pairs[i] = (row << 6) | col;
      }

      introspectiveInsertionSort(pairs, 0, numPairs - 1);
      compressTheSurprisingValues(target, source, pairs, numPairs);
    }
  }

  private static void uncompressSlidingFlavor(final CpcSketch target, final CompressedState source) {
    assert (source.cwStream != null);
    uncompressTheWindow(target, source);
    final int srcLgK = source.lgK;
    final int numPairs = source.numCsv;
    if (numPairs == 0) {
      target.pairTable = new PairTable(2, 6 + srcLgK);
    }
    else {
      assert (numPairs > 0);
      assert (source.csvStream != null);
      final int[] pairs = uncompressTheSurprisingValues(source);
      final int pseudoPhase = determinePseudoPhase(srcLgK, source.numCoupons); // NB
      assert (pseudoPhase < 16);
      final byte[] permutation = columnPermutationsForDecoding[pseudoPhase];

      final int offset = source.getWindowOffset();
      assert (offset > 0) && (offset <= 56);

      for (int i = 0; i < numPairs; i++) {
        final int rowCol = pairs[i];
        final int row = rowCol >>> 6;
        int col = rowCol & 63;
        // first undo the permutation
        col = permutation[col];
        // then undo the rotation: old = (new + (offset+8)) mod 64
        col = (col + (offset + 8)) & 63;
        pairs[i] = (row << 6) | col;
      }

      final PairTable table = PairTable.newInstanceFromPairsArray(pairs, numPairs, srcLgK);
      target.pairTable = table;
    }
  }

  static CompressedState compress(final CpcSketch source, final CompressedState target) {

    final Flavor srcFlavor = source.getFlavor();

    switch (srcFlavor) {
      case EMPTY: break;
      case SPARSE:
        compressSparseFlavor(target, source);
        assert (target.cwStream == null);
        assert (target.csvStream != null);
        break;
      case HYBRID:
        compressHybridFlavor(target, source);
        assert (target.cwStream == null);
        assert (target.csvStream != null);
        break;
      case PINNED:
        compressPinnedFlavor(target, source);
        assert (target.cwStream != null);
        break;
      case SLIDING:
        compressSlidingFlavor(target, source);
        assert (target.cwStream != null);
        break;
        //default: not possible
    }

    return target;
   }

  static CpcSketch uncompress(final CompressedState source, final CpcSketch target) {
    assert (target != null);

    final Flavor srcFlavor = source.getFlavor();
    switch (srcFlavor) {
      case EMPTY: break;
      case SPARSE:
        assert (source.cwStream == null);
        uncompressSparseFlavor(target, source);
        break;
      case HYBRID:
        uncompressHybridFlavor(target, source);
        break;
      case PINNED:
        assert (source.cwStream != null);
        uncompressPinnedFlavor(target, source);
        break;
      case SLIDING:
        uncompressSlidingFlavor(target, source);
        break;
        //default: not possible
    }
    return target;
  }

  private static int golombChooseNumberOfBaseBits(final int k, final long count) {
    assert k >= 1L;
    assert count >= 1L;
    final long quotient = (k - count) / count; // integer division
    if (quotient == 0) { return 0; }
    return (int) floorLog2ofLong(quotient);
  }

  private static long floorLog2ofLong(final long x) { //not a good name
    assert (x >= 1L);
    long p = 0;
    long y = 1;
    while (true) {
      if (y == x) { return p; }
      if (y  > x) { return p - 1; }
      p  += 1;
      y <<= 1;
    }
  }

  private static long divideBy32RoundingUp(final long x) {
    final long tmp = x >>> 5;
    return ((tmp << 5) == x) ? tmp : tmp + 1;
  }

}