All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tdunning.math.stats.Simple64 Maven / Gradle / Ivy

Go to download

Data structure which allows accurate estimation of quantiles and related rank statistics

The newest version!
/*
 * Licensed to Ted Dunning under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.tdunning.math.stats;


import java.nio.LongBuffer;

/**
 * Very simple variable byte encoding that always uses 64bit units.  The idea is that the next few values
 * are smashed into 64 bits using a few bits to indicate how they are fitted in and the rest of the bits
 * to fit each value into equal-sized chunks.
 *
 * In this encoding, 4 bits are used to indicate how the remaining 60 bits are divided. The possible ways are shown
 * in the following table:
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
 * 
CodeArrangement
14 1 X 60BITS
13 2 X 30BITS
12 3 X 20BITS
11 4 X 15BITS
10 5 X 12BITS
9 6 X 10BITS
8 7 X 8BITS
7 8 X 7BITS
6 10 X 6BITS
5 12 X 5BITS
4 15 X 4BITS
3 20 X 3BITS
2 30 X 2BITS
1 60 X 1BITS
Size codes for Simple64 compression
*/ public class Simple64 { private static final int NUM_DATA_BITS = 60; private static final int BITS_30_MASK = (1 << 30) - 1; private static final int BITS_20_MASK = (1 << 20) - 1; private static final int BITS_15_MASK = (1 << 15) - 1; private static final int BITS_12_MASK = (1 << 12) - 1; private static final int BITS_11_MASK = (1 << 11) - 1; private static final int BITS_10_MASK = (1 << 10) - 1; private static final int BITS_8_MASK = (1 << 8) - 1; // 4 bits unused, then the last value take them private static final int BITS_7_MASK = (1 << 7) - 1; // 4 bits unused, then the last value take them private static final int BITS_6_MASK = (1 << 6) - 1; private static final int BITS_5_MASK = (1 << 5) - 1; private static final int BITS_4_MASK = (1 << 4) - 1; private static final int BITS_3_MASK = (1 << 3) - 1; private static final int BITS_2_MASK = (1 << 2) - 1; private static final int BITS_1_MASK = (1 << 1) - 1; private static final int STATUS_1NUM_60BITS = 14; private static final int STATUS_2NUM_30BITS = 13; private static final int STATUS_3NUM_20BITS = 12; private static final int STATUS_4NUM_15BITS = 11; private static final int STATUS_5NUM_12BITS = 10; private static final int STATUS_6NUM_10BITS = 9; private static final int STATUS_7NUM_8BITS = 8; private static final int STATUS_8NUM_7BITS = 7; private static final int STATUS_10NUM_6BITS = 6; private static final int STATUS_12NUM_5BITS = 5; private static final int STATUS_15NUM_4BITS = 4; private static final int STATUS_20NUM_3BITS = 3; private static final int STATUS_30NUM_2BITS = 2; private static final int STATUS_60NUM_1BITS = 1; private int inputCompressable = 1; private int minBits = 1; private long maxFitPlus1 = (1 << minBits); private final long[] pending = new long[100]; // nocommit -- 60 or 61 should do? private int inputCount; private void reset() { inputCompressable = 1; minBits = 1; inputCount = 0; maxFitPlus1 = (1 << minBits); } // nocommit -- need low level test that streaming api // didn't break anything // Returns 0 if no new long written, else returns number // of input values and out[0] has the long to write public int add(long v, long[] out) { //System.out.println("S64.add v=" + v + " " + (1 + inputCount - inputCompressable) + " waiting"); pending[inputCount++] = v; while (inputCompressable <= inputCount) { final long nextData = pending[(inputCompressable - 1)]; //System.out.println(" cycle: data=" + nextData); while ((nextData >= maxFitPlus1) && (minBits < NUM_DATA_BITS)) { //System.out.println(" cycle maxFitPlus1=" + maxFitPlus1 + " minBits=" + minBits); if ((minBits == 7) && (inputCompressable == 8) && (nextData < (maxFitPlus1 << 4))) { break; } else if ((minBits == 8) && (inputCompressable == 7) && (nextData < (maxFitPlus1 << 4))) { break; } else { //System.out.println(" advance"); minBits++; maxFitPlus1 <<= 1; if ((inputCompressable * minBits) > NUM_DATA_BITS) { inputCompressable--; //System.out.println(" hard break"); break; } } } inputCompressable++; //System.out.println(" minBits=" + minBits + " count=" + (inputCompressable-1) + " inputCount=" + inputCount); if ((inputCompressable * minBits) > NUM_DATA_BITS) { // Time to compress! inputCompressable--; //System.out.println(" FLUSH count=" + inputCompressable); // nocommit -- it should always be > 0... right?? assert inputCompressable > 0; // Check whether a bigger number of bits can be used: while ((inputCompressable * (minBits + 1)) <= NUM_DATA_BITS) { minBits++; //System.out.println(" incr minBits=" + minBits); } /* if (((inputCompressable+1) * minBits) <= NUM_DATA_BITS) { // not enough input available for minBits minBits++; // do not compress all available input inputCompressable = NUM_DATA_BITS / minBits; } */ // Put compression method in status bits and encode input data long s9; final int consumed; switch (minBits) { // add status bits and later input values case 60: s9 = STATUS_1NUM_60BITS; s9 |= pending[0] << 4; consumed = 1; break; case 30: s9 = STATUS_2NUM_30BITS; // nocommit -- make a single expr instead of |'ing ? s9 |= pending[0] << 4; s9 |= pending[1] << 34; consumed = 2; break; case 20: s9 = STATUS_3NUM_20BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 24; s9 |= pending[2] << 44; consumed = 3; break; case 15: s9 = STATUS_4NUM_15BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 19; s9 |= pending[2] << 34; s9 |= pending[3] << 49; consumed = 4; break; case 12: s9 = STATUS_5NUM_12BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 16; s9 |= pending[2] << 28; s9 |= pending[3] << 40; s9 |= pending[4] << 52; consumed = 5; break; case 10: s9 = STATUS_6NUM_10BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 14; s9 |= pending[2] << 24; s9 |= pending[3] << 34; s9 |= pending[4] << 44; s9 |= pending[5] << 54; consumed = 6; break; case 8: s9 = STATUS_7NUM_8BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 12; s9 |= pending[2] << 20; s9 |= pending[3] << 28; s9 |= pending[4] << 36; s9 |= pending[5] << 44; s9 |= pending[6] << 52; // 4 more bits consumed = 7; break; case 7: s9 = STATUS_8NUM_7BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 11; s9 |= pending[2] << 18; s9 |= pending[3] << 25; s9 |= pending[4] << 32; s9 |= pending[5] << 39; s9 |= pending[6] << 46; s9 |= pending[7] << 53; // 4 more bits consumed = 8; break; case 6: s9 = STATUS_10NUM_6BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 10; s9 |= pending[2] << 16; s9 |= pending[3] << 22; s9 |= pending[4] << 28; s9 |= pending[5] << 34; s9 |= pending[6] << 40; s9 |= pending[7] << 46; s9 |= pending[8] << 52; s9 |= pending[9] << 58; consumed = 10; break; case 5: s9 = STATUS_12NUM_5BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 9; s9 |= pending[2] << 14; s9 |= pending[3] << 19; s9 |= pending[4] << 24; s9 |= pending[5] << 29; s9 |= pending[6] << 34; s9 |= pending[7] << 39; s9 |= pending[8] << 44; s9 |= pending[9] << 49; s9 |= pending[10] << 54; s9 |= pending[11] << 59; consumed = 12; break; case 4: s9 = STATUS_15NUM_4BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 8; s9 |= pending[2] << 12; s9 |= pending[3] << 16; s9 |= pending[4] << 20; s9 |= pending[5] << 24; s9 |= pending[6] << 28; s9 |= pending[7] << 32; s9 |= pending[8] << 36; s9 |= pending[9] << 40; s9 |= pending[10] << 44; s9 |= pending[11] << 48; s9 |= pending[12] << 52; s9 |= pending[13] << 56; s9 |= pending[14] << 60; consumed = 15; break; case 3: s9 = STATUS_20NUM_3BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 7; s9 |= pending[2] << 10; s9 |= pending[3] << 13; s9 |= pending[4] << 16; s9 |= pending[5] << 19; s9 |= pending[6] << 22; s9 |= pending[7] << 25; s9 |= pending[8] << 28; s9 |= pending[9] << 31; s9 |= pending[10] << 34; s9 |= pending[11] << 37; s9 |= pending[12] << 40; s9 |= pending[13] << 43; s9 |= pending[14] << 46; s9 |= pending[15] << 49; s9 |= pending[16] << 52; s9 |= pending[17] << 55; s9 |= pending[18] << 58; s9 |= pending[19] << 61; consumed = 20; break; case 2: s9 = STATUS_30NUM_2BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 6; s9 |= pending[2] << 8; s9 |= pending[3] << 10; s9 |= pending[4] << 12; s9 |= pending[5] << 14; s9 |= pending[6] << 16; s9 |= pending[7] << 18; s9 |= pending[8] << 20; s9 |= pending[9] << 22; s9 |= pending[10] << 24; s9 |= pending[11] << 26; s9 |= pending[12] << 28; s9 |= pending[13] << 30; s9 |= pending[14] << 32; s9 |= pending[15] << 34; s9 |= pending[16] << 36; s9 |= pending[17] << 38; s9 |= pending[18] << 40; s9 |= pending[19] << 42; s9 |= pending[20] << 44; s9 |= pending[21] << 46; s9 |= pending[22] << 48; s9 |= pending[23] << 50; s9 |= pending[24] << 52; s9 |= pending[25] << 54; s9 |= pending[26] << 56; s9 |= pending[27] << 58; s9 |= pending[28] << 60; s9 |= pending[29] << 62; consumed = 30; break; case 1: s9 = STATUS_60NUM_1BITS; s9 |= pending[0] << 4; s9 |= pending[1] << 5; s9 |= pending[2] << 6; s9 |= pending[3] << 7; s9 |= pending[4] << 8; s9 |= pending[5] << 9; s9 |= pending[6] << 10; s9 |= pending[7] << 11; s9 |= pending[8] << 12; s9 |= pending[9] << 13; s9 |= pending[10] << 14; s9 |= pending[11] << 15; s9 |= pending[12] << 16; s9 |= pending[13] << 17; s9 |= pending[14] << 18; s9 |= pending[15] << 19; s9 |= pending[16] << 20; s9 |= pending[17] << 21; s9 |= pending[18] << 22; s9 |= pending[19] << 23; s9 |= pending[20] << 24; s9 |= pending[21] << 25; s9 |= pending[22] << 26; s9 |= pending[23] << 27; s9 |= pending[24] << 28; s9 |= pending[25] << 29; s9 |= pending[26] << 30; s9 |= pending[27] << 31; s9 |= pending[28] << 32; s9 |= pending[29] << 33; s9 |= pending[30] << 34; s9 |= pending[31] << 35; s9 |= pending[32] << 36; s9 |= pending[33] << 37; s9 |= pending[34] << 38; s9 |= pending[35] << 39; s9 |= pending[36] << 40; s9 |= pending[37] << 41; s9 |= pending[38] << 42; s9 |= pending[39] << 43; s9 |= pending[40] << 44; s9 |= pending[41] << 45; s9 |= pending[42] << 46; s9 |= pending[43] << 47; s9 |= pending[44] << 48; s9 |= pending[45] << 49; s9 |= pending[46] << 50; s9 |= pending[47] << 51; s9 |= pending[48] << 52; s9 |= pending[49] << 53; s9 |= pending[50] << 54; s9 |= pending[51] << 55; s9 |= pending[52] << 56; s9 |= pending[53] << 57; s9 |= pending[54] << 58; s9 |= pending[55] << 59; s9 |= pending[56] << 60; s9 |= pending[57] << 61; s9 |= pending[58] << 62; s9 |= pending[59] << 63; consumed = 60; break; default: assert false; s9 = 0; consumed = 60; //throw new Error("S98b.compressSingle internal error: unknown minBits: " + minBits); } final int leftover = inputCount - consumed; assert leftover >= 0 : "consumed=" + consumed + " vs " + inputCompressable; /* for(int x=0;x= maxFitPlus1) && (minBits < NUM_DATA_BITS)) { if ((minBits == 7) && (inputCompressable == 8) && (nextData < (maxFitPlus1 << 4))) { break; } else if ((minBits == 8) && (inputCompressable == 7) && (nextData < (maxFitPlus1 << 4))) { break; } else { minBits++; maxFitPlus1 <<= 1; if ((inputCompressable * minBits) > NUM_DATA_BITS) { inputCompressable--; break; } } } inputCompressable++; } while (((inputCompressable * minBits) <= NUM_DATA_BITS) && (inputCompressable <= inSize)); inputCompressable--; if (inputCompressable == 0) { throw new IllegalArgumentException("Cannot compress input " + nextData + " with more than " + NUM_DATA_BITS + " bits (at offSet " + inOffset + ")"); } // Check whether a bigger number of bits can be used: while ((inputCompressable * (minBits + 1)) <= NUM_DATA_BITS) { minBits++; } if (((inputCompressable + 1) * minBits) <= NUM_DATA_BITS) { // not enough input available for minBits minBits++; } // Put compression method in status bits and encode input data long s9; switch (minBits) { // add status bits and later input values case 60: s9 = STATUS_1NUM_60BITS; s9 |= uncompressed[inOffset] << 4; compressedBuffer.put(s9); return 1; case 30: s9 = STATUS_2NUM_30BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 34; compressedBuffer.put(s9); return 2; case 20: s9 = STATUS_3NUM_20BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 24; s9 |= uncompressed[inOffset + 2] << 44; compressedBuffer.put(s9); return 3; case 15: s9 = STATUS_4NUM_15BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 19; s9 |= uncompressed[inOffset + 2] << 34; s9 |= uncompressed[inOffset + 3] << 49; compressedBuffer.put(s9); return 4; case 12: s9 = STATUS_5NUM_12BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 16; s9 |= uncompressed[inOffset + 2] << 28; s9 |= uncompressed[inOffset + 3] << 40; s9 |= uncompressed[inOffset + 4] << 52; compressedBuffer.put(s9); return 5; case 10: s9 = STATUS_6NUM_10BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 14; s9 |= uncompressed[inOffset + 2] << 24; s9 |= uncompressed[inOffset + 3] << 34; s9 |= uncompressed[inOffset + 4] << 44; s9 |= uncompressed[inOffset + 5] << 54; compressedBuffer.put(s9); return 6; case 8: s9 = STATUS_7NUM_8BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 12; s9 |= uncompressed[inOffset + 2] << 20; s9 |= uncompressed[inOffset + 3] << 28; s9 |= uncompressed[inOffset + 4] << 36; s9 |= uncompressed[inOffset + 5] << 44; s9 |= uncompressed[inOffset + 6] << 52; // 4 more bits compressedBuffer.put(s9); return 7; case 7: s9 = STATUS_8NUM_7BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 11; s9 |= uncompressed[inOffset + 2] << 18; s9 |= uncompressed[inOffset + 3] << 25; s9 |= uncompressed[inOffset + 4] << 32; s9 |= uncompressed[inOffset + 5] << 39; s9 |= uncompressed[inOffset + 6] << 46; s9 |= uncompressed[inOffset + 7] << 53; // 4 more bits compressedBuffer.put(s9); return 8; case 6: s9 = STATUS_10NUM_6BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 10; s9 |= uncompressed[inOffset + 2] << 16; s9 |= uncompressed[inOffset + 3] << 22; s9 |= uncompressed[inOffset + 4] << 28; s9 |= uncompressed[inOffset + 5] << 34; s9 |= uncompressed[inOffset + 6] << 40; s9 |= uncompressed[inOffset + 7] << 46; s9 |= uncompressed[inOffset + 8] << 52; s9 |= uncompressed[inOffset + 9] << 58; compressedBuffer.put(s9); return 10; case 5: s9 = STATUS_12NUM_5BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 9; s9 |= uncompressed[inOffset + 2] << 14; s9 |= uncompressed[inOffset + 3] << 19; s9 |= uncompressed[inOffset + 4] << 24; s9 |= uncompressed[inOffset + 5] << 29; s9 |= uncompressed[inOffset + 6] << 34; s9 |= uncompressed[inOffset + 7] << 39; s9 |= uncompressed[inOffset + 8] << 44; s9 |= uncompressed[inOffset + 9] << 49; s9 |= uncompressed[inOffset + 10] << 54; s9 |= uncompressed[inOffset + 11] << 59; compressedBuffer.put(s9); return 12; case 4: s9 = STATUS_15NUM_4BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 8; s9 |= uncompressed[inOffset + 2] << 12; s9 |= uncompressed[inOffset + 3] << 16; s9 |= uncompressed[inOffset + 4] << 20; s9 |= uncompressed[inOffset + 5] << 24; s9 |= uncompressed[inOffset + 6] << 28; s9 |= uncompressed[inOffset + 7] << 32; s9 |= uncompressed[inOffset + 8] << 36; s9 |= uncompressed[inOffset + 9] << 40; s9 |= uncompressed[inOffset + 10] << 44; s9 |= uncompressed[inOffset + 11] << 48; s9 |= uncompressed[inOffset + 12] << 52; s9 |= uncompressed[inOffset + 13] << 56; s9 |= uncompressed[inOffset + 14] << 60; compressedBuffer.put(s9); return 15; case 3: s9 = STATUS_20NUM_3BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 7; s9 |= uncompressed[inOffset + 2] << 10; s9 |= uncompressed[inOffset + 3] << 13; s9 |= uncompressed[inOffset + 4] << 16; s9 |= uncompressed[inOffset + 5] << 19; s9 |= uncompressed[inOffset + 6] << 22; s9 |= uncompressed[inOffset + 7] << 25; s9 |= uncompressed[inOffset + 8] << 28; s9 |= uncompressed[inOffset + 9] << 31; s9 |= uncompressed[inOffset + 10] << 34; s9 |= uncompressed[inOffset + 11] << 37; s9 |= uncompressed[inOffset + 12] << 40; s9 |= uncompressed[inOffset + 13] << 43; s9 |= uncompressed[inOffset + 14] << 46; s9 |= uncompressed[inOffset + 15] << 49; s9 |= uncompressed[inOffset + 16] << 52; s9 |= uncompressed[inOffset + 17] << 55; s9 |= uncompressed[inOffset + 18] << 58; s9 |= uncompressed[inOffset + 19] << 61; compressedBuffer.put(s9); return 20; case 2: s9 = STATUS_30NUM_2BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 6; s9 |= uncompressed[inOffset + 2] << 8; s9 |= uncompressed[inOffset + 3] << 10; s9 |= uncompressed[inOffset + 4] << 12; s9 |= uncompressed[inOffset + 5] << 14; s9 |= uncompressed[inOffset + 6] << 16; s9 |= uncompressed[inOffset + 7] << 18; s9 |= uncompressed[inOffset + 8] << 20; s9 |= uncompressed[inOffset + 9] << 22; s9 |= uncompressed[inOffset + 10] << 24; s9 |= uncompressed[inOffset + 11] << 26; s9 |= uncompressed[inOffset + 12] << 28; s9 |= uncompressed[inOffset + 13] << 30; s9 |= uncompressed[inOffset + 14] << 32; s9 |= uncompressed[inOffset + 15] << 34; s9 |= uncompressed[inOffset + 16] << 36; s9 |= uncompressed[inOffset + 17] << 38; s9 |= uncompressed[inOffset + 18] << 40; s9 |= uncompressed[inOffset + 19] << 42; s9 |= uncompressed[inOffset + 20] << 44; s9 |= uncompressed[inOffset + 21] << 46; s9 |= uncompressed[inOffset + 22] << 48; s9 |= uncompressed[inOffset + 23] << 50; s9 |= uncompressed[inOffset + 24] << 52; s9 |= uncompressed[inOffset + 25] << 54; s9 |= uncompressed[inOffset + 26] << 56; s9 |= uncompressed[inOffset + 27] << 58; s9 |= uncompressed[inOffset + 28] << 60; s9 |= uncompressed[inOffset + 29] << 62; compressedBuffer.put(s9); return 30; case 1: s9 = STATUS_60NUM_1BITS; s9 |= uncompressed[inOffset] << 4; s9 |= uncompressed[inOffset + 1] << 5; s9 |= uncompressed[inOffset + 2] << 6; s9 |= uncompressed[inOffset + 3] << 7; s9 |= uncompressed[inOffset + 4] << 8; s9 |= uncompressed[inOffset + 5] << 9; s9 |= uncompressed[inOffset + 6] << 10; s9 |= uncompressed[inOffset + 7] << 11; s9 |= uncompressed[inOffset + 8] << 12; s9 |= uncompressed[inOffset + 9] << 13; s9 |= uncompressed[inOffset + 10] << 14; s9 |= uncompressed[inOffset + 11] << 15; s9 |= uncompressed[inOffset + 12] << 16; s9 |= uncompressed[inOffset + 13] << 17; s9 |= uncompressed[inOffset + 14] << 18; s9 |= uncompressed[inOffset + 15] << 19; s9 |= uncompressed[inOffset + 16] << 20; s9 |= uncompressed[inOffset + 17] << 21; s9 |= uncompressed[inOffset + 18] << 22; s9 |= uncompressed[inOffset + 19] << 23; s9 |= uncompressed[inOffset + 20] << 24; s9 |= uncompressed[inOffset + 21] << 25; s9 |= uncompressed[inOffset + 22] << 26; s9 |= uncompressed[inOffset + 23] << 27; s9 |= uncompressed[inOffset + 24] << 28; s9 |= uncompressed[inOffset + 25] << 29; s9 |= uncompressed[inOffset + 26] << 30; s9 |= uncompressed[inOffset + 27] << 31; s9 |= uncompressed[inOffset + 28] << 32; s9 |= uncompressed[inOffset + 29] << 33; s9 |= uncompressed[inOffset + 30] << 34; s9 |= uncompressed[inOffset + 31] << 35; s9 |= uncompressed[inOffset + 32] << 36; s9 |= uncompressed[inOffset + 33] << 37; s9 |= uncompressed[inOffset + 34] << 38; s9 |= uncompressed[inOffset + 35] << 39; s9 |= uncompressed[inOffset + 36] << 40; s9 |= uncompressed[inOffset + 37] << 41; s9 |= uncompressed[inOffset + 38] << 42; s9 |= uncompressed[inOffset + 39] << 43; s9 |= uncompressed[inOffset + 40] << 44; s9 |= uncompressed[inOffset + 41] << 45; s9 |= uncompressed[inOffset + 42] << 46; s9 |= uncompressed[inOffset + 43] << 47; s9 |= uncompressed[inOffset + 44] << 48; s9 |= uncompressed[inOffset + 45] << 49; s9 |= uncompressed[inOffset + 46] << 50; s9 |= uncompressed[inOffset + 47] << 51; s9 |= uncompressed[inOffset + 48] << 52; s9 |= uncompressed[inOffset + 49] << 53; s9 |= uncompressed[inOffset + 50] << 54; s9 |= uncompressed[inOffset + 51] << 55; s9 |= uncompressed[inOffset + 52] << 56; s9 |= uncompressed[inOffset + 53] << 57; s9 |= uncompressed[inOffset + 54] << 58; s9 |= uncompressed[inOffset + 55] << 59; s9 |= uncompressed[inOffset + 56] << 60; s9 |= uncompressed[inOffset + 57] << 61; s9 |= uncompressed[inOffset + 58] << 62; s9 |= uncompressed[inOffset + 59] << 63; compressedBuffer.put(s9); return 60; default: throw new Error("S98b.compressSingle internal error: unknown minBits: " + minBits); } } private static int decompressSingle(final long s9, final long[] decompressed, final int outOffset) { switch ((int) (s9 & 15L)) { case STATUS_1NUM_60BITS: decompressed[outOffset] = s9 >>> 4; return 1; case STATUS_2NUM_30BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_30_MASK; decompressed[outOffset + 1] = (s9 >>> 34) & BITS_30_MASK; return 2; case STATUS_3NUM_20BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_20_MASK; decompressed[outOffset + 1] = ((s9 >>> 24)) & BITS_20_MASK; decompressed[outOffset + 2] = (s9 >>> 44) & BITS_20_MASK; return 3; case STATUS_4NUM_15BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_15_MASK; decompressed[outOffset + 1] = (s9 >>> 19) & BITS_15_MASK; decompressed[outOffset + 2] = (s9 >>> 34) & BITS_15_MASK; decompressed[outOffset + 3] = (s9 >>> 49) & BITS_15_MASK; return 4; case STATUS_5NUM_12BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_12_MASK; decompressed[outOffset + 1] = (s9 >>> 16) & BITS_12_MASK; decompressed[outOffset + 2] = (s9 >>> 28) & BITS_12_MASK; decompressed[outOffset + 3] = (s9 >>> 40) & BITS_12_MASK; decompressed[outOffset + 4] = (s9 >>> 52) & BITS_12_MASK; return 5; case STATUS_6NUM_10BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_10_MASK; decompressed[outOffset + 1] = (s9 >>> 14) & BITS_10_MASK; decompressed[outOffset + 2] = (s9 >>> 24) & BITS_10_MASK; decompressed[outOffset + 3] = (s9 >>> 34) & BITS_10_MASK; decompressed[outOffset + 4] = (s9 >>> 44) & BITS_10_MASK; decompressed[outOffset + 5] = (s9 >>> 54) & BITS_10_MASK; return 6; case STATUS_7NUM_8BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_8_MASK; decompressed[outOffset + 1] = (s9 >>> 12) & BITS_8_MASK; decompressed[outOffset + 2] = (s9 >>> 20) & BITS_8_MASK; decompressed[outOffset + 3] = (s9 >>> 28) & BITS_8_MASK; decompressed[outOffset + 4] = (s9 >>> 36) & BITS_8_MASK; decompressed[outOffset + 5] = (s9 >>> 44) & BITS_8_MASK; decompressed[outOffset + 6] = (s9 >>> 52) & BITS_12_MASK; return 7; case STATUS_8NUM_7BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_7_MASK; decompressed[outOffset + 1] = (s9 >>> 11) & BITS_7_MASK; decompressed[outOffset + 2] = (s9 >>> 18) & BITS_7_MASK; decompressed[outOffset + 3] = (s9 >>> 25) & BITS_7_MASK; decompressed[outOffset + 4] = (s9 >>> 32) & BITS_7_MASK; decompressed[outOffset + 5] = (s9 >>> 39) & BITS_7_MASK; decompressed[outOffset + 6] = (s9 >>> 46) & BITS_7_MASK; decompressed[outOffset + 7] = (s9 >>> 53) & BITS_11_MASK; return 8; case STATUS_10NUM_6BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_6_MASK; decompressed[outOffset + 1] = (s9 >>> 10) & BITS_6_MASK; decompressed[outOffset + 2] = (s9 >>> 16) & BITS_6_MASK; decompressed[outOffset + 3] = (s9 >>> 22) & BITS_6_MASK; decompressed[outOffset + 4] = (s9 >>> 28) & BITS_6_MASK; decompressed[outOffset + 5] = (s9 >>> 34) & BITS_6_MASK; decompressed[outOffset + 6] = (s9 >>> 40) & BITS_6_MASK; decompressed[outOffset + 7] = (s9 >>> 46) & BITS_6_MASK; decompressed[outOffset + 8] = (s9 >>> 52) & BITS_6_MASK; decompressed[outOffset + 9] = (s9 >>> 58) & BITS_6_MASK; return 10; case STATUS_12NUM_5BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_5_MASK; decompressed[outOffset + 1] = (s9 >>> 9) & BITS_5_MASK; decompressed[outOffset + 2] = (s9 >>> 14) & BITS_5_MASK; decompressed[outOffset + 3] = (s9 >>> 19) & BITS_5_MASK; decompressed[outOffset + 4] = (s9 >>> 24) & BITS_5_MASK; decompressed[outOffset + 5] = (s9 >>> 29) & BITS_5_MASK; decompressed[outOffset + 6] = (s9 >>> 34) & BITS_5_MASK; decompressed[outOffset + 7] = (s9 >>> 39) & BITS_5_MASK; decompressed[outOffset + 8] = (s9 >>> 44) & BITS_5_MASK; decompressed[outOffset + 9] = (s9 >>> 49) & BITS_5_MASK; decompressed[outOffset + 10] = (s9 >>> 54) & BITS_5_MASK; decompressed[outOffset + 11] = (s9 >>> 59) & BITS_5_MASK; return 12; case STATUS_15NUM_4BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_4_MASK; decompressed[outOffset + 1] = (s9 >>> 8) & BITS_4_MASK; decompressed[outOffset + 2] = (s9 >>> 12) & BITS_4_MASK; decompressed[outOffset + 3] = (s9 >>> 16) & BITS_4_MASK; decompressed[outOffset + 4] = (s9 >>> 20) & BITS_4_MASK; decompressed[outOffset + 5] = (s9 >>> 24) & BITS_4_MASK; decompressed[outOffset + 6] = (s9 >>> 28) & BITS_4_MASK; decompressed[outOffset + 6] = (s9 >>> 32) & BITS_4_MASK; decompressed[outOffset + 8] = (s9 >>> 36) & BITS_4_MASK; decompressed[outOffset + 9] = (s9 >>> 40) & BITS_4_MASK; decompressed[outOffset + 10] = (s9 >>> 44) & BITS_4_MASK; decompressed[outOffset + 11] = (s9 >>> 48) & BITS_4_MASK; decompressed[outOffset + 12] = (s9 >>> 52) & BITS_4_MASK; decompressed[outOffset + 13] = (s9 >>> 56) & BITS_4_MASK; decompressed[outOffset + 14] = (s9 >>> 60) & BITS_4_MASK; return 15; case STATUS_20NUM_3BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_3_MASK; decompressed[outOffset + 1] = (s9 >>> 7) & BITS_3_MASK; decompressed[outOffset + 2] = (s9 >>> 10) & BITS_3_MASK; decompressed[outOffset + 3] = (s9 >>> 13) & BITS_3_MASK; decompressed[outOffset + 4] = (s9 >>> 16) & BITS_3_MASK; decompressed[outOffset + 5] = (s9 >>> 19) & BITS_3_MASK; decompressed[outOffset + 6] = (s9 >>> 22) & BITS_3_MASK; decompressed[outOffset + 7] = (s9 >>> 25) & BITS_3_MASK; decompressed[outOffset + 8] = (s9 >>> 28) & BITS_3_MASK; decompressed[outOffset + 9] = (s9 >>> 31) & BITS_3_MASK; decompressed[outOffset + 10] = (s9 >>> 34) & BITS_3_MASK; decompressed[outOffset + 11] = (s9 >>> 37) & BITS_3_MASK; decompressed[outOffset + 12] = (s9 >>> 40) & BITS_3_MASK; decompressed[outOffset + 13] = (s9 >>> 43) & BITS_3_MASK; decompressed[outOffset + 14] = (s9 >>> 46) & BITS_3_MASK; decompressed[outOffset + 15] = (s9 >>> 49) & BITS_3_MASK; decompressed[outOffset + 16] = (s9 >>> 52) & BITS_3_MASK; decompressed[outOffset + 17] = (s9 >>> 55) & BITS_3_MASK; decompressed[outOffset + 18] = (s9 >>> 58) & BITS_3_MASK; decompressed[outOffset + 19] = (s9 >>> 61) & BITS_3_MASK; return 20; case STATUS_30NUM_2BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_2_MASK; decompressed[outOffset + 1] = (s9 >>> 6) & BITS_2_MASK; decompressed[outOffset + 2] = (s9 >>> 8) & BITS_2_MASK; decompressed[outOffset + 3] = (s9 >>> 10) & BITS_2_MASK; decompressed[outOffset + 4] = (s9 >>> 12) & BITS_2_MASK; decompressed[outOffset + 5] = (s9 >>> 14) & BITS_2_MASK; decompressed[outOffset + 6] = (s9 >>> 16) & BITS_2_MASK; decompressed[outOffset + 7] = (s9 >>> 18) & BITS_2_MASK; decompressed[outOffset + 8] = (s9 >>> 20) & BITS_2_MASK; decompressed[outOffset + 9] = (s9 >>> 22) & BITS_2_MASK; decompressed[outOffset + 10] = (s9 >>> 24) & BITS_2_MASK; decompressed[outOffset + 11] = (s9 >>> 26) & BITS_2_MASK; decompressed[outOffset + 12] = (s9 >>> 28) & BITS_2_MASK; decompressed[outOffset + 13] = (s9 >>> 30) & BITS_2_MASK; decompressed[outOffset + 14] = (s9 >>> 32) & BITS_2_MASK; decompressed[outOffset + 15] = (s9 >>> 34) & BITS_2_MASK; decompressed[outOffset + 16] = (s9 >>> 36) & BITS_2_MASK; decompressed[outOffset + 17] = (s9 >>> 38) & BITS_2_MASK; decompressed[outOffset + 18] = (s9 >>> 40) & BITS_2_MASK; decompressed[outOffset + 19] = (s9 >>> 42) & BITS_2_MASK; decompressed[outOffset + 20] = (s9 >>> 44) & BITS_2_MASK; decompressed[outOffset + 21] = (s9 >>> 46) & BITS_2_MASK; decompressed[outOffset + 22] = (s9 >>> 48) & BITS_2_MASK; decompressed[outOffset + 23] = (s9 >>> 50) & BITS_2_MASK; decompressed[outOffset + 24] = (s9 >>> 52) & BITS_2_MASK; decompressed[outOffset + 25] = (s9 >>> 54) & BITS_2_MASK; decompressed[outOffset + 26] = (s9 >>> 56) & BITS_2_MASK; decompressed[outOffset + 27] = (s9 >>> 58) & BITS_2_MASK; decompressed[outOffset + 28] = (s9 >>> 60) & BITS_2_MASK; decompressed[outOffset + 29] = (s9 >>> 62) & BITS_2_MASK; return 30; case STATUS_60NUM_1BITS: decompressed[outOffset] = (s9 >>> 4) & BITS_1_MASK; decompressed[outOffset + 1] = (s9 >>> 5) & BITS_1_MASK; decompressed[outOffset + 2] = (s9 >>> 6) & BITS_1_MASK; decompressed[outOffset + 3] = (s9 >>> 7) & BITS_1_MASK; decompressed[outOffset + 4] = (s9 >>> 8) & BITS_1_MASK; decompressed[outOffset + 5] = (s9 >>> 9) & BITS_1_MASK; decompressed[outOffset + 6] = (s9 >>> 10) & BITS_1_MASK; decompressed[outOffset + 7] = (s9 >>> 11) & BITS_1_MASK; decompressed[outOffset + 8] = (s9 >>> 12) & BITS_1_MASK; decompressed[outOffset + 9] = (s9 >>> 13) & BITS_1_MASK; decompressed[outOffset + 10] = (s9 >>> 14) & BITS_1_MASK; decompressed[outOffset + 11] = (s9 >>> 15) & BITS_1_MASK; decompressed[outOffset + 12] = (s9 >>> 16) & BITS_1_MASK; decompressed[outOffset + 13] = (s9 >>> 17) & BITS_1_MASK; decompressed[outOffset + 14] = (s9 >>> 18) & BITS_1_MASK; decompressed[outOffset + 15] = (s9 >>> 19) & BITS_1_MASK; decompressed[outOffset + 16] = (s9 >>> 20) & BITS_1_MASK; decompressed[outOffset + 17] = (s9 >>> 21) & BITS_1_MASK; decompressed[outOffset + 18] = (s9 >>> 22) & BITS_1_MASK; decompressed[outOffset + 19] = (s9 >>> 23) & BITS_1_MASK; decompressed[outOffset + 20] = (s9 >>> 24) & BITS_1_MASK; decompressed[outOffset + 21] = (s9 >>> 25) & BITS_1_MASK; decompressed[outOffset + 22] = (s9 >>> 26) & BITS_1_MASK; decompressed[outOffset + 23] = (s9 >>> 27) & BITS_1_MASK; decompressed[outOffset + 24] = (s9 >>> 28) & BITS_1_MASK; decompressed[outOffset + 25] = (s9 >>> 29) & BITS_1_MASK; decompressed[outOffset + 26] = (s9 >>> 30) & BITS_1_MASK; decompressed[outOffset + 27] = (s9 >>> 31) & BITS_1_MASK; decompressed[outOffset + 28] = (s9 >>> 32) & BITS_1_MASK; decompressed[outOffset + 29] = (s9 >>> 33) & BITS_1_MASK; decompressed[outOffset + 30] = (s9 >>> 34) & BITS_1_MASK; decompressed[outOffset + 31] = (s9 >>> 35) & BITS_1_MASK; decompressed[outOffset + 32] = (s9 >>> 36) & BITS_1_MASK; decompressed[outOffset + 33] = (s9 >>> 37) & BITS_1_MASK; decompressed[outOffset + 34] = (s9 >>> 38) & BITS_1_MASK; decompressed[outOffset + 35] = (s9 >>> 39) & BITS_1_MASK; decompressed[outOffset + 36] = (s9 >>> 40) & BITS_1_MASK; decompressed[outOffset + 37] = (s9 >>> 41) & BITS_1_MASK; decompressed[outOffset + 38] = (s9 >>> 42) & BITS_1_MASK; decompressed[outOffset + 39] = (s9 >>> 43) & BITS_1_MASK; decompressed[outOffset + 40] = (s9 >>> 44) & BITS_1_MASK; decompressed[outOffset + 41] = (s9 >>> 45) & BITS_1_MASK; decompressed[outOffset + 42] = (s9 >>> 46) & BITS_1_MASK; decompressed[outOffset + 43] = (s9 >>> 47) & BITS_1_MASK; decompressed[outOffset + 44] = (s9 >>> 48) & BITS_1_MASK; decompressed[outOffset + 45] = (s9 >>> 49) & BITS_1_MASK; decompressed[outOffset + 46] = (s9 >>> 50) & BITS_1_MASK; decompressed[outOffset + 47] = (s9 >>> 51) & BITS_1_MASK; decompressed[outOffset + 48] = (s9 >>> 52) & BITS_1_MASK; decompressed[outOffset + 49] = (s9 >>> 53) & BITS_1_MASK; decompressed[outOffset + 50] = (s9 >>> 54) & BITS_1_MASK; decompressed[outOffset + 51] = (s9 >>> 55) & BITS_1_MASK; decompressed[outOffset + 52] = (s9 >>> 56) & BITS_1_MASK; decompressed[outOffset + 53] = (s9 >>> 57) & BITS_1_MASK; decompressed[outOffset + 54] = (s9 >>> 58) & BITS_1_MASK; decompressed[outOffset + 55] = (s9 >>> 59) & BITS_1_MASK; decompressed[outOffset + 56] = (s9 >>> 60) & BITS_1_MASK; decompressed[outOffset + 57] = (s9 >>> 61) & BITS_1_MASK; decompressed[outOffset + 58] = (s9 >>> 62) & BITS_1_MASK; decompressed[outOffset + 59] = (s9 >>> 63) & BITS_1_MASK; return 60; default: throw new IllegalArgumentException("Unknown Simple9 status: " + (s9 >>> NUM_DATA_BITS)); } } @SuppressWarnings("WeakerAccess") public static void compress(LongBuffer compressedBuffer, long[] unCompressedData, @SuppressWarnings("SameParameterValue") int offset, int size) { int encoded; while (size > 0) { encoded = compressSingle(unCompressedData, offset, size, compressedBuffer); offset += encoded; size -= encoded; } } @SuppressWarnings("WeakerAccess") public static int decompress(LongBuffer compressedBuffer, long[] unCompressedData) { int totalOut = 0; compressedBuffer.rewind(); int unComprSize = unCompressedData.length; while (unComprSize > 0) { final int decoded = decompressSingle(compressedBuffer.get(), unCompressedData, totalOut); unComprSize -= decoded; totalOut += decoded; } return totalOut; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy