All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.lemire.integercompression.Simple9 Maven / Gradle / Ivy

Go to download

It is a library to compress and uncompress arrays of integers very fast. The assumption is that most (but not all) values in your array use less than 32 bits.

There is a newer version: 0.2.1
Show newest version
/**
 * This code is released under the
 * Apache License Version 2.0 http://www.apache.org/licenses/.
 *
 * (c) Daniel Lemire, http://lemire.me/en/
 */

package me.lemire.integercompression;


/**
 * This is an implementation of the popular Simple9 scheme.
 * It is limited to 28-bit integers (between 0 and 2^28-1).
 * 
 * Note that this does not use differential coding: if you are working on sorted
 * lists, you must compute the deltas separately.
 * 
 * @author Daniel Lemire
 * 
 */
public final class Simple9 implements IntegerCODEC, SkippableIntegerCODEC {
        @Override
        public void headlessCompress(int[] in, IntWrapper inpos, int inlength,
                int out[], IntWrapper outpos) {
                int tmpoutpos = outpos.get();
                int currentPos = inpos.get();
                final int finalin = currentPos + inlength;
                outer: while (currentPos < finalin - 28) {
                        mainloop: for (int selector = 0; selector < 8; selector++) {

                                int res = 0;
                                int compressedNum = codeNum[selector];
                                int b = bitLength[selector];
                                int max = 1 << b;
                                int i = 0;
                                for (; i < compressedNum; i++) {
                                        if (max <= in[currentPos + i])
                                                continue mainloop;
                                        res = (res << b) + in[currentPos + i];
                                }
                                res |= selector << 28;
                                out[tmpoutpos++] = res;
                                currentPos += compressedNum;
                                continue outer;
                        }
                        final int selector = 8;
                        if (in[currentPos] >= 1 << bitLength[selector])
                                throw new RuntimeException("Too big a number");
                        out[tmpoutpos++] = in[currentPos++] | (selector << 28);
                }
                outer: while (currentPos < finalin) {
                        mainloop: for (int selector = 0; selector < 8; selector++) {
                                int res = 0;
                                int compressedNum = codeNum[selector];
                                if (finalin <= currentPos + compressedNum - 1)
                                        compressedNum = finalin - currentPos;
                                int b = bitLength[selector];
                                int max = 1 << b;
                                int i = 0;
                                for (; i < compressedNum; i++) {
                                        if (max <= in[currentPos + i])
                                                continue mainloop;
                                        res = (res << b) + in[currentPos + i];
                                }

                                if (compressedNum != codeNum[selector])
                                        res <<= (codeNum[selector] - compressedNum)
                                                * b;
                                res |= selector << 28;
                                out[tmpoutpos++] = res;
                                currentPos += compressedNum;
                                continue outer;
                        }
                        final int selector = 8;
                        if (in[currentPos] >= 1 << bitLength[selector])
                                throw new RuntimeException("Too big a number");
                        out[tmpoutpos++] = in[currentPos++] | (selector << 28);
                }
                inpos.set(currentPos);
                outpos.set(tmpoutpos);
        }

        @Override
        public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
                int[] out, IntWrapper outpos, int outlength) {
                int currentPos = outpos.get();
                int tmpinpos = inpos.get();
                final int finalout = currentPos + outlength;
                while (currentPos < finalout - 28) {
                        int val = in[tmpinpos++];
                        int header = val >>> 28;
                        switch (header) {
                        case 0: { // number : 28, bitwidth : 1
                                out[currentPos++] = (val << 4) >>> 31;
                                out[currentPos++] = (val << 5) >>> 31;
                                out[currentPos++] = (val << 6) >>> 31;
                                out[currentPos++] = (val << 7) >>> 31;
                                out[currentPos++] = (val << 8) >>> 31;
                                out[currentPos++] = (val << 9) >>> 31;
                                out[currentPos++] = (val << 10) >>> 31;
                                out[currentPos++] = (val << 11) >>> 31;
                                out[currentPos++] = (val << 12) >>> 31;
                                out[currentPos++] = (val << 13) >>> 31; // 10
                                out[currentPos++] = (val << 14) >>> 31;
                                out[currentPos++] = (val << 15) >>> 31;
                                out[currentPos++] = (val << 16) >>> 31;
                                out[currentPos++] = (val << 17) >>> 31;
                                out[currentPos++] = (val << 18) >>> 31;
                                out[currentPos++] = (val << 19) >>> 31;
                                out[currentPos++] = (val << 20) >>> 31;
                                out[currentPos++] = (val << 21) >>> 31;
                                out[currentPos++] = (val << 22) >>> 31;
                                out[currentPos++] = (val << 23) >>> 31; // 20
                                out[currentPos++] = (val << 24) >>> 31;
                                out[currentPos++] = (val << 25) >>> 31;
                                out[currentPos++] = (val << 26) >>> 31;
                                out[currentPos++] = (val << 27) >>> 31;
                                out[currentPos++] = (val << 28) >>> 31;
                                out[currentPos++] = (val << 29) >>> 31;
                                out[currentPos++] = (val << 30) >>> 31;
                                out[currentPos++] = (val << 31) >>> 31;
                                break;
                        }
                        case 1: { // number : 14, bitwidth : 2
                                out[currentPos++] = (val << 4) >>> 30;
                                out[currentPos++] = (val << 6) >>> 30;
                                out[currentPos++] = (val << 8) >>> 30;
                                out[currentPos++] = (val << 10) >>> 30;
                                out[currentPos++] = (val << 12) >>> 30;
                                out[currentPos++] = (val << 14) >>> 30;
                                out[currentPos++] = (val << 16) >>> 30;
                                out[currentPos++] = (val << 18) >>> 30;
                                out[currentPos++] = (val << 20) >>> 30;
                                out[currentPos++] = (val << 22) >>> 30; // 10
                                out[currentPos++] = (val << 24) >>> 30;
                                out[currentPos++] = (val << 26) >>> 30;
                                out[currentPos++] = (val << 28) >>> 30;
                                out[currentPos++] = (val << 30) >>> 30;
                                break;
                        }
                        case 2: { // number : 9, bitwidth : 3
                                out[currentPos++] = (val << 5) >>> 29;
                                out[currentPos++] = (val << 8) >>> 29;
                                out[currentPos++] = (val << 11) >>> 29;
                                out[currentPos++] = (val << 14) >>> 29;
                                out[currentPos++] = (val << 17) >>> 29;
                                out[currentPos++] = (val << 20) >>> 29;
                                out[currentPos++] = (val << 23) >>> 29;
                                out[currentPos++] = (val << 26) >>> 29;
                                out[currentPos++] = (val << 29) >>> 29;
                                break;
                        }
                        case 3: { // number : 7, bitwidth : 4
                                out[currentPos++] = (val << 4) >>> 28;
                                out[currentPos++] = (val << 8) >>> 28;
                                out[currentPos++] = (val << 12) >>> 28;
                                out[currentPos++] = (val << 16) >>> 28;
                                out[currentPos++] = (val << 20) >>> 28;
                                out[currentPos++] = (val << 24) >>> 28;
                                out[currentPos++] = (val << 28) >>> 28;
                                break;
                        }
                        case 4: { // number : 5, bitwidth : 5
                                out[currentPos++] = (val << 7) >>> 27;
                                out[currentPos++] = (val << 12) >>> 27;
                                out[currentPos++] = (val << 17) >>> 27;
                                out[currentPos++] = (val << 22) >>> 27;
                                out[currentPos++] = (val << 27) >>> 27;
                                break;
                        }
                        case 5: { // number : 4, bitwidth : 7
                                out[currentPos++] = (val << 4) >>> 25;
                                out[currentPos++] = (val << 11) >>> 25;
                                out[currentPos++] = (val << 18) >>> 25;
                                out[currentPos++] = (val << 25) >>> 25;
                                break;
                        }
                        case 6: { // number : 3, bitwidth : 9
                                out[currentPos++] = (val << 5) >>> 23;
                                out[currentPos++] = (val << 14) >>> 23;
                                out[currentPos++] = (val << 23) >>> 23;
                                break;
                        }
                        case 7: { // number : 2, bitwidth : 14
                                out[currentPos++] = (val << 4) >>> 18;
                                out[currentPos++] = (val << 18) >>> 18;
                                break;
                        }
                        case 8: { // number : 1, bitwidth : 28
                                out[currentPos++] = (val << 4) >>> 4;
                                break;
                        }
                        default: {
                                throw new RuntimeException("shouldn't happen: limited to 28-bit integers");
                        }
                        }
                }
                while (currentPos < finalout) {
                        int val = in[tmpinpos++];
                        int header = val >>> 28;
                        switch (header) {
                        case 0: { // number : 28, bitwidth : 1
                                final int howmany = finalout - currentPos;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (k + 4)) >>> 31;
                                }
                                break;
                        }
                        case 1: { // number : 14, bitwidth : 2
                                final int howmany = finalout - currentPos < 14 ? finalout
                                        - currentPos
                                        : 14;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (2 * k + 4)) >>> 30;
                                }
                                break;
                        }
                        case 2: { // number : 9, bitwidth : 3
                                final int howmany = finalout - currentPos < 9 ? finalout
                                        - currentPos
                                        : 9;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (3 * k + 5)) >>> 29;
                                }
                                break;
                        }
                        case 3: { // number : 7, bitwidth : 4
                                final int howmany = finalout - currentPos < 7 ? finalout
                                        - currentPos
                                        : 7;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (4 * k + 4)) >>> 28;
                                }
                                break;
                        }
                        case 4: { // number : 5, bitwidth : 5
                                final int howmany = finalout - currentPos < 5 ? finalout
                                        - currentPos
                                        : 5;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (5 * k + 7)) >>> 27;
                                }
                                break;
                        }
                        case 5: { // number : 4, bitwidth : 7
                                final int howmany = finalout - currentPos < 4 ? finalout
                                        - currentPos
                                        : 4;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (7 * k + 4)) >>> 25;
                                }
                                break;
                        }
                        case 6: { // number : 3, bitwidth : 9
                                final int howmany = finalout - currentPos < 3 ? finalout
                                        - currentPos
                                        : 3;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (9 * k + 5)) >>> 23;
                                }
                                break;
                        }
                        case 7: { // number : 2, bitwidth : 14
                                final int howmany = finalout - currentPos < 2 ? finalout
                                        - currentPos
                                        : 2;
                                for (int k = 0; k < howmany; ++k) {
                                        out[currentPos++] = (val << (14 * k + 4)) >>> 18;
                                }
                                break;
                        }
                        case 8: { // number : 1, bitwidth : 28
                                out[currentPos++] = (val << 4) >>> 4;
                                break;
                        }
                        default: {
                                throw new RuntimeException("shouldn't happen");
                        }
                        }
                }
                outpos.set(currentPos);
                inpos.set(tmpinpos);

        }
        @Override
        public void compress(int[] in, IntWrapper inpos, int inlength, int[] out,
                IntWrapper outpos) {
            if (inlength == 0)
                    return;
            out[outpos.get()] = inlength;
            outpos.increment();
            headlessCompress(in, inpos, inlength, out, outpos);        
        }

        @Override
        public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
                IntWrapper outpos) {
            if (inlength == 0)
                return;
            final int outlength = in[inpos.get()];
            inpos.increment();
            headlessUncompress(in, inpos, inlength, out, outpos, outlength);

        }
        private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 };

        private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 };

        @Override
        public String toString() {
                return this.getClass().getSimpleName();
        }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy