All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.lemire.integercompression.Simple9 Maven / Gradle / Ivy

Go to download

It is a library to compress and uncompress arrays of integers very fast. The assumption is that most (but not all) values in your array use less than 32 bits.

There is a newer version: 0.2.1
Show newest version
/**
 * This code is released under the
 * Apache License Version 2.0 http://www.apache.org/licenses/.
 *
 * (c) Daniel Lemire, http://lemire.me/en/
 */

package me.lemire.integercompression;

/**
 * This is an implementation of the popular Simple9 scheme. It is limited to
 * 28-bit integers (between 0 and 2^28-1).
 * 
 * Note that this does not use differential coding: if you are working on sorted
 * lists, you must compute the deltas separately.
 * 
 * @author Daniel Lemire
 * 
 */
public final class Simple9 implements IntegerCODEC, SkippableIntegerCODEC {


	@Override
	public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int out[], IntWrapper outpos) {
		int tmpoutpos = outpos.get();
		int currentPos = inpos.get();
		final int finalin = currentPos + inlength;
		outer: while (currentPos < finalin - 28) {
			mainloop: for (int selector = 0; selector < 8; selector++) {

				int res = 0;
				int compressedNum = codeNum[selector];
				int b = bitLength[selector];
				int max = 1 << b;
				int i = 0;
				for (; i < compressedNum; i++) {
					if (max <= in[currentPos + i])
						continue mainloop;
					res = (res << b) + in[currentPos + i];
				}
				res |= selector << 28;
				out[tmpoutpos++] = res;
				currentPos += compressedNum;
				continue outer;
			}
			final int selector = 8;
			if (in[currentPos] >= 1 << bitLength[selector])
				throw new RuntimeException("Too big a number");
			out[tmpoutpos++] = in[currentPos++] | (selector << 28);
		}
		outer: while (currentPos < finalin) {
			mainloop: for (int selector = 0; selector < 8; selector++) {
				int res = 0;
				int compressedNum = codeNum[selector];
				if (finalin <= currentPos + compressedNum - 1)
					compressedNum = finalin - currentPos;
				int b = bitLength[selector];
				int max = 1 << b;
				int i = 0;
				for (; i < compressedNum; i++) {
					if (max <= in[currentPos + i])
						continue mainloop;
					res = (res << b) + in[currentPos + i];
				}

				if (compressedNum != codeNum[selector])
					res <<= (codeNum[selector] - compressedNum) * b;
				res |= selector << 28;
				out[tmpoutpos++] = res;
				currentPos += compressedNum;
				continue outer;
			}
			final int selector = 8;
			if (in[currentPos] >= 1 << bitLength[selector])
				throw new RuntimeException("Too big a number");
			out[tmpoutpos++] = in[currentPos++] | (selector << 28);
		}
		inpos.set(currentPos);
		outpos.set(tmpoutpos);
	}

	@Override
	public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos,
			int outlength) {
		int currentPos = outpos.get();
		int tmpinpos = inpos.get();
		final int finalout = currentPos + outlength;
		while (currentPos < finalout - 28) {
			int val = in[tmpinpos++];
			int header = val >>> 28;
			switch (header) {
			case 0: { // number : 28, bitwidth : 1
				out[currentPos++] = (val << 4) >>> 31;
				out[currentPos++] = (val << 5) >>> 31;
				out[currentPos++] = (val << 6) >>> 31;
				out[currentPos++] = (val << 7) >>> 31;
				out[currentPos++] = (val << 8) >>> 31;
				out[currentPos++] = (val << 9) >>> 31;
				out[currentPos++] = (val << 10) >>> 31;
				out[currentPos++] = (val << 11) >>> 31;
				out[currentPos++] = (val << 12) >>> 31;
				out[currentPos++] = (val << 13) >>> 31; // 10
				out[currentPos++] = (val << 14) >>> 31;
				out[currentPos++] = (val << 15) >>> 31;
				out[currentPos++] = (val << 16) >>> 31;
				out[currentPos++] = (val << 17) >>> 31;
				out[currentPos++] = (val << 18) >>> 31;
				out[currentPos++] = (val << 19) >>> 31;
				out[currentPos++] = (val << 20) >>> 31;
				out[currentPos++] = (val << 21) >>> 31;
				out[currentPos++] = (val << 22) >>> 31;
				out[currentPos++] = (val << 23) >>> 31; // 20
				out[currentPos++] = (val << 24) >>> 31;
				out[currentPos++] = (val << 25) >>> 31;
				out[currentPos++] = (val << 26) >>> 31;
				out[currentPos++] = (val << 27) >>> 31;
				out[currentPos++] = (val << 28) >>> 31;
				out[currentPos++] = (val << 29) >>> 31;
				out[currentPos++] = (val << 30) >>> 31;
				out[currentPos++] = (val << 31) >>> 31;
				break;
			}
			case 1: { // number : 14, bitwidth : 2
				out[currentPos++] = (val << 4) >>> 30;
				out[currentPos++] = (val << 6) >>> 30;
				out[currentPos++] = (val << 8) >>> 30;
				out[currentPos++] = (val << 10) >>> 30;
				out[currentPos++] = (val << 12) >>> 30;
				out[currentPos++] = (val << 14) >>> 30;
				out[currentPos++] = (val << 16) >>> 30;
				out[currentPos++] = (val << 18) >>> 30;
				out[currentPos++] = (val << 20) >>> 30;
				out[currentPos++] = (val << 22) >>> 30; // 10
				out[currentPos++] = (val << 24) >>> 30;
				out[currentPos++] = (val << 26) >>> 30;
				out[currentPos++] = (val << 28) >>> 30;
				out[currentPos++] = (val << 30) >>> 30;
				break;
			}
			case 2: { // number : 9, bitwidth : 3
				out[currentPos++] = (val << 5) >>> 29;
				out[currentPos++] = (val << 8) >>> 29;
				out[currentPos++] = (val << 11) >>> 29;
				out[currentPos++] = (val << 14) >>> 29;
				out[currentPos++] = (val << 17) >>> 29;
				out[currentPos++] = (val << 20) >>> 29;
				out[currentPos++] = (val << 23) >>> 29;
				out[currentPos++] = (val << 26) >>> 29;
				out[currentPos++] = (val << 29) >>> 29;
				break;
			}
			case 3: { // number : 7, bitwidth : 4
				out[currentPos++] = (val << 4) >>> 28;
				out[currentPos++] = (val << 8) >>> 28;
				out[currentPos++] = (val << 12) >>> 28;
				out[currentPos++] = (val << 16) >>> 28;
				out[currentPos++] = (val << 20) >>> 28;
				out[currentPos++] = (val << 24) >>> 28;
				out[currentPos++] = (val << 28) >>> 28;
				break;
			}
			case 4: { // number : 5, bitwidth : 5
				out[currentPos++] = (val << 7) >>> 27;
				out[currentPos++] = (val << 12) >>> 27;
				out[currentPos++] = (val << 17) >>> 27;
				out[currentPos++] = (val << 22) >>> 27;
				out[currentPos++] = (val << 27) >>> 27;
				break;
			}
			case 5: { // number : 4, bitwidth : 7
				out[currentPos++] = (val << 4) >>> 25;
				out[currentPos++] = (val << 11) >>> 25;
				out[currentPos++] = (val << 18) >>> 25;
				out[currentPos++] = (val << 25) >>> 25;
				break;
			}
			case 6: { // number : 3, bitwidth : 9
				out[currentPos++] = (val << 5) >>> 23;
				out[currentPos++] = (val << 14) >>> 23;
				out[currentPos++] = (val << 23) >>> 23;
				break;
			}
			case 7: { // number : 2, bitwidth : 14
				out[currentPos++] = (val << 4) >>> 18;
				out[currentPos++] = (val << 18) >>> 18;
				break;
			}
			case 8: { // number : 1, bitwidth : 28
				out[currentPos++] = (val << 4) >>> 4;
				break;
			}
			default: {
				throw new RuntimeException("shouldn't happen: limited to 28-bit integers");
			}
			}
		}
		while (currentPos < finalout) {
			int val = in[tmpinpos++];
			int header = val >>> 28;
			switch (header) {
			case 0: { // number : 28, bitwidth : 1
				final int howmany = finalout - currentPos;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (k + 4)) >>> 31;
				}
				break;
			}
			case 1: { // number : 14, bitwidth : 2
				final int howmany = finalout - currentPos < 14 ? finalout - currentPos : 14;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (2 * k + 4)) >>> 30;
				}
				break;
			}
			case 2: { // number : 9, bitwidth : 3
				final int howmany = finalout - currentPos < 9 ? finalout - currentPos : 9;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (3 * k + 5)) >>> 29;
				}
				break;
			}
			case 3: { // number : 7, bitwidth : 4
				final int howmany = finalout - currentPos < 7 ? finalout - currentPos : 7;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (4 * k + 4)) >>> 28;
				}
				break;
			}
			case 4: { // number : 5, bitwidth : 5
				final int howmany = finalout - currentPos < 5 ? finalout - currentPos : 5;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (5 * k + 7)) >>> 27;
				}
				break;
			}
			case 5: { // number : 4, bitwidth : 7
				final int howmany = finalout - currentPos < 4 ? finalout - currentPos : 4;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (7 * k + 4)) >>> 25;
				}
				break;
			}
			case 6: { // number : 3, bitwidth : 9
				final int howmany = finalout - currentPos < 3 ? finalout - currentPos : 3;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (9 * k + 5)) >>> 23;
				}
				break;
			}
			case 7: { // number : 2, bitwidth : 14
				final int howmany = finalout - currentPos < 2 ? finalout - currentPos : 2;
				for (int k = 0; k < howmany; ++k) {
					out[currentPos++] = (val << (14 * k + 4)) >>> 18;
				}
				break;
			}
			case 8: { // number : 1, bitwidth : 28
				out[currentPos++] = (val << 4) >>> 4;
				break;
			}
			default: {
				throw new RuntimeException("shouldn't happen");
			}
			}
		}
		outpos.set(currentPos);
		inpos.set(tmpinpos);

	}

	@Override
	public void compress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) {
		if (inlength == 0)
			return;
		out[outpos.get()] = inlength;
		outpos.increment();
		headlessCompress(in, inpos, inlength, out, outpos);
	}

	@Override
	public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out, IntWrapper outpos) {
		if (inlength == 0)
			return;
		final int outlength = in[inpos.get()];
		inpos.increment();
		headlessUncompress(in, inpos, inlength, out, outpos, outlength);

	}

	private final static int bitLength[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 };

	private final static int codeNum[] = { 28, 14, 9, 7, 5, 4, 3, 2, 1 };

	@Override
	public String toString() {
		return this.getClass().getSimpleName();
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy