me.lemire.integercompression.FastPFOR128 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of JavaFastPFOR Show documentation
Show all versions of JavaFastPFOR Show documentation
It is a library to compress and uncompress arrays of integers
very fast. The assumption is that most (but not all) values in
your array use less than 32 bits.
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
package me.lemire.integercompression;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
/**
* This class is similar to FastPFOR but uses a small block size.
*
* Note that this does not use differential coding: if you are working on sorted
* lists, you should first compute deltas, @see me.lemire.integercompression.differential.Delta#delta.
*
* For multi-threaded applications, each thread should use its own FastPFOR
* object.
*
* @author Daniel Lemire
*/
public final class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC {
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
/**
*
*/
public final static int DEFAULT_PAGE_SIZE = 65536;
/**
*
*/
public final static int BLOCK_SIZE = 128;
final int pageSize;
final int[][] dataTobePacked = new int[33][];
final ByteBuffer byteContainer;
// Working area for compress and uncompress.
final int[] dataPointers = new int[33];
final int[] freqs = new int[33];
final int[] bestbbestcexceptmaxb = new int[3];
/**
* Construct the FastPFOR CODEC.
*
* @param pagesize
* the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE)
*/
public FastPFOR128(int pagesize) {
pageSize = pagesize;
// Initiate arrrays.
byteContainer = ByteBuffer.allocateDirect(3 * pageSize
/ BLOCK_SIZE + pageSize);
byteContainer.order(ByteOrder.LITTLE_ENDIAN);
for (int k = 1; k < dataTobePacked.length; ++k)
dataTobePacked[k] = new int[pageSize / 32 * 4]; // heuristic
}
/**
* Construct the fastPFOR CODEC with default parameters.
*/
public FastPFOR128() {
this(DEFAULT_PAGE_SIZE);
}
/**
* Compress data in blocks of BLOCK_SIZE integers (if fewer than BLOCK_SIZE integers
* are provided, nothing is done).
*
* @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper)
*/
@Override
public void headlessCompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
final int finalinpos = inpos.get() + inlength;
while (inpos.get() != finalinpos) {
int thissize = Math.min(pageSize,
finalinpos - inpos.get());
encodePage(in, inpos, thissize, out, outpos);
}
}
private void getBestBFromData(int[] in, int pos) {
Arrays.fill(freqs, 0);
for (int k = pos, k_end = pos + BLOCK_SIZE; k < k_end; ++k) {
freqs[Util.bits(in[k])]++;
}
bestbbestcexceptmaxb[0] = 32;
while (freqs[bestbbestcexceptmaxb[0]] == 0)
bestbbestcexceptmaxb[0]--;
bestbbestcexceptmaxb[2] = bestbbestcexceptmaxb[0];
int bestcost = bestbbestcexceptmaxb[0] * BLOCK_SIZE;
int cexcept = 0;
bestbbestcexceptmaxb[1] = cexcept;
for (int b = bestbbestcexceptmaxb[0] - 1; b >= 0; --b) {
cexcept += freqs[b + 1];
if (cexcept == BLOCK_SIZE)
break;
// the extra 8 is the cost of storing maxbits
int thiscost = cexcept * OVERHEAD_OF_EACH_EXCEPT
+ cexcept * (bestbbestcexceptmaxb[2] - b) + b
* BLOCK_SIZE + 8;
if(bestbbestcexceptmaxb[2] - b == 1) thiscost -= cexcept;
if (thiscost < bestcost) {
bestcost = thiscost;
bestbbestcexceptmaxb[0] = b;
bestbbestcexceptmaxb[1] = cexcept;
}
}
}
private void encodePage(int[] in, IntWrapper inpos, int thissize,
int[] out, IntWrapper outpos) {
final int headerpos = outpos.get();
outpos.increment();
int tmpoutpos = outpos.get();
// Clear working area.
Arrays.fill(dataPointers, 0);
byteContainer.clear();
int tmpinpos = inpos.get();
for (final int finalinpos = tmpinpos + thissize - BLOCK_SIZE; tmpinpos <= finalinpos; tmpinpos += BLOCK_SIZE) {
getBestBFromData(in, tmpinpos);
final int tmpbestb = bestbbestcexceptmaxb[0];
byteContainer.put((byte)bestbbestcexceptmaxb[0]);
byteContainer.put((byte)bestbbestcexceptmaxb[1]);
if (bestbbestcexceptmaxb[1] > 0) {
byteContainer.put((byte)bestbbestcexceptmaxb[2]);
final int index = bestbbestcexceptmaxb[2]
- bestbbestcexceptmaxb[0];
if (dataPointers[index]
+ bestbbestcexceptmaxb[1] >= dataTobePacked[index].length) {
int newsize = 2 * (dataPointers[index] + bestbbestcexceptmaxb[1]);
// make sure it is a multiple of 32
newsize = Util
.greatestMultiple(newsize + 31, 32);
dataTobePacked[index] = Arrays.copyOf(
dataTobePacked[index], newsize);
}
for (int k = 0; k < BLOCK_SIZE; ++k) {
if ((in[k + tmpinpos] >>> bestbbestcexceptmaxb[0]) != 0) {
// we have an exception
byteContainer.put((byte) k);
dataTobePacked[index][dataPointers[index]++] = in[k
+ tmpinpos] >>> tmpbestb;
}
}
}
for (int k = 0; k < BLOCK_SIZE; k += 32) {
BitPacking.fastpack(in, tmpinpos + k, out,
tmpoutpos, tmpbestb);
tmpoutpos += tmpbestb;
}
}
inpos.set(tmpinpos);
out[headerpos] = tmpoutpos - headerpos;
final int bytesize = byteContainer.position();
while ((byteContainer.position() & 3) != 0)
byteContainer.put((byte) 0);
out[tmpoutpos++] = bytesize;
final int howmanyints = byteContainer.position() / 4;
byteContainer.flip();
byteContainer.asIntBuffer().get(out, tmpoutpos, howmanyints);
tmpoutpos += howmanyints;
int bitmap = 0;
for (int k = 2; k <= 32; ++k) {
if (dataPointers[k] != 0)
bitmap |= (1 << (k - 1));
}
out[tmpoutpos++] = bitmap;
for (int k = 2; k <= 32; ++k) {
if (dataPointers[k] != 0) {
out[tmpoutpos++] = dataPointers[k];// size
int j = 0;
for (; j < dataPointers[k]; j += 32) {
BitPacking.fastpack(dataTobePacked[k],
j, out, tmpoutpos, k);
tmpoutpos += k;
}
int overflow = j - dataPointers[k];
tmpoutpos -= overflow * k / 32;
}
}
outpos.set(tmpoutpos);
}
/**
* Uncompress data in blocks of integers. In this particular case,
* the inlength parameter is ignored: it is deduced from the compressed
* data.
*
* @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper)
*/
@Override
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos, int mynvalue) {
if (inlength == 0)
return;
mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE);
int finalout = outpos.get() + mynvalue;
while (outpos.get() != finalout) {
int thissize = Math.min(pageSize,
finalout - outpos.get());
decodePage(in, inpos, out, outpos, thissize);
}
}
private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
final int initpos = inpos.get();
final int wheremeta = in[inpos.get()];
inpos.increment();
int inexcept = initpos + wheremeta;
final int bytesize = in[inexcept++];
byteContainer.clear();
byteContainer.asIntBuffer().put(in, inexcept, (bytesize + 3) / 4);
inexcept += (bytesize + 3)/ 4;
final int bitmap = in[inexcept++];
for (int k = 2; k <= 32; ++k) {
if ((bitmap & (1 << (k - 1))) != 0) {
int size = in[inexcept++];
int roundedup = Util
.greatestMultiple(size + 31, 32);
if (dataTobePacked[k].length < roundedup)
dataTobePacked[k] = new int[roundedup];
if(inexcept + roundedup/32*k <= in.length) {
int j = 0;
for (; j < size; j += 32) {
BitPacking.fastunpack(in, inexcept,
dataTobePacked[k], j, k);
inexcept += k;
}
int overflow = j - size;
inexcept -= overflow * k / 32;
} else {
int j = 0;
int[] buf = new int[roundedup/32*k];
int initinexcept = inexcept;
System.arraycopy(in, inexcept, buf, 0, in.length - inexcept);
for (; j < size; j += 32) {
BitPacking.fastunpack(buf, inexcept-initinexcept,
dataTobePacked[k], j, k);
inexcept += k;
}
int overflow = j - size;
inexcept -= overflow * k / 32;
}
}
}
Arrays.fill(dataPointers, 0);
int tmpoutpos = outpos.get();
int tmpinpos = inpos.get();
for (int run = 0, run_end = thissize / BLOCK_SIZE; run < run_end; ++run, tmpoutpos += BLOCK_SIZE) {
final int b = byteContainer.get();
final int cexcept = byteContainer.get() & 0xFF;
for (int k = 0; k < BLOCK_SIZE; k += 32) {
BitPacking.fastunpack(in, tmpinpos, out,
tmpoutpos + k, b);
tmpinpos += b;
}
if (cexcept > 0) {
final int maxbits = byteContainer.get();
final int index = maxbits - b;
if(index == 1) {
for (int k = 0; k < cexcept; ++k) {
final int pos = byteContainer.get() &0xFF;
out[pos + tmpoutpos] |= 1 << b;
}
} else {
for (int k = 0; k < cexcept; ++k) {
final int pos = byteContainer.get() &0xFF;
final int exceptvalue = dataTobePacked[index][dataPointers[index]++];
out[pos + tmpoutpos] |= exceptvalue << b;
}
}
}
}
outpos.set(tmpoutpos);
inpos.set(inexcept);
}
@Override
public void compress(int[] in, IntWrapper inpos, int inlength, int[] out,
IntWrapper outpos) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
if (inlength == 0)
return;
out[outpos.get()] = inlength;
outpos.increment();
headlessCompress(in, inpos, inlength, out, outpos);
}
@Override
public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
IntWrapper outpos) {
if (inlength == 0)
return;
final int outlength = in[inpos.get()];
inpos.increment();
headlessUncompress(in, inpos, inlength, out, outpos, outlength);
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
}