
me.lemire.integercompression.FastPFOR128 Maven / Gradle / Ivy
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
package me.lemire.integercompression;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
/**
* This class is similar to FastPFOR but uses a small block size.
*
* Note that this does not use differential coding: if you are working on sorted
* lists, use IntegratedFastPFOR instead.
*
* For multi-threaded applications, each thread should use its own FastPFOR
* object.
*
* @author Daniel Lemire
*/
public final class FastPFOR128 implements IntegerCODEC,SkippableIntegerCODEC {
final static int OVERHEAD_OF_EACH_EXCEPT = 8;
/**
*
*/
public final static int DEFAULT_PAGE_SIZE = 65536;
/**
*
*/
public final static int BLOCK_SIZE = 128;
final int pageSize;
final int[][] dataTobePacked = new int[33][];
final ByteBuffer byteContainer;
// Working area for compress and uncompress.
final int[] dataPointers = new int[33];
final int[] freqs = new int[33];
final int[] bestbbestcexceptmaxb = new int[3];
/**
* Construct the FastPFOR CODEC.
*
* @param pagesize
* the desired page size (recommended value is FastPFOR.DEFAULT_PAGE_SIZE)
*/
public FastPFOR128(int pagesize) {
pageSize = pagesize;
// Initiate arrrays.
byteContainer = ByteBuffer.allocateDirect(3 * pageSize
/ BLOCK_SIZE + pageSize);
byteContainer.order(ByteOrder.LITTLE_ENDIAN);
for (int k = 1; k < dataTobePacked.length; ++k)
dataTobePacked[k] = new int[pageSize / 32 * 4]; // heuristic
}
/**
* Construct the fastPFOR CODEC with default parameters.
*/
public FastPFOR128() {
this(DEFAULT_PAGE_SIZE);
}
/**
* Compress data in blocks of BLOCK_SIZE integers (if fewer than BLOCK_SIZE integers
* are provided, nothing is done).
*
* @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper)
*/
@Override
public void headlessCompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
final int finalinpos = inpos.get() + inlength;
while (inpos.get() != finalinpos) {
int thissize = Math.min(pageSize,
finalinpos - inpos.get());
encodePage(in, inpos, thissize, out, outpos);
}
}
private void getBestBFromData(int[] in, int pos) {
Arrays.fill(freqs, 0);
for (int k = pos, k_end = pos + BLOCK_SIZE; k < k_end; ++k) {
freqs[Util.bits(in[k])]++;
}
bestbbestcexceptmaxb[0] = 32;
while (freqs[bestbbestcexceptmaxb[0]] == 0)
bestbbestcexceptmaxb[0]--;
bestbbestcexceptmaxb[2] = bestbbestcexceptmaxb[0];
int bestcost = bestbbestcexceptmaxb[0] * BLOCK_SIZE;
int cexcept = 0;
bestbbestcexceptmaxb[1] = cexcept;
for (int b = bestbbestcexceptmaxb[0] - 1; b >= 0; --b) {
cexcept += freqs[b + 1];
if (cexcept == BLOCK_SIZE)
break;
// the extra 8 is the cost of storing maxbits
int thiscost = cexcept * OVERHEAD_OF_EACH_EXCEPT
+ cexcept * (bestbbestcexceptmaxb[2] - b) + b
* BLOCK_SIZE + 8;
if(bestbbestcexceptmaxb[2] - b == 1) thiscost -= cexcept;
if (thiscost < bestcost) {
bestcost = thiscost;
bestbbestcexceptmaxb[0] = b;
bestbbestcexceptmaxb[1] = cexcept;
}
}
}
private void encodePage(int[] in, IntWrapper inpos, int thissize,
int[] out, IntWrapper outpos) {
final int headerpos = outpos.get();
outpos.increment();
int tmpoutpos = outpos.get();
// Clear working area.
Arrays.fill(dataPointers, 0);
byteContainer.clear();
int tmpinpos = inpos.get();
for (final int finalinpos = tmpinpos + thissize - BLOCK_SIZE; tmpinpos <= finalinpos; tmpinpos += BLOCK_SIZE) {
getBestBFromData(in, tmpinpos);
final int tmpbestb = bestbbestcexceptmaxb[0];
byteContainer.put((byte)bestbbestcexceptmaxb[0]);
byteContainer.put((byte)bestbbestcexceptmaxb[1]);
if (bestbbestcexceptmaxb[1] > 0) {
byteContainer.put((byte)bestbbestcexceptmaxb[2]);
final int index = bestbbestcexceptmaxb[2]
- bestbbestcexceptmaxb[0];
if (dataPointers[index]
+ bestbbestcexceptmaxb[1] >= dataTobePacked[index].length) {
int newsize = 2 * (dataPointers[index] + bestbbestcexceptmaxb[1]);
// make sure it is a multiple of 32
newsize = Util
.greatestMultiple(newsize + 31, 32);
dataTobePacked[index] = Arrays.copyOf(
dataTobePacked[index], newsize);
}
for (int k = 0; k < BLOCK_SIZE; ++k) {
if ((in[k + tmpinpos] >>> bestbbestcexceptmaxb[0]) != 0) {
// we have an exception
byteContainer.put((byte) k);
dataTobePacked[index][dataPointers[index]++] = in[k
+ tmpinpos] >>> tmpbestb;
}
}
}
for (int k = 0; k < BLOCK_SIZE; k += 32) {
BitPacking.fastpack(in, tmpinpos + k, out,
tmpoutpos, tmpbestb);
tmpoutpos += tmpbestb;
}
}
inpos.set(tmpinpos);
out[headerpos] = tmpoutpos - headerpos;
final int bytesize = byteContainer.position();
while ((byteContainer.position() & 3) != 0)
byteContainer.put((byte) 0);
out[tmpoutpos++] = bytesize;
final int howmanyints = byteContainer.position() / 4;
byteContainer.flip();
byteContainer.asIntBuffer().get(out, tmpoutpos, howmanyints);
tmpoutpos += howmanyints;
int bitmap = 0;
for (int k = 2; k <= 32; ++k) {
if (dataPointers[k] != 0)
bitmap |= (1 << (k - 1));
}
out[tmpoutpos++] = bitmap;
for (int k = 2; k <= 32; ++k) {
if (dataPointers[k] != 0) {
out[tmpoutpos++] = dataPointers[k];// size
int j = 0;
for (; j < dataPointers[k]; j += 32) {
BitPacking.fastpack(dataTobePacked[k],
j, out, tmpoutpos, k);
tmpoutpos += k;
}
int overflow = j - dataPointers[k];
tmpoutpos -= overflow * k / 32;
}
}
outpos.set(tmpoutpos);
}
/**
* Uncompress data in blocks of integers. In this particular case,
* the inlength parameter is ignored: it is deduced from the compressed
* data.
*
* @see IntegerCODEC#compress(int[], IntWrapper, int, int[], IntWrapper)
*/
@Override
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos, int mynvalue) {
if (inlength == 0)
return;
mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE);
int finalout = outpos.get() + mynvalue;
while (outpos.get() != finalout) {
int thissize = Math.min(pageSize,
finalout - outpos.get());
decodePage(in, inpos, out, outpos, thissize);
}
}
private void decodePage(int[] in, IntWrapper inpos, int[] out,
IntWrapper outpos, int thissize) {
final int initpos = inpos.get();
final int wheremeta = in[inpos.get()];
inpos.increment();
int inexcept = initpos + wheremeta;
final int bytesize = in[inexcept++];
byteContainer.clear();
byteContainer.asIntBuffer().put(in, inexcept, (bytesize + 3) / 4);
inexcept += (bytesize + 3)/ 4;
final int bitmap = in[inexcept++];
for (int k = 2; k <= 32; ++k) {
if ((bitmap & (1 << (k - 1))) != 0) {
int size = in[inexcept++];
int roundedup = Util
.greatestMultiple(size + 31, 32);
if (dataTobePacked[k].length < roundedup)
dataTobePacked[k] = new int[roundedup];
if(inexcept + roundedup/32*k <= in.length) {
int j = 0;
for (; j < size; j += 32) {
BitPacking.fastunpack(in, inexcept,
dataTobePacked[k], j, k);
inexcept += k;
}
int overflow = j - size;
inexcept -= overflow * k / 32;
} else {
int j = 0;
int[] buf = new int[roundedup/32*k];
int initinexcept = inexcept;
System.arraycopy(in, inexcept, buf, 0, in.length - inexcept);
for (; j < size; j += 32) {
BitPacking.fastunpack(buf, inexcept-initinexcept,
dataTobePacked[k], j, k);
inexcept += k;
}
int overflow = j - size;
inexcept -= overflow * k / 32;
}
}
}
Arrays.fill(dataPointers, 0);
int tmpoutpos = outpos.get();
int tmpinpos = inpos.get();
for (int run = 0, run_end = thissize / BLOCK_SIZE; run < run_end; ++run, tmpoutpos += BLOCK_SIZE) {
final int b = byteContainer.get();
final int cexcept = byteContainer.get() & 0xFF;
for (int k = 0; k < BLOCK_SIZE; k += 32) {
BitPacking.fastunpack(in, tmpinpos, out,
tmpoutpos + k, b);
tmpinpos += b;
}
if (cexcept > 0) {
final int maxbits = byteContainer.get();
final int index = maxbits - b;
if(index == 1) {
for (int k = 0; k < cexcept; ++k) {
final int pos = byteContainer.get() &0xFF;
out[pos + tmpoutpos] |= 1 << b;
}
} else {
for (int k = 0; k < cexcept; ++k) {
final int pos = byteContainer.get() &0xFF;
final int exceptvalue = dataTobePacked[index][dataPointers[index]++];
out[pos + tmpoutpos] |= exceptvalue << b;
}
}
}
}
outpos.set(tmpoutpos);
inpos.set(inexcept);
}
@Override
public void compress(int[] in, IntWrapper inpos, int inlength, int[] out,
IntWrapper outpos) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
if (inlength == 0)
return;
out[outpos.get()] = inlength;
outpos.increment();
headlessCompress(in, inpos, inlength, out, outpos);
}
@Override
public void uncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
IntWrapper outpos) {
if (inlength == 0)
return;
final int outlength = in[inpos.get()];
inpos.increment();
headlessUncompress(in, inpos, inlength, out, outpos, outlength);
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy