me.lemire.integercompression.differential.IntegratedVariableByte Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of JavaFastPFOR Show documentation
Show all versions of JavaFastPFOR Show documentation
It is a library to compress and uncompress arrays of integers
very fast. The assumption is that most (but not all) values in
your array use less than 32 bits.
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
package me.lemire.integercompression.differential;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;
import me.lemire.integercompression.IntWrapper;
/**
* Implementation of variable-byte with differential coding. For best
* performance, use it using the IntegratedByteIntegerCODEC interface.
*
* You should only use this scheme on sorted arrays. Use VariableByte if you
* have unsorted arrays.
*
* @author Daniel Lemire
*/
public class IntegratedVariableByte implements IntegratedIntegerCODEC, IntegratedByteIntegerCODEC,
SkippableIntegratedIntegerCODEC {
private static byte extract7bits(int i, long val) {
return (byte)((val >> (7 * i)) & ((1 << 7) - 1));
}
private static byte extract7bitsmaskless(int i, long val) {
return (byte)((val >> (7 * i)));
}
@Override
public void compress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos) {
if (inlength == 0)
return;
int initoffset = 0;
ByteBuffer buf = ByteBuffer.allocateDirect(inlength * 8);
buf.order(ByteOrder.LITTLE_ENDIAN);
for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
final long val = (in[k] - initoffset) & 0xFFFFFFFFL; // To be consistent with unsigned integers in C/C++
initoffset = in[k];
if (val < (1 << 7)) {
buf.put((byte)(val | (1 << 7)));
} else if (val < (1 << 14)) {
buf.put((byte)extract7bits(0, val));
buf.put((byte)(extract7bitsmaskless(1, (val)) | (1 << 7)));
} else if (val < (1 << 21)) {
buf.put((byte)extract7bits(0, val));
buf.put((byte)extract7bits(1, val));
buf.put((byte)(extract7bitsmaskless(2, (val)) | (1 << 7)));
} else if (val < (1 << 28)) {
buf.put((byte)extract7bits(0, val));
buf.put((byte)extract7bits(1, val));
buf.put((byte)extract7bits(2, val));
buf.put((byte)(extract7bitsmaskless(3, (val)) | (1 << 7)));
} else {
buf.put((byte)extract7bits(0, val));
buf.put((byte)extract7bits(1, val));
buf.put((byte)extract7bits(2, val));
buf.put((byte)extract7bits(3, val));
buf.put((byte)(extract7bitsmaskless(4, (val)) | (1 << 7)));
}
}
while (buf.position() % 4 != 0)
buf.put((byte) 0);
final int length = buf.position();
buf.flip();
IntBuffer ibuf = buf.asIntBuffer();
ibuf.get(out, outpos.get(), length / 4);
outpos.add(length / 4);
inpos.add(inlength);
}
@Override
public void compress(int[] in, IntWrapper inpos, int inlength,
byte[] out, IntWrapper outpos) {
if (inlength == 0)
return;
int initoffset = 0;
int outpostmp = outpos.get();
for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
final long val = (in[k] - initoffset) & 0xFFFFFFFFL; // To be consistent with unsigned integers in C/C++
initoffset = in[k];
if (val < (1 << 7)) {
out[outpostmp++] = (byte)(val | (1 << 7));
} else if (val < (1 << 14)) {
out[outpostmp++] = (byte)extract7bits(0, val);
out[outpostmp++] = (byte)(extract7bitsmaskless(1, (val)) | (1 << 7));
} else if (val < (1 << 21)) {
out[outpostmp++] = (byte)extract7bits(0, val);
out[outpostmp++] = (byte)extract7bits(1, val);
out[outpostmp++] = (byte)(extract7bitsmaskless(2, (val)) | (1 << 7));
} else if (val < (1 << 28)) {
out[outpostmp++] = (byte)extract7bits(0, val);
out[outpostmp++] = (byte)extract7bits(1, val);
out[outpostmp++] = (byte)extract7bits(2, val);
out[outpostmp++] = (byte)(extract7bitsmaskless(3, (val)) | (1 << 7));
} else {
out[outpostmp++] = (byte)extract7bits(0, val);
out[outpostmp++] = (byte)extract7bits(1, val);
out[outpostmp++] = (byte)extract7bits(2, val);
out[outpostmp++] = (byte)extract7bits(3, val);
out[outpostmp++] = (byte)(extract7bitsmaskless(4, (val)) | (1 << 7));
}
}
outpos.set(outpostmp);
inpos.add(inlength);
}
@Override
public void uncompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos) {
int s = 0;
int val = 0;
int p = inpos.get();
int finalp = inpos.get() + inlength;
int tmpoutpos = outpos.get();
int initoffset = 0;
for (int v = 0, shift =0; p < finalp;) {
val = in[p];
int c = (byte) (val >>> s);
s += 8;
p += s>>5;
s = s & 31;
v += ((c & 127) << shift);
if ((c & 128) == 128) {
out[tmpoutpos] = v + initoffset;
initoffset = out[tmpoutpos];
tmpoutpos++;
v = 0;
shift = 0;
} else
shift +=7;
}
outpos.set(tmpoutpos);
inpos.add(inlength);
}
@Override
public void uncompress(byte[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos) {
int p = inpos.get();
int initoffset = 0;
int finalp = inpos.get() + inlength;
int tmpoutpos = outpos.get();
for (int v = 0;p < finalp; out[tmpoutpos++] = (initoffset = initoffset + v)) {
v = in[p] & 0x7F;
if (in[p] < 0 ) {
p+= 1;
continue;
}
v = ((in[p+1] & 0x7F)<<7) | v;
if (in[p+1] < 0) {
p+= 2;
continue;
}
v = ((in[p+2] & 0x7F)<<14) | v;
if (in[p+2] < 0) {
p+= 3;
continue;
}
v = ((in[p+3] & 0x7F)<<21) | v;
if (in[p+3] < 0) {
p+= 4;
continue;
}
v = ((in[p+4] & 0x7F)<<28) | v;
p+= 5;
}
outpos.set(tmpoutpos);
inpos.add(p);
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
@Override
public void headlessCompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos, IntWrapper initvalue) {
if (inlength == 0)
return;
int initoffset = initvalue.get();
initvalue.set(in[inpos.get()+inlength -1]);
ByteBuffer buf = ByteBuffer.allocateDirect(inlength * 8);
buf.order(ByteOrder.LITTLE_ENDIAN);
for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
final long val = (in[k] - initoffset) & 0xFFFFFFFFL; // To be consistent with unsigned integers in C/C++
initoffset = in[k];
if (val < (1 << 7)) {
buf.put((byte) (val | (1 << 7)));
} else if (val < (1 << 14)) {
buf.put((byte) extract7bits(0, val));
buf.put((byte) (extract7bitsmaskless(1, (val)) | (1 << 7)));
} else if (val < (1 << 21)) {
buf.put((byte) extract7bits(0, val));
buf.put((byte) extract7bits(1, val));
buf.put((byte) (extract7bitsmaskless(2, (val)) | (1 << 7)));
} else if (val < (1 << 28)) {
buf.put((byte) extract7bits(0, val));
buf.put((byte) extract7bits(1, val));
buf.put((byte) extract7bits(2, val));
buf.put((byte) (extract7bitsmaskless(3, (val)) | (1 << 7)));
} else {
buf.put((byte) extract7bits(0, val));
buf.put((byte) extract7bits(1, val));
buf.put((byte) extract7bits(2, val));
buf.put((byte) extract7bits(3, val));
buf.put((byte) (extract7bitsmaskless(4, (val)) | (1 << 7)));
}
}
while (buf.position() % 4 != 0)
buf.put((byte) 0);
final int length = buf.position();
buf.flip();
IntBuffer ibuf = buf.asIntBuffer();
ibuf.get(out, outpos.get(), length / 4);
outpos.add(length / 4);
inpos.add(inlength);
}
@Override
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos, int num, IntWrapper initvalue) {
int s = 0;
int val = 0;
int p = inpos.get();
int initoffset = initvalue.get();
int tmpoutpos = outpos.get();
int finaloutpos = num + tmpoutpos;
for (int v = 0, shift = 0; tmpoutpos < finaloutpos;) {
val = in[p];
int c = val >>> s;
s += 8;
p += s>>5;
s = s & 31;
v += ((c & 127) << shift);
if ((c & 128) == 128) {
out[tmpoutpos++] = (initoffset = initoffset + v);
v = 0;
shift = 0;
} else
shift += 7;
}
initvalue.set(out[tmpoutpos-1]);
outpos.set(tmpoutpos);
inpos.set(p + (s!=0 ? 1 : 0));
}
}