All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.lemire.integercompression.differential.IntegratedVariableByte Maven / Gradle / Ivy

Go to download

It is a library to compress and uncompress arrays of integers very fast. The assumption is that most (but not all) values in your array use less than 32 bits.

There is a newer version: 0.2.1
Show newest version
/**
 * This code is released under the
 * Apache License Version 2.0 http://www.apache.org/licenses/.
 *
 * (c) Daniel Lemire, http://lemire.me/en/
 */
package me.lemire.integercompression.differential;

import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.IntBuffer;

import me.lemire.integercompression.IntWrapper;

/**
 * Implementation of variable-byte with differential coding. For best
 * performance, use it using the IntegratedByteIntegerCODEC interface.
 * 
 * You should only use this scheme on sorted arrays. Use VariableByte if you
 * have unsorted arrays.
 * 
 * @author Daniel Lemire
 */
public class IntegratedVariableByte implements IntegratedIntegerCODEC, IntegratedByteIntegerCODEC,
SkippableIntegratedIntegerCODEC  {

    private static byte extract7bits(int i, long val) {
        return (byte)((val >> (7 * i)) & ((1 << 7) - 1));
    }

    private static  byte extract7bitsmaskless(int i, long val) {
        return (byte)((val >> (7 * i)));
    }

    @Override
    public void compress(int[] in, IntWrapper inpos, int inlength,
            int[] out, IntWrapper outpos) {
        if (inlength == 0)
            return;
        int initoffset = 0;
        ByteBuffer buf = ByteBuffer.allocateDirect(inlength * 8);
        buf.order(ByteOrder.LITTLE_ENDIAN);
        for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
            final long val = (in[k] - initoffset) & 0xFFFFFFFFL; // To be consistent with unsigned integers in C/C++
            initoffset = in[k];
            if (val < (1 << 7)) {
                buf.put((byte)(val | (1 << 7)));
            } else if (val < (1 << 14)) {
                buf.put((byte)extract7bits(0, val));
                buf.put((byte)(extract7bitsmaskless(1, (val)) | (1 << 7)));
            } else if (val < (1 << 21)) {
                buf.put((byte)extract7bits(0, val));
                buf.put((byte)extract7bits(1, val));
                buf.put((byte)(extract7bitsmaskless(2, (val)) | (1 << 7)));
            } else if (val < (1 << 28)) {
                buf.put((byte)extract7bits(0, val));
                buf.put((byte)extract7bits(1, val));
                buf.put((byte)extract7bits(2, val));
                buf.put((byte)(extract7bitsmaskless(3, (val)) | (1 << 7)));
            } else {
                buf.put((byte)extract7bits(0, val));
                buf.put((byte)extract7bits(1, val));
                buf.put((byte)extract7bits(2, val));
                buf.put((byte)extract7bits(3, val));
                buf.put((byte)(extract7bitsmaskless(4, (val)) | (1 << 7)));
            }
        }
        while (buf.position() % 4 != 0)
            buf.put((byte) 0);
        final int length = buf.position();
        buf.flip();
        IntBuffer ibuf = buf.asIntBuffer();
        ibuf.get(out, outpos.get(), length / 4);
        outpos.add(length / 4);
        inpos.add(inlength);
    }

    @Override
    public void compress(int[] in, IntWrapper inpos, int inlength,
            byte[] out, IntWrapper outpos) {
        if (inlength == 0)
            return;
        int initoffset = 0;
        int outpostmp = outpos.get();
        for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
            final long val = (in[k] - initoffset) & 0xFFFFFFFFL;  // To be consistent with unsigned integers in C/C++
            initoffset = in[k];
            if (val < (1 << 7)) {
                out[outpostmp++] = (byte)(val | (1 << 7));
            } else if (val < (1 << 14)) {
                out[outpostmp++] = (byte)extract7bits(0, val);
                out[outpostmp++] = (byte)(extract7bitsmaskless(1, (val)) | (1 << 7));
            } else if (val < (1 << 21)) {
                out[outpostmp++] = (byte)extract7bits(0, val);
                out[outpostmp++] = (byte)extract7bits(1, val);
                out[outpostmp++] = (byte)(extract7bitsmaskless(2, (val)) | (1 << 7));
            } else if (val < (1 << 28)) {
                out[outpostmp++] = (byte)extract7bits(0, val);
                out[outpostmp++] = (byte)extract7bits(1, val);
                out[outpostmp++] = (byte)extract7bits(2, val);
                out[outpostmp++] = (byte)(extract7bitsmaskless(3, (val)) | (1 << 7));
            } else {
                out[outpostmp++] = (byte)extract7bits(0, val);
                out[outpostmp++] = (byte)extract7bits(1, val);
                out[outpostmp++] = (byte)extract7bits(2, val);
                out[outpostmp++] = (byte)extract7bits(3, val);
                out[outpostmp++] = (byte)(extract7bitsmaskless(4, (val)) | (1 << 7));
            }
        }
        outpos.set(outpostmp);
        inpos.add(inlength);
    }

    @Override
    public void uncompress(int[] in, IntWrapper inpos, int inlength,
            int[] out, IntWrapper outpos) {
        int s = 0;
        int val = 0;
        int p = inpos.get();
        int finalp = inpos.get() + inlength;
        int tmpoutpos = outpos.get();                
        int initoffset = 0;
        for (int v = 0, shift =0; p < finalp;) {
            val = in[p];
            int c = (byte) (val >>> s);
            s += 8;
            p += s>>5;
            s = s & 31;
            v += ((c & 127) << shift);
            if ((c & 128) == 128) {
                out[tmpoutpos] = v + initoffset;
                initoffset = out[tmpoutpos];
                tmpoutpos++;
                v = 0;
                shift = 0;
            } else 
                shift +=7;
        }
        outpos.set(tmpoutpos);
        inpos.add(inlength);
    }

    @Override
    public void uncompress(byte[] in, IntWrapper inpos, int inlength,
            int[] out, IntWrapper outpos) {
        int p = inpos.get();
        int initoffset = 0;
        int finalp = inpos.get() + inlength;
        int tmpoutpos = outpos.get();
        for (int v = 0;p < finalp; out[tmpoutpos++] = (initoffset = initoffset + v)) {
            v = in[p] & 0x7F;
            if (in[p] < 0 ) {
                p+= 1;
                continue;
            }
            v = ((in[p+1] & 0x7F)<<7) | v; 
            if (in[p+1] < 0) {
                p+= 2;
                continue;
            }
            v = ((in[p+2] & 0x7F)<<14) | v;
            if (in[p+2] < 0) {
                p+= 3;
                continue;
            }
            v = ((in[p+3] & 0x7F)<<21) | v;
            if (in[p+3] < 0) {
                p+= 4;
                continue;
            }
            v = ((in[p+4] & 0x7F)<<28) | v;
            p+= 5;                
        }
        outpos.set(tmpoutpos);
        inpos.add(p);
    }

    @Override
    public String toString() {
        return this.getClass().getSimpleName();
    }

    @Override
    public void headlessCompress(int[] in, IntWrapper inpos, int inlength,
            int[] out, IntWrapper outpos, IntWrapper initvalue) {
        if (inlength == 0)
            return;
        int initoffset = initvalue.get();
        initvalue.set(in[inpos.get()+inlength -1]);
        ByteBuffer buf = ByteBuffer.allocateDirect(inlength * 8);
        buf.order(ByteOrder.LITTLE_ENDIAN);
        for (int k = inpos.get(); k < inpos.get() + inlength; ++k) {
            final long val = (in[k] - initoffset) & 0xFFFFFFFFL;  // To be consistent with unsigned integers in C/C++
            initoffset = in[k];
            if (val < (1 << 7)) {
                buf.put((byte) (val | (1 << 7)));
            } else if (val < (1 << 14)) {
                buf.put((byte) extract7bits(0, val));
                buf.put((byte) (extract7bitsmaskless(1, (val)) | (1 << 7)));
            } else if (val < (1 << 21)) {
                buf.put((byte) extract7bits(0, val));
                buf.put((byte) extract7bits(1, val));
                buf.put((byte) (extract7bitsmaskless(2, (val)) | (1 << 7)));
            } else if (val < (1 << 28)) {
                buf.put((byte) extract7bits(0, val));
                buf.put((byte) extract7bits(1, val));
                buf.put((byte) extract7bits(2, val));
                buf.put((byte) (extract7bitsmaskless(3, (val)) | (1 << 7)));
            } else {
                buf.put((byte) extract7bits(0, val));
                buf.put((byte) extract7bits(1, val));
                buf.put((byte) extract7bits(2, val));
                buf.put((byte) extract7bits(3, val));
                buf.put((byte) (extract7bitsmaskless(4, (val)) | (1 << 7)));
            }
        }
        while (buf.position() % 4 != 0)
            buf.put((byte) 0);
        final int length = buf.position();
        buf.flip();
        IntBuffer ibuf = buf.asIntBuffer();
        ibuf.get(out, outpos.get(), length / 4);
        outpos.add(length / 4);
        inpos.add(inlength);        
    }

    @Override
    public void headlessUncompress(int[] in, IntWrapper inpos, int inlength,
            int[] out, IntWrapper outpos, int num, IntWrapper initvalue) {
        int s = 0;
        int val = 0;
        int p = inpos.get();
        int initoffset = initvalue.get();
        int tmpoutpos = outpos.get();
        int finaloutpos = num + tmpoutpos;
        for (int v = 0, shift = 0; tmpoutpos < finaloutpos;) {
            val = in[p];
            int c = val >>> s;
            s += 8;
            p += s>>5;
            s = s & 31;
            v += ((c & 127) << shift);
            if ((c & 128) == 128) {
                out[tmpoutpos++] = (initoffset = initoffset + v);
                v = 0;
                shift = 0;
            } else
                shift += 7;
        }
        initvalue.set(out[tmpoutpos-1]);
        outpos.set(tmpoutpos);

        inpos.set(p + (s!=0 ? 1 : 0));        
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy