All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.lemire.integercompression.benchmarktools.Benchmark Maven / Gradle / Ivy

Go to download

It is a library to compress and uncompress arrays of integers very fast. The assumption is that most (but not all) values in your array use less than 32 bits.

There is a newer version: 0.2.1
Show newest version
/**
 * This code is released under the
 * Apache License Version 2.0 http://www.apache.org/licenses/.
 *
 * (c) Daniel Lemire, http://lemire.me/en/
 */
package me.lemire.integercompression.benchmarktools;

import com.kamikaze.pfordelta.PForDelta;

import me.lemire.integercompression.BinaryPacking;
import me.lemire.integercompression.ByteIntegerCODEC;
import me.lemire.integercompression.Composition;
import me.lemire.integercompression.DeltaZigzagBinaryPacking;
import me.lemire.integercompression.DeltaZigzagVariableByte;
import me.lemire.integercompression.FastPFOR;
import me.lemire.integercompression.FastPFOR128;
import me.lemire.integercompression.IntWrapper;
import me.lemire.integercompression.IntegerCODEC;
import me.lemire.integercompression.JustCopy;
import me.lemire.integercompression.NewPFD;
import me.lemire.integercompression.NewPFDS16;
import me.lemire.integercompression.NewPFDS9;
import me.lemire.integercompression.OptPFD;
import me.lemire.integercompression.OptPFDS16;
import me.lemire.integercompression.OptPFDS9;
import me.lemire.integercompression.Simple9;
import me.lemire.integercompression.VariableByte;
import me.lemire.integercompression.differential.Delta;
import me.lemire.integercompression.differential.IntegratedBinaryPacking;
import me.lemire.integercompression.differential.IntegratedByteIntegerCODEC;
import me.lemire.integercompression.differential.IntegratedComposition;
import me.lemire.integercompression.differential.IntegratedIntegerCODEC;
import me.lemire.integercompression.differential.IntegratedVariableByte;
import me.lemire.integercompression.differential.XorBinaryPacking;
import me.lemire.integercompression.synth.ClusteredDataGenerator;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;

/**
 * 
 * Simple class meant to compare the speed of different schemes.
 * 
 * @author Daniel Lemire
 * 
 */
public class Benchmark {

        /**
         * Standard benchmark
         * 
         * @param csvLog
         *                Writer for CSV log.
         * @param c
         *                the codec
         * @param data
         *                arrays of input data
         * @param repeat
         *                How many times to repeat the test
         * @param verbose
         *                whether to output result on screen
         */
        private static void testCodec(PrintWriter csvLog, int sparsity,
                IntegerCODEC c, int[][] data, int repeat, boolean verbose) {
                if (verbose) {
                        System.out.println("# " + c.toString());
                        System.out
                                .println("# bits per int, compress speed (mis), decompression speed (mis) ");
                }

                int N = data.length;

                int totalSize = 0;
                int maxLength = 0;
                for (int k = 0; k < N; ++k) {
                        totalSize += data[k].length;
                        if (data[k].length > maxLength) {
                                maxLength = data[k].length;
                        }
                }

                // 4x + 1024 to account for the possibility of some negative
                // compression.
                int[] compressBuffer = new int[4 * maxLength + 1024];
                int[] decompressBuffer = new int[maxLength + 1024];

                // These variables hold time in microseconds (10^-6).
                long compressTime = 0;
                long decompressTime = 0;

                int size = 0;
                IntWrapper inpos = new IntWrapper();
                IntWrapper outpos = new IntWrapper();

                for (int r = 0; r < repeat; ++r) {
                        size = 0;
                        for (int k = 0; k < N; ++k) {
                                int[] backupdata = Arrays.copyOf(data[k],
                                        data[k].length);

                                // compress data.
                                long beforeCompress = System.nanoTime() / 1000;
                                inpos.set(1);
                                outpos.set(0);
                                if (!(c instanceof IntegratedIntegerCODEC)) {
                                        Delta.delta(backupdata);
                                }
                                c.compress(backupdata, inpos, backupdata.length
                                        - inpos.get(), compressBuffer, outpos);
                                long afterCompress = System.nanoTime() / 1000;

                                // measure time of compression.
                                compressTime += afterCompress - beforeCompress;
                                final int thiscompsize = outpos.get() + 1;
                                size += thiscompsize;

                                // extract (uncompress) data
                                long beforeDecompress = System.nanoTime() / 1000;
                                inpos.set(0);
                                outpos.set(1);
                                decompressBuffer[0] = backupdata[0];
                                c.uncompress(compressBuffer, inpos,
                                        thiscompsize - 1, decompressBuffer,
                                        outpos);
                                if (!(c instanceof IntegratedIntegerCODEC))
                                        Delta.fastinverseDelta(decompressBuffer);
                                long afterDecompress = System.nanoTime() / 1000;

                                // measure time of extraction (uncompression).
                                decompressTime += afterDecompress
                                        - beforeDecompress;
                                if (outpos.get() != data[k].length)
                                        throw new RuntimeException(
                                                "we have a bug (diff length) "
                                                        + c + " expected "
                                                        + data[k].length
                                                        + " got "
                                                        + outpos.get());

                                // verify: compare original array with
                                // compressed and
                                // uncompressed.

                                for (int m = 0; m < outpos.get(); ++m) {
                                        if (decompressBuffer[m] != data[k][m]) {
                                                throw new RuntimeException(
                                                        "we have a bug (actual difference), expected "
                                                                + data[k][m]
                                                                + " found "
                                                                + decompressBuffer[m]
                                                                + " at " + m);
                                        }
                                }
                        }
                }

                if (verbose) {
                        double bitsPerInt = size * 32.0 / totalSize;
                        long compressSpeed = totalSize * repeat
                                / (compressTime);
                        long decompressSpeed = totalSize * repeat
                                / (decompressTime);
                        System.out.println(String.format(
                                "\t%1$.2f\t%2$d\t%3$d", bitsPerInt,
                                compressSpeed, decompressSpeed));
                        csvLog.format("\"%1$s\",%2$d,%3$.2f,%4$d,%5$d\n",
                                c.toString(), sparsity, bitsPerInt,
                                compressSpeed, decompressSpeed);
                        csvLog.flush();
                }
        }

        /**
         * Standard benchmark byte byte-aligned schemes
         * 
         * @param csvLog
         *                Writer for CSV log.
         * @param c
         *                the codec
         * @param data
         *                arrays of input data
         * @param repeat
         *                How many times to repeat the test
         * @param verbose
         *                whether to output result on screen
         */
        private static void testByteCodec(PrintWriter csvLog, int sparsity,
                ByteIntegerCODEC c, int[][] data, int repeat, boolean verbose) {
                if (verbose) {
                        System.out.println("# " + c.toString());
                        System.out
                                .println("# bits per int, compress speed (mis), decompression speed (mis) ");
                }

                int N = data.length;

                int totalSize = 0;
                int maxLength = 0;
                for (int k = 0; k < N; ++k) {
                        totalSize += data[k].length;
                        if (data[k].length > maxLength) {
                                maxLength = data[k].length;
                        }
                }

                byte[] compressBuffer = new byte[8 * maxLength + 1024];
                int[] decompressBuffer = new int[maxLength + 1024];

                // These variables hold time in microseconds (10^-6).
                long compressTime = 0;
                long decompressTime = 0;

                int size = 0;
                IntWrapper inpos = new IntWrapper();
                IntWrapper outpos = new IntWrapper();

                for (int r = 0; r < repeat; ++r) {
                        size = 0;
                        for (int k = 0; k < N; ++k) {
                                int[] backupdata = Arrays.copyOf(data[k],
                                        data[k].length);

                                // compress data.
                                long beforeCompress = System.nanoTime() / 1000;
                                inpos.set(1);
                                outpos.set(0);
                                if (!(c instanceof IntegratedByteIntegerCODEC)) {
                                        Delta.delta(backupdata);
                                }
                                c.compress(backupdata, inpos, backupdata.length
                                        - inpos.get(), compressBuffer, outpos);
                                long afterCompress = System.nanoTime() / 1000;

                                // measure time of compression.
                                compressTime += afterCompress - beforeCompress;
                                final int thiscompsize = outpos.get() + 1;
                                size += thiscompsize;

                                // extract (uncompress) data
                                long beforeDecompress = System.nanoTime() / 1000;
                                inpos.set(0);
                                outpos.set(1);
                                decompressBuffer[0] = backupdata[0];
                                c.uncompress(compressBuffer, inpos,
                                        thiscompsize - 1, decompressBuffer,
                                        outpos);
                                if (!(c instanceof IntegratedByteIntegerCODEC))
                                        Delta.fastinverseDelta(decompressBuffer);
                                long afterDecompress = System.nanoTime() / 1000;

                                // measure time of extraction (uncompression).
                                decompressTime += afterDecompress
                                        - beforeDecompress;
                                if (outpos.get() != data[k].length)
                                        throw new RuntimeException(
                                                "we have a bug (diff length) "
                                                        + c + " expected "
                                                        + data[k].length
                                                        + " got "
                                                        + outpos.get());

                                // verify: compare original array with
                                // compressed and
                                // uncompressed.
                                for (int m = 0; m < outpos.get(); ++m) {
                                        if (decompressBuffer[m] != data[k][m]) {
                                                throw new RuntimeException(
                                                        "we have a bug (actual difference), expected "
                                                                + data[k][m]
                                                                + " found "
                                                                + decompressBuffer[m]
                                                                + " at " + m);
                                        }
                                }
                        }
                }

                if (verbose) {
                        double bitsPerInt = size * 8.0 / totalSize;
                        long compressSpeed = totalSize * repeat
                                / (compressTime);
                        long decompressSpeed = totalSize * repeat
                                / (decompressTime);
                        System.out.println(String.format(
                                "\t%1$.2f\t%2$d\t%3$d", bitsPerInt,
                                compressSpeed, decompressSpeed));
                        csvLog.format("\"%1$s\",%2$d,%3$.2f,%4$d,%5$d\n",
                                c.toString(), sparsity, bitsPerInt,
                                compressSpeed, decompressSpeed);
                        csvLog.flush();
                }
        }

        /**
         * Main method.
         * 
         * @param args
         *                command-line arguments
         * @throws FileNotFoundException when we fail to create a new file
         */
        public static void main(String args[]) throws FileNotFoundException  {
                System.out
                        .println("# benchmark based on the ClusterData model from:");
                System.out.println("# 	 Vo Ngoc Anh and Alistair Moffat. ");
                System.out.println("#	 Index compression using 64-bit words.");
                System.out
                        .println("# 	 Softw. Pract. Exper.40, 2 (February 2010), 131-147. ");
                System.out.println();

                PrintWriter writer = null;
                try {
                        File csvFile = new File(
                                String.format(
                                        "benchmark-%1$tY%1$tm%1$tdT%1$tH%1$tM%1$tS.csv",
                                        System.currentTimeMillis()));
                        writer = new PrintWriter(csvFile);
                        System.out
                                .println("# Results will be written into a CSV file: "
                                        + csvFile.getName());
                        System.out.println();
                        test(writer, 20, 18, 10);
                        System.out.println();
                        System.out
                                .println("Results were written into a CSV file: "
                                        + csvFile.getName());
                } finally {
                        if (writer != null) {
                                writer.close();
                        }
                }
        }

        /**
         * Standard test for the Kamikaze library
         * 
         * @param data
         *                input data
         * @param repeat
         *                how many times to repeat
         * @param verbose
         *                whether to output data on screen
         */
        public static void testKamikaze(int[][] data, int repeat,
                boolean verbose) {
                DecimalFormat df = new DecimalFormat("0.00");
                DecimalFormat dfspeed = new DecimalFormat("0");
                if (verbose)
                        System.out.println("# kamikaze PForDelta");
                if (verbose)
                        System.out
                                .println("# bits per int, compress speed (mis), decompression speed (mis) ");
                long bef, aft;
                String line = "";
                int N = data.length;
                int totalsize = 0;
                int maxlength = 0;
                for (int k = 0; k < N; ++k) {
                        totalsize += data[k].length;
                        if (data[k].length > maxlength)
                                maxlength = data[k].length;
                }
                int[] buffer = new int[maxlength + 1024];
                /*
                 * 4x + 1024 to account for the possibility of some negative
                 * compression
                 */
                int size = 0;
                int comptime = 0;
                long decomptime = 0;
                for (int r = 0; r < repeat; ++r) {
                        size = 0;
                        for (int k = 0; k < N; ++k) {
                                int outpos = 0;
                                int[] backupdata = Arrays.copyOf(data[k],
                                        data[k].length);
                                //
                                bef = System.nanoTime() / 1000;
                                Delta.delta(backupdata);
                                ArrayList dataout = new ArrayList(
                                        data[k].length / 128);
                                for (int K = 0; K < data[k].length; K += 128) {
                                        final int[] compressedbuf = PForDelta
                                                .compressOneBlockOpt(Arrays
                                                        .copyOfRange(
                                                                backupdata, K,
                                                                K + 128), 128);
                                        dataout.add(compressedbuf);
                                        outpos += compressedbuf.length;
                                }
                                aft = System.nanoTime() / 1000;
                                //
                                comptime += aft - bef;
                                final int thiscompsize = outpos;
                                size += thiscompsize;
                                //
                                bef = System.nanoTime() / 1000;
                                ArrayList datauncomp = new ArrayList(
                                        dataout.size());
                                int deltaoffset = 0;

                                for (int[] compbuf : dataout) {
                                        int[] tmpbuf = new int[128];
                                        PForDelta.decompressOneBlock(tmpbuf,
                                                compbuf, 128);
                                        tmpbuf[0] += deltaoffset;
                                        Delta.fastinverseDelta(tmpbuf);
                                        deltaoffset = tmpbuf[127];
                                        datauncomp.add(tmpbuf);
                                }
                                aft = System.nanoTime() / 1000;
                                //
                                decomptime += aft - bef;
                                if (datauncomp.size() * 128 != data[k].length)
                                        throw new RuntimeException(
                                                "we have a bug (diff length) "
                                                        + " expected "
                                                        + data[k].length
                                                        + " got "
                                                        + datauncomp.size()
                                                        * 128);
                                for (int m = 0; m < data[k].length; ++m)
                                        if (datauncomp.get(m / 128)[m % 128] != data[k][m]) {
                                                throw new RuntimeException(
                                                        "we have a bug (actual difference), expected "
                                                                + data[k][m]
                                                                + " found "
                                                                + buffer[m]
                                                                + " at " + m);
                                        }

                        }
                }

                line += "\t" + df.format(size * 32.0 / totalsize);
                line += "\t" + dfspeed.format(totalsize * repeat / (comptime));
                line += "\t"
                        + dfspeed.format(totalsize * repeat / (decomptime));
                if (verbose)
                        System.out.println(line);
        }

        /**
         * Generate test data.
         * 
         * @param N
         *                How many input arrays to generate
         * @param nbr
         *                How big (in log2) should the arrays be
         * @param sparsity
         *                How sparse test data generated
         */
        private static int[][] generateTestData(ClusteredDataGenerator dataGen,
                int N, int nbr, int sparsity) {
                final int[][] data = new int[N][];
                final int dataSize = (1 << (nbr + sparsity));
                for (int i = 0; i < N; ++i) {
                        data[i] = dataGen.generateClustered((1 << nbr),
                                dataSize);
                }
                return data;
        }

        /**
         * Generates data and calls other tests.
         * 
         * @param csvLog
         *                Writer for CSV log.
         * @param N
         *                How many input arrays to generate
         * @param nbr
         *                how big (in log2) should the arrays be
         * @param repeat
         *                How many times should we repeat tests.
         */
        private static void test(PrintWriter csvLog, int N, int nbr, int repeat) {
                csvLog.format("\"Algorithm\",\"Sparsity\",\"Bits per int\",\"Compress speed (MiS)\",\"Decompress speed (MiS)\"\n");
                ClusteredDataGenerator cdg = new ClusteredDataGenerator();
                final int max_sparsity = 31 - nbr;
                for (int sparsity = 1; sparsity < max_sparsity; ++sparsity) {
                        System.out.println("# sparsity " + sparsity);
                        System.out.println("# generating random data...");
                        int[][] data = generateTestData(cdg, N, nbr, sparsity);
                        System.out.println("# generating random data... ok.");


                        testCodec(csvLog, sparsity, new Composition(
                                new FastPFOR128(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new FastPFOR128(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new FastPFOR128(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();
                        testCodec(csvLog, sparsity, new Composition(
                                new FastPFOR(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new FastPFOR(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new FastPFOR(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();
                        
                        
                        // TODO: support CSV log output.
                        testKamikaze(data, repeat, false);
                        testKamikaze(data, repeat, false);
                        testKamikaze(data, repeat, true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new IntegratedComposition(
                                new IntegratedBinaryPacking(),
                                new IntegratedVariableByte()), data, repeat,
                                false);
                        testCodec(csvLog, sparsity, new IntegratedComposition(
                                new IntegratedBinaryPacking(),
                                new IntegratedVariableByte()), data, repeat,
                                false);
                        testCodec(csvLog, sparsity, new IntegratedComposition(
                                new IntegratedBinaryPacking(),
                                new IntegratedVariableByte()), data, repeat,
                                true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new JustCopy(), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new JustCopy(), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new JustCopy(), data,
                                repeat, true);
                        System.out.println();

                        testByteCodec(csvLog, sparsity, new VariableByte(),
                                data, repeat, false);
                        testByteCodec(csvLog, sparsity, new VariableByte(),
                                data, repeat, false);
                        testByteCodec(csvLog, sparsity, new VariableByte(),
                                data, repeat, true);
                        System.out.println();

                        testByteCodec(csvLog, sparsity,
                                new IntegratedVariableByte(), data, repeat,
                                false);
                        testByteCodec(csvLog, sparsity,
                                new IntegratedVariableByte(), data, repeat,
                                false);
                        testByteCodec(csvLog, sparsity,
                                new IntegratedVariableByte(), data, repeat,
                                true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new Composition(
                                new BinaryPacking(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new BinaryPacking(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new BinaryPacking(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFD(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFD(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFD(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFDS9(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFDS9(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFDS9(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFDS16(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFDS16(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new NewPFDS16(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFD(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFD(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFD(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFDS9(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFDS9(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFDS9(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();

                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFDS16(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFDS16(), new VariableByte()), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Composition(
                                new OptPFDS16(), new VariableByte()), data,
                                repeat, true);
                        System.out.println();


                        testCodec(csvLog, sparsity, new Simple9(), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Simple9(), data,
                                repeat, false);
                        testCodec(csvLog, sparsity, new Simple9(), data,
                                repeat, true);
                        System.out.println();

                        {
                                IntegerCODEC c = new Composition(
                                        new XorBinaryPacking(),
                                        new VariableByte());
                                testCodec(csvLog, sparsity, c, data, repeat,
                                        false);
                                testCodec(csvLog, sparsity, c, data, repeat,
                                        false);
                                testCodec(csvLog, sparsity, c, data, repeat,
                                        true);
                                System.out.println();
                        }

                        {
                                IntegerCODEC c = new Composition(
                                        new DeltaZigzagBinaryPacking(),
                                        new DeltaZigzagVariableByte());
                                testCodec(csvLog, sparsity, c, data, repeat,
                                        false);
                                testCodec(csvLog, sparsity, c, data, repeat,
                                        false);
                                testCodec(csvLog, sparsity, c, data, repeat,
                                        true);
                                System.out.println();
                        }

                }
        }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy