All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.compress.Compressor Maven / Gradle / Ivy

Go to download

Library for use in Java components of Vespa. Shared code which do not fit anywhere else.

There is a newer version: 8.409.18
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.compress;

import net.jpountz.lz4.LZ4Compressor;
import net.jpountz.lz4.LZ4Factory;
import net.jpountz.lz4.LZ4FastDecompressor;
import net.jpountz.lz4.LZ4SafeDecompressor;

import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Optional;
import java.util.Random;

/**
 * Compressor which can compress and decompress in various formats.
 * This class is thread safe. Creating a reusable instance is faster than creating instances as needed.
 *
 * @author bratseth
 */
public class Compressor {

    private final CompressionType type;
    private final int level;
    private final double compressionThresholdFactor;
    private final int compressMinSizeBytes;

    private static final LZ4Factory factory = LZ4Factory.fastestInstance();

    /** Creates a compressor with default settings. */
    public Compressor() {
        this(CompressionType.LZ4);
    }

    /** Creates a compressor with a default compression type. */
    public Compressor(CompressionType type) {
        this(type, 9, 0.95, 0);
    }

    public Compressor(CompressionType type, int level) {
        this(type, level, 0.95, 0);
    }

    /**
     * Creates a compressor.
     *
     * @param type the type of compression to use to compress data
     * @param level a number between 0 and 9 where a higher value means more compression
     * @param compressionThresholdFactor the compression factor we need to achieve to return the compressed data
     *                                   instead of raw data
     * @param compressMinSizeBytes the minimal input data size to perform compression
     */
    public Compressor(CompressionType type, int level, double compressionThresholdFactor, int compressMinSizeBytes) {
        this.type = type;
        this.level = level;
        this.compressionThresholdFactor = compressionThresholdFactor;
        this.compressMinSizeBytes = compressMinSizeBytes;
    }

    /** Returns the default compression type used by this */
    public CompressionType type() { return type; }

    /** Returns the compression level this will use - a number between 0 and 9 where higher means more compression  */
    public int level() { return level; }

    /** Returns the compression factor we need to achieve to return compressed rather than raw data */
    public double compressionThresholdFactor() { return compressionThresholdFactor; }

    /** Returns the minimal data size required to perform compression */
    public int compressMinSizeBytes() { return compressMinSizeBytes; }

    /**
     * Compresses some data
     *
     * @param requestedCompression the desired compression type, which will be used if the data is deemed suitable.
     *                             Not all the existing types are actually supported.
     * @param data the data to compress. This array is only read by this method.
     * @param uncompressedSize uncompressedSize the size in bytes of the data array. If this is not present, it is
     *                         assumed that the size is the same as the data array size, i.e that it is completely
     *                         filled with uncompressed data.
     * @return the compression result
     * @throws IllegalArgumentException if the compression type is not supported
     */
    public Compression compress(CompressionType requestedCompression, byte[] data, Optional uncompressedSize) {
        return compress(requestedCompression, data, 0, uncompressedSize.orElse(data.length));
    }
    public Compression compress(CompressionType requestedCompression, byte[] data, int offset, int len) {
        switch (requestedCompression) {
            case NONE:
                return compact(CompressionType.NONE, data, offset, len);
            case LZ4:
                if (len < compressMinSizeBytes) {
                    return compact(CompressionType.INCOMPRESSIBLE, data, offset, len);
                }
                byte[] compressedData = getCompressor().compress(data, offset, len);
                if (compressedData.length + 8 >= len * compressionThresholdFactor) {
                    return compact(CompressionType.INCOMPRESSIBLE, data, offset, len);
                }
                return new Compression(CompressionType.LZ4, len, compressedData);
            case ZSTD:
                if (len < compressMinSizeBytes) {
                    return compact(CompressionType.INCOMPRESSIBLE, data, offset, len);
                }
                ZstdCompressor zstdCompressor = new ZstdCompressor();
                byte[] compressed = zstdCompressor.compress(data, offset, len);
                return new Compression(CompressionType.ZSTD, len, compressed);
            default:
                throw new IllegalArgumentException(requestedCompression + " is not supported");
        }
    }

    private Compression compact(CompressionType type, byte[] data, int offset, int len) {
        if ((offset != 0) || (len != data.length)) {
            data = Arrays.copyOfRange(data, offset, offset + len);
        }
        return new Compression(type, len, data);
    }
    private LZ4Compressor getCompressor() {
        return level < 7 ? factory.fastCompressor() : factory.highCompressor();
    }
    /** Compresses some data using the requested compression type */
    public Compression compress(CompressionType requestedCompression, byte[] data) { return compress(requestedCompression, data, 0, data.length); }
    /** Compresses some data using the compression type of this compressor */
    public Compression compress(byte[] data, int uncompressedSize) { return compress(type, data, 0, uncompressedSize); }
    /** Compresses some data using the compression type of this compressor */
    public Compression compress(byte[] data) { return compress(type, data, Optional.empty()); }

    /**
     * Decompresses some data
     *
     * @param compression the compression type used
     * @param compressedData the compressed data. This array is only read by this method.
     * @param compressedDataOffset the offset in the compressed data at which to start decompression
     * @param expectedUncompressedSize the uncompressed size in bytes of this data
     * @param expectedCompressedSize the expected compressed size of the data in bytes, optionally for validation with LZ4.
     * @return the uncompressed data, of the given size
     * @throws IllegalArgumentException if the compression type is not supported
     * @throws IllegalStateException if the expected compressed size is non-empty and specifies a different size than the actual size
     */
    public byte[] decompress(CompressionType compression, byte[] compressedData, int compressedDataOffset,
                             int expectedUncompressedSize, Optional expectedCompressedSize) {
        switch (compression) {
            case NONE: case INCOMPRESSIBLE: // return a copy of the requested slice of the input buffer
                int endPosition = expectedCompressedSize.isPresent() ? compressedDataOffset + expectedCompressedSize.get() : compressedData.length;
                return Arrays.copyOfRange(compressedData, compressedDataOffset, endPosition);
            case LZ4:
                byte[] uncompressedLZ4Data = new byte[expectedUncompressedSize];
                int compressedSize = factory.fastDecompressor().decompress(compressedData, compressedDataOffset,
                                                                           uncompressedLZ4Data, 0, expectedUncompressedSize);
                if (expectedCompressedSize.isPresent() && compressedSize != expectedCompressedSize.get())
                    throw new IllegalStateException("Compressed size mismatch. Expected " + compressedSize + ". Got " + expectedCompressedSize.get());
                return uncompressedLZ4Data;
            case ZSTD:
                int compressedLength = expectedCompressedSize.orElseThrow(() -> new IllegalArgumentException("Zstd decompressor requires input size"));
                ZstdCompressor zstdCompressor = new ZstdCompressor();
                byte[] decompressedData = zstdCompressor.decompress(compressedData, compressedDataOffset, compressedLength);
                expectedCompressedSize.ifPresent(expectedSize -> {
                    if (compressedData.length != expectedSize) {
                        throw new IllegalStateException("Compressed size mismatch. Expected " + expectedSize + ". Got " + decompressedData.length);
                    }
                });
                return decompressedData;
            default:
                throw new IllegalArgumentException(compression + " is not supported");
        }
    }
    /** Decompresses some data */
    public byte[] decompress(byte[] compressedData, CompressionType compressionType, int uncompressedSize) {
        return decompress(compressionType, compressedData, 0, uncompressedSize, Optional.empty());
    }
    /** Decompresses some data */
    public byte[] decompress(Compression compression) {
        return decompress(compression.type(), compression.data(), 0, compression.uncompressedSize(), Optional.empty());
    }

    public byte[] compressUnconditionally(byte[] input) {
        return getCompressor().compress(input, 0, input.length);
    }
    public byte[] compressUnconditionally(ByteBuffer input) {
        return getCompressor().compress(input.array(), input.arrayOffset()+input.position(), input.remaining());
    }

    public void decompressUnconditionally(ByteBuffer input, ByteBuffer output) {
        if (input.remaining() > 0) {
            factory.fastDecompressor().decompress(input, output);
        }
    }

    public byte [] decompressUnconditionally(byte[] input, int srcOffset, int uncompressedLen) {
        if (input.length > 0) {
            return factory.fastDecompressor().decompress(input, srcOffset, uncompressedLen);
        }
        return new byte[0];
    }

    public long warmup(double seconds) {
        byte [] input = new byte[0x4000];
        new Random().nextBytes(input);
        long startTime = System.nanoTime();
        long compressedBytes = 0;
        byte [] decompressed = new byte [input.length];
        LZ4FastDecompressor fastDecompressor = factory.fastDecompressor();
        LZ4SafeDecompressor safeDecompressor = factory.safeDecompressor();
        LZ4Compressor fastCompressor = factory.fastCompressor();
        LZ4Compressor highCompressor = factory.highCompressor();
        while (System.nanoTime() - startTime < seconds * 1e9) {
            byte [] compressedFast = fastCompressor.compress(input);
            byte [] compressedHigh = highCompressor.compress(input);
            fastDecompressor.decompress(compressedFast, decompressed);
            fastDecompressor.decompress(compressedHigh, decompressed);
            safeDecompressor.decompress(compressedFast, decompressed);
            safeDecompressor.decompress(compressedHigh, decompressed);
            compressedBytes += compressedFast.length + compressedHigh.length;
        }
        return compressedBytes;
    }

    public static class Compression {

        private final CompressionType compressionType;
        private final int uncompressedSize;
        private final byte[] data;

        public Compression(CompressionType compressionType, int uncompressedSize, byte[] data) {
            this.compressionType = compressionType;
            this.uncompressedSize = uncompressedSize;
            this.data = data;
        }

        /**
         * Returns the compression type used to compress this data.
         * This will be either the requested compression or INCOMPRESSIBLE.
         */
        public CompressionType type() { return compressionType; }

        /** Returns the uncompressed size of this data in bytes */
        public int uncompressedSize() { return uncompressedSize; }

        /** Returns the uncompressed data in a buffer which gets owned by the caller */
        public byte[] data() { return data; }

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy