All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.kafka.common.record.CompressionType Maven / Gradle / Ivy

There is a newer version: 3.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.kafka.common.record;

import org.apache.kafka.common.KafkaException;
import org.apache.kafka.common.compress.KafkaLZ4BlockInputStream;
import org.apache.kafka.common.compress.KafkaLZ4BlockOutputStream;
import org.apache.kafka.common.compress.SnappyFactory;
import org.apache.kafka.common.compress.ZstdFactory;
import org.apache.kafka.common.utils.BufferSupplier;
import org.apache.kafka.common.utils.ByteBufferInputStream;
import org.apache.kafka.common.utils.ByteBufferOutputStream;
import org.apache.kafka.common.utils.ChunkedBytesStream;

import java.io.BufferedOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/**
 * The compression type to use
 */
public enum CompressionType {
    NONE((byte) 0, "none", 1.0f) {
        @Override
        public OutputStream wrapForOutput(ByteBufferOutputStream buffer, byte messageVersion) {
            return buffer;
        }

        @Override
        public InputStream wrapForInput(ByteBuffer buffer, byte messageVersion, BufferSupplier decompressionBufferSupplier) {
            return new ByteBufferInputStream(buffer);
        }
    },

    // Shipped with the JDK
    GZIP((byte) 1, "gzip", 1.0f) {
        @Override
        public OutputStream wrapForOutput(ByteBufferOutputStream buffer, byte messageVersion) {
            try {
                // Set input buffer (uncompressed) to 16 KB (none by default) and output buffer (compressed) to
                // 8 KB (0.5 KB by default) to ensure reasonable performance in cases where the caller passes a small
                // number of bytes to write (potentially a single byte)
                return new BufferedOutputStream(new GZIPOutputStream(buffer, 8 * 1024), 16 * 1024);
            } catch (Exception e) {
                throw new KafkaException(e);
            }
        }

        @Override
        public InputStream wrapForInput(ByteBuffer buffer, byte messageVersion, BufferSupplier decompressionBufferSupplier) {
            try {
                // Set input buffer (compressed) to 8 KB (GZIPInputStream uses 0.5 KB by default) to ensure reasonable
                // performance in cases where the caller reads a small number of bytes (potentially a single byte).
                //
                // Size of output buffer (uncompressed) is provided by decompressionOutputSize.
                //
                // ChunkedBytesStream is used to wrap the GZIPInputStream because the default implementation of
                // GZIPInputStream does not use an intermediate buffer for decompression in chunks.
                return new ChunkedBytesStream(new GZIPInputStream(new ByteBufferInputStream(buffer), 8 * 1024), decompressionBufferSupplier, decompressionOutputSize(), false);
            } catch (Exception e) {
                throw new KafkaException(e);
            }
        }

        @Override
        public int decompressionOutputSize() {
            // 16KB has been chosen based on legacy implementation introduced in https://github.com/apache/kafka/pull/6785
            return 16 * 1024;
        }
    },

    // We should only load classes from a given compression library when we actually use said compression library. This
    // is because compression libraries include native code for a set of platforms and we want to avoid errors
    // in case the platform is not supported and the compression library is not actually used.
    // To ensure this, we only reference compression library code from classes that are only invoked when actual usage
    // happens.

    SNAPPY((byte) 2, "snappy", 1.0f) {
        @Override
        public OutputStream wrapForOutput(ByteBufferOutputStream buffer, byte messageVersion) {
            return SnappyFactory.wrapForOutput(buffer);
        }

        @Override
        public InputStream wrapForInput(ByteBuffer buffer, byte messageVersion, BufferSupplier decompressionBufferSupplier) {
            // SnappyInputStream uses default implementation of InputStream for skip. Default implementation of
            // SnappyInputStream allocates a new skip buffer every time, hence, we prefer our own implementation.
            return new ChunkedBytesStream(SnappyFactory.wrapForInput(buffer), decompressionBufferSupplier, decompressionOutputSize(), false);
        }

        @Override
        public int decompressionOutputSize() {
            // SnappyInputStream already uses an intermediate buffer internally. The size
            // of this buffer is based on legacy implementation based on skipArray introduced in
            // https://github.com/apache/kafka/pull/6785
            return 2 * 1024; // 2KB
        }
    },

    LZ4((byte) 3, "lz4", 1.0f) {
        @Override
        public OutputStream wrapForOutput(ByteBufferOutputStream buffer, byte messageVersion) {
            try {
                return new KafkaLZ4BlockOutputStream(buffer, messageVersion == RecordBatch.MAGIC_VALUE_V0);
            } catch (Throwable e) {
                throw new KafkaException(e);
            }
        }

        @Override
        public InputStream wrapForInput(ByteBuffer inputBuffer, byte messageVersion, BufferSupplier decompressionBufferSupplier) {
            try {
                return new ChunkedBytesStream(
                    new KafkaLZ4BlockInputStream(inputBuffer, decompressionBufferSupplier, messageVersion == RecordBatch.MAGIC_VALUE_V0),
                    decompressionBufferSupplier, decompressionOutputSize(), true);
            } catch (Throwable e) {
                throw new KafkaException(e);
            }
        }

        @Override
        public int decompressionOutputSize() {
            // KafkaLZ4BlockInputStream uses an internal intermediate buffer to store decompressed data. The size
            // of this buffer is based on legacy implementation based on skipArray introduced in
            // https://github.com/apache/kafka/pull/6785
            return 2 * 1024; // 2KB
        }
    },

    ZSTD((byte) 4, "zstd", 1.0f) {
        @Override
        public OutputStream wrapForOutput(ByteBufferOutputStream buffer, byte messageVersion) {
            return ZstdFactory.wrapForOutput(buffer);
        }

        @Override
        public InputStream wrapForInput(ByteBuffer buffer, byte messageVersion, BufferSupplier decompressionBufferSupplier) {
            return new ChunkedBytesStream(ZstdFactory.wrapForInput(buffer, messageVersion, decompressionBufferSupplier), decompressionBufferSupplier, decompressionOutputSize(), false);
        }

        /**
         * Size of intermediate buffer which contains uncompressed data.
         * This size should be <= ZSTD_BLOCKSIZE_MAX
         * see: https://github.com/facebook/zstd/blob/189653a9c10c9f4224a5413a6d6a69dd01d7c3bd/lib/zstd.h#L854
         */
        @Override
        public int decompressionOutputSize() {
            // 16KB has been chosen based on legacy implementation introduced in https://github.com/apache/kafka/pull/6785
            return 16 * 1024;
        }


    };

    // compression type is represented by two bits in the attributes field of the record batch header, so `byte` is
    // large enough
    public final byte id;
    public final String name;
    public final float rate;

    CompressionType(byte id, String name, float rate) {
        this.id = id;
        this.name = name;
        this.rate = rate;
    }

    /**
     * Wrap bufferStream with an OutputStream that will compress data with this CompressionType.
     * 

* Note: Unlike {@link #wrapForInput}, {@link #wrapForOutput} cannot take {@link ByteBuffer}s directly. * Currently, {@link MemoryRecordsBuilder#writeDefaultBatchHeader()} and {@link MemoryRecordsBuilder#writeLegacyCompressedWrapperHeader()} * write to the underlying buffer in the given {@link ByteBufferOutputStream} after the compressed data has been written. * In the event that the buffer needs to be expanded while writing the data, access to the underlying buffer needs to be preserved. */ public abstract OutputStream wrapForOutput(ByteBufferOutputStream bufferStream, byte messageVersion); /** * Wrap buffer with an InputStream that will decompress data with this CompressionType. * * @param decompressionBufferSupplier The supplier of ByteBuffer(s) used for decompression if supported. * For small record batches, allocating a potentially large buffer (64 KB for LZ4) * will dominate the cost of decompressing and iterating over the records in the * batch. As such, a supplier that reuses buffers will have a significant * performance impact. */ public abstract InputStream wrapForInput(ByteBuffer buffer, byte messageVersion, BufferSupplier decompressionBufferSupplier); /** * Recommended size of buffer for storing decompressed output. */ public int decompressionOutputSize() { throw new UnsupportedOperationException("Size of decompression buffer is not defined for this compression type=" + this.name); } public static CompressionType forId(int id) { switch (id) { case 0: return NONE; case 1: return GZIP; case 2: return SNAPPY; case 3: return LZ4; case 4: return ZSTD; default: throw new IllegalArgumentException("Unknown compression type id: " + id); } } public static CompressionType forName(String name) { if (NONE.name.equals(name)) return NONE; else if (GZIP.name.equals(name)) return GZIP; else if (SNAPPY.name.equals(name)) return SNAPPY; else if (LZ4.name.equals(name)) return LZ4; else if (ZSTD.name.equals(name)) return ZSTD; else throw new IllegalArgumentException("Unknown compression name: " + name); } @Override public String toString() { return name; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy