All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.compress.compressors.snappy.SnappyCompressorOutputStream Maven / Gradle / Ivy

Go to download

Apache Commons Compress software defines an API for working with compression and archive formats. These include: bzip2, gzip, pack200, lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.

There is a newer version: 1.26.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.commons.compress.compressors.snappy;

import java.io.IOException;
import java.io.OutputStream;

import org.apache.commons.compress.compressors.CompressorOutputStream;
import org.apache.commons.compress.compressors.lz77support.LZ77Compressor;
import org.apache.commons.compress.compressors.lz77support.Parameters;
import org.apache.commons.compress.utils.ByteUtils;

/**
 * CompressorOutputStream for the raw Snappy format.
 *
 * 

This implementation uses an internal buffer in order to handle * the back-references that are at the heart of the LZ77 algorithm. * The size of the buffer must be at least as big as the biggest * offset used in the compressed stream. The current version of the * Snappy algorithm as defined by Google works on 32k blocks and * doesn't contain offsets bigger than 32k which is the default block * size used by this class.

* *

The raw Snappy format requires the uncompressed size to be * written at the beginning of the stream using a varint * representation, i.e. the number of bytes needed to write the * information is not known before the uncompressed size is * known. We've chosen to make the uncompressedSize a parameter of the * constructor in favor of buffering the whole output until the size * is known. When using the {@link FramedSnappyCompressorOutputStream} * this limitation is taken care of by the warpping framing * format.

* * @see Snappy compressed format description * @since 1.14 * @NotThreadSafe */ public class SnappyCompressorOutputStream extends CompressorOutputStream { // literal length is stored as (len - 1) either inside the tag // (six bits minus four flags) or in 1 to 4 bytes after the tag private static final int MAX_LITERAL_SIZE_WITHOUT_SIZE_BYTES = 60; private static final int MAX_LITERAL_SIZE_WITH_ONE_SIZE_BYTE = 1 << 8; private static final int MAX_LITERAL_SIZE_WITH_TWO_SIZE_BYTES = 1 << 16; private static final int MAX_LITERAL_SIZE_WITH_THREE_SIZE_BYTES = 1 << 24; private static final int ONE_SIZE_BYTE_MARKER = 60 << 2; private static final int TWO_SIZE_BYTE_MARKER = 61 << 2; private static final int THREE_SIZE_BYTE_MARKER = 62 << 2; private static final int FOUR_SIZE_BYTE_MARKER = 63 << 2; // Back-references ("copies") have their offset/size information // in two, three or five bytes. private static final int MIN_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE = 4; private static final int MAX_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE = 11; private static final int MAX_OFFSET_WITH_ONE_OFFSET_BYTE = 1 << 11 - 1; private static final int MAX_OFFSET_WITH_TWO_OFFSET_BYTES = 1 << 16 - 1; private static final int ONE_BYTE_COPY_TAG = 1; private static final int TWO_BYTE_COPY_TAG = 2; private static final int FOUR_BYTE_COPY_TAG = 3; // technically the format could use shorter matches but with a // length of three the offset would be encoded as at least two // bytes in addition to the tag, so yield no compression at all private static final int MIN_MATCH_LENGTH = 4; // Snappy stores the match length in six bits of the tag private static final int MAX_MATCH_LENGTH = 64; /** * Returns a builder correctly configured for the Snappy algorithm using the gven block size. * @param blockSize the block size. * @return a builder correctly configured for the Snappy algorithm using the gven block size */ public static Parameters.Builder createParameterBuilder(final int blockSize) { // the max offset and max literal length defined by the format // are 2^32 - 1 and 2^32 respectively - with blockSize being // an integer we will never exceed that return Parameters.builder(blockSize) .withMinBackReferenceLength(MIN_MATCH_LENGTH) .withMaxBackReferenceLength(MAX_MATCH_LENGTH) .withMaxOffset(blockSize) .withMaxLiteralLength(blockSize); } private final LZ77Compressor compressor; private final OutputStream os; private final ByteUtils.ByteConsumer consumer; // used in one-arg write method private final byte[] oneByte = new byte[1]; private boolean finished; /** * Constructor using the default block size of 32k. * * @param os the outputstream to write compressed data to * @param uncompressedSize the uncompressed size of data * @throws IOException if writing of the size fails */ public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize) throws IOException { this(os, uncompressedSize, SnappyCompressorInputStream.DEFAULT_BLOCK_SIZE); } /** * Constructor using a configurable block size. * * @param os the outputstream to write compressed data to * @param uncompressedSize the uncompressed size of data * @param blockSize the block size used - must be a power of two * @throws IOException if writing of the size fails */ public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize, final int blockSize) throws IOException { this(os, uncompressedSize, createParameterBuilder(blockSize).build()); } /** * Constructor providing full control over the underlying LZ77 compressor. * * @param os the outputstream to write compressed data to * @param uncompressedSize the uncompressed size of data * @param params the parameters to use by the compressor - note * that the format itself imposes some limits like a maximum match * length of 64 bytes * @throws IOException if writing of the size fails */ public SnappyCompressorOutputStream(final OutputStream os, final long uncompressedSize, final Parameters params) throws IOException { this.os = os; consumer = new ByteUtils.OutputStreamByteConsumer(os); compressor = new LZ77Compressor(params, block -> { switch (block.getType()) { case LITERAL: writeLiteralBlock((LZ77Compressor.LiteralBlock) block); break; case BACK_REFERENCE: writeBackReference((LZ77Compressor.BackReference) block); break; case EOD: break; } }); writeUncompressedSize(uncompressedSize); } @Override public void close() throws IOException { try { finish(); } finally { os.close(); } } /** * Compresses all remaining data and writes it to the stream, * doesn't close the underlying stream. * @throws IOException if an error occurs */ public void finish() throws IOException { if (!finished) { compressor.finish(); finished = true; } } @Override public void write(final byte[] data, final int off, final int len) throws IOException { compressor.compress(data, off, len); } @Override public void write(final int b) throws IOException { oneByte[0] = (byte) (b & 0xff); write(oneByte); } private void writeBackReference(final LZ77Compressor.BackReference block) throws IOException { final int len = block.getLength(); final int offset = block.getOffset(); if (len >= MIN_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE && len <= MAX_MATCH_LENGTH_WITH_ONE_OFFSET_BYTE && offset <= MAX_OFFSET_WITH_ONE_OFFSET_BYTE) { writeBackReferenceWithOneOffsetByte(len, offset); } else if (offset < MAX_OFFSET_WITH_TWO_OFFSET_BYTES) { writeBackReferenceWithTwoOffsetBytes(len, offset); } else { writeBackReferenceWithFourOffsetBytes(len, offset); } } private void writeBackReferenceWithFourOffsetBytes(final int len, final int offset) throws IOException { writeBackReferenceWithLittleEndianOffset(FOUR_BYTE_COPY_TAG, 4, len, offset); } private void writeBackReferenceWithLittleEndianOffset(final int tag, final int offsetBytes, final int len, final int offset) throws IOException { os.write(tag | ((len - 1) << 2)); writeLittleEndian(offsetBytes, offset); } private void writeBackReferenceWithOneOffsetByte(final int len, final int offset) throws IOException { os.write(ONE_BYTE_COPY_TAG | ((len - 4) << 2) | ((offset & 0x700) >> 3)); os.write(offset & 0xff); } private void writeBackReferenceWithTwoOffsetBytes(final int len, final int offset) throws IOException { writeBackReferenceWithLittleEndianOffset(TWO_BYTE_COPY_TAG, 2, len, offset); } private void writeLiteralBlock(final LZ77Compressor.LiteralBlock block) throws IOException { final int len = block.getLength(); if (len <= MAX_LITERAL_SIZE_WITHOUT_SIZE_BYTES) { writeLiteralBlockNoSizeBytes(block, len); } else if (len <= MAX_LITERAL_SIZE_WITH_ONE_SIZE_BYTE) { writeLiteralBlockOneSizeByte(block, len); } else if (len <= MAX_LITERAL_SIZE_WITH_TWO_SIZE_BYTES) { writeLiteralBlockTwoSizeBytes(block, len); } else if (len <= MAX_LITERAL_SIZE_WITH_THREE_SIZE_BYTES) { writeLiteralBlockThreeSizeBytes(block, len); } else { writeLiteralBlockFourSizeBytes(block, len); } } private void writeLiteralBlockFourSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException { writeLiteralBlockWithSize(FOUR_SIZE_BYTE_MARKER, 4, len, block); } private void writeLiteralBlockNoSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException { writeLiteralBlockWithSize(len - 1 << 2, 0, len, block); } private void writeLiteralBlockOneSizeByte(final LZ77Compressor.LiteralBlock block, final int len) throws IOException { writeLiteralBlockWithSize(ONE_SIZE_BYTE_MARKER, 1, len, block); } private void writeLiteralBlockThreeSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException { writeLiteralBlockWithSize(THREE_SIZE_BYTE_MARKER, 3, len, block); } private void writeLiteralBlockTwoSizeBytes(final LZ77Compressor.LiteralBlock block, final int len) throws IOException { writeLiteralBlockWithSize(TWO_SIZE_BYTE_MARKER, 2, len, block); } private void writeLiteralBlockWithSize(final int tagByte, final int sizeBytes, final int len, final LZ77Compressor.LiteralBlock block) throws IOException { os.write(tagByte); writeLittleEndian(sizeBytes, len - 1); os.write(block.getData(), block.getOffset(), len); } private void writeLittleEndian(final int numBytes, final int num) throws IOException { ByteUtils.toLittleEndian(consumer, num, numBytes); } private void writeUncompressedSize(long uncompressedSize) throws IOException { boolean more = false; do { int currentByte = (int) (uncompressedSize & 0x7F); more = uncompressedSize > currentByte; if (more) { currentByte |= 0x80; } os.write(currentByte); uncompressedSize >>= 7; } while (more); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy