All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.netty.handler.codec.compression.Bzip2BlockDecompressor Maven / Gradle / Ivy

There is a newer version: 2.38.0
Show newest version
/*
 * Copyright 2014 The Netty Project
 *
 * The Netty Project licenses this file to you under the Apache License,
 * version 2.0 (the "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at:
 *
 *   https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */
package io.netty.handler.codec.compression;

import static io.netty.handler.codec.compression.Bzip2Constants.*;

/**
 * Reads and decompresses a single Bzip2 block.

* * Block decoding consists of the following stages:
* 1. Read block header
* 2. Read Huffman tables
* 3. Read and decode Huffman encoded data - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}
* 4. Run-Length Decoding[2] - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}
* 5. Inverse Move To Front Transform - {@link #decodeHuffmanData(Bzip2HuffmanStageDecoder)}
* 6. Inverse Burrows Wheeler Transform - {@link #initialiseInverseBWT()}
* 7. Run-Length Decoding[1] - {@link #read()}
* 8. Optional Block De-Randomisation - {@link #read()} (through {@link #decodeNextBWTByte()}) */ final class Bzip2BlockDecompressor { /** * A reader that provides bit-level reads. */ private final Bzip2BitReader reader; /** * Calculates the block CRC from the fully decoded bytes of the block. */ private final Crc32 crc = new Crc32(); /** * The CRC of the current block as read from the block header. */ private final int blockCRC; /** * {@code true} if the current block is randomised, otherwise {@code false}. */ private final boolean blockRandomised; /* Huffman Decoding stage */ /** * The end-of-block Huffman symbol. Decoding of the block ends when this is encountered. */ int huffmanEndOfBlockSymbol; /** * Bitmap, of ranges of 16 bytes, present/not present. */ int huffmanInUse16; /** * A map from Huffman symbol index to output character. Some types of data (e.g. ASCII text) * may contain only a limited number of byte values; Huffman symbols are only allocated to * those values that actually occur in the uncompressed data. */ final byte[] huffmanSymbolMap = new byte[256]; /* Move To Front stage */ /** * Counts of each byte value within the {@link Bzip2BlockDecompressor#huffmanSymbolMap} data. * Collected at the Move To Front stage, consumed by the Inverse Burrows Wheeler Transform stage. */ private final int[] bwtByteCounts = new int[256]; /** * The Burrows-Wheeler Transform processed data. Read at the Move To Front stage, consumed by the * Inverse Burrows Wheeler Transform stage. */ private final byte[] bwtBlock; /** * Starting pointer into BWT for after untransform. */ private final int bwtStartPointer; /* Inverse Burrows-Wheeler Transform stage */ /** * At each position contains the union of :- * An output character (8 bits) * A pointer from each position to its successor (24 bits, left shifted 8 bits) * As the pointer cannot exceed the maximum block size of 900k, 24 bits is more than enough to * hold it; Folding the character data into the spare bits while performing the inverse BWT, * when both pieces of information are available, saves a large number of memory accesses in * the final decoding stages. */ private int[] bwtMergedPointers; /** * The current merged pointer into the Burrow-Wheeler Transform array. */ private int bwtCurrentMergedPointer; /** * The actual length in bytes of the current block at the Inverse Burrows Wheeler Transform * stage (before final Run-Length Decoding). */ private int bwtBlockLength; /** * The number of output bytes that have been decoded up to the Inverse Burrows Wheeler Transform stage. */ private int bwtBytesDecoded; /* Run-Length Encoding and Random Perturbation stage */ /** * The most recently RLE decoded byte. */ private int rleLastDecodedByte = -1; /** * The number of previous identical output bytes decoded. After 4 identical bytes, the next byte * decoded is an RLE repeat count. */ private int rleAccumulator; /** * The RLE repeat count of the current decoded byte. When this reaches zero, a new byte is decoded. */ private int rleRepeat; /** * If the current block is randomised, the position within the RNUMS randomisation array. */ private int randomIndex; /** * If the current block is randomised, the remaining count at the current RNUMS position. */ private int randomCount = Bzip2Rand.rNums(0) - 1; /** * Table for Move To Front transformations. */ private final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable(); // This variables is used to save current state if we haven't got enough readable bits private int repeatCount; private int repeatIncrement = 1; private int mtfValue; Bzip2BlockDecompressor(final int blockSize, final int blockCRC, final boolean blockRandomised, final int bwtStartPointer, final Bzip2BitReader reader) { bwtBlock = new byte[blockSize]; this.blockCRC = blockCRC; this.blockRandomised = blockRandomised; this.bwtStartPointer = bwtStartPointer; this.reader = reader; } /** * Reads the Huffman encoded data from the input stream, performs Run-Length Decoding and * applies the Move To Front transform to reconstruct the Burrows-Wheeler Transform array. */ boolean decodeHuffmanData(final Bzip2HuffmanStageDecoder huffmanDecoder) { final Bzip2BitReader reader = this.reader; final byte[] bwtBlock = this.bwtBlock; final byte[] huffmanSymbolMap = this.huffmanSymbolMap; final int streamBlockSize = this.bwtBlock.length; final int huffmanEndOfBlockSymbol = this.huffmanEndOfBlockSymbol; final int[] bwtByteCounts = this.bwtByteCounts; final Bzip2MoveToFrontTable symbolMTF = this.symbolMTF; int bwtBlockLength = this.bwtBlockLength; int repeatCount = this.repeatCount; int repeatIncrement = this.repeatIncrement; int mtfValue = this.mtfValue; for (;;) { if (!reader.hasReadableBits(HUFFMAN_DECODE_MAX_CODE_LENGTH)) { this.bwtBlockLength = bwtBlockLength; this.repeatCount = repeatCount; this.repeatIncrement = repeatIncrement; this.mtfValue = mtfValue; return false; } final int nextSymbol = huffmanDecoder.nextSymbol(); if (nextSymbol == HUFFMAN_SYMBOL_RUNA) { repeatCount += repeatIncrement; repeatIncrement <<= 1; } else if (nextSymbol == HUFFMAN_SYMBOL_RUNB) { repeatCount += repeatIncrement << 1; repeatIncrement <<= 1; } else { if (repeatCount > 0) { if (bwtBlockLength + repeatCount > streamBlockSize) { throw new DecompressionException("block exceeds declared block size"); } final byte nextByte = huffmanSymbolMap[mtfValue]; bwtByteCounts[nextByte & 0xff] += repeatCount; while (--repeatCount >= 0) { bwtBlock[bwtBlockLength++] = nextByte; } repeatCount = 0; repeatIncrement = 1; } if (nextSymbol == huffmanEndOfBlockSymbol) { break; } if (bwtBlockLength >= streamBlockSize) { throw new DecompressionException("block exceeds declared block size"); } mtfValue = symbolMTF.indexToFront(nextSymbol - 1) & 0xff; final byte nextByte = huffmanSymbolMap[mtfValue]; bwtByteCounts[nextByte & 0xff]++; bwtBlock[bwtBlockLength++] = nextByte; } } if (bwtBlockLength > MAX_BLOCK_LENGTH) { throw new DecompressionException("block length exceeds max block length: " + bwtBlockLength + " > " + MAX_BLOCK_LENGTH); } this.bwtBlockLength = bwtBlockLength; initialiseInverseBWT(); return true; } /** * Set up the Inverse Burrows-Wheeler Transform merged pointer array. */ private void initialiseInverseBWT() { final int bwtStartPointer = this.bwtStartPointer; final byte[] bwtBlock = this.bwtBlock; final int[] bwtMergedPointers = new int[bwtBlockLength]; final int[] characterBase = new int[256]; if (bwtStartPointer < 0 || bwtStartPointer >= bwtBlockLength) { throw new DecompressionException("start pointer invalid"); } // Cumulative character counts System.arraycopy(bwtByteCounts, 0, characterBase, 1, 255); for (int i = 2; i <= 255; i++) { characterBase[i] += characterBase[i - 1]; } // Merged-Array Inverse Burrows-Wheeler Transform // Combining the output characters and forward pointers into a single array here, where we // have already read both of the corresponding values, cuts down on memory accesses in the // final walk through the array for (int i = 0; i < bwtBlockLength; i++) { int value = bwtBlock[i] & 0xff; bwtMergedPointers[characterBase[value]++] = (i << 8) + value; } this.bwtMergedPointers = bwtMergedPointers; bwtCurrentMergedPointer = bwtMergedPointers[bwtStartPointer]; } /** * Decodes a byte from the final Run-Length Encoding stage, pulling a new byte from the * Burrows-Wheeler Transform stage when required. * @return The decoded byte, or -1 if there are no more bytes */ public int read() { while (rleRepeat < 1) { if (bwtBytesDecoded == bwtBlockLength) { return -1; } int nextByte = decodeNextBWTByte(); if (nextByte != rleLastDecodedByte) { // New byte, restart accumulation rleLastDecodedByte = nextByte; rleRepeat = 1; rleAccumulator = 1; crc.updateCRC(nextByte); } else { if (++rleAccumulator == 4) { // Accumulation complete, start repetition int rleRepeat = decodeNextBWTByte() + 1; this.rleRepeat = rleRepeat; rleAccumulator = 0; crc.updateCRC(nextByte, rleRepeat); } else { rleRepeat = 1; crc.updateCRC(nextByte); } } } rleRepeat--; return rleLastDecodedByte; } /** * Decodes a byte from the Burrows-Wheeler Transform stage. If the block has randomisation * applied, reverses the randomisation. * @return The decoded byte */ private int decodeNextBWTByte() { int mergedPointer = bwtCurrentMergedPointer; int nextDecodedByte = mergedPointer & 0xff; bwtCurrentMergedPointer = bwtMergedPointers[mergedPointer >>> 8]; if (blockRandomised) { if (--randomCount == 0) { nextDecodedByte ^= 1; randomIndex = (randomIndex + 1) % 512; randomCount = Bzip2Rand.rNums(randomIndex); } } bwtBytesDecoded++; return nextDecodedByte; } public int blockLength() { return bwtBlockLength; } /** * Verify and return the block CRC. This method may only be called * after all of the block's bytes have been read. * @return The block CRC */ int checkCRC() { final int computedBlockCRC = crc.getCRC(); if (blockCRC != computedBlockCRC) { throw new DecompressionException("block CRC error"); } return computedBlockCRC; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy