io.netty.handler.codec.compression.Bzip2BlockCompressor Maven / Gradle / Ivy
Go to download
This artifact provides a single jar that contains all classes required to use remote EJB and JMS, including
all dependencies. It is intended for use by those not using maven, maven users should just import the EJB and
JMS BOM's instead (shaded JAR's cause lots of problems with maven, as it is very easy to inadvertently end up
with different versions on classes on the class path).
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.handler.codec.compression;
import io.netty.buffer.ByteBuf;
import io.netty.util.ByteProcessor;
import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
/**
* Compresses and writes a single Bzip2 block.
*
* Block encoding consists of the following stages:
* 1. Run-Length Encoding[1] - {@link #write(int)}
* 2. Burrows Wheeler Transform - {@link #close(ByteBuf)} (through {@link Bzip2DivSufSort})
* 3. Write block header - {@link #close(ByteBuf)}
* 4. Move To Front Transform - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
* 5. Run-Length Encoding[2] - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
* 6. Create and write Huffman tables - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
* 7. Huffman encode and write data - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
*/
final class Bzip2BlockCompressor {
private final ByteProcessor writeProcessor = new ByteProcessor() {
@Override
public boolean process(byte value) throws Exception {
return write(value);
}
};
/**
* A writer that provides bit-level writes.
*/
private final Bzip2BitWriter writer;
/**
* CRC builder for the block.
*/
private final Crc32 crc = new Crc32();
/**
* The RLE'd block data.
*/
private final byte[] block;
/**
* Current length of the data within the {@link #block} array.
*/
private int blockLength;
/**
* A limit beyond which new data will not be accepted into the block.
*/
private final int blockLengthLimit;
/**
* The values that are present within the RLE'd block data. For each index, {@code true} if that
* value is present within the data, otherwise {@code false}.
*/
private final boolean[] blockValuesPresent = new boolean[256];
/**
* The Burrows Wheeler Transformed block data.
*/
private final int[] bwtBlock;
/**
* The current RLE value being accumulated (undefined when {@link #rleLength} is 0).
*/
private int rleCurrentValue = -1;
/**
* The repeat count of the current RLE value.
*/
private int rleLength;
/**
* @param writer The {@link Bzip2BitWriter} which provides bit-level writes
* @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
* into the block after Run-Length Encoding is applied
*/
Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
this.writer = writer;
// One extra byte is added to allow for the block wrap applied in close()
block = new byte[blockSize + 1];
bwtBlock = new int[blockSize + 1];
blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see {@link #write(int)}
}
/**
* Write the Huffman symbol to output byte map.
*/
private void writeSymbolMap(ByteBuf out) {
Bzip2BitWriter writer = this.writer;
final boolean[] blockValuesPresent = this.blockValuesPresent;
final boolean[] condensedInUse = new boolean[16];
for (int i = 0; i < condensedInUse.length; i++) {
for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
if (blockValuesPresent[k]) {
condensedInUse[i] = true;
break;
}
}
}
for (boolean isCondensedInUse : condensedInUse) {
writer.writeBoolean(out, isCondensedInUse);
}
for (int i = 0; i < condensedInUse.length; i++) {
if (condensedInUse[i]) {
for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
writer.writeBoolean(out, blockValuesPresent[k]);
}
}
}
}
/**
* Writes an RLE run to the block array, updating the block CRC and present values array as required.
* @param value The value to write
* @param runLength The run length of the value to write
*/
private void writeRun(final int value, int runLength) {
final int blockLength = this.blockLength;
final byte[] block = this.block;
blockValuesPresent[value] = true;
crc.updateCRC(value, runLength);
final byte byteValue = (byte) value;
switch (runLength) {
case 1:
block[blockLength] = byteValue;
this.blockLength = blockLength + 1;
break;
case 2:
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
this.blockLength = blockLength + 2;
break;
case 3:
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
block[blockLength + 2] = byteValue;
this.blockLength = blockLength + 3;
break;
default:
runLength -= 4;
blockValuesPresent[runLength] = true;
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
block[blockLength + 2] = byteValue;
block[blockLength + 3] = byteValue;
block[blockLength + 4] = (byte) runLength;
this.blockLength = blockLength + 5;
break;
}
}
/**
* Writes a byte to the block, accumulating to an RLE run where possible.
* @param value The byte to write
* @return {@code true} if the byte was written, or {@code false} if the block is already full
*/
boolean write(final int value) {
if (blockLength > blockLengthLimit) {
return false;
}
final int rleCurrentValue = this.rleCurrentValue;
final int rleLength = this.rleLength;
if (rleLength == 0) {
this.rleCurrentValue = value;
this.rleLength = 1;
} else if (rleCurrentValue != value) {
// This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
writeRun(rleCurrentValue & 0xff, rleLength);
this.rleCurrentValue = value;
this.rleLength = 1;
} else {
if (rleLength == 254) {
writeRun(rleCurrentValue & 0xff, 255);
this.rleLength = 0;
} else {
this.rleLength = rleLength + 1;
}
}
return true;
}
/**
* Writes an array to the block.
* @param buffer The buffer to write
* @param offset The offset within the input data to write from
* @param length The number of bytes of input data to write
* @return The actual number of input bytes written. May be less than the number requested, or
* zero if the block is already full
*/
int write(final ByteBuf buffer, int offset, int length) {
int index = buffer.forEachByte(offset, length, writeProcessor);
return index == -1 ? length : index - offset;
}
/**
* Compresses and writes out the block.
*/
void close(ByteBuf out) {
// If an RLE run is in progress, write it out
if (rleLength > 0) {
writeRun(rleCurrentValue & 0xff, rleLength);
}
// Apply a one byte block wrap required by the BWT implementation
block[blockLength] = block[0];
// Perform the Burrows Wheeler Transform
Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
int bwtStartPointer = divSufSort.bwt();
Bzip2BitWriter writer = this.writer;
// Write out the block header
writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
writer.writeInt(out, crc.getCRC());
writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
writer.writeBits(out, 24, bwtStartPointer);
// Write out the symbol map
writeSymbolMap(out);
// Perform the Move To Front Transform and Run-Length Encoding[2] stages
Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
blockValuesPresent);
mtfEncoder.encode();
// Perform the Huffman Encoding stage and write out the encoded data
Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
mtfEncoder.mtfBlock(),
mtfEncoder.mtfLength(),
mtfEncoder.mtfAlphabetSize(),
mtfEncoder.mtfSymbolFrequencies());
huffmanEncoder.encode(out);
}
/**
* Gets available size of the current block.
* @return Number of available bytes which can be written
*/
int availableSize() {
if (blockLength == 0) {
return blockLengthLimit + 2;
}
return blockLengthLimit - blockLength + 1;
}
/**
* Determines if the block is full and ready for compression.
* @return {@code true} if the block is full, otherwise {@code false}
*/
boolean isFull() {
return blockLength > blockLengthLimit;
}
/**
* Determines if any bytes have been written to the block.
* @return {@code true} if one or more bytes has been written to the block, otherwise {@code false}
*/
boolean isEmpty() {
return blockLength == 0 && rleLength == 0;
}
/**
* Gets the CRC of the completed block. Only valid after calling {@link #close(ByteBuf)}.
* @return The block's CRC
*/
int crc() {
return crc.getCRC();
}
}