io.netty.handler.codec.compression.Bzip2BlockCompressor Maven / Gradle / Ivy
/*
* Copyright 2014 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.handler.codec.compression;
import io.netty.buffer.ByteBuf;
import io.netty.util.ByteProcessor;
import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_1;
import static io.netty.handler.codec.compression.Bzip2Constants.BLOCK_HEADER_MAGIC_2;
import static io.netty.handler.codec.compression.Bzip2Constants.HUFFMAN_SYMBOL_RANGE_SIZE;
/**
* Compresses and writes a single Bzip2 block.
*
* Block encoding consists of the following stages:
* 1. Run-Length Encoding[1] - {@link #write(int)}
* 2. Burrows Wheeler Transform - {@link #close(ByteBuf)} (through {@link Bzip2DivSufSort})
* 3. Write block header - {@link #close(ByteBuf)}
* 4. Move To Front Transform - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
* 5. Run-Length Encoding[2] - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
* 6. Create and write Huffman tables - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
* 7. Huffman encode and write data - {@link #close(ByteBuf)} (through {@link Bzip2HuffmanStageEncoder})
*/
final class Bzip2BlockCompressor {
private final ByteProcessor writeProcessor = new ByteProcessor() {
@Override
public boolean process(byte value) throws Exception {
return write(value);
}
};
/**
* A writer that provides bit-level writes.
*/
private final Bzip2BitWriter writer;
/**
* CRC builder for the block.
*/
private final Crc32 crc = new Crc32();
/**
* The RLE'd block data.
*/
private final byte[] block;
/**
* Current length of the data within the {@link #block} array.
*/
private int blockLength;
/**
* A limit beyond which new data will not be accepted into the block.
*/
private final int blockLengthLimit;
/**
* The values that are present within the RLE'd block data. For each index, {@code true} if that
* value is present within the data, otherwise {@code false}.
*/
private final boolean[] blockValuesPresent = new boolean[256];
/**
* The Burrows Wheeler Transformed block data.
*/
private final int[] bwtBlock;
/**
* The current RLE value being accumulated (undefined when {@link #rleLength} is 0).
*/
private int rleCurrentValue = -1;
/**
* The repeat count of the current RLE value.
*/
private int rleLength;
/**
* @param writer The {@link Bzip2BitWriter} which provides bit-level writes
* @param blockSize The declared block size in bytes. Up to this many bytes will be accepted
* into the block after Run-Length Encoding is applied
*/
Bzip2BlockCompressor(final Bzip2BitWriter writer, final int blockSize) {
this.writer = writer;
// One extra byte is added to allow for the block wrap applied in close()
block = new byte[blockSize + 1];
bwtBlock = new int[blockSize + 1];
blockLengthLimit = blockSize - 6; // 5 bytes for one RLE run plus one byte - see {@link #write(int)}
}
/**
* Write the Huffman symbol to output byte map.
*/
private void writeSymbolMap(ByteBuf out) {
Bzip2BitWriter writer = this.writer;
final boolean[] blockValuesPresent = this.blockValuesPresent;
final boolean[] condensedInUse = new boolean[16];
for (int i = 0; i < condensedInUse.length; i++) {
for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
if (blockValuesPresent[k]) {
condensedInUse[i] = true;
break;
}
}
}
for (boolean isCondensedInUse : condensedInUse) {
writer.writeBoolean(out, isCondensedInUse);
}
for (int i = 0; i < condensedInUse.length; i++) {
if (condensedInUse[i]) {
for (int j = 0, k = i << 4; j < HUFFMAN_SYMBOL_RANGE_SIZE; j++, k++) {
writer.writeBoolean(out, blockValuesPresent[k]);
}
}
}
}
/**
* Writes an RLE run to the block array, updating the block CRC and present values array as required.
* @param value The value to write
* @param runLength The run length of the value to write
*/
private void writeRun(final int value, int runLength) {
final int blockLength = this.blockLength;
final byte[] block = this.block;
blockValuesPresent[value] = true;
crc.updateCRC(value, runLength);
final byte byteValue = (byte) value;
switch (runLength) {
case 1:
block[blockLength] = byteValue;
this.blockLength = blockLength + 1;
break;
case 2:
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
this.blockLength = blockLength + 2;
break;
case 3:
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
block[blockLength + 2] = byteValue;
this.blockLength = blockLength + 3;
break;
default:
runLength -= 4;
blockValuesPresent[runLength] = true;
block[blockLength] = byteValue;
block[blockLength + 1] = byteValue;
block[blockLength + 2] = byteValue;
block[blockLength + 3] = byteValue;
block[blockLength + 4] = (byte) runLength;
this.blockLength = blockLength + 5;
break;
}
}
/**
* Writes a byte to the block, accumulating to an RLE run where possible.
* @param value The byte to write
* @return {@code true} if the byte was written, or {@code false} if the block is already full
*/
boolean write(final int value) {
if (blockLength > blockLengthLimit) {
return false;
}
final int rleCurrentValue = this.rleCurrentValue;
final int rleLength = this.rleLength;
if (rleLength == 0) {
this.rleCurrentValue = value;
this.rleLength = 1;
} else if (rleCurrentValue != value) {
// This path commits us to write 6 bytes - one RLE run (5 bytes) plus one extra
writeRun(rleCurrentValue & 0xff, rleLength);
this.rleCurrentValue = value;
this.rleLength = 1;
} else {
if (rleLength == 254) {
writeRun(rleCurrentValue & 0xff, 255);
this.rleLength = 0;
} else {
this.rleLength = rleLength + 1;
}
}
return true;
}
/**
* Writes an array to the block.
* @param buffer The buffer to write
* @param offset The offset within the input data to write from
* @param length The number of bytes of input data to write
* @return The actual number of input bytes written. May be less than the number requested, or
* zero if the block is already full
*/
int write(final ByteBuf buffer, int offset, int length) {
int index = buffer.forEachByte(offset, length, writeProcessor);
return index == -1 ? length : index - offset;
}
/**
* Compresses and writes out the block.
*/
void close(ByteBuf out) {
// If an RLE run is in progress, write it out
if (rleLength > 0) {
writeRun(rleCurrentValue & 0xff, rleLength);
}
// Apply a one byte block wrap required by the BWT implementation
block[blockLength] = block[0];
// Perform the Burrows Wheeler Transform
Bzip2DivSufSort divSufSort = new Bzip2DivSufSort(block, bwtBlock, blockLength);
int bwtStartPointer = divSufSort.bwt();
Bzip2BitWriter writer = this.writer;
// Write out the block header
writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_1);
writer.writeBits(out, 24, BLOCK_HEADER_MAGIC_2);
writer.writeInt(out, crc.getCRC());
writer.writeBoolean(out, false); // Randomised block flag. We never create randomised blocks
writer.writeBits(out, 24, bwtStartPointer);
// Write out the symbol map
writeSymbolMap(out);
// Perform the Move To Front Transform and Run-Length Encoding[2] stages
Bzip2MTFAndRLE2StageEncoder mtfEncoder = new Bzip2MTFAndRLE2StageEncoder(bwtBlock, blockLength,
blockValuesPresent);
mtfEncoder.encode();
// Perform the Huffman Encoding stage and write out the encoded data
Bzip2HuffmanStageEncoder huffmanEncoder = new Bzip2HuffmanStageEncoder(writer,
mtfEncoder.mtfBlock(),
mtfEncoder.mtfLength(),
mtfEncoder.mtfAlphabetSize(),
mtfEncoder.mtfSymbolFrequencies());
huffmanEncoder.encode(out);
}
/**
* Gets available size of the current block.
* @return Number of available bytes which can be written
*/
int availableSize() {
if (blockLength == 0) {
return blockLengthLimit + 2;
}
return blockLengthLimit - blockLength + 1;
}
/**
* Determines if the block is full and ready for compression.
* @return {@code true} if the block is full, otherwise {@code false}
*/
boolean isFull() {
return blockLength > blockLengthLimit;
}
/**
* Determines if any bytes have been written to the block.
* @return {@code true} if one or more bytes has been written to the block, otherwise {@code false}
*/
boolean isEmpty() {
return blockLength == 0 && rleLength == 0;
}
/**
* Gets the CRC of the completed block. Only valid after calling {@link #close(ByteBuf)}.
* @return The block's CRC
*/
int crc() {
return crc.getCRC();
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy