io.airlift.compress.v2.bzip2.CBZip2InputStream Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* This package is based on the work done by Keiron Liddle, Aftex Software
* to whom the Ant project is very grateful for his
* great code.
*/
package io.airlift.compress.v2.bzip2;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import static io.airlift.compress.v2.bzip2.BZip2Constants.G_SIZE;
import static io.airlift.compress.v2.bzip2.BZip2Constants.MAX_ALPHA_SIZE;
import static io.airlift.compress.v2.bzip2.BZip2Constants.MAX_SELECTORS;
import static io.airlift.compress.v2.bzip2.BZip2Constants.N_GROUPS;
import static io.airlift.compress.v2.bzip2.BZip2Constants.RUN_A;
import static io.airlift.compress.v2.bzip2.BZip2Constants.RUN_B;
/**
* An input stream that decompresses from the BZip2 format (without the file
* header chars) to be read as any other stream.
*
*
* The decompression requires large amounts of memory. Thus you should call the
* {@link #close() close()} method as soon as possible, to force
* CBZip2InputStream to release the allocated memory. See
* {@link CBZip2OutputStream CBZip2OutputStream} for information about memory
* usage.
*
*
*
* CBZip2InputStream reads bytes from the compressed source stream via
* the single byte {@link InputStream#read() read()} method exclusively.
* Thus you should consider to use a buffered source stream.
*
*
*
* This Ant code was enhanced so that it can de-compress blocks of bzip2 data.
* Current position in the stream is an important statistic for Hadoop. For
* example in LineRecordReader, we solely depend on the current position in the
* stream to know about the progress. The notion of position becomes complicated
* for compressed files. The Hadoop splitting is done in terms of compressed
* file. But a compressed file deflates to a large amount of data. So we have
* handled this problem in the following way.
*
* On object creation time, we find the next block start delimiter. Once such a
* marker is found, the stream stops there (we discard any read compressed data
* in this process) and the position is reported as the beginning of the block
* start delimiter. At this point we are ready for actual reading
* (i.e. decompression) of data.
*
* The subsequent read calls give out data. The position is updated when the
* caller of this class has read off the current block + 1 bytes. In between the
* block reading, position is not updated. (We can only update the position on
* block boundaries).
*
*
*
* Instances of this class are not thread safe.
*
*/
@SuppressWarnings({ "AssignmentToForLoopParameter", "SpellCheckingInspection"})
class CBZip2InputStream
extends InputStream
{
// start of block
private static final long BLOCK_DELIMITER = 0X314159265359L;
private static final int MAX_CODE_LEN = 23;
/**
* End of a BZip2 block
*/
public static final int END_OF_BLOCK = -2;
/**
* End of BZip2 stream.
*/
private static final int END_OF_STREAM = -1;
private static final int DELIMITER_BIT_LENGTH = 48;
// The variable records the current advertised position of the stream.
private long reportedBytesReadFromCompressedStream;
// The following variable keep record of compressed bytes read.
private long bytesReadFromCompressedStream;
private boolean initialized;
private final byte[] array = new byte[1];
/**
* Index of the last char in the block, so the block size == last + 1.
*/
private int last;
/**
* Index in zptr[] of original string after sorting.
*/
private int origPtr;
/**
* always: in the range 0 .. 9. The current block size is 100000 * this
* number.
*/
private int blockSize100k;
private boolean blockRandomised;
private long bsBuff;
private long bsLive;
private final Crc32 crc32 = new Crc32();
private int nInUse;
private BufferedInputStream in;
private int currentChar = -1;
/**
* A state machine to keep track of current state of the de-coder
*/
public enum STATE
{
EOF, START_BLOCK_STATE, RAND_PART_A_STATE, RAND_PART_B_STATE, RAND_PART_C_STATE, NO_RAND_PART_A_STATE, NO_RAND_PART_B_STATE, NO_RAND_PART_C_STATE, NO_PROCESS_STATE
}
private STATE currentState = STATE.START_BLOCK_STATE;
private int storedBlockCRC;
private int storedCombinedCRC;
private int computedCombinedCRC;
// used by skipToNextMarker
private boolean skipResult;
// Variables used by setup* methods exclusively
private int suCount;
private int suCh2;
private int suChPrev;
private int suI2;
private int suJ2;
private int suRNToGo;
private int suRTPos;
private int suTPos;
private char suZ;
/**
* All memory intensive stuff. This field is initialized by initBlock().
*/
private Data data;
/**
* Constructs a new CBZip2InputStream which decompresses bytes read from the
* specified stream.
*
*
* Although BZip2 headers are marked with the magic "Bz" this
* constructor expects the next byte in the stream to be the first one after
* the magic. Thus callers have to skip the first two bytes. Otherwise this
* constructor will throw an exception.
*
*
* @throws IOException if the stream content is malformed or an I/O error occurs.
* @throws NullPointerException if in == null
*/
public CBZip2InputStream(final InputStream in)
{
int blockSize = 0X39; // i.e 9
this.blockSize100k = blockSize - (int) '0';
this.in = new BufferedInputStream(in, 1024 * 9); // >1 MB buffer
}
/**
* This method reports the processed bytes so far. Please note that this
* statistic is only updated on block boundaries and only when the stream is
* initiated in BYBLOCK mode.
*/
public long getProcessedByteCount()
{
return reportedBytesReadFromCompressedStream;
}
/**
* This method keeps track of raw processed compressed
* bytes.
*
* @param count count is the number of bytes to be
* added to raw processed bytes
*/
private void updateProcessedByteCount(int count)
{
this.bytesReadFromCompressedStream += count;
}
/**
* This method reads a Byte from the compressed stream. Whenever we need to
* read from the underlying compressed stream, this method should be called
* instead of directly calling the read method of the underlying compressed
* stream. This method does important record keeping to have the statistic
* that how many bytes have been read off the compressed stream.
*/
private int readAByte(InputStream inStream)
throws IOException
{
int read = inStream.read();
if (read >= 0) {
this.updateProcessedByteCount(1);
}
return read;
}
/**
* This method tries to find the marker (passed to it as the first parameter)
* in the stream. It can find bit patterns of length <= 63 bits. Specifically
* this method is used in CBZip2InputStream to find the end of block (EOB)
* delimiter in the stream, starting from the current position of the stream.
* If marker is found, the stream position will be at the byte containing
* the starting bit of the marker.
*
* @param marker The bit pattern to be found in the stream
* @param markerBitLength No of bits in the marker
* @return true if the marker was found otherwise false
* @throws IllegalArgumentException if marketBitLength is greater than 63
*/
private boolean skipToNextMarker(long marker, int markerBitLength)
throws IllegalArgumentException
{
try {
if (markerBitLength > 63) {
throw new IllegalArgumentException(
"skipToNextMarker can not find patterns greater than 63 bits");
}
// pick next marketBitLength bits in the stream
long bytes;
bytes = this.bsR(markerBitLength);
if (bytes == -1) {
this.reportedBytesReadFromCompressedStream =
this.bytesReadFromCompressedStream;
return false;
}
while (true) {
if (bytes == marker) {
// Report the byte position where the marker starts
long markerBytesRead = (markerBitLength + this.bsLive + 7) / 8;
this.reportedBytesReadFromCompressedStream =
this.bytesReadFromCompressedStream - markerBytesRead;
return true;
}
else {
bytes = bytes << 1;
bytes = bytes & ((1L << markerBitLength) - 1);
int oneBit = (int) this.bsR(1);
if (oneBit != -1) {
bytes = bytes | oneBit;
}
else {
this.reportedBytesReadFromCompressedStream =
this.bytesReadFromCompressedStream;
return false;
}
}
}
}
catch (IOException ex) {
this.reportedBytesReadFromCompressedStream =
this.bytesReadFromCompressedStream;
return false;
}
}
private void makeMaps()
{
final boolean[] inUse = this.data.inUse;
final byte[] seqToUnseq = this.data.seqToUnseq;
int nInUseShadow = 0;
for (int i = 0; i < 256; i++) {
if (inUse[i]) {
seqToUnseq[nInUseShadow++] = (byte) i;
}
}
this.nInUse = nInUseShadow;
}
private void changeStateToProcessABlock()
throws IOException
{
if (skipResult) {
initBlock();
setupBlock();
}
else {
this.currentState = STATE.EOF;
}
}
@Override
public int read()
throws IOException
{
if (this.in != null) {
int result = this.read(array, 0, 1);
int value = 0XFF & array[0];
return (result > 0 ? value : result);
}
else {
throw new IOException("stream closed");
}
}
/**
* In CONTINOUS reading mode, this read method starts from the
* start of the compressed stream and end at the end of file by
* emitting un-compressed data. In this mode stream positioning
* is not announced and should be ignored.
*
* In BYBLOCK reading mode, this read method informs about the end
* of a BZip2 block by returning EOB. At this event, the compressed
* stream position is also announced. This announcement tells that
* how much of the compressed stream has been de-compressed and read
* out of this class. In between EOB events, the stream position is
* not updated.
*
* @return int The return value greater than 0 are the bytes read. A value
* of -1 means end of stream while -2 represents end of block
* @throws IOException if the stream content is malformed or an I/O error occurs.
*/
@Override
public int read(final byte[] dest, final int offs, final int len)
throws IOException
{
if (offs < 0) {
throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
}
if (len < 0) {
throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
}
if (offs + len > dest.length) {
throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+ len + ") > dest.length(" + dest.length + ").");
}
if (this.in == null) {
throw new IOException("stream closed");
}
if (!initialized) {
this.init();
this.initialized = true;
}
final int hi = offs + len;
int destOffs = offs;
int b = 0;
while (((destOffs < hi) && ((b = read0())) >= 0)) {
dest[destOffs++] = (byte) b;
}
int result = destOffs - offs;
if (result == 0) {
//report 'end of block' or 'end of stream'
result = b;
skipResult = this.skipToNextMarker(BLOCK_DELIMITER, DELIMITER_BIT_LENGTH);
changeStateToProcessABlock();
}
return result;
}
private int read0()
throws IOException
{
final int retChar = this.currentChar;
switch (this.currentState) {
case EOF:
return END_OF_STREAM; // return -1
case NO_PROCESS_STATE:
return END_OF_BLOCK; // return -2
case START_BLOCK_STATE:
throw new IllegalStateException();
case RAND_PART_A_STATE:
throw new IllegalStateException();
case RAND_PART_B_STATE:
setupRandPartB();
break;
case RAND_PART_C_STATE:
setupRandPartC();
break;
case NO_RAND_PART_A_STATE:
throw new IllegalStateException();
case NO_RAND_PART_B_STATE:
setupNoRandPartB();
break;
case NO_RAND_PART_C_STATE:
setupNoRandPartC();
break;
default:
throw new IllegalStateException();
}
return retChar;
}
private void init()
throws IOException
{
int magic2 = this.readAByte(in);
if (magic2 != 'h') {
throw new IOException("Stream is not BZip2 formatted: expected 'h'"
+ " as first byte but got '" + (char) magic2 + "'");
}
int blockSize = this.readAByte(in);
if ((blockSize < '1') || (blockSize > '9')) {
throw new IOException("Stream is not BZip2 formatted: illegal "
+ "blocksize " + (char) blockSize);
}
this.blockSize100k = blockSize - (int) '0';
initBlock();
setupBlock();
}
private void initBlock()
throws IOException
{
char magic0 = bsGetUByte();
char magic1 = bsGetUByte();
char magic2 = bsGetUByte();
char magic3 = bsGetUByte();
char magic4 = bsGetUByte();
char magic5 = bsGetUByte();
if (magic0 == 0x17 && magic1 == 0x72 && magic2 == 0x45
&& magic3 == 0x38 && magic4 == 0x50 && magic5 == 0x90) {
complete(); // end of file
}
else if (magic0 != 0x31 || // '1'
magic1 != 0x41 || // ')'
magic2 != 0x59 || // 'Y'
magic3 != 0x26 || // '&'
magic4 != 0x53 || // 'S'
magic5 != 0x59 /* 'Y' */) {
this.currentState = STATE.EOF;
throw new IOException("bad block header");
}
else {
this.storedBlockCRC = bsGetInt();
this.blockRandomised = bsR(1) == 1;
// Allocate data here instead in constructor, so we do not allocate
// it if the input file is empty.
if (this.data == null) {
this.data = new Data(this.blockSize100k);
}
// currBlockNo++;
getAndMoveToFrontDecode();
this.crc32.initialiseCRC();
this.currentState = STATE.START_BLOCK_STATE;
}
}
private void endBlock()
throws IOException
{
int computedBlockCRC = this.crc32.getFinalCRC();
// A bad CRC is considered a fatal error.
if (this.storedBlockCRC != computedBlockCRC) {
// make next blocks readable without error
// (repair feature, not yet documented, not tested)
this.computedCombinedCRC = (this.storedCombinedCRC << 1)
| (this.storedCombinedCRC >>> 31);
this.computedCombinedCRC ^= this.storedBlockCRC;
throw new IOException("crc error");
}
this.computedCombinedCRC = (this.computedCombinedCRC << 1)
| (this.computedCombinedCRC >>> 31);
this.computedCombinedCRC ^= computedBlockCRC;
}
private void complete()
throws IOException
{
this.storedCombinedCRC = bsGetInt();
this.currentState = STATE.EOF;
this.data = null;
if (this.storedCombinedCRC != this.computedCombinedCRC) {
throw new IOException("crc error");
}
}
@Override
public void close()
throws IOException
{
InputStream inShadow = this.in;
if (inShadow != null) {
try {
if (inShadow != System.in) {
inShadow.close();
}
}
finally {
this.data = null;
this.in = null;
}
}
}
private long bsR(final long n)
throws IOException
{
long bsLiveShadow = this.bsLive;
long bsBuffShadow = this.bsBuff;
if (bsLiveShadow < n) {
final InputStream inShadow = this.in;
do {
int thech = readAByte(inShadow);
if (thech < 0) {
throw new IOException("unexpected end of stream");
}
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
} while (bsLiveShadow < n);
this.bsBuff = bsBuffShadow;
}
this.bsLive = bsLiveShadow - n;
return (bsBuffShadow >> (bsLiveShadow - n)) & ((1L << n) - 1);
}
private boolean bsGetBit()
throws IOException
{
long bsLiveShadow = this.bsLive;
long bsBuffShadow = this.bsBuff;
if (bsLiveShadow < 1) {
int thech = this.readAByte(in);
if (thech < 0) {
throw new IOException("unexpected end of stream");
}
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
this.bsBuff = bsBuffShadow;
}
this.bsLive = bsLiveShadow - 1;
return ((bsBuffShadow >> (bsLiveShadow - 1)) & 1) != 0;
}
private char bsGetUByte()
throws IOException
{
return (char) bsR(8);
}
private int bsGetInt()
throws IOException
{
return (int) ((((((bsR(8) << 8) | bsR(8)) << 8) | bsR(8)) << 8) | bsR(8));
}
/**
* Called by createHuffmanDecodingTables() exclusively.
*/
private static void hbCreateDecodeTables(final int[] limit,
final int[] base, final int[] perm, final char[] length,
final int minLen, final int maxLen, final int alphaSize)
{
for (int i = minLen, pp = 0; i <= maxLen; i++) {
for (int j = 0; j < alphaSize; j++) {
if (length[j] == i) {
perm[pp++] = j;
}
}
}
for (int i = MAX_CODE_LEN; --i > 0; ) {
base[i] = 0;
limit[i] = 0;
}
for (int i = 0; i < alphaSize; i++) {
base[(int) length[i] + 1]++;
}
for (int i = 1, b = base[0]; i < MAX_CODE_LEN; i++) {
b += base[i];
base[i] = b;
}
for (int i = minLen, vec = 0, b = base[i]; i <= maxLen; i++) {
final int nb = base[i + 1];
vec += nb - b;
b = nb;
limit[i] = vec - 1;
vec <<= 1;
}
for (int i = minLen + 1; i <= maxLen; i++) {
base[i] = ((limit[i - 1] + 1) << 1) - base[i];
}
}
private void recvDecodingTables()
throws IOException
{
final Data dataShadow = this.data;
final boolean[] inUse = dataShadow.inUse;
final byte[] pos = dataShadow.recvDecodingTablesPos;
final byte[] selector = dataShadow.selector;
final byte[] selectorMtf = dataShadow.selectorMtf;
int inUse16 = 0;
/* Receive the mapping table */
for (int i = 0; i < 16; i++) {
if (bsGetBit()) {
inUse16 |= 1 << i;
}
}
for (int i = 256; --i >= 0; ) {
inUse[i] = false;
}
for (int i = 0; i < 16; i++) {
if ((inUse16 & (1 << i)) != 0) {
final int i16 = i << 4;
for (int j = 0; j < 16; j++) {
if (bsGetBit()) {
inUse[i16 + j] = true;
}
}
}
}
makeMaps();
final int alphaSize = this.nInUse + 2;
/* Now the selectors */
final int nGroups = (int) bsR(3);
final int nSelectors = (int) bsR(15);
for (int i = 0; i < nSelectors; i++) {
int j = 0;
while (bsGetBit()) {
j++;
}
selectorMtf[i] = (byte) j;
}
/* Undo the MTF values for the selectors. */
for (int v = nGroups; --v >= 0; ) {
pos[v] = (byte) v;
}
for (int i = 0; i < nSelectors; i++) {
int v = selectorMtf[i] & 0xff;
final byte tmp = pos[v];
while (v > 0) {
// nearly all times v is zero, 4 in most other cases
pos[v] = pos[v - 1];
v--;
}
pos[0] = tmp;
selector[i] = tmp;
}
final char[][] len = dataShadow.tempCharArray2D;
/* Now the coding tables */
for (int t = 0; t < nGroups; t++) {
int curr = (int) bsR(5);
final char[] lenT = len[t];
for (int i = 0; i < alphaSize; i++) {
while (bsGetBit()) {
curr += bsGetBit() ? -1 : 1;
}
lenT[i] = (char) curr;
}
}
// finally create the Huffman tables
createHuffmanDecodingTables(alphaSize, nGroups);
}
/**
* Called by recvDecodingTables() exclusively.
*/
private void createHuffmanDecodingTables(final int alphaSize,
final int nGroups)
{
final Data dataShadow = this.data;
final char[][] len = dataShadow.tempCharArray2D;
final int[] minLens = dataShadow.minLens;
final int[][] limit = dataShadow.limit;
final int[][] base = dataShadow.base;
final int[][] perm = dataShadow.perm;
for (int t = 0; t < nGroups; t++) {
int minLen = 32;
int maxLen = 0;
final char[] lenT = len[t];
for (int i = alphaSize; --i >= 0; ) {
final char lent = lenT[i];
if (lent > maxLen) {
maxLen = lent;
}
if (lent < minLen) {
minLen = lent;
}
}
hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
maxLen, alphaSize);
minLens[t] = minLen;
}
}
private void getAndMoveToFrontDecode()
throws IOException
{
this.origPtr = (int) bsR(24);
recvDecodingTables();
final InputStream inShadow = this.in;
final Data dataShadow = this.data;
final byte[] ll8 = dataShadow.ll8;
final int[] unzftab = dataShadow.unzftab;
final byte[] selector = dataShadow.selector;
final byte[] seqToUnseq = dataShadow.seqToUnseq;
final char[] yy = dataShadow.getAndMoveToFrontDecodeYy;
final int[] minLens = dataShadow.minLens;
final int[][] limit = dataShadow.limit;
final int[][] base = dataShadow.base;
final int[][] perm = dataShadow.perm;
final int limitLast = this.blockSize100k * 100000;
/*
* Setting up the unzftab entries here is not strictly necessary, but it
* does save having to do it later in a separate pass, and so saves a
* block's worth of cache misses.
*/
for (int i = 256; --i >= 0; ) {
yy[i] = (char) i;
unzftab[i] = 0;
}
int groupNo = 0;
int groupPos = G_SIZE - 1;
final int eob = this.nInUse + 1;
int nextSym = getAndMoveToFrontDecode0(0);
int bsBuffShadow = (int) this.bsBuff;
int bsLiveShadow = (int) this.bsLive;
int lastShadow = -1;
int zt = selector[groupNo] & 0xff;
int[] baseZt = base[zt];
int[] limitZt = limit[zt];
int[] permZt = perm[zt];
int minLensZt = minLens[zt];
while (nextSym != eob) {
if ((nextSym == RUN_A) || (nextSym == RUN_B)) {
int s = -1;
for (int n = 1; true; n <<= 1) {
if (nextSym == RUN_A) {
s += n;
}
else if (nextSym == RUN_B) {
s += n << 1;
}
else {
break;
}
if (groupPos == 0) {
groupPos = G_SIZE - 1;
zt = selector[++groupNo] & 0xff;
baseZt = base[zt];
limitZt = limit[zt];
permZt = perm[zt];
minLensZt = minLens[zt];
}
else {
groupPos--;
}
int zn = minLensZt;
while (bsLiveShadow < zn) {
final int thech = readAByte(inShadow);
if (thech >= 0) {
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
else {
throw new IOException("unexpected end of stream");
}
}
long zvec = (bsBuffShadow >> (bsLiveShadow - zn)) & ((1L << zn) - 1);
bsLiveShadow -= zn;
while (zvec > limitZt[zn]) {
zn++;
while (bsLiveShadow < 1) {
final int thech = readAByte(inShadow);
if (thech >= 0) {
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
else {
throw new IOException("unexpected end of stream");
}
}
bsLiveShadow--;
zvec = (zvec << 1)
| ((bsBuffShadow >> bsLiveShadow) & 1);
}
nextSym = permZt[(int) (zvec - baseZt[zn])];
}
final byte ch = seqToUnseq[yy[0]];
unzftab[ch & 0xff] += s + 1;
while (s-- >= 0) {
ll8[++lastShadow] = ch;
}
if (lastShadow >= limitLast) {
throw new IOException("block overrun");
}
}
else {
if (++lastShadow >= limitLast) {
throw new IOException("block overrun");
}
final char tmp = yy[nextSym - 1];
unzftab[seqToUnseq[tmp] & 0xff]++;
ll8[lastShadow] = seqToUnseq[tmp];
/*
* This loop is hammered during decompression, hence avoid
* native method call overhead of System.arraycopy for very
* small ranges to copy.
*/
if (nextSym <= 16) {
for (int j = nextSym - 1; j > 0; ) {
yy[j] = yy[--j];
}
}
else {
//noinspection SuspiciousSystemArraycopy
System.arraycopy(yy, 0, yy, 1, nextSym - 1);
}
yy[0] = tmp;
if (groupPos == 0) {
groupPos = G_SIZE - 1;
zt = selector[++groupNo] & 0xff;
baseZt = base[zt];
limitZt = limit[zt];
permZt = perm[zt];
minLensZt = minLens[zt];
}
else {
groupPos--;
}
int zn = minLensZt;
while (bsLiveShadow < zn) {
final int thech = readAByte(inShadow);
if (thech >= 0) {
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
else {
throw new IOException("unexpected end of stream");
}
}
int zvec = (bsBuffShadow >> (bsLiveShadow - zn))
& ((1 << zn) - 1);
bsLiveShadow -= zn;
while (zvec > limitZt[zn]) {
zn++;
while (bsLiveShadow < 1) {
final int thech = readAByte(inShadow);
if (thech >= 0) {
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
else {
throw new IOException("unexpected end of stream");
}
}
bsLiveShadow--;
zvec = ((zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1));
}
nextSym = permZt[zvec - baseZt[zn]];
}
}
this.last = lastShadow;
this.bsLive = bsLiveShadow;
this.bsBuff = bsBuffShadow;
}
private int getAndMoveToFrontDecode0(final int groupNo)
throws IOException
{
final InputStream inShadow = this.in;
final Data dataShadow = this.data;
final int zt = dataShadow.selector[groupNo] & 0xff;
final int[] limitZt = dataShadow.limit[zt];
int zn = dataShadow.minLens[zt];
int zvec = (int) bsR(zn);
int bsLiveShadow = (int) this.bsLive;
int bsBuffShadow = (int) this.bsBuff;
while (zvec > limitZt[zn]) {
zn++;
while (bsLiveShadow < 1) {
final int thech = readAByte(inShadow);
if (thech >= 0) {
bsBuffShadow = (bsBuffShadow << 8) | thech;
bsLiveShadow += 8;
}
else {
throw new IOException("unexpected end of stream");
}
}
bsLiveShadow--;
zvec = (zvec << 1) | ((bsBuffShadow >> bsLiveShadow) & 1);
}
this.bsLive = bsLiveShadow;
this.bsBuff = bsBuffShadow;
return dataShadow.perm[zt][zvec - dataShadow.base[zt][zn]];
}
private void setupBlock()
throws IOException
{
if (this.data == null) {
return;
}
final int[] cftab = this.data.cftab;
final int[] tt = this.data.initTT(this.last + 1);
final byte[] ll8 = this.data.ll8;
cftab[0] = 0;
System.arraycopy(this.data.unzftab, 0, cftab, 1, 256);
for (int i = 1, c = cftab[0]; i <= 256; i++) {
c += cftab[i];
cftab[i] = c;
}
for (int i = 0, lastShadow = this.last; i <= lastShadow; i++) {
tt[cftab[ll8[i] & 0xff]++] = i;
}
if ((this.origPtr < 0) || (this.origPtr >= tt.length)) {
throw new IOException("stream corrupted");
}
this.suTPos = tt[this.origPtr];
this.suCount = 0;
this.suI2 = 0;
this.suCh2 = 256; /* not a char and not EOF */
if (this.blockRandomised) {
this.suRNToGo = 0;
this.suRTPos = 0;
setupRandPartA();
}
else {
setupNoRandPartA();
}
}
@SuppressWarnings("checkstyle:InnerAssignment")
private void setupRandPartA()
throws IOException
{
if (this.suI2 <= this.last) {
this.suChPrev = this.suCh2;
int suCh2Shadow = this.data.ll8[this.suTPos] & 0xff;
this.suTPos = this.data.tt[this.suTPos];
if (this.suRNToGo == 0) {
this.suRNToGo = R_NUMS[this.suRTPos] - 1;
if (++this.suRTPos == 512) {
this.suRTPos = 0;
}
}
else {
this.suRNToGo--;
}
this.suCh2 = suCh2Shadow ^= (this.suRNToGo == 1) ? 1 : 0;
this.suI2++;
this.currentChar = suCh2Shadow;
this.currentState = STATE.RAND_PART_B_STATE;
this.crc32.updateCRC(suCh2Shadow);
}
else {
endBlock();
initBlock();
setupBlock();
}
}
private void setupNoRandPartA()
throws IOException
{
if (this.suI2 <= this.last) {
this.suChPrev = this.suCh2;
int suCh2Shadow = this.data.ll8[this.suTPos] & 0xff;
this.suCh2 = suCh2Shadow;
this.suTPos = this.data.tt[this.suTPos];
this.suI2++;
this.currentChar = suCh2Shadow;
this.currentState = STATE.NO_RAND_PART_B_STATE;
this.crc32.updateCRC(suCh2Shadow);
}
else {
this.currentState = STATE.NO_RAND_PART_A_STATE;
endBlock();
initBlock();
setupBlock();
}
}
private void setupRandPartB()
throws IOException
{
if (this.suCh2 != this.suChPrev) {
this.currentState = STATE.RAND_PART_A_STATE;
this.suCount = 1;
setupRandPartA();
}
else if (++this.suCount >= 4) {
this.suZ = (char) (this.data.ll8[this.suTPos] & 0xff);
this.suTPos = this.data.tt[this.suTPos];
if (this.suRNToGo == 0) {
this.suRNToGo = R_NUMS[this.suRTPos] - 1;
if (++this.suRTPos == 512) {
this.suRTPos = 0;
}
}
else {
this.suRNToGo--;
}
this.suJ2 = 0;
this.currentState = STATE.RAND_PART_C_STATE;
if (this.suRNToGo == 1) {
this.suZ ^= 1;
}
setupRandPartC();
}
else {
this.currentState = STATE.RAND_PART_A_STATE;
setupRandPartA();
}
}
private void setupRandPartC()
throws IOException
{
if (this.suJ2 < this.suZ) {
this.currentChar = this.suCh2;
this.crc32.updateCRC(this.suCh2);
this.suJ2++;
}
else {
this.currentState = STATE.RAND_PART_A_STATE;
this.suI2++;
this.suCount = 0;
setupRandPartA();
}
}
private void setupNoRandPartB()
throws IOException
{
if (this.suCh2 != this.suChPrev) {
this.suCount = 1;
setupNoRandPartA();
}
else if (++this.suCount >= 4) {
this.suZ = (char) (this.data.ll8[this.suTPos] & 0xff);
this.suTPos = this.data.tt[this.suTPos];
this.suJ2 = 0;
setupNoRandPartC();
}
else {
setupNoRandPartA();
}
}
private void setupNoRandPartC()
throws IOException
{
if (this.suJ2 < this.suZ) {
int suCh2Shadow = this.suCh2;
this.currentChar = suCh2Shadow;
this.crc32.updateCRC(suCh2Shadow);
this.suJ2++;
this.currentState = STATE.NO_RAND_PART_C_STATE;
}
else {
this.suI2++;
this.suCount = 0;
setupNoRandPartA();
}
}
private static final class Data
{
// (with blockSize 900k)
final boolean[] inUse = new boolean[256]; // 256 byte
final byte[] seqToUnseq = new byte[256]; // 256 byte
final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
/**
* Freq table collected to save a pass over the data during
* decompression.
*/
final int[] unzftab = new int[256]; // 1024 byte
final int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
final int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
final int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 byte
final int[] minLens = new int[N_GROUPS]; // 24 byte
final int[] cftab = new int[257]; // 1028 byte
final char[] getAndMoveToFrontDecodeYy = new char[256]; // 512 byte
final char[][] tempCharArray2D = new char[N_GROUPS][MAX_ALPHA_SIZE]; // 3096
// byte
final byte[] recvDecodingTablesPos = new byte[N_GROUPS]; // 6 byte
// ---------------
// 60798 byte
int[] tt; // 3600000 byte
byte[] ll8; // 900000 byte
// ---------------
// 4560782 byte
// ===============
Data(int blockSize100k)
{
this.ll8 = new byte[blockSize100k * BZip2Constants.BASE_BLOCK_SIZE];
}
/**
* Initializes the {@link #tt} array.
*
* This method is called when the required length of the array is known.
* I don't initialize it at construction time to avoid unnecessary
* memory allocation when compressing small files.
*/
int[] initTT(int length)
{
int[] ttShadow = this.tt;
// tt.length should always be >= length, but theoretically
// it can happen, if the compressor mixed small and large
// blocks. Normally only the last block will be smaller
// than others.
if ((ttShadow == null) || (ttShadow.length < length)) {
ttShadow = new int[length];
this.tt = ttShadow;
}
return ttShadow;
}
}
private static final int[] R_NUMS = {
619, 720, 127, 481, 931, 816, 813, 233, 566, 247,
985, 724, 205, 454, 863, 491, 741, 242, 949, 214, 733, 859, 335,
708, 621, 574, 73, 654, 730, 472, 419, 436, 278, 496, 867, 210,
399, 680, 480, 51, 878, 465, 811, 169, 869, 675, 611, 697, 867,
561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 150, 238,
59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727,
476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249,
445, 515, 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641,
801, 220, 162, 819, 984, 589, 513, 495, 799, 161, 604, 958, 533,
221, 400, 386, 867, 600, 782, 382, 596, 414, 171, 516, 375, 682,
485, 911, 276, 98, 553, 163, 354, 666, 933, 424, 341, 533, 870,
227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 469, 68, 770,
919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383, 461,
404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604,
560, 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970,
447, 318, 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93,
354, 99, 820, 908, 609, 772, 154, 274, 580, 184, 79, 626, 630, 742,
653, 282, 762, 623, 680, 81, 927, 626, 789, 125, 411, 521, 938,
300, 821, 78, 343, 175, 128, 250, 170, 774, 972, 275, 999, 639,
495, 78, 352, 126, 857, 956, 358, 619, 580, 124, 737, 594, 701,
612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 944, 375,
748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511, 655,
814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893,
228, 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754,
806, 760, 493, 403, 415, 394, 687, 700, 946, 670, 656, 610, 738,
392, 760, 799, 887, 653, 978, 321, 576, 617, 626, 502, 894, 679,
243, 440, 680, 879, 194, 572, 640, 724, 926, 56, 204, 700, 707,
151, 457, 449, 797, 195, 791, 558, 945, 679, 297, 59, 87, 824, 713,
663, 412, 693, 342, 606, 134, 108, 571, 364, 631, 212, 174, 643,
304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 140,
206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692,
829, 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150,
790, 288, 923, 804, 378, 215, 828, 592, 281, 565, 555, 710, 82,
896, 831, 547, 261, 524, 462, 293, 465, 502, 56, 661, 821, 976,
991, 658, 869, 905, 758, 745, 193, 768, 550, 608, 933, 378, 286,
215, 979, 792, 961, 61, 688, 793, 644, 986, 403, 106, 366, 905,
644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 780, 773,
635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857,
265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850,
858, 364, 936, 638};
}