java.org.apache.tools.bzip2.CBZip2OutputStream Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/*
* This package is based on the work done by Keiron Liddle, Aftex Software
* to whom the Ant project is very grateful for his
* great code.
*/
package org.apache.tools.bzip2;
import java.io.IOException;
import java.io.OutputStream;
/**
* An output stream that compresses into the BZip2 format (without the file
* header chars) into another stream.
*
*
* The compression requires large amounts of memory. Thus you should call the
* {@link #close() close()} method as soon as possible, to force
* CBZip2OutputStream to release the allocated memory.
*
*
* You can shrink the amount of allocated memory and maybe raise
* the compression speed by choosing a lower blocksize, which in turn
* may cause a lower compression ratio. You can avoid unnecessary
* memory allocation by avoiding using a blocksize which is bigger
* than the size of the input.
*
* You can compute the memory usage for compressing by the
* following formula:
*
*
* <code>400k + (9 * blocksize)</code>.
*
*
* To get the memory required for decompression by {@link
* CBZip2InputStream CBZip2InputStream} use
*
*
* <code>65k + (5 * blocksize)</code>.
*
*
*
* Memory usage by blocksize
*
* Blocksize
* Compression
memory usage
* Decompression
memory usage
*
*
* 100k
* 1300k
* 565k
*
*
* 200k
* 2200k
* 1065k
*
*
* 300k
* 3100k
* 1565k
*
*
* 400k
* 4000k
* 2065k
*
*
* 500k
* 4900k
* 2565k
*
*
* 600k
* 5800k
* 3065k
*
*
* 700k
* 6700k
* 3565k
*
*
* 800k
* 7600k
* 4065k
*
*
* 900k
* 8500k
* 4565k
*
*
*
*
* For decompression CBZip2InputStream allocates less memory if the
* bzipped input is smaller than one block.
*
*
*
* Instances of this class are not threadsafe.
*
*
*
* TODO: Update to BZip2 1.0.1
*
*
*/
public class CBZip2OutputStream extends OutputStream
implements BZip2Constants {
/**
* The minimum supported blocksize == 1.
*/
public static final int MIN_BLOCKSIZE = 1;
/**
* The maximum supported blocksize == 9.
*/
public static final int MAX_BLOCKSIZE = 9;
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
*/
protected static final int SETMASK = (1 << 21);
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
*/
protected static final int CLEARMASK = (~SETMASK);
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
*/
protected static final int GREATER_ICOST = 15;
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
*/
protected static final int LESSER_ICOST = 0;
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
*/
protected static final int SMALL_THRESH = 20;
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
*/
protected static final int DEPTH_THRESH = 10;
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
*/
protected static final int WORK_FACTOR = 30;
/**
* This constant is accessible by subclasses for historical
* purposes. If you don't know what it means then you don't need
* it.
* If you are ever unlucky/improbable enough to get a stack
* overflow whilst sorting, increase the following constant and
* try again. In practice I have never seen the stack go above 27
* elems, so the following limit seems very generous.
*/
protected static final int QSORT_STACK_SIZE = 1000;
/**
* Knuth's increments seem to work better than Incerpi-Sedgewick here.
* Possibly because the number of elems to sort is usually small, typically
* <= 20.
*/
@SuppressWarnings("unused")
private static final int[] INCS = {1, 4, 13, 40, 121, 364, 1093, 3280,
9841, 29524, 88573, 265720, 797161,
2391484};
/**
* This method is accessible by subclasses for historical
* purposes. If you don't know what it does then you don't need
* it.
*
* @param len char[]
* @param freq char[]
* @param alphaSize int
* @param maxLen int
*/
protected static void hbMakeCodeLengths(char[] len, int[] freq,
int alphaSize, int maxLen) {
/*
* Nodes and heap entries run from 1. Entry 0 for both the heap and
* nodes is a sentinel.
*/
final int[] heap = new int[MAX_ALPHA_SIZE * 2];
final int[] weight = new int[MAX_ALPHA_SIZE * 2];
final int[] parent = new int[MAX_ALPHA_SIZE * 2];
for (int i = alphaSize; --i >= 0;) {
weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
}
for (boolean tooLong = true; tooLong;) {
tooLong = false;
int nNodes = alphaSize;
int nHeap = 0;
heap[0] = 0;
weight[0] = 0;
parent[0] = -2;
for (int i = 1; i <= alphaSize; i++) {
parent[i] = -1;
nHeap++;
heap[nHeap] = i;
int zz = nHeap;
int tmp = heap[zz];
while (weight[tmp] < weight[heap[zz >> 1]]) {
heap[zz] = heap[zz >> 1];
zz >>= 1;
}
heap[zz] = tmp;
}
// assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap;
while (nHeap > 1) {
int n1 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
int yy = 0;
int zz = 1;
int tmp = heap[1];
while (true) {
yy = zz << 1;
if (yy > nHeap) {
break;
}
if ((yy < nHeap)
&& (weight[heap[yy + 1]] < weight[heap[yy]])) {
yy++;
}
if (weight[tmp] < weight[heap[yy]]) {
break;
}
heap[zz] = heap[yy];
zz = yy;
}
heap[zz] = tmp;
int n2 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
yy = 0;
zz = 1;
tmp = heap[1];
while (true) {
yy = zz << 1;
if (yy > nHeap) {
break;
}
if ((yy < nHeap)
&& (weight[heap[yy + 1]] < weight[heap[yy]])) {
yy++;
}
if (weight[tmp] < weight[heap[yy]]) {
break;
}
heap[zz] = heap[yy];
zz = yy;
}
heap[zz] = tmp;
nNodes++;
parent[n1] = parent[n2] = nNodes;
final int weight_n1 = weight[n1];
final int weight_n2 = weight[n2];
weight[nNodes] = (((weight_n1 & 0xffffff00)
+ (weight_n2 & 0xffffff00))
|
(1 + (((weight_n1 & 0x000000ff)
> (weight_n2 & 0x000000ff))
? (weight_n1 & 0x000000ff)
: (weight_n2 & 0x000000ff))
));
parent[nNodes] = -1;
nHeap++;
heap[nHeap] = nNodes;
tmp = 0;
zz = nHeap;
tmp = heap[zz];
final int weight_tmp = weight[tmp];
while (weight_tmp < weight[heap[zz >> 1]]) {
heap[zz] = heap[zz >> 1];
zz >>= 1;
}
heap[zz] = tmp;
}
// assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes;
for (int i = 1; i <= alphaSize; i++) {
int j = 0;
int k = i;
for (int parent_k; (parent_k = parent[k]) >= 0;) {
k = parent_k;
j++;
}
len[i - 1] = (char) j;
if (j > maxLen) {
tooLong = true;
}
}
if (tooLong) {
for (int i = 1; i < alphaSize; i++) {
int j = weight[i] >> 8;
j = 1 + (j >> 1);
weight[i] = j << 8;
}
}
}
}
private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
final Data dat, final int alphaSize,
final int maxLen) {
/*
* Nodes and heap entries run from 1. Entry 0 for both the heap and
* nodes is a sentinel.
*/
final int[] heap = dat.heap;
final int[] weight = dat.weight;
final int[] parent = dat.parent;
for (int i = alphaSize; --i >= 0;) {
weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
}
for (boolean tooLong = true; tooLong;) {
tooLong = false;
int nNodes = alphaSize;
int nHeap = 0;
heap[0] = 0;
weight[0] = 0;
parent[0] = -2;
for (int i = 1; i <= alphaSize; i++) {
parent[i] = -1;
nHeap++;
heap[nHeap] = i;
int zz = nHeap;
int tmp = heap[zz];
while (weight[tmp] < weight[heap[zz >> 1]]) {
heap[zz] = heap[zz >> 1];
zz >>= 1;
}
heap[zz] = tmp;
}
while (nHeap > 1) {
int n1 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
int yy = 0;
int zz = 1;
int tmp = heap[1];
while (true) {
yy = zz << 1;
if (yy > nHeap) {
break;
}
if ((yy < nHeap)
&& (weight[heap[yy + 1]] < weight[heap[yy]])) {
yy++;
}
if (weight[tmp] < weight[heap[yy]]) {
break;
}
heap[zz] = heap[yy];
zz = yy;
}
heap[zz] = tmp;
int n2 = heap[1];
heap[1] = heap[nHeap];
nHeap--;
yy = 0;
zz = 1;
tmp = heap[1];
while (true) {
yy = zz << 1;
if (yy > nHeap) {
break;
}
if ((yy < nHeap)
&& (weight[heap[yy + 1]] < weight[heap[yy]])) {
yy++;
}
if (weight[tmp] < weight[heap[yy]]) {
break;
}
heap[zz] = heap[yy];
zz = yy;
}
heap[zz] = tmp;
nNodes++;
parent[n1] = parent[n2] = nNodes;
final int weight_n1 = weight[n1];
final int weight_n2 = weight[n2];
weight[nNodes] = ((weight_n1 & 0xffffff00)
+ (weight_n2 & 0xffffff00))
| (1 + (((weight_n1 & 0x000000ff)
> (weight_n2 & 0x000000ff))
? (weight_n1 & 0x000000ff)
: (weight_n2 & 0x000000ff)));
parent[nNodes] = -1;
nHeap++;
heap[nHeap] = nNodes;
tmp = 0;
zz = nHeap;
tmp = heap[zz];
final int weight_tmp = weight[tmp];
while (weight_tmp < weight[heap[zz >> 1]]) {
heap[zz] = heap[zz >> 1];
zz >>= 1;
}
heap[zz] = tmp;
}
for (int i = 1; i <= alphaSize; i++) {
int j = 0;
int k = i;
for (int parent_k; (parent_k = parent[k]) >= 0;) {
k = parent_k;
j++;
}
len[i - 1] = (byte) j;
if (j > maxLen) {
tooLong = true;
}
}
if (tooLong) {
for (int i = 1; i < alphaSize; i++) {
int j = weight[i] >> 8;
j = 1 + (j >> 1);
weight[i] = j << 8;
}
}
}
}
/**
* Index of the last char in the block, so the block size == last + 1.
*/
private int last;
/**
* Always: in the range 0 .. 9. The current block size is 100000 * this
* number.
*/
private final int blockSize100k;
private int bsBuff;
private int bsLive;
private final CRC crc = new CRC();
private int nInUse;
private int nMTF;
private int currentChar = -1;
private int runLength = 0;
private int blockCRC;
private int combinedCRC;
private final int allowableBlockSize;
/**
* All memory intensive stuff.
*/
private Data data;
private BlockSort blockSorter;
private OutputStream out;
/**
* Chooses a blocksize based on the given length of the data to compress.
*
* @param inputLength
* The length of the data which will be compressed by
* CBZip2OutputStream.
* @return The blocksize, between {@link #MIN_BLOCKSIZE} and
* {@link #MAX_BLOCKSIZE} both inclusive. For a negative
* inputLength this method returns MAX_BLOCKSIZE
* always.
*/
public static int chooseBlockSize(long inputLength) {
return (inputLength > 0) ? (int) Math
.min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
}
/**
* Constructs a new CBZip2OutputStream with a blocksize of 900k.
*
*
* Attention: The caller is responsible to write the two BZip2 magic
* bytes "BZ" to the specified stream prior to calling this
* constructor.
*
*
* @param out *
* the destination stream.
*
* @throws IOException
* if an I/O error occurs in the specified stream.
* @throws NullPointerException
* if out == null
.
*/
public CBZip2OutputStream(final OutputStream out) throws IOException {
this(out, MAX_BLOCKSIZE);
}
/**
* Constructs a new CBZip2OutputStream with specified blocksize.
*
*
* Attention: The caller is responsible to write the two BZip2 magic
* bytes "BZ" to the specified stream prior to calling this
* constructor.
*
*
*
* @param out
* the destination stream.
* @param blockSize
* the blockSize as 100k units.
*
* @throws IOException
* if an I/O error occurs in the specified stream.
* @throws IllegalArgumentException
* if (blockSize < 1) || (blockSize > 9)
.
* @throws NullPointerException
* if out == null
.
*
* @see #MIN_BLOCKSIZE
* @see #MAX_BLOCKSIZE
*/
public CBZip2OutputStream(final OutputStream out, final int blockSize)
throws IOException {
super();
if (blockSize < 1) {
throw new IllegalArgumentException("blockSize(" + blockSize
+ ") < 1");
}
if (blockSize > 9) {
throw new IllegalArgumentException("blockSize(" + blockSize
+ ") > 9");
}
this.blockSize100k = blockSize;
this.out = out;
/* 20 is just a paranoia constant */
this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20;
init();
}
/** {@inheritDoc} */
@Override
public void write(final int b) throws IOException {
if (this.out != null) {
write0(b);
} else {
throw new IOException("closed");
}
}
/**
* Writes the current byte to the buffer, run-length encoding it
* if it has been repeated at least four times (the first step
* RLEs sequences of four identical bytes).
*
* Flushes the current block before writing data if it is
* full.
*
* "write to the buffer" means adding to data.buffer starting
* two steps "after" this.last - initially starting at index 1
* (not 0) - and updating this.last to point to the last index
* written minus 1.
*/
private void writeRun() throws IOException {
final int lastShadow = this.last;
if (lastShadow < this.allowableBlockSize) {
final int currentCharShadow = this.currentChar;
final Data dataShadow = this.data;
dataShadow.inUse[currentCharShadow] = true;
final byte ch = (byte) currentCharShadow;
int runLengthShadow = this.runLength;
this.crc.updateCRC(currentCharShadow, runLengthShadow);
final byte[] block = dataShadow.block;
switch (runLengthShadow) {
case 1:
block[lastShadow + 2] = ch;
this.last = lastShadow + 1;
break;
case 2:
block[lastShadow + 2] = ch;
block[lastShadow + 3] = ch;
this.last = lastShadow + 2;
break;
case 3:
block[lastShadow + 2] = ch;
block[lastShadow + 3] = ch;
block[lastShadow + 4] = ch;
this.last = lastShadow + 3;
break;
default:
runLengthShadow -= 4;
dataShadow.inUse[runLengthShadow] = true;
block[lastShadow + 2] = ch;
block[lastShadow + 3] = ch;
block[lastShadow + 4] = ch;
block[lastShadow + 5] = ch;
block[lastShadow + 6] = (byte) runLengthShadow;
this.last = lastShadow + 5;
break;
}
} else {
endBlock();
initBlock();
writeRun();
}
}
/**
* Overridden to close the stream.
*/
@Override
protected void finalize() throws Throwable {
finish();
super.finalize();
}
public void finish() throws IOException {
if (out != null) {
try {
if (this.runLength > 0) {
writeRun();
}
this.currentChar = -1;
endBlock();
endCompression();
} finally {
this.out = null;
this.data = null;
this.blockSorter = null;
}
}
}
@Override
public void close() throws IOException {
if (out != null) {
OutputStream outShadow = this.out;
finish();
outShadow.close();
}
}
@Override
public void flush() throws IOException {
OutputStream outShadow = this.out;
if (outShadow != null) {
outShadow.flush();
}
}
private void init() throws IOException {
// write magic: done by caller who created this stream
// this.out.write('B');
// this.out.write('Z');
this.data = new Data(this.blockSize100k);
this.blockSorter = new BlockSort(this.data);
/*
* Write `magic' bytes h indicating file-format == huffmanised, followed
* by a digit indicating blockSize100k.
*/
bsPutUByte('h');
bsPutUByte('0' + this.blockSize100k);
this.combinedCRC = 0;
initBlock();
}
private void initBlock() {
// blockNo++;
this.crc.initialiseCRC();
this.last = -1;
// ch = 0;
boolean[] inUse = this.data.inUse;
for (int i = 256; --i >= 0;) {
inUse[i] = false;
}
}
private void endBlock() throws IOException {
this.blockCRC = this.crc.getFinalCRC();
this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
this.combinedCRC ^= this.blockCRC;
// empty block at end of file
if (this.last == -1) {
return;
}
/* sort the block and establish posn of original string */
blockSort();
/*
* A 6-byte block header, the value chosen arbitrarily as 0x314159265359
* :-). A 32 bit value does not really give a strong enough guarantee
* that the value will not appear by chance in the compressed
* datastream. Worst-case probability of this event, for a 900k block,
* is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
* bits. For a compressed file of size 100Gb -- about 100000 blocks --
* only a 48-bit marker will do. NB: normal compression/ decompression
* do not rely on these statistical properties. They are only important
* when trying to recover blocks from damaged files.
*/
bsPutUByte(0x31);
bsPutUByte(0x41);
bsPutUByte(0x59);
bsPutUByte(0x26);
bsPutUByte(0x53);
bsPutUByte(0x59);
/* Now the block's CRC, so it is in a known place. */
bsPutInt(this.blockCRC);
/* Now a single bit indicating no randomisation. */
bsW(1, 0);
/* Finally, block's contents proper. */
moveToFrontCodeAndSend();
}
private void endCompression() throws IOException {
/*
* Now another magic 48-bit number, 0x177245385090, to indicate the end
* of the last block. (sqrt(pi), if you want to know. I did want to use
* e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
* to feel statistically comfortable. Call me paranoid.)
*/
bsPutUByte(0x17);
bsPutUByte(0x72);
bsPutUByte(0x45);
bsPutUByte(0x38);
bsPutUByte(0x50);
bsPutUByte(0x90);
bsPutInt(this.combinedCRC);
bsFinishedWithStream();
}
/**
* Returns the blocksize parameter specified at construction time.
*
* @return int
*/
public final int getBlockSize() {
return this.blockSize100k;
}
@Override
public void write(final byte[] buf, int offs, final int len)
throws IOException {
if (offs < 0) {
throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
}
if (len < 0) {
throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
}
if (offs + len > buf.length) {
throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+ len + ") > buf.length("
+ buf.length + ").");
}
if (this.out == null) {
throw new IOException("stream closed");
}
for (int hi = offs + len; offs < hi;) {
write0(buf[offs++]);
}
}
/**
* Keeps track of the last bytes written and implicitly performs
* run-length encoding as the first step of the bzip2 algorithm.
*/
private void write0(int b) throws IOException {
if (this.currentChar != -1) {
b &= 0xff;
if (this.currentChar == b) {
if (++this.runLength > 254) {
writeRun();
this.currentChar = -1;
this.runLength = 0;
}
// else nothing to do
} else {
writeRun();
this.runLength = 1;
this.currentChar = b;
}
} else {
this.currentChar = b & 0xff;
this.runLength++;
}
}
private static void hbAssignCodes(final int[] code, final byte[] length,
final int minLen, final int maxLen,
final int alphaSize) {
int vec = 0;
for (int n = minLen; n <= maxLen; n++) {
for (int i = 0; i < alphaSize; i++) {
if ((length[i] & 0xff) == n) {
code[i] = vec;
vec++;
}
}
vec <<= 1;
}
}
private void bsFinishedWithStream() throws IOException {
while (this.bsLive > 0) {
int ch = this.bsBuff >> 24;
this.out.write(ch); // write 8-bit
this.bsBuff <<= 8;
this.bsLive -= 8;
}
}
private void bsW(final int n, final int v) throws IOException {
final OutputStream outShadow = this.out;
int bsLiveShadow = this.bsLive;
int bsBuffShadow = this.bsBuff;
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24); // write 8-bit
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
this.bsLive = bsLiveShadow + n;
}
private void bsPutUByte(final int c) throws IOException {
bsW(8, c);
}
private void bsPutInt(final int u) throws IOException {
bsW(8, (u >> 24) & 0xff);
bsW(8, (u >> 16) & 0xff);
bsW(8, (u >> 8) & 0xff);
bsW(8, u & 0xff);
}
private void sendMTFValues() throws IOException {
final byte[][] len = this.data.sendMTFValues_len;
final int alphaSize = this.nInUse + 2;
for (int t = N_GROUPS; --t >= 0;) {
byte[] len_t = len[t];
for (int v = alphaSize; --v >= 0;) {
len_t[v] = GREATER_ICOST;
}
}
/* Decide how many coding tables to use */
// assert (this.nMTF > 0) : this.nMTF;
final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
: (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
/* Generate an initial set of coding tables */
sendMTFValues0(nGroups, alphaSize);
/*
* Iterate up to N_ITERS times to improve the tables.
*/
final int nSelectors = sendMTFValues1(nGroups, alphaSize);
/* Compute MTF values for the selectors. */
sendMTFValues2(nGroups, nSelectors);
/* Assign actual codes for the tables. */
sendMTFValues3(nGroups, alphaSize);
/* Transmit the mapping table. */
sendMTFValues4();
/* Now the selectors. */
sendMTFValues5(nGroups, nSelectors);
/* Now the coding tables. */
sendMTFValues6(nGroups, alphaSize);
/* And finally, the block data proper */
sendMTFValues7();
}
private void sendMTFValues0(final int nGroups, final int alphaSize) {
final byte[][] len = this.data.sendMTFValues_len;
final int[] mtfFreq = this.data.mtfFreq;
int remF = this.nMTF;
int gs = 0;
for (int nPart = nGroups; nPart > 0; nPart--) {
final int tFreq = remF / nPart;
int ge = gs - 1;
int aFreq = 0;
while (aFreq < tFreq && ge < alphaSize - 1) {
aFreq += mtfFreq[++ge];
}
if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) {
aFreq -= mtfFreq[ge--];
}
final byte[] len_np = len[nPart - 1];
for (int v = alphaSize; --v >= 0;) {
if (v >= gs && v <= ge) {
len_np[v] = LESSER_ICOST;
} else {
len_np[v] = GREATER_ICOST;
}
}
gs = ge + 1;
remF -= aFreq;
}
}
private int sendMTFValues1(final int nGroups, final int alphaSize) {
final Data dataShadow = this.data;
final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
final int[] fave = dataShadow.sendMTFValues_fave;
final short[] cost = dataShadow.sendMTFValues_cost;
final char[] sfmap = dataShadow.sfmap;
final byte[] selector = dataShadow.selector;
final byte[][] len = dataShadow.sendMTFValues_len;
final byte[] len_0 = len[0];
final byte[] len_1 = len[1];
final byte[] len_2 = len[2];
final byte[] len_3 = len[3];
final byte[] len_4 = len[4];
final byte[] len_5 = len[5];
final int nMTFShadow = this.nMTF;
int nSelectors = 0;
for (int iter = 0; iter < N_ITERS; iter++) {
for (int t = nGroups; --t >= 0;) {
fave[t] = 0;
int[] rfreqt = rfreq[t];
for (int i = alphaSize; --i >= 0;) {
rfreqt[i] = 0;
}
}
nSelectors = 0;
for (int gs = 0; gs < this.nMTF;) {
/* Set group start & end marks. */
/*
* Calculate the cost of this group as coded by each of the
* coding tables.
*/
final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
if (nGroups == N_GROUPS) {
// unrolled version of the else-block
short cost0 = 0;
short cost1 = 0;
short cost2 = 0;
short cost3 = 0;
short cost4 = 0;
short cost5 = 0;
for (int i = gs; i <= ge; i++) {
final int icv = sfmap[i];
cost0 += len_0[icv] & 0xff;
cost1 += len_1[icv] & 0xff;
cost2 += len_2[icv] & 0xff;
cost3 += len_3[icv] & 0xff;
cost4 += len_4[icv] & 0xff;
cost5 += len_5[icv] & 0xff;
}
cost[0] = cost0;
cost[1] = cost1;
cost[2] = cost2;
cost[3] = cost3;
cost[4] = cost4;
cost[5] = cost5;
} else {
for (int t = nGroups; --t >= 0;) {
cost[t] = 0;
}
for (int i = gs; i <= ge; i++) {
final int icv = sfmap[i];
for (int t = nGroups; --t >= 0;) {
cost[t] += len[t][icv] & 0xff;
}
}
}
/*
* Find the coding table which is best for this group, and
* record its identity in the selector table.
*/
int bt = -1;
for (int t = nGroups, bc = 999999999; --t >= 0;) {
final int cost_t = cost[t];
if (cost_t < bc) {
bc = cost_t;
bt = t;
}
}
fave[bt]++;
selector[nSelectors] = (byte) bt;
nSelectors++;
/*
* Increment the symbol frequencies for the selected table.
*/
final int[] rfreq_bt = rfreq[bt];
for (int i = gs; i <= ge; i++) {
rfreq_bt[sfmap[i]]++;
}
gs = ge + 1;
}
/*
* Recompute the tables based on the accumulated frequencies.
*/
for (int t = 0; t < nGroups; t++) {
hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
}
}
return nSelectors;
}
private void sendMTFValues2(final int nGroups, final int nSelectors) {
// assert (nGroups < 8) : nGroups;
final Data dataShadow = this.data;
byte[] pos = dataShadow.sendMTFValues2_pos;
for (int i = nGroups; --i >= 0;) {
pos[i] = (byte) i;
}
for (int i = 0; i < nSelectors; i++) {
final byte ll_i = dataShadow.selector[i];
byte tmp = pos[0];
int j = 0;
while (ll_i != tmp) {
j++;
byte tmp2 = tmp;
tmp = pos[j];
pos[j] = tmp2;
}
pos[0] = tmp;
dataShadow.selectorMtf[i] = (byte) j;
}
}
private void sendMTFValues3(final int nGroups, final int alphaSize) {
int[][] code = this.data.sendMTFValues_code;
byte[][] len = this.data.sendMTFValues_len;
for (int t = 0; t < nGroups; t++) {
int minLen = 32;
int maxLen = 0;
final byte[] len_t = len[t];
for (int i = alphaSize; --i >= 0;) {
final int l = len_t[i] & 0xff;
if (l > maxLen) {
maxLen = l;
}
if (l < minLen) {
minLen = l;
}
}
// assert (maxLen <= 20) : maxLen;
// assert (minLen >= 1) : minLen;
hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
}
}
private void sendMTFValues4() throws IOException {
final boolean[] inUse = this.data.inUse;
final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
for (int i = 16; --i >= 0;) {
inUse16[i] = false;
final int i16 = i * 16;
for (int j = 16; --j >= 0;) {
if (inUse[i16 + j]) {
inUse16[i] = true;
}
}
}
for (int i = 0; i < 16; i++) {
bsW(1, inUse16[i] ? 1 : 0);
}
final OutputStream outShadow = this.out;
int bsLiveShadow = this.bsLive;
int bsBuffShadow = this.bsBuff;
for (int i = 0; i < 16; i++) {
if (inUse16[i]) {
final int i16 = i * 16;
for (int j = 0; j < 16; j++) {
// inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24); // write 8-bit
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
if (inUse[i16 + j]) {
bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
}
bsLiveShadow++;
}
}
}
this.bsBuff = bsBuffShadow;
this.bsLive = bsLiveShadow;
}
private void sendMTFValues5(final int nGroups, final int nSelectors)
throws IOException {
bsW(3, nGroups);
bsW(15, nSelectors);
final OutputStream outShadow = this.out;
final byte[] selectorMtf = this.data.selectorMtf;
int bsLiveShadow = this.bsLive;
int bsBuffShadow = this.bsBuff;
for (int i = 0; i < nSelectors; i++) {
for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
// inlined: bsW(1, 1);
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24);
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
bsLiveShadow++;
}
// inlined: bsW(1, 0);
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24);
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
// bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
bsLiveShadow++;
}
this.bsBuff = bsBuffShadow;
this.bsLive = bsLiveShadow;
}
private void sendMTFValues6(final int nGroups, final int alphaSize)
throws IOException {
final byte[][] len = this.data.sendMTFValues_len;
final OutputStream outShadow = this.out;
int bsLiveShadow = this.bsLive;
int bsBuffShadow = this.bsBuff;
for (int t = 0; t < nGroups; t++) {
byte[] len_t = len[t];
int curr = len_t[0] & 0xff;
// inlined: bsW(5, curr);
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24); // write 8-bit
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
bsLiveShadow += 5;
for (int i = 0; i < alphaSize; i++) {
int lti = len_t[i] & 0xff;
while (curr < lti) {
// inlined: bsW(2, 2);
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24); // write 8-bit
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
bsLiveShadow += 2;
curr++; /* 10 */
}
while (curr > lti) {
// inlined: bsW(2, 3);
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24); // write 8-bit
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
bsLiveShadow += 2;
curr--; /* 11 */
}
// inlined: bsW(1, 0);
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24); // write 8-bit
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
// bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
bsLiveShadow++;
}
}
this.bsBuff = bsBuffShadow;
this.bsLive = bsLiveShadow;
}
private void sendMTFValues7() throws IOException {
final Data dataShadow = this.data;
final byte[][] len = dataShadow.sendMTFValues_len;
final int[][] code = dataShadow.sendMTFValues_code;
final OutputStream outShadow = this.out;
final byte[] selector = dataShadow.selector;
final char[] sfmap = dataShadow.sfmap;
final int nMTFShadow = this.nMTF;
int selCtr = 0;
int bsLiveShadow = this.bsLive;
int bsBuffShadow = this.bsBuff;
for (int gs = 0; gs < nMTFShadow;) {
final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
final int selector_selCtr = selector[selCtr] & 0xff;
final int[] code_selCtr = code[selector_selCtr];
final byte[] len_selCtr = len[selector_selCtr];
while (gs <= ge) {
final int sfmap_i = sfmap[gs];
//
// inlined: bsW(len_selCtr[sfmap_i] & 0xff,
// code_selCtr[sfmap_i]);
//
while (bsLiveShadow >= 8) {
outShadow.write(bsBuffShadow >> 24);
bsBuffShadow <<= 8;
bsLiveShadow -= 8;
}
final int n = len_selCtr[sfmap_i] & 0xFF;
bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
bsLiveShadow += n;
gs++;
}
gs = ge + 1;
selCtr++;
}
this.bsBuff = bsBuffShadow;
this.bsLive = bsLiveShadow;
}
private void moveToFrontCodeAndSend() throws IOException {
bsW(24, this.data.origPtr);
generateMTFValues();
sendMTFValues();
}
private void blockSort() {
blockSorter.blockSort(data, last);
}
/*
* Performs Move-To-Front on the Burrows-Wheeler transformed
* buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
* run-length-encoded form.
*
* Keeps track of byte frequencies in data.mtfFreq at the same time.
*/
private void generateMTFValues() {
final int lastShadow = this.last;
final Data dataShadow = this.data;
final boolean[] inUse = dataShadow.inUse;
final byte[] block = dataShadow.block;
final int[] fmap = dataShadow.fmap;
final char[] sfmap = dataShadow.sfmap;
final int[] mtfFreq = dataShadow.mtfFreq;
final byte[] unseqToSeq = dataShadow.unseqToSeq;
final byte[] yy = dataShadow.generateMTFValues_yy;
// make maps
int nInUseShadow = 0;
for (int i = 0; i < 256; i++) {
if (inUse[i]) {
unseqToSeq[i] = (byte) nInUseShadow;
nInUseShadow++;
}
}
this.nInUse = nInUseShadow;
final int eob = nInUseShadow + 1;
for (int i = eob; i >= 0; i--) {
mtfFreq[i] = 0;
}
for (int i = nInUseShadow; --i >= 0;) {
yy[i] = (byte) i;
}
int wr = 0;
int zPend = 0;
for (int i = 0; i <= lastShadow; i++) {
final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
byte tmp = yy[0];
int j = 0;
while (ll_i != tmp) {
j++;
byte tmp2 = tmp;
tmp = yy[j];
yy[j] = tmp2;
}
yy[0] = tmp;
if (j == 0) {
zPend++;
} else {
if (zPend > 0) {
zPend--;
while (true) {
if ((zPend & 1) == 0) {
sfmap[wr] = RUNA;
wr++;
mtfFreq[RUNA]++;
} else {
sfmap[wr] = RUNB;
wr++;
mtfFreq[RUNB]++;
}
if (zPend >= 2) {
zPend = (zPend - 2) >> 1;
} else {
break;
}
}
zPend = 0;
}
sfmap[wr] = (char) (j + 1);
wr++;
mtfFreq[j + 1]++;
}
}
if (zPend > 0) {
zPend--;
while (true) {
if ((zPend & 1) == 0) {
sfmap[wr] = RUNA;
wr++;
mtfFreq[RUNA]++;
} else {
sfmap[wr] = RUNB;
wr++;
mtfFreq[RUNB]++;
}
if (zPend >= 2) {
zPend = (zPend - 2) >> 1;
} else {
break;
}
}
}
sfmap[wr] = (char) eob;
mtfFreq[eob]++;
this.nMTF = wr + 1;
}
static final class Data {
// with blockSize 900k
/* maps unsigned byte => "does it occur in block" */
final boolean[] inUse = new boolean[256]; // 256 byte
final byte[] unseqToSeq = new byte[256]; // 256 byte
final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
// byte
final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
// byte
final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
// byte
final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
// ------------
// 333408 byte
/* holds the RLEd block of original data starting at index 1.
* After sorting the last byte added to the buffer is at index
* 0. */
final byte[] block; // 900021 byte
/* maps index in Burrows-Wheeler transformed block => index of
* byte in original block */
final int[] fmap; // 3600000 byte
final char[] sfmap; // 3600000 byte
// ------------
// 8433529 byte
// ============
/**
* Index of original line in Burrows-Wheeler table.
*
* This is the index in fmap that points to the last byte
* of the original data.
*/
int origPtr;
Data(int blockSize100k) {
super();
final int n = blockSize100k * BZip2Constants.baseBlockSize;
this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
this.fmap = new int[n];
this.sfmap = new char[2 * n];
}
}
}