All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tools.bzip2.CBZip2OutputStream Maven / Gradle / Ivy

There is a newer version: 1.10.15
Show newest version
/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */

/*
 * This package is based on the work done by Keiron Liddle, Aftex Software
 *  to whom the Ant project is very grateful for his
 * great code.
 */

package org.apache.tools.bzip2;

import java.io.IOException;
import java.io.OutputStream;

/**
 * An output stream that compresses into the BZip2 format (without the file
 * header chars) into another stream.
 *
 * 

* The compression requires large amounts of memory. Thus you should call the * {@link #close() close()} method as soon as possible, to force * CBZip2OutputStream to release the allocated memory. *

* *

You can shrink the amount of allocated memory and maybe raise * the compression speed by choosing a lower blocksize, which in turn * may cause a lower compression ratio. You can avoid unnecessary * memory allocation by avoiding using a blocksize which is bigger * than the size of the input.

* *

You can compute the memory usage for compressing by the * following formula:

* *
 * <code>400k + (9 * blocksize)</code>.
 * 
* *

To get the memory required for decompression by {@link * CBZip2InputStream CBZip2InputStream} use

* *
 * <code>65k + (5 * blocksize)</code>.
 * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Memory usage by blocksize
BlocksizeCompression
memory usage
Decompression
memory usage
100k1300k565k
200k2200k1065k
300k3100k1565k
400k4000k2065k
500k4900k2565k
600k5800k3065k
700k6700k3565k
800k7600k4065k
900k8500k4565k
* *

* For decompression CBZip2InputStream allocates less memory if the * bzipped input is smaller than one block. *

* *

* Instances of this class are not threadsafe. *

* *

* TODO: Update to BZip2 1.0.1 *

* */ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { /** * The minimum supported blocksize == 1. */ public static final int MIN_BLOCKSIZE = 1; /** * The maximum supported blocksize == 9. */ public static final int MAX_BLOCKSIZE = 9; /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. */ protected static final int SETMASK = (1 << 21); /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. */ protected static final int CLEARMASK = (~SETMASK); /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. */ protected static final int GREATER_ICOST = 15; /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. */ protected static final int LESSER_ICOST = 0; /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. */ protected static final int SMALL_THRESH = 20; /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. */ protected static final int DEPTH_THRESH = 10; /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. */ protected static final int WORK_FACTOR = 30; /** * This constant is accessible by subclasses for historical * purposes. If you don't know what it means then you don't need * it. *

If you are ever unlucky/improbable enough to get a stack * overflow whilst sorting, increase the following constant and * try again. In practice I have never seen the stack go above 27 * elems, so the following limit seems very generous.

*/ protected static final int QSORT_STACK_SIZE = 1000; /** * Knuth's increments seem to work better than Incerpi-Sedgewick here. * Possibly because the number of elems to sort is usually small, typically * <= 20. */ @SuppressWarnings("unused") private static final int[] INCS = {1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 2391484}; /** * This method is accessible by subclasses for historical * purposes. If you don't know what it does then you don't need * it. * * @param len char[] * @param freq char[] * @param alphaSize int * @param maxLen int */ protected static void hbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen) { /* * Nodes and heap entries run from 1. Entry 0 for both the heap and * nodes is a sentinel. */ final int[] heap = new int[MAX_ALPHA_SIZE * 2]; final int[] weight = new int[MAX_ALPHA_SIZE * 2]; final int[] parent = new int[MAX_ALPHA_SIZE * 2]; for (int i = alphaSize; --i >= 0;) { weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; } for (boolean tooLong = true; tooLong;) { tooLong = false; int nNodes = alphaSize; int nHeap = 0; heap[0] = 0; weight[0] = 0; parent[0] = -2; for (int i = 1; i <= alphaSize; i++) { parent[i] = -1; nHeap++; heap[nHeap] = i; int zz = nHeap; int tmp = heap[zz]; while (weight[tmp] < weight[heap[zz >> 1]]) { heap[zz] = heap[zz >> 1]; zz >>= 1; } heap[zz] = tmp; } // assert (nHeap < (MAX_ALPHA_SIZE + 2)) : nHeap; while (nHeap > 1) { int n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; int yy = 0; int zz = 1; int tmp = heap[1]; while (true) { yy = zz << 1; if (yy > nHeap) { break; } if ((yy < nHeap) && (weight[heap[yy + 1]] < weight[heap[yy]])) { yy++; } if (weight[tmp] < weight[heap[yy]]) { break; } heap[zz] = heap[yy]; zz = yy; } heap[zz] = tmp; int n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; yy = 0; zz = 1; tmp = heap[1]; while (true) { yy = zz << 1; if (yy > nHeap) { break; } if ((yy < nHeap) && (weight[heap[yy + 1]] < weight[heap[yy]])) { yy++; } if (weight[tmp] < weight[heap[yy]]) { break; } heap[zz] = heap[yy]; zz = yy; } heap[zz] = tmp; nNodes++; parent[n1] = parent[n2] = nNodes; final int weight_n1 = weight[n1]; final int weight_n2 = weight[n2]; weight[nNodes] = (((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff) : (weight_n2 & 0x000000ff)) )); parent[nNodes] = -1; nHeap++; heap[nHeap] = nNodes; tmp = 0; zz = nHeap; tmp = heap[zz]; final int weight_tmp = weight[tmp]; while (weight_tmp < weight[heap[zz >> 1]]) { heap[zz] = heap[zz >> 1]; zz >>= 1; } heap[zz] = tmp; } // assert (nNodes < (MAX_ALPHA_SIZE * 2)) : nNodes; for (int i = 1; i <= alphaSize; i++) { int j = 0; int k = i; for (int parent_k; (parent_k = parent[k]) >= 0;) { k = parent_k; j++; } len[i - 1] = (char) j; if (j > maxLen) { tooLong = true; } } if (tooLong) { for (int i = 1; i < alphaSize; i++) { int j = weight[i] >> 8; j = 1 + (j >> 1); weight[i] = j << 8; } } } } private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) { /* * Nodes and heap entries run from 1. Entry 0 for both the heap and * nodes is a sentinel. */ final int[] heap = dat.heap; final int[] weight = dat.weight; final int[] parent = dat.parent; for (int i = alphaSize; --i >= 0;) { weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; } for (boolean tooLong = true; tooLong;) { tooLong = false; int nNodes = alphaSize; int nHeap = 0; heap[0] = 0; weight[0] = 0; parent[0] = -2; for (int i = 1; i <= alphaSize; i++) { parent[i] = -1; nHeap++; heap[nHeap] = i; int zz = nHeap; int tmp = heap[zz]; while (weight[tmp] < weight[heap[zz >> 1]]) { heap[zz] = heap[zz >> 1]; zz >>= 1; } heap[zz] = tmp; } while (nHeap > 1) { int n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; int yy = 0; int zz = 1; int tmp = heap[1]; while (true) { yy = zz << 1; if (yy > nHeap) { break; } if ((yy < nHeap) && (weight[heap[yy + 1]] < weight[heap[yy]])) { yy++; } if (weight[tmp] < weight[heap[yy]]) { break; } heap[zz] = heap[yy]; zz = yy; } heap[zz] = tmp; int n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; yy = 0; zz = 1; tmp = heap[1]; while (true) { yy = zz << 1; if (yy > nHeap) { break; } if ((yy < nHeap) && (weight[heap[yy + 1]] < weight[heap[yy]])) { yy++; } if (weight[tmp] < weight[heap[yy]]) { break; } heap[zz] = heap[yy]; zz = yy; } heap[zz] = tmp; nNodes++; parent[n1] = parent[n2] = nNodes; final int weight_n1 = weight[n1]; final int weight_n2 = weight[n2]; weight[nNodes] = ((weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00)) | (1 + (((weight_n1 & 0x000000ff) > (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff) : (weight_n2 & 0x000000ff))); parent[nNodes] = -1; nHeap++; heap[nHeap] = nNodes; tmp = 0; zz = nHeap; tmp = heap[zz]; final int weight_tmp = weight[tmp]; while (weight_tmp < weight[heap[zz >> 1]]) { heap[zz] = heap[zz >> 1]; zz >>= 1; } heap[zz] = tmp; } for (int i = 1; i <= alphaSize; i++) { int j = 0; int k = i; for (int parent_k; (parent_k = parent[k]) >= 0;) { k = parent_k; j++; } len[i - 1] = (byte) j; if (j > maxLen) { tooLong = true; } } if (tooLong) { for (int i = 1; i < alphaSize; i++) { int j = weight[i] >> 8; j = 1 + (j >> 1); weight[i] = j << 8; } } } } /** * Index of the last char in the block, so the block size == last + 1. */ private int last; /** * Always: in the range 0 .. 9. The current block size is 100000 * this * number. */ private final int blockSize100k; private int bsBuff; private int bsLive; private final CRC crc = new CRC(); private int nInUse; private int nMTF; private int currentChar = -1; private int runLength = 0; private int blockCRC; private int combinedCRC; private final int allowableBlockSize; /** * All memory intensive stuff. */ private Data data; private BlockSort blockSorter; private OutputStream out; /** * Chooses a blocksize based on the given length of the data to compress. * * @param inputLength * The length of the data which will be compressed by * CBZip2OutputStream. * @return The blocksize, between {@link #MIN_BLOCKSIZE} and * {@link #MAX_BLOCKSIZE} both inclusive. For a negative * inputLength this method returns MAX_BLOCKSIZE * always. */ public static int chooseBlockSize(long inputLength) { return (inputLength > 0) ? (int) Math .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; } /** * Constructs a new CBZip2OutputStream with a blocksize of 900k. * *

* Attention: The caller is responsible to write the two BZip2 magic * bytes "BZ" to the specified stream prior to calling this * constructor. *

* * @param out * * the destination stream. * * @throws IOException * if an I/O error occurs in the specified stream. * @throws NullPointerException * if out == null. */ public CBZip2OutputStream(final OutputStream out) throws IOException { this(out, MAX_BLOCKSIZE); } /** * Constructs a new CBZip2OutputStream with specified blocksize. * *

* Attention: The caller is responsible to write the two BZip2 magic * bytes "BZ" to the specified stream prior to calling this * constructor. *

* * * @param out * the destination stream. * @param blockSize * the blockSize as 100k units. * * @throws IOException * if an I/O error occurs in the specified stream. * @throws IllegalArgumentException * if (blockSize < 1) || (blockSize > 9). * @throws NullPointerException * if out == null. * * @see #MIN_BLOCKSIZE * @see #MAX_BLOCKSIZE */ public CBZip2OutputStream(final OutputStream out, final int blockSize) throws IOException { super(); if (blockSize < 1) { throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); } if (blockSize > 9) { throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); } this.blockSize100k = blockSize; this.out = out; /* 20 is just a paranoia constant */ this.allowableBlockSize = (this.blockSize100k * BZip2Constants.baseBlockSize) - 20; init(); } /** {@inheritDoc} */ @Override public void write(final int b) throws IOException { if (this.out != null) { write0(b); } else { throw new IOException("closed"); } } /** * Writes the current byte to the buffer, run-length encoding it * if it has been repeated at least four times (the first step * RLEs sequences of four identical bytes). * *

Flushes the current block before writing data if it is * full.

* *

"write to the buffer" means adding to data.buffer starting * two steps "after" this.last - initially starting at index 1 * (not 0) - and updating this.last to point to the last index * written minus 1.

*/ private void writeRun() throws IOException { final int lastShadow = this.last; if (lastShadow < this.allowableBlockSize) { final int currentCharShadow = this.currentChar; final Data dataShadow = this.data; dataShadow.inUse[currentCharShadow] = true; final byte ch = (byte) currentCharShadow; int runLengthShadow = this.runLength; this.crc.updateCRC(currentCharShadow, runLengthShadow); final byte[] block = dataShadow.block; switch (runLengthShadow) { case 1: block[lastShadow + 2] = ch; this.last = lastShadow + 1; break; case 2: block[lastShadow + 2] = ch; block[lastShadow + 3] = ch; this.last = lastShadow + 2; break; case 3: block[lastShadow + 2] = ch; block[lastShadow + 3] = ch; block[lastShadow + 4] = ch; this.last = lastShadow + 3; break; default: runLengthShadow -= 4; dataShadow.inUse[runLengthShadow] = true; block[lastShadow + 2] = ch; block[lastShadow + 3] = ch; block[lastShadow + 4] = ch; block[lastShadow + 5] = ch; block[lastShadow + 6] = (byte) runLengthShadow; this.last = lastShadow + 5; break; } } else { endBlock(); initBlock(); writeRun(); } } /** * Overridden to close the stream. */ @Override protected void finalize() throws Throwable { finish(); super.finalize(); } public void finish() throws IOException { if (out != null) { try { if (this.runLength > 0) { writeRun(); } this.currentChar = -1; endBlock(); endCompression(); } finally { this.out = null; this.data = null; this.blockSorter = null; } } } @Override public void close() throws IOException { if (out != null) { OutputStream outShadow = this.out; finish(); outShadow.close(); } } @Override public void flush() throws IOException { OutputStream outShadow = this.out; if (outShadow != null) { outShadow.flush(); } } private void init() throws IOException { // write magic: done by caller who created this stream // this.out.write('B'); // this.out.write('Z'); this.data = new Data(this.blockSize100k); this.blockSorter = new BlockSort(this.data); /* * Write `magic' bytes h indicating file-format == huffmanised, followed * by a digit indicating blockSize100k. */ bsPutUByte('h'); bsPutUByte('0' + this.blockSize100k); this.combinedCRC = 0; initBlock(); } private void initBlock() { // blockNo++; this.crc.initialiseCRC(); this.last = -1; // ch = 0; boolean[] inUse = this.data.inUse; for (int i = 256; --i >= 0;) { inUse[i] = false; } } private void endBlock() throws IOException { this.blockCRC = this.crc.getFinalCRC(); this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); this.combinedCRC ^= this.blockCRC; // empty block at end of file if (this.last == -1) { return; } /* sort the block and establish posn of original string */ blockSort(); /* * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 * :-). A 32 bit value does not really give a strong enough guarantee * that the value will not appear by chance in the compressed * datastream. Worst-case probability of this event, for a 900k block, * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 * bits. For a compressed file of size 100Gb -- about 100000 blocks -- * only a 48-bit marker will do. NB: normal compression/ decompression * do not rely on these statistical properties. They are only important * when trying to recover blocks from damaged files. */ bsPutUByte(0x31); bsPutUByte(0x41); bsPutUByte(0x59); bsPutUByte(0x26); bsPutUByte(0x53); bsPutUByte(0x59); /* Now the block's CRC, so it is in a known place. */ bsPutInt(this.blockCRC); /* Now a single bit indicating no randomisation. */ bsW(1, 0); /* Finally, block's contents proper. */ moveToFrontCodeAndSend(); } private void endCompression() throws IOException { /* * Now another magic 48-bit number, 0x177245385090, to indicate the end * of the last block. (sqrt(pi), if you want to know. I did want to use * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me * to feel statistically comfortable. Call me paranoid.) */ bsPutUByte(0x17); bsPutUByte(0x72); bsPutUByte(0x45); bsPutUByte(0x38); bsPutUByte(0x50); bsPutUByte(0x90); bsPutInt(this.combinedCRC); bsFinishedWithStream(); } /** * Returns the blocksize parameter specified at construction time. * * @return int */ public final int getBlockSize() { return this.blockSize100k; } @Override public void write(final byte[] buf, int offs, final int len) throws IOException { if (offs < 0) { throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); } if (len < 0) { throw new IndexOutOfBoundsException("len(" + len + ") < 0."); } if (offs + len > buf.length) { throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ")."); } if (this.out == null) { throw new IOException("stream closed"); } for (int hi = offs + len; offs < hi;) { write0(buf[offs++]); } } /** * Keeps track of the last bytes written and implicitly performs * run-length encoding as the first step of the bzip2 algorithm. */ private void write0(int b) throws IOException { if (this.currentChar != -1) { b &= 0xff; if (this.currentChar == b) { if (++this.runLength > 254) { writeRun(); this.currentChar = -1; this.runLength = 0; } // else nothing to do } else { writeRun(); this.runLength = 1; this.currentChar = b; } } else { this.currentChar = b & 0xff; this.runLength++; } } private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) { int vec = 0; for (int n = minLen; n <= maxLen; n++) { for (int i = 0; i < alphaSize; i++) { if ((length[i] & 0xff) == n) { code[i] = vec; vec++; } } vec <<= 1; } } private void bsFinishedWithStream() throws IOException { while (this.bsLive > 0) { int ch = this.bsBuff >> 24; this.out.write(ch); // write 8-bit this.bsBuff <<= 8; this.bsLive -= 8; } } private void bsW(final int n, final int v) throws IOException { final OutputStream outShadow = this.out; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); this.bsLive = bsLiveShadow + n; } private void bsPutUByte(final int c) throws IOException { bsW(8, c); } private void bsPutInt(final int u) throws IOException { bsW(8, (u >> 24) & 0xff); bsW(8, (u >> 16) & 0xff); bsW(8, (u >> 8) & 0xff); bsW(8, u & 0xff); } private void sendMTFValues() throws IOException { final byte[][] len = this.data.sendMTFValues_len; final int alphaSize = this.nInUse + 2; for (int t = N_GROUPS; --t >= 0;) { byte[] len_t = len[t]; for (int v = alphaSize; --v >= 0;) { len_t[v] = GREATER_ICOST; } } /* Decide how many coding tables to use */ // assert (this.nMTF > 0) : this.nMTF; final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; /* Generate an initial set of coding tables */ sendMTFValues0(nGroups, alphaSize); /* * Iterate up to N_ITERS times to improve the tables. */ final int nSelectors = sendMTFValues1(nGroups, alphaSize); /* Compute MTF values for the selectors. */ sendMTFValues2(nGroups, nSelectors); /* Assign actual codes for the tables. */ sendMTFValues3(nGroups, alphaSize); /* Transmit the mapping table. */ sendMTFValues4(); /* Now the selectors. */ sendMTFValues5(nGroups, nSelectors); /* Now the coding tables. */ sendMTFValues6(nGroups, alphaSize); /* And finally, the block data proper */ sendMTFValues7(); } private void sendMTFValues0(final int nGroups, final int alphaSize) { final byte[][] len = this.data.sendMTFValues_len; final int[] mtfFreq = this.data.mtfFreq; int remF = this.nMTF; int gs = 0; for (int nPart = nGroups; nPart > 0; nPart--) { final int tFreq = remF / nPart; int ge = gs - 1; int aFreq = 0; while (aFreq < tFreq && ge < alphaSize - 1) { aFreq += mtfFreq[++ge]; } if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) { aFreq -= mtfFreq[ge--]; } final byte[] len_np = len[nPart - 1]; for (int v = alphaSize; --v >= 0;) { if (v >= gs && v <= ge) { len_np[v] = LESSER_ICOST; } else { len_np[v] = GREATER_ICOST; } } gs = ge + 1; remF -= aFreq; } } private int sendMTFValues1(final int nGroups, final int alphaSize) { final Data dataShadow = this.data; final int[][] rfreq = dataShadow.sendMTFValues_rfreq; final int[] fave = dataShadow.sendMTFValues_fave; final short[] cost = dataShadow.sendMTFValues_cost; final char[] sfmap = dataShadow.sfmap; final byte[] selector = dataShadow.selector; final byte[][] len = dataShadow.sendMTFValues_len; final byte[] len_0 = len[0]; final byte[] len_1 = len[1]; final byte[] len_2 = len[2]; final byte[] len_3 = len[3]; final byte[] len_4 = len[4]; final byte[] len_5 = len[5]; final int nMTFShadow = this.nMTF; int nSelectors = 0; for (int iter = 0; iter < N_ITERS; iter++) { for (int t = nGroups; --t >= 0;) { fave[t] = 0; int[] rfreqt = rfreq[t]; for (int i = alphaSize; --i >= 0;) { rfreqt[i] = 0; } } nSelectors = 0; for (int gs = 0; gs < this.nMTF;) { /* Set group start & end marks. */ /* * Calculate the cost of this group as coded by each of the * coding tables. */ final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); if (nGroups == N_GROUPS) { // unrolled version of the else-block short cost0 = 0; short cost1 = 0; short cost2 = 0; short cost3 = 0; short cost4 = 0; short cost5 = 0; for (int i = gs; i <= ge; i++) { final int icv = sfmap[i]; cost0 += len_0[icv] & 0xff; cost1 += len_1[icv] & 0xff; cost2 += len_2[icv] & 0xff; cost3 += len_3[icv] & 0xff; cost4 += len_4[icv] & 0xff; cost5 += len_5[icv] & 0xff; } cost[0] = cost0; cost[1] = cost1; cost[2] = cost2; cost[3] = cost3; cost[4] = cost4; cost[5] = cost5; } else { for (int t = nGroups; --t >= 0;) { cost[t] = 0; } for (int i = gs; i <= ge; i++) { final int icv = sfmap[i]; for (int t = nGroups; --t >= 0;) { cost[t] += len[t][icv] & 0xff; } } } /* * Find the coding table which is best for this group, and * record its identity in the selector table. */ int bt = -1; for (int t = nGroups, bc = 999999999; --t >= 0;) { final int cost_t = cost[t]; if (cost_t < bc) { bc = cost_t; bt = t; } } fave[bt]++; selector[nSelectors] = (byte) bt; nSelectors++; /* * Increment the symbol frequencies for the selected table. */ final int[] rfreq_bt = rfreq[bt]; for (int i = gs; i <= ge; i++) { rfreq_bt[sfmap[i]]++; } gs = ge + 1; } /* * Recompute the tables based on the accumulated frequencies. */ for (int t = 0; t < nGroups; t++) { hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); } } return nSelectors; } private void sendMTFValues2(final int nGroups, final int nSelectors) { // assert (nGroups < 8) : nGroups; final Data dataShadow = this.data; byte[] pos = dataShadow.sendMTFValues2_pos; for (int i = nGroups; --i >= 0;) { pos[i] = (byte) i; } for (int i = 0; i < nSelectors; i++) { final byte ll_i = dataShadow.selector[i]; byte tmp = pos[0]; int j = 0; while (ll_i != tmp) { j++; byte tmp2 = tmp; tmp = pos[j]; pos[j] = tmp2; } pos[0] = tmp; dataShadow.selectorMtf[i] = (byte) j; } } private void sendMTFValues3(final int nGroups, final int alphaSize) { int[][] code = this.data.sendMTFValues_code; byte[][] len = this.data.sendMTFValues_len; for (int t = 0; t < nGroups; t++) { int minLen = 32; int maxLen = 0; final byte[] len_t = len[t]; for (int i = alphaSize; --i >= 0;) { final int l = len_t[i] & 0xff; if (l > maxLen) { maxLen = l; } if (l < minLen) { minLen = l; } } // assert (maxLen <= 20) : maxLen; // assert (minLen >= 1) : minLen; hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); } } private void sendMTFValues4() throws IOException { final boolean[] inUse = this.data.inUse; final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; for (int i = 16; --i >= 0;) { inUse16[i] = false; final int i16 = i * 16; for (int j = 16; --j >= 0;) { if (inUse[i16 + j]) { inUse16[i] = true; } } } for (int i = 0; i < 16; i++) { bsW(1, inUse16[i] ? 1 : 0); } final OutputStream outShadow = this.out; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int i = 0; i < 16; i++) { if (inUse16[i]) { final int i16 = i * 16; for (int j = 0; j < 16; j++) { // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } if (inUse[i16 + j]) { bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); } bsLiveShadow++; } } } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException { bsW(3, nGroups); bsW(15, nSelectors); final OutputStream outShadow = this.out; final byte[] selectorMtf = this.data.selectorMtf; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int i = 0; i < nSelectors; i++) { for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { // inlined: bsW(1, 1); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); bsLiveShadow++; } // inlined: bsW(1, 0); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); bsBuffShadow <<= 8; bsLiveShadow -= 8; } // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); bsLiveShadow++; } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException { final byte[][] len = this.data.sendMTFValues_len; final OutputStream outShadow = this.out; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int t = 0; t < nGroups; t++) { byte[] len_t = len[t]; int curr = len_t[0] & 0xff; // inlined: bsW(5, curr); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= curr << (32 - bsLiveShadow - 5); bsLiveShadow += 5; for (int i = 0; i < alphaSize; i++) { int lti = len_t[i] & 0xff; while (curr < lti) { // inlined: bsW(2, 2); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); bsLiveShadow += 2; curr++; /* 10 */ } while (curr > lti) { // inlined: bsW(2, 3); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); bsLiveShadow += 2; curr--; /* 11 */ } // inlined: bsW(1, 0); while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); // write 8-bit bsBuffShadow <<= 8; bsLiveShadow -= 8; } // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); bsLiveShadow++; } } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void sendMTFValues7() throws IOException { final Data dataShadow = this.data; final byte[][] len = dataShadow.sendMTFValues_len; final int[][] code = dataShadow.sendMTFValues_code; final OutputStream outShadow = this.out; final byte[] selector = dataShadow.selector; final char[] sfmap = dataShadow.sfmap; final int nMTFShadow = this.nMTF; int selCtr = 0; int bsLiveShadow = this.bsLive; int bsBuffShadow = this.bsBuff; for (int gs = 0; gs < nMTFShadow;) { final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); final int selector_selCtr = selector[selCtr] & 0xff; final int[] code_selCtr = code[selector_selCtr]; final byte[] len_selCtr = len[selector_selCtr]; while (gs <= ge) { final int sfmap_i = sfmap[gs]; // // inlined: bsW(len_selCtr[sfmap_i] & 0xff, // code_selCtr[sfmap_i]); // while (bsLiveShadow >= 8) { outShadow.write(bsBuffShadow >> 24); bsBuffShadow <<= 8; bsLiveShadow -= 8; } final int n = len_selCtr[sfmap_i] & 0xFF; bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); bsLiveShadow += n; gs++; } gs = ge + 1; selCtr++; } this.bsBuff = bsBuffShadow; this.bsLive = bsLiveShadow; } private void moveToFrontCodeAndSend() throws IOException { bsW(24, this.data.origPtr); generateMTFValues(); sendMTFValues(); } private void blockSort() { blockSorter.blockSort(data, last); } /* * Performs Move-To-Front on the Burrows-Wheeler transformed * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB * run-length-encoded form. * *

Keeps track of byte frequencies in data.mtfFreq at the same time.

*/ private void generateMTFValues() { final int lastShadow = this.last; final Data dataShadow = this.data; final boolean[] inUse = dataShadow.inUse; final byte[] block = dataShadow.block; final int[] fmap = dataShadow.fmap; final char[] sfmap = dataShadow.sfmap; final int[] mtfFreq = dataShadow.mtfFreq; final byte[] unseqToSeq = dataShadow.unseqToSeq; final byte[] yy = dataShadow.generateMTFValues_yy; // make maps int nInUseShadow = 0; for (int i = 0; i < 256; i++) { if (inUse[i]) { unseqToSeq[i] = (byte) nInUseShadow; nInUseShadow++; } } this.nInUse = nInUseShadow; final int eob = nInUseShadow + 1; for (int i = eob; i >= 0; i--) { mtfFreq[i] = 0; } for (int i = nInUseShadow; --i >= 0;) { yy[i] = (byte) i; } int wr = 0; int zPend = 0; for (int i = 0; i <= lastShadow; i++) { final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; byte tmp = yy[0]; int j = 0; while (ll_i != tmp) { j++; byte tmp2 = tmp; tmp = yy[j]; yy[j] = tmp2; } yy[0] = tmp; if (j == 0) { zPend++; } else { if (zPend > 0) { zPend--; while (true) { if ((zPend & 1) == 0) { sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; } else { sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; } if (zPend >= 2) { zPend = (zPend - 2) >> 1; } else { break; } } zPend = 0; } sfmap[wr] = (char) (j + 1); wr++; mtfFreq[j + 1]++; } } if (zPend > 0) { zPend--; while (true) { if ((zPend & 1) == 0) { sfmap[wr] = RUNA; wr++; mtfFreq[RUNA]++; } else { sfmap[wr] = RUNB; wr++; mtfFreq[RUNB]++; } if (zPend >= 2) { zPend = (zPend - 2) >> 1; } else { break; } } } sfmap[wr] = (char) eob; mtfFreq[eob]++; this.nMTF = wr + 1; } static final class Data { // with blockSize 900k /* maps unsigned byte => "does it occur in block" */ final boolean[] inUse = new boolean[256]; // 256 byte final byte[] unseqToSeq = new byte[256]; // 256 byte final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte final byte[] generateMTFValues_yy = new byte[256]; // 256 byte final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 // byte final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 // byte final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 // byte final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte // ------------ // 333408 byte /* holds the RLEd block of original data starting at index 1. * After sorting the last byte added to the buffer is at index * 0. */ final byte[] block; // 900021 byte /* maps index in Burrows-Wheeler transformed block => index of * byte in original block */ final int[] fmap; // 3600000 byte final char[] sfmap; // 3600000 byte // ------------ // 8433529 byte // ============ /** * Index of original line in Burrows-Wheeler table. * *

This is the index in fmap that points to the last byte * of the original data.

*/ int origPtr; Data(int blockSize100k) { super(); final int n = blockSize100k * BZip2Constants.baseBlockSize; this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; this.fmap = new int[n]; this.sfmap = new char[2 * n]; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy