All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ucar.unidata.io.bzip2.CBZip2InputStream Maven / Gradle / Ivy

The newest version!
/*
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001-2003 The Apache Software Foundation. All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 * notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 * notice, this list of conditions and the following disclaimer in
 * the documentation and/or other materials provided with the
 * distribution.
 *
 * 3. The end-user documentation included with the redistribution, if
 * any, must include the following acknowlegement:
 * "This product includes software developed by the
 * Apache Software Foundation (http://www.apache.org/)."
 * Alternately, this acknowlegement may appear in the software itself,
 * if and wherever such third-party acknowlegements normally appear.
 *
 * 4. The names "Ant" and "Apache Software
 * Foundation" must not be used to endorse or promote products derived
 * from this software without prior written permission. For written
 * permission, please contact [email protected].
 *
 * 5. Products derived from this software may not be called "Apache"
 * nor may "Apache" appear in their names without prior written
 * permission of the Apache Group.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation. For more
 * information on the Apache Software Foundation, please see
 * .
 */

/*
 * This package is based on the work done by Keiron Liddle, Aftex Software
 *  to whom the Ant project is very grateful for his
 * great code.
 */

/*
 * There has been further modification of this class by Unidata to
 * support setting the inputstream. This saves many extra memory
 * allocations when doing lots of uncompressions. The read method
 * may throw a BZip2ReadException instead of an IOException because
 * the superclass catches and discards IOExceptions.
 */

package ucar.unidata.io.bzip2;

import java.io.InputStream;
import java.io.IOException;

/**
 * An input stream that decompresses from the BZip2 format (without the file
 * header chars) to be read as any other stream.
 *
 * @author Keiron Liddle
 */
public class CBZip2InputStream extends InputStream implements BZip2Constants {
  private static final int START_BLOCK_STATE = 1;
  private static final int RAND_PART_A_STATE = 2;
  private static final int RAND_PART_B_STATE = 3;
  private static final int RAND_PART_C_STATE = 4;
  private static final int NO_RAND_PART_A_STATE = 5;
  private static final int NO_RAND_PART_B_STATE = 6;
  private static final int NO_RAND_PART_C_STATE = 7;

  /*
   * index of the last char in the block, so
   * the block size == last + 1.
   */
  private int last;

  /*
   * index in zptr[] of original string after sorting.
   */
  private int origPtr;

  /*
   * always: in the range 0 .. 9.
   * The current block size is 100000 * this number.
   */
  private int blockSize100k;

  private boolean blockRandomised;

  private int bsBuff;
  private int bsLive;
  private CRC mCrc = new CRC();

  private boolean[] inUse = new boolean[256];
  private int nInUse;

  private char[] seqToUnseq = new char[256];
  private char[] unseqToSeq = new char[256];

  private char[] selector = new char[MAX_SELECTORS];
  private char[] selectorMtf = new char[MAX_SELECTORS];

  private int[] tt;
  private char[] ll8;

  /*
   * freq table collected to save a pass over the data
   * during decompression.
   */
  private int[] unzftab = new int[256];

  private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
  private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE];
  private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
  private int[] minLens = new int[N_GROUPS];

  private InputStream bsStream;

  private boolean streamEnd;
  private int currentChar = -1;
  private int currentState = START_BLOCK_STATE;

  private int storedBlockCRC, storedCombinedCRC;
  private int computedBlockCRC, computedCombinedCRC;

  int i2, count, chPrev, ch2;
  int i, tPos;
  int rNToGo;
  int rTPos;
  int j2;
  char z;

  public CBZip2InputStream() {}


  public CBZip2InputStream(InputStream zStream) {
    setStream(zStream);
  }

  public CBZip2InputStream(InputStream zStream, boolean skip) throws IOException {
    if (skip) {
      byte[] bzString = new byte[2];
      int ret = zStream.read(bzString);
      if (ret == -1)
        throw new IOException("End of stream reached skipping bytes");
    }
    setStream(zStream);
  }

  /**
   * Added 5-30-2006 to allow for resetting of the input used
   * by this object. This saves in memory allocation costs
   */
  public void setStream(InputStream zStream) {
    last = 0;
    origPtr = 0;
    blockSize100k = 0;
    blockRandomised = false;
    bsBuff = 0;
    bsLive = 0;
    mCrc = new CRC();
    nInUse = 0;
    bsStream = null;
    streamEnd = false;
    currentChar = -1;
    currentState = START_BLOCK_STATE;
    storedBlockCRC = storedCombinedCRC = 0;
    computedBlockCRC = computedCombinedCRC = 0;
    i2 = count = chPrev = ch2 = 0;
    i = tPos = 0;
    rNToGo = 0;
    rTPos = 0;
    j2 = 0;
    z = 0;

    bsSetStream(zStream);
    initialize();
    if (!streamEnd) { // Handle if initialize does not detect valid bz2 stream
      initBlock();
      setupBlock();
    }
  }



  /**
   * Reads the stream.
   * 
   * @throws BZip2ReadException if there is a problem. InputStream does
   *         not throw an IOException, so we throw a RuntimeException.
   */
  public int read() {
    if (streamEnd) {
      return -1;
    } else {
      int retChar = currentChar;
      switch (currentState) {
        case RAND_PART_B_STATE:
          setupRandPartB();
          break;
        case RAND_PART_C_STATE:
          setupRandPartC();
          break;
        case NO_RAND_PART_B_STATE:
          setupNoRandPartB();
          break;
        case NO_RAND_PART_C_STATE:
          setupNoRandPartC();
          break;
        default:
          break;
      }
      return retChar;
    }
  }

  private void initialize() {
    char magic3, magic4;
    magic3 = bsGetUChar();
    magic4 = bsGetUChar();
    if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
      bsFinishedWithStream();
      streamEnd = true;
      return;
    }

    setDecompressStructureSizes(magic4 - '0');
    computedCombinedCRC = 0;
  }

  private void initBlock() {
    char magic1, magic2, magic3, magic4;
    char magic5, magic6;
    magic1 = bsGetUChar();
    magic2 = bsGetUChar();
    magic3 = bsGetUChar();
    magic4 = bsGetUChar();
    magic5 = bsGetUChar();
    magic6 = bsGetUChar();
    if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45 && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
      complete();
      return;
    }

    if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59 || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
      badBlockHeader();
      streamEnd = true;
      return;
    }

    storedBlockCRC = bsGetInt32();

    blockRandomised = bsR(1) == 1;

    // currBlockNo++;
    getAndMoveToFrontDecode();

    mCrc.initialiseCRC();
    currentState = START_BLOCK_STATE;
  }

  private void endBlock() {
    computedBlockCRC = mCrc.getFinalCRC();
    /* A bad CRC is considered a fatal error. */
    if (storedBlockCRC != computedBlockCRC) {
      // crcError();
      cadvise("CRC error: storedBlockCRC != computedBlockCRC");
    }

    computedCombinedCRC = (computedCombinedCRC << 1) | (computedCombinedCRC >>> 31);
    computedCombinedCRC ^= computedBlockCRC;
  }

  private void complete() {
    storedCombinedCRC = bsGetInt32();
    if (storedCombinedCRC != computedCombinedCRC) {
      // crcError();
      cadvise("CRC error: storedCombinedCRC != computedCombinedCRC");
    }

    bsFinishedWithStream();
    streamEnd = true;
  }

  private void blockOverrun() {
    cadvise("Block Overrun");
  }

  private void badBlockHeader() {
    cadvise("Bad Block Header");
  }

  private void bsFinishedWithStream() {
    try {
      if (this.bsStream != null) {
        if (this.bsStream != System.in) {
          this.bsStream.close();
          this.bsStream = null;
        }
      }
    } catch (IOException ioe) {
      // ignore
    }
  }

  private void bsSetStream(InputStream f) {
    bsStream = f;
    bsLive = 0;
    bsBuff = 0;
  }

  private int bsR(int n) {
    int v;
    while (bsLive < n) {
      int zzi = 0;
      try {
        zzi = bsStream.read();
      } catch (IOException e) {
        compressedStreamEOF();
      }
      if (zzi == -1) {
        compressedStreamEOF();
      }
      bsBuff = (bsBuff << 8) | (zzi & 0xff);
      bsLive += 8;
    }

    v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1);
    bsLive -= n;
    return v;
  }

  private char bsGetUChar() {
    return (char) bsR(8);
  }

  private int bsGetint() {
    int u = 0;
    u = (u << 8) | bsR(8);
    u = (u << 8) | bsR(8);
    u = (u << 8) | bsR(8);
    u = (u << 8) | bsR(8);
    return u;
  }

  private int bsGetIntVS(int numBits) {
    return bsR(numBits);
  }

  private int bsGetInt32() {
    return bsGetint();
  }

  private void hbCreateDecodeTables(int[] limit, int[] base, int[] perm, char[] length, int minLen, int maxLen,
      int alphaSize) {
    int pp, i, j, vec;

    pp = 0;
    for (i = minLen; i <= maxLen; i++) {
      for (j = 0; j < alphaSize; j++) {
        if (length[j] == i) {
          perm[pp] = j;
          pp++;
        }
      }
    }

    for (i = 0; i < MAX_CODE_LEN; i++) {
      base[i] = 0;
    }
    for (i = 0; i < alphaSize; i++) {
      base[length[i] + 1]++;
    }

    for (i = 1; i < MAX_CODE_LEN; i++) {
      base[i] += base[i - 1];
    }

    for (i = 0; i < MAX_CODE_LEN; i++) {
      limit[i] = 0;
    }
    vec = 0;

    for (i = minLen; i <= maxLen; i++) {
      vec += (base[i + 1] - base[i]);
      limit[i] = vec - 1;
      vec <<= 1;
    }
    for (i = minLen + 1; i <= maxLen; i++) {
      base[i] = ((limit[i - 1] + 1) << 1) - base[i];
    }
  }

  private void recvDecodingTables() {
    char[][] len = new char[N_GROUPS][MAX_ALPHA_SIZE];
    int i, j, t, nGroups, nSelectors, alphaSize;
    int minLen, maxLen;
    boolean[] inUse16 = new boolean[16];

    /* Receive the mapping table */
    for (i = 0; i < 16; i++) {
      inUse16[i] = bsR(1) == 1;
    }

    for (i = 0; i < 256; i++) {
      inUse[i] = false;
    }

    for (i = 0; i < 16; i++) {
      if (inUse16[i]) {
        for (j = 0; j < 16; j++) {
          if (bsR(1) == 1) {
            inUse[i * 16 + j] = true;
          }
        }
      }
    }

    makeMaps();
    alphaSize = nInUse + 2;

    /* Now the selectors */
    nGroups = bsR(3);
    nSelectors = bsR(15);
    for (i = 0; i < nSelectors; i++) {
      j = 0;
      while (bsR(1) == 1) {
        j++;
      }
      selectorMtf[i] = (char) j;
    }

    /* Undo the MTF values for the selectors. */
    {
      char[] pos = new char[N_GROUPS];
      char tmp, v;
      for (v = 0; v < nGroups; v++) {
        pos[v] = v;
      }

      for (i = 0; i < nSelectors; i++) {
        v = selectorMtf[i];
        tmp = pos[v];
        while (v > 0) {
          pos[v] = pos[v - 1];
          v--;
        }
        pos[0] = tmp;
        selector[i] = tmp;
      }
    }

    /* Now the coding tables */
    for (t = 0; t < nGroups; t++) {
      int curr = bsR(5);
      for (i = 0; i < alphaSize; i++) {
        while (bsR(1) == 1) {
          if (bsR(1) == 0) {
            curr++;
          } else {
            curr--;
          }
        }
        len[t][i] = (char) curr;
      }
    }

    /* Create the Huffman decoding tables */
    for (t = 0; t < nGroups; t++) {
      minLen = 32;
      maxLen = 0;
      for (i = 0; i < alphaSize; i++) {
        if (len[t][i] > maxLen) {
          maxLen = len[t][i];
        }
        if (len[t][i] < minLen) {
          minLen = len[t][i];
        }
      }
      hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen, maxLen, alphaSize);
      minLens[t] = minLen;
    }
  }

  private void getAndMoveToFrontDecode() {
    char[] yy = new char[256];
    int i, j, nextSym, limitLast;
    int EOB, groupNo, groupPos;

    limitLast = baseBlockSize * blockSize100k;
    origPtr = bsGetIntVS(24);

    recvDecodingTables();
    EOB = nInUse + 1;
    groupNo = -1;
    groupPos = 0;

    /*
     * Setting up the unzftab entries here is not strictly
     * necessary, but it does save having to do it later
     * in a separate pass, and so saves a block's worth of
     * cache misses.
     */
    for (i = 0; i <= 255; i++) {
      unzftab[i] = 0;
    }

    for (i = 0; i <= 255; i++) {
      yy[i] = (char) i;
    }

    last = -1;

    {
      int zt, zn, zvec, zj;
      if (groupPos == 0) {
        groupNo++;
        groupPos = G_SIZE;
      }
      groupPos--;
      zt = selector[groupNo];
      zn = minLens[zt];
      zvec = bsR(zn);
      while (zvec > limit[zt][zn]) {
        zn++;
        {
          {
            while (bsLive < 1) {
              int zzi = 0;
              try {
                zzi = bsStream.read();
              } catch (IOException e) {
                compressedStreamEOF();
              }
              if (zzi == -1) {
                compressedStreamEOF();
              }
              bsBuff = (bsBuff << 8) | (zzi & 0xff);
              bsLive += 8;
            }
          }
          zj = (bsBuff >> (bsLive - 1)) & 1;
          bsLive--;
        }
        zvec = (zvec << 1) | zj;
      }
      nextSym = perm[zt][zvec - base[zt][zn]];
    }

    while (true) {

      if (nextSym == EOB) {
        break;
      }

      if (nextSym == RUNA || nextSym == RUNB) {
        char ch;
        int s = -1;
        int N = 1;
        do {
          if (nextSym == RUNA) {
            s = s + N;
          } else if (nextSym == RUNB) {
            s = s + (1 + 1) * N;
          }
          N = N * 2;
          {
            int zt, zn, zvec, zj;
            if (groupPos == 0) {
              groupNo++;
              groupPos = G_SIZE;
            }
            groupPos--;
            zt = selector[groupNo];
            zn = minLens[zt];
            zvec = bsR(zn);
            while (zvec > limit[zt][zn]) {
              zn++;
              {
                {
                  while (bsLive < 1) {
                    int zzi = 0;
                    try {
                      zzi = bsStream.read();
                    } catch (IOException e) {
                      compressedStreamEOF();
                    }
                    if (zzi == -1) {
                      compressedStreamEOF();
                    }
                    bsBuff = (bsBuff << 8) | (zzi & 0xff);
                    bsLive += 8;
                  }
                }
                zj = (bsBuff >> (bsLive - 1)) & 1;
                bsLive--;
              }
              zvec = (zvec << 1) | zj;
            }
            nextSym = perm[zt][zvec - base[zt][zn]];
          }
        } while (nextSym == RUNA || nextSym == RUNB);

        s++;
        ch = seqToUnseq[yy[0]];
        unzftab[ch] += s;

        while (s > 0) {
          last++;
          ll8[last] = ch;
          s--;
        }

        if (last >= limitLast) {
          blockOverrun();
        }
      } else {
        char tmp;
        last++;
        if (last >= limitLast) {
          blockOverrun();
        }

        tmp = yy[nextSym - 1];
        unzftab[seqToUnseq[tmp]]++;
        ll8[last] = seqToUnseq[tmp];

        /*
         * This loop is hammered during decompression,
         * hence the unrolling.
         * 
         * for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
         */

        j = nextSym - 1;
        for (; j > 3; j -= 4) {
          yy[j] = yy[j - 1];
          yy[j - 1] = yy[j - 2];
          yy[j - 2] = yy[j - 3];
          yy[j - 3] = yy[j - 4];
        }
        for (; j > 0; j--) {
          yy[j] = yy[j - 1];
        }

        yy[0] = tmp;
        {
          int zt, zn, zvec, zj;
          if (groupPos == 0) {
            groupNo++;
            groupPos = G_SIZE;
          }
          groupPos--;
          zt = selector[groupNo];
          zn = minLens[zt];
          zvec = bsR(zn);
          while (zvec > limit[zt][zn]) {
            zn++;
            {
              {
                while (bsLive < 1) {
                  int zzi;
                  char thech = 0;
                  try {
                    thech = (char) bsStream.read();
                  } catch (IOException e) {
                    compressedStreamEOF();
                  }
                  zzi = thech;
                  bsBuff = (bsBuff << 8) | (zzi & 0xff);
                  bsLive += 8;
                }
              }
              zj = (bsBuff >> (bsLive - 1)) & 1;
              bsLive--;
            }
            zvec = (zvec << 1) | zj;
          }
          nextSym = perm[zt][zvec - base[zt][zn]];
        }
      }
    }
  }

  private void setupBlock() {
    int[] cftab = new int[257];
    char ch;

    cftab[0] = 0;
    for (i = 1; i <= 256; i++) {
      cftab[i] = unzftab[i - 1];
    }
    for (i = 1; i <= 256; i++) {
      cftab[i] += cftab[i - 1];
    }

    for (i = 0; i <= last; i++) {
      ch = ll8[i];
      tt[cftab[ch]] = i;
      cftab[ch]++;
    }

    tPos = tt[origPtr];
    count = 0;
    i2 = 0;
    ch2 = 256; /* not a char and not EOF */

    if (blockRandomised) {
      rNToGo = 0;
      rTPos = 0;
      setupRandPartA();
    } else {
      setupNoRandPartA();
    }
  }

  private void setupRandPartA() {
    if (i2 <= last) {
      chPrev = ch2;
      ch2 = ll8[tPos];
      tPos = tt[tPos];
      if (rNToGo == 0) {
        rNToGo = rNums[rTPos];
        rTPos++;
        if (rTPos == 512) {
          rTPos = 0;
        }
      }
      rNToGo--;
      ch2 ^= ((rNToGo == 1) ? 1 : 0);
      i2++;

      currentChar = ch2;
      currentState = RAND_PART_B_STATE;
      mCrc.updateCRC(ch2);
    } else {
      endBlock();
      initBlock();
      setupBlock();
    }
  }

  private void setupNoRandPartA() {
    if (i2 <= last) {
      chPrev = ch2;
      ch2 = ll8[tPos];
      tPos = tt[tPos];
      i2++;

      currentChar = ch2;
      currentState = NO_RAND_PART_B_STATE;
      mCrc.updateCRC(ch2);
    } else {
      endBlock();
      initBlock();
      setupBlock();
    }
  }

  private void setupRandPartB() {
    if (ch2 != chPrev) {
      currentState = RAND_PART_A_STATE;
      count = 1;
      setupRandPartA();
    } else {
      count++;
      if (count >= 4) {
        z = ll8[tPos];
        tPos = tt[tPos];
        if (rNToGo == 0) {
          rNToGo = rNums[rTPos];
          rTPos++;
          if (rTPos == 512) {
            rTPos = 0;
          }
        }
        rNToGo--;
        z ^= ((rNToGo == 1) ? 1 : 0);
        j2 = 0;
        currentState = RAND_PART_C_STATE;
        setupRandPartC();
      } else {
        currentState = RAND_PART_A_STATE;
        setupRandPartA();
      }
    }
  }

  private void setupRandPartC() {
    if (j2 < (int) z) {
      currentChar = ch2;
      mCrc.updateCRC(ch2);
      j2++;
    } else {
      currentState = RAND_PART_A_STATE;
      i2++;
      count = 0;
      setupRandPartA();
    }
  }

  private void setupNoRandPartB() {
    if (ch2 != chPrev) {
      currentState = NO_RAND_PART_A_STATE;
      count = 1;
      setupNoRandPartA();
    } else {
      count++;
      if (count >= 4) {
        z = ll8[tPos];
        tPos = tt[tPos];
        currentState = NO_RAND_PART_C_STATE;
        j2 = 0;
        setupNoRandPartC();
      } else {
        currentState = NO_RAND_PART_A_STATE;
        setupNoRandPartA();
      }
    }
  }

  private void setupNoRandPartC() {
    if (j2 < (int) z) {
      currentChar = ch2;
      mCrc.updateCRC(ch2);
      j2++;
    } else {
      currentState = NO_RAND_PART_A_STATE;
      i2++;
      count = 0;
      setupNoRandPartA();
    }
  }

  private void setDecompressStructureSizes(int newSize100k) {
    if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k && blockSize100k <= 9)) {
      cadvise("Invalid block size");
    }

    blockSize100k = newSize100k;

    if (newSize100k == 0) {
      return;
    }

    // Modified 5-30-2006 by unidata to allow for reuse of the ll8 and tt buffers

    int n = baseBlockSize * newSize100k;

    if (ll8 != null && ll8.length != n) {
      ll8 = null;
    }

    if (tt != null && tt.length != n) {
      tt = null;
    }


    if (ll8 == null) {
      ll8 = new char[n];
    }
    if (tt == null) {
      tt = new int[n];
    }
  }

  private void cadvise(String msg) {
    throw new BZip2ReadException(msg);
  }

  private void compressedStreamEOF() {
    cadvise("Compressed Stream EOF");
  }

  private void makeMaps() {
    int i;
    nInUse = 0;
    for (i = 0; i < 256; i++) {
      if (inUse[i]) {
        seqToUnseq[nInUse] = (char) i;
        unseqToSeq[i] = (char) nInUse;
        nInUse++;
      }
    }
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy