All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ning.compress.gzip.GZIPUncompressor Maven / Gradle / Ivy

Go to download

Compression codec for LZF encoding for particularly encoding/decoding, with reasonable compression. Compressor is basic Lempel-Ziv codec, without Huffman (deflate/gzip) or statistical post-encoding. See "http://oldhome.schmorp.de/marc/liblzf.html" for more on original LZF package.

There is a newer version: 1.1.2
Show newest version
package com.ning.compress.gzip;

import java.io.IOException;
import java.util.zip.CRC32;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;

import com.ning.compress.*;

/**
 * {@link com.ning.compress.Uncompressor} implementation for uncompressing
 * GZIP encoded data in "push" mode, in which input is not
 * read using {@link java.io.InputStream} but rather pushed to
 * uncompressor in variable length chunks.
 */
public class GZIPUncompressor extends Uncompressor
{
    /*
    ///////////////////////////////////////////////////////////////////////
    // GZIP constants
    ///////////////////////////////////////////////////////////////////////
     */

    // little-endian marker bytes:
    protected final static int GZIP_MAGIC = 0x8b1f;

    protected final static byte GZIP_MAGIC_0 = (byte) (GZIP_MAGIC & 0xFF);
    protected final static byte GZIP_MAGIC_1 = (byte) (GZIP_MAGIC >> 8);
    
    // // // File header flags.

    //protected final static int FTEXT    = 1;    // Extra text
    protected final static int FHCRC      = 2;    // Header CRC
    protected final static int FEXTRA     = 4;    // Extra field
    protected final static int FNAME      = 8;    // File name
    protected final static int FCOMMENT   = 16;   // File comment

    /**
     * Size of input chunks fed to underlying decoder. Since it is not 100%
     * clear what its effects are on 
     */
    protected final static int DEFAULT_CHUNK_SIZE = 4096;

    /**
     * For decoding we should use buffer that is big enough
     * to contain typical amount of decoded data; 64k seems
     * like a nice big number
     */
    protected final static int DECODE_BUFFER_SIZE = 0xFFFF;
        
    /*
    ///////////////////////////////////////////////////////////////////////
    // State constants
    ///////////////////////////////////////////////////////////////////////
     */

    /**
     * State in which a new compression stream can start.
     */
    protected final static int STATE_INITIAL = 0;

    // State in which first byte of signature has been matched, second exepcted
    protected final static int STATE_HEADER_SIG1 = 1;

    // State in which 'compression type' byte is expected
    protected final static int STATE_HEADER_COMP_TYPE = 2;
    // State in which flag byte is expected
    protected final static int STATE_HEADER_FLAGS = 3;
    // State in which we are to skip 6 bytes 
    protected final static int STATE_HEADER_SKIP = 4;
    protected final static int STATE_HEADER_EXTRA0 = 5;
    protected final static int STATE_HEADER_EXTRA1 = 6;
    protected final static int STATE_HEADER_FNAME = 7;
    protected final static int STATE_HEADER_COMMENT = 8;
    protected final static int STATE_HEADER_CRC0 = 9;
    protected final static int STATE_HEADER_CRC1 = 10;

    protected final static int STATE_TRAILER_INITIAL = 11;
    protected final static int STATE_TRAILER_CRC1 = 12;
    protected final static int STATE_TRAILER_CRC2 = 13;
    protected final static int STATE_TRAILER_CRC3 = 14;
    protected final static int STATE_TRAILER_LEN0 = 15;
    protected final static int STATE_TRAILER_LEN1 = 16;
    protected final static int STATE_TRAILER_LEN2 = 17;
    protected final static int STATE_TRAILER_LEN3 = 18;

    /**
     * State in which we are buffering compressed data for decompression
     */
    protected final static int STATE_BODY = 20;
    
    /*
    ///////////////////////////////////////////////////////////////////////
    // Configuration, helper objects
    ///////////////////////////////////////////////////////////////////////
     */

    /**
     * Handler that will receive uncompressed data.
     */
    protected final DataHandler _handler;

    /**
     * Object that handles details of buffer recycling
     */
    protected final BufferRecycler _recycler;

    protected final GZIPRecycler _gzipRecycler;

    protected Inflater _inflater;
    
    protected final CRC32 _crc;
   
    protected final int _inputChunkLength;
    
    /**
     * Buffer used for data uncompressed from _inputBuffer.
     */
    protected byte[] _decodeBuffer;
    
    
    /*
    ///////////////////////////////////////////////////////////////////////
    // Decoder state
    ///////////////////////////////////////////////////////////////////////
     */
    
    /**
     * Current decoding state, which determines meaning of following byte(s).
     */
    protected int _state = STATE_INITIAL;

    /**
     * Flag set if {@link DataHandler} indicates that processing should be
     * terminated.
     */
    protected boolean _terminated;

    /**
     * Header flags read from gzip header
     */
    protected int _flags;

    /**
     * Expected CRC for header, from gzip file itself.
     */
    protected int _headerCRC;
    
    /**
     * Simple counter used when skipping fixed number of bytes
     */
    protected int _skippedBytes;
    
    /**
     * CRC container in trailer, should match calculated CRC over data
     */
    protected int _trailerCRC;

    /**
     * Number of bytes that trailer indicates preceding data stream
     * should have had.
     */
    protected int _trailerCount;
    
    /*
    ///////////////////////////////////////////////////////////////////////
    // Instance creation
    ///////////////////////////////////////////////////////////////////////
     */
    
    public GZIPUncompressor(DataHandler h)
    {
        this(h, DEFAULT_CHUNK_SIZE, BufferRecycler.instance(), GZIPRecycler.instance());
    }
    
    public GZIPUncompressor(DataHandler h, int inputChunkLength)
    {
        this(h, inputChunkLength, BufferRecycler.instance(), GZIPRecycler.instance());
    }

    public GZIPUncompressor(DataHandler h, int inputChunkLength, BufferRecycler bufferRecycler, GZIPRecycler gzipRecycler)
    {
        _inputChunkLength = inputChunkLength;
        _handler = h;
        _recycler = bufferRecycler;
        _decodeBuffer = bufferRecycler.allocDecodeBuffer(DECODE_BUFFER_SIZE);
        _gzipRecycler = gzipRecycler;
        _inflater = gzipRecycler.allocInflater();
        _crc = new CRC32();
    }

    /*
    ///////////////////////////////////////////////////////////////////////
    // Uncompressor API implementation
    ///////////////////////////////////////////////////////////////////////
     */

    @Override
    public boolean feedCompressedData(byte[] comp, int offset, int len) throws IOException
    {
        if (_terminated) {
            return false;
        }
        
        final int end = offset + len;
        if (_state != STATE_BODY) {
            if (_state < STATE_TRAILER_INITIAL) { // header
                offset = _handleHeader(comp, offset, end);
                if (offset >= end) { // not fully handled yet
                    return true;
                }
                // fall through to body
            } else { // trailer
                offset = _handleTrailer(comp, offset, end);
                if (offset < end) { // sanity check
                    _throwInternal();
                }
                // either way, we are done
                return true;
            }
        }

        // Ok, decode...
        while (true) {
            // first: if input is needed, give some
            if (_inflater.needsInput()) {
                final int left = end-offset;
                if (left < 1) { // need input but nothing to give, leve
                    return true;
                }
                final int amount = Math.min(left, _inputChunkLength);
                _inflater.setInput(comp, offset, amount);
                offset += amount;
            }
            // and then see what we can get out if anything
            while (true) {
                int decoded;
                try {
                    decoded = _inflater.inflate(_decodeBuffer);
                } catch (DataFormatException e) {
                    throw new GZIPException("Problems inflating gzip data: "+e.getMessage(), e);
                }
                if (decoded == 0) {
                    break;
                }
                _crc.update(_decodeBuffer, 0, decoded);
                if (!_handler.handleData(_decodeBuffer, 0, decoded)) {
                    _terminated = true;
                    return false;
                }
            }
            if (_inflater.finished() || _inflater.needsDictionary()) {
                _state = STATE_TRAILER_INITIAL;
                // also: push back some of data that is buffered
                int remains = _inflater.getRemaining();
                if (remains > 0) {
                    offset -= remains;
                }
                break;
            }
        }
        
        // finally; handle trailer if we got this far
        offset = _handleTrailer(comp, offset, end);
        if (offset < end) { // sanity check
            _throwInternal();
        }
        return !_terminated;
    }

    @Override
    public void complete() throws IOException
    {
        byte[] b = _decodeBuffer;
        if (b != null) {
            _decodeBuffer = null;
            _recycler.releaseDecodeBuffer(b);
        }
        Inflater i = _inflater;
        if (i != null) {
            _inflater = null;
            _gzipRecycler.releaseInflater(i);
        }
        // 24-May-2012, tatu: Should we call this here; or fail with exception?
        _handler.allDataHandled();
        if (!_terminated) {
            if (_state != STATE_INITIAL) {
                if (_state >= STATE_TRAILER_INITIAL) {
                    if (_state == STATE_BODY) {
                        throw new GZIPException("Invalid GZIP stream: end-of-input in the middle of compressed data");
                    }
                    throw new GZIPException("Invalid GZIP stream: end-of-input in the trailer (state: "+_state+")");
                }
                throw new GZIPException("Invalid GZIP stream: end-of-input in header (state: "+_state+")");
            }
        }
    }

    /*
    ///////////////////////////////////////////////////////////////////////
    // Helper methods, header/trailer
    ///////////////////////////////////////////////////////////////////////
     */

    protected final boolean _hasFlag(int flag) {
        return (_flags & flag) == flag;
    }

    private final int _handleHeader(byte[] comp, int offset, final int end) throws IOException
    {
        
        main_loop:
        while (offset < end) {
            byte b = comp[offset++];
            _crc.update(b);
            
            switch (_state) {
            case STATE_INITIAL:
                if (b != GZIP_MAGIC_0) {
                    _reportBadHeader(comp, offset, end, 0);
                }
                if (offset >= end) {
                    _state = STATE_HEADER_SIG1;
                    break;
                }
                b = comp[offset++];
                _crc.update(b);
                // fall through
            case STATE_HEADER_SIG1:
                if (b != GZIP_MAGIC_1) {
                    _reportBadHeader(comp, offset, end, 1);
                }
                if (offset >= end) {
                    _state = STATE_HEADER_COMP_TYPE;
                    break;
                }
                b = comp[offset++];
                _crc.update(b);
                // fall through
            case STATE_HEADER_COMP_TYPE:
                if (b != Deflater.DEFLATED) {
                    _reportBadHeader(comp, offset, end, 1);
                }
                if (offset >= end) {
                    _state = STATE_HEADER_FLAGS;
                    break;
                }
                b = comp[offset++];
                _crc.update(b);
                // fall through
            case STATE_HEADER_FLAGS:
                _flags = b; // should we validate these?
                _skippedBytes = 0;
                _state = STATE_HEADER_SKIP;
                if (offset >= end) {
                    break;
                }
                b = comp[offset++];
                _crc.update(b);
                // fall through
            case STATE_HEADER_SKIP:
                while (++_skippedBytes < 6) {
                    if (offset >= end) {
                        break main_loop;
                    }
                    b = comp[offset++];
                    _crc.update(b);
                }
                if (_hasFlag(FEXTRA)) {
                    _state = STATE_HEADER_EXTRA0;
                } else if (_hasFlag(FNAME)) {
                    _state = STATE_HEADER_FNAME;
                } else if (_hasFlag(FCOMMENT)) {
                    _state = STATE_HEADER_COMMENT;
                } else if (_hasFlag(FHCRC)) {
                    _state = STATE_HEADER_CRC0;
                } else { // no extras... body, I guess?
                    _state = STATE_BODY;
                    break main_loop;
                }
                // let's keep things simple, do explicit re-loop to sort it out:
                continue;
            case STATE_HEADER_EXTRA0:
                _state = STATE_HEADER_EXTRA1;
                break;
            case STATE_HEADER_EXTRA1:
                if (_hasFlag(FNAME)) {
                    _state = STATE_HEADER_FNAME;
                } else if (_hasFlag(FCOMMENT)) {
                    _state = STATE_HEADER_COMMENT;
                } else if (_hasFlag(FHCRC)) {
                    _state = STATE_HEADER_CRC0;
                } else {
                    _state = STATE_BODY;
                    break main_loop;
                }
                break;
            case STATE_HEADER_FNAME: // skip until zero
                while (b != 0) {
                    if (offset >= end) {
                        break main_loop;
                    }
                    b = comp[offset++];
                    _crc.update(b);
                }
                if (_hasFlag(FCOMMENT)) {
                    _state = STATE_HEADER_COMMENT;
                } else if (_hasFlag(FHCRC)) {
                    _state = STATE_HEADER_CRC0;
                } else {
                    _state = STATE_BODY;
                    break main_loop;
                }
                break;
            case STATE_HEADER_COMMENT:
                while (b != 0) {
                    if (offset >= end) {
                        break main_loop;
                    }
                    b = comp[offset++];
                }
                if (_hasFlag(FHCRC)) {
                    _state = STATE_HEADER_CRC0;
                } else {
                    _state = STATE_BODY;
                    break main_loop;
                }
                break;
            case STATE_HEADER_CRC0:
                _headerCRC = b & 0xFF;
                if (offset >= end) {
                    _state = STATE_HEADER_CRC1;
                    break;
                }
                b = comp[offset++];
                _crc.update(b);
                // fall through
            case STATE_HEADER_CRC1:
                _headerCRC += ((b & 0xFF) << 8);
                int act = (int)_crc.getValue() & 0xffff;
                if (act != _headerCRC) {
                    throw new GZIPException("Corrupt GZIP header: header CRC 0x"
                                          +Integer.toHexString(act)+", expected 0x "
                                          +Integer.toHexString(_headerCRC));
                }
                _state = STATE_BODY;
                break main_loop;
            default:
                _throwInternal("Unknown header state: "+_state);
            }
        }
        if (_state == STATE_BODY) {
            _crc.reset();
        }
        return offset;
    }
    
    private final int _handleTrailer(byte[] comp, int offset, final int end) throws IOException
    {
        while (offset < end) {
            byte b = comp[offset++];

            switch (_state) {
            case STATE_TRAILER_INITIAL:
                _trailerCRC = b & 0xFF;
                _state = STATE_TRAILER_CRC1;
                break;
            case STATE_TRAILER_CRC1:
                _trailerCRC += (b & 0xFF) << 8;
                _state = STATE_TRAILER_CRC2;
                break;
            case STATE_TRAILER_CRC2:
                _trailerCRC += (b & 0xFF) << 16;
                _state = STATE_TRAILER_CRC3;
                break;
            case STATE_TRAILER_CRC3:
                _trailerCRC += (b & 0xFF) << 24;
                final int actCRC = (int) _crc.getValue();
                // verify CRC:
                if (_trailerCRC != actCRC) {
                    throw new GZIPException("Corrupt block or trailer: expected CRC "
                            +Integer.toHexString(_trailerCRC)+", computed "+Integer.toHexString(actCRC));
                }
                _state = STATE_TRAILER_LEN0;
                break;
            case STATE_TRAILER_LEN0:
                _trailerCount = b & 0xFF;
                _state = STATE_TRAILER_LEN1;
                break;
            case STATE_TRAILER_LEN1:
                _trailerCount += (b & 0xFF) << 8;
                _state = STATE_TRAILER_LEN2;
                break;
            case STATE_TRAILER_LEN2:
                _trailerCount += (b & 0xFF) << 16;
                _state = STATE_TRAILER_LEN3;
                break;
            case STATE_TRAILER_LEN3:
                _trailerCount += (b & 0xFF) << 24;
                _state = STATE_INITIAL;
                // Verify count...
                int actCount32 = (int) _inflater.getBytesWritten();

                if (actCount32 != _trailerCount) {
                    throw new GZIPException("Corrupt block or trailed: expected byte count "+_trailerCount+", read "+actCount32);
                }
                break;
            default:
                _throwInternal("Unknown trailer state: "+_state);
            }
        }
        return offset;
    }

    /*
    ///////////////////////////////////////////////////////////////////////
    // Helper methods, other
    ///////////////////////////////////////////////////////////////////////
     */

    protected void _throwInternal() throws GZIPException {
        throw new GZIPException("Internal error");
    }
    
    protected void _throwInternal(String msg) throws GZIPException {
        throw new GZIPException("Internal error: "+msg);
    }
    
    protected void _reportBadHeader(byte[] comp, int nextOffset, int end, int relative)
        throws GZIPException
    {
        String byteStr = "0x"+Integer.toHexString(comp[nextOffset] & 0xFF);
        if (relative <= 1) {
            int exp = (relative == 0) ? (GZIP_MAGIC & 0xFF) : (GZIP_MAGIC >> 8);
            --nextOffset;
            throw new GZIPException("Bad GZIP stream: byte #"+relative+" of header not '"
                    +exp+"' (0x"+Integer.toHexString(exp)+") but "+byteStr);
        }
        if (relative == 2) { // odd that 
            throw new GZIPException("Bad GZIP stream: byte #2 of header invalid: type "+byteStr
                    +" not supported, 0x"+Integer.toHexString(Deflater.DEFLATED)
                    +" expected");
        }
        throw new GZIPException("Bad GZIP stream: byte #"+relative+" of header invalid: "+byteStr);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy