org.tukaani.xz.XZInputStream Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of xz Show documentation
XZ data compression
There is a newer version: 1.10
/*
 * XZInputStream
 *
 * Author: Lasse Collin 
 *
 * This file has been put into the public domain.
 * You can do whatever you want with this file.
 */

package org.tukaani.xz;

import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.EOFException;
import org.tukaani.xz.common.DecoderUtil;

/**
 * Decompresses a .xz file in streamed mode (no seeking).
 * 
 * Use this to decompress regular standalone .xz files. This reads from
 * its input stream until the end of the input or until an error occurs.
 * This supports decompressing concatenated .xz files.
 *
 * 
Typical use cases
 * 
 * Getting an input stream to decompress a .xz file:
 * 
 * InputStream infile = new FileInputStream("foo.xz");
 * XZInputStream inxz = new XZInputStream(infile);
 * 
 * 
 * It's important to keep in mind that decompressor memory usage depends
 * on the settings used to compress the file. The worst-case memory usage
 * of XZInputStream is currently 1.5 GiB. Still, very few files will
 * require more than about 65 MiB because that's how much decompressing
 * a file created with the highest preset level will need, and only a few
 * people use settings other than the predefined presets.
 * 

 * It is possible to specify a memory usage limit for
 * XZInputStream. If decompression requires more memory than
 * the specified limit, MemoryLimitException will be thrown when reading
 * from the stream. For example, the following sets the memory usage limit
 * to 100 MiB:
 * 
 * InputStream infile = new FileInputStream("foo.xz");
 * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
 * 
 *
 * When uncompressed size is known beforehand
 * 
 * If you are decompressing complete files and your application knows
 * exactly how much uncompressed data there should be, it is good to try
 * reading one more byte by calling read() and checking
 * that it returns -1. This way the decompressor will parse the
 * file footers and verify the integrity checks, giving the caller more
 * confidence that the uncompressed data is valid. (This advice seems to
 * apply to
 * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
 *
 * @see SingleXZInputStream
 */
public class XZInputStream extends InputStream {
    private final ArrayCache arrayCache;

    private final int memoryLimit;
    private InputStream in;
    private SingleXZInputStream xzIn;
    private final boolean verifyCheck;
    private boolean endReached = false;
    private IOException exception = null;

    private final byte[] tempBuf = new byte[1];

    /**
     * Creates a new XZ decompressor without a memory usage limit.
     * 

     * This constructor reads and parses the XZ Stream Header (12 bytes)
     * from in. The header of the first Block is not read
     * until read is called.
     *
     * @param       in          input stream from which XZ-compressed
     *                          data is read
     *
     * @throws      XZFormatException
     *                          input is not in the XZ format
     *
     * @throws      CorruptedInputException
     *                          XZ header CRC32 doesn't match
     *
     * @throws      UnsupportedOptionsException
     *                          XZ header is valid but specifies options
     *                          not supported by this implementation
     *
     * @throws      EOFException
     *                          less than 12 bytes of input was available
     *                          from in
     *
     * @throws      IOException may be thrown by in
     */
    public XZInputStream(InputStream in) throws IOException {
        this(in, -1);
    }

    /**
     * Creates a new XZ decompressor without a memory usage limit.
     * 

     * This is identical to XZInputStream(InputStream)
     * except that this takes also the arrayCache argument.
     *
     * @param       in          input stream from which XZ-compressed
     *                          data is read
     *
     * @param       arrayCache  cache to be used for allocating large arrays
     *
     * @throws      XZFormatException
     *                          input is not in the XZ format
     *
     * @throws      CorruptedInputException
     *                          XZ header CRC32 doesn't match
     *
     * @throws      UnsupportedOptionsException
     *                          XZ header is valid but specifies options
     *                          not supported by this implementation
     *
     * @throws      EOFException
     *                          less than 12 bytes of input was available
     *                          from in
     *
     * @throws      IOException may be thrown by in
     *
     * @since 1.7
     */
    public XZInputStream(InputStream in, ArrayCache arrayCache)
            throws IOException {
        this(in, -1, arrayCache);
    }

    /**
     * Creates a new XZ decompressor with an optional memory usage limit.
     * 

     * This is identical to XZInputStream(InputStream) except
     * that this takes also the memoryLimit argument.
     *
     * @param       in          input stream from which XZ-compressed
     *                          data is read
     *
     * @param       memoryLimit memory usage limit in kibibytes (KiB)
     *                          or -1 to impose no
     *                          memory usage limit
     *
     * @throws      XZFormatException
     *                          input is not in the XZ format
     *
     * @throws      CorruptedInputException
     *                          XZ header CRC32 doesn't match
     *
     * @throws      UnsupportedOptionsException
     *                          XZ header is valid but specifies options
     *                          not supported by this implementation
     *
     * @throws      EOFException
     *                          less than 12 bytes of input was available
     *                          from in
     *
     * @throws      IOException may be thrown by in
     */
    public XZInputStream(InputStream in, int memoryLimit) throws IOException {
        this(in, memoryLimit, true);
    }

    /**
     * Creates a new XZ decompressor with an optional memory usage limit.
     * 

     * This is identical to XZInputStream(InputStream) except
     * that this takes also the memoryLimit and
     * arrayCache arguments.
     *
     * @param       in          input stream from which XZ-compressed
     *                          data is read
     *
     * @param       memoryLimit memory usage limit in kibibytes (KiB)
     *                          or -1 to impose no
     *                          memory usage limit
     *
     * @param       arrayCache  cache to be used for allocating large arrays
     *
     * @throws      XZFormatException
     *                          input is not in the XZ format
     *
     * @throws      CorruptedInputException
     *                          XZ header CRC32 doesn't match
     *
     * @throws      UnsupportedOptionsException
     *                          XZ header is valid but specifies options
     *                          not supported by this implementation
     *
     * @throws      EOFException
     *                          less than 12 bytes of input was available
     *                          from in
     *
     * @throws      IOException may be thrown by in
     *
     * @since 1.7
     */
    public XZInputStream(InputStream in, int memoryLimit,
                         ArrayCache arrayCache) throws IOException {
        this(in, memoryLimit, true, arrayCache);
    }

    /**
     * Creates a new XZ decompressor with an optional memory usage limit
     * and ability to disable verification of integrity checks.
     * 

     * This is identical to XZInputStream(InputStream,int) except
     * that this takes also the verifyCheck argument.
     * 

     * Note that integrity check verification should almost never be disabled.
     * Possible reasons to disable integrity check verification:
     * 

     *   Trying to recover data from a corrupt .xz file.
     *   Speeding up decompression. This matters mostly with SHA-256
     *   or with files that have compressed extremely well. It's recommended
     *   that integrity checking isn't disabled for performance reasons
     *   unless the file integrity is verified externally in some other
     *   way.
     * 
     * 
     * verifyCheck only affects the integrity check of
     * the actual compressed data. The CRC32 fields in the headers
     * are always verified.
     *
     * @param       in          input stream from which XZ-compressed
     *                          data is read
     *
     * @param       memoryLimit memory usage limit in kibibytes (KiB)
     *                          or -1 to impose no
     *                          memory usage limit
     *
     * @param       verifyCheck if true, the integrity checks
     *                          will be verified; this should almost never
     *                          be set to false
     *
     * @throws      XZFormatException
     *                          input is not in the XZ format
     *
     * @throws      CorruptedInputException
     *                          XZ header CRC32 doesn't match
     *
     * @throws      UnsupportedOptionsException
     *                          XZ header is valid but specifies options
     *                          not supported by this implementation
     *
     * @throws      EOFException
     *                          less than 12 bytes of input was available
     *                          from in
     *
     * @throws      IOException may be thrown by in
     *
     * @since 1.6
     */
    public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
            throws IOException {
        this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
    }

    /**
     * Creates a new XZ decompressor with an optional memory usage limit
     * and ability to disable verification of integrity checks.
     * 

     * This is identical to XZInputStream(InputStream,int,boolean)
     * except that this takes also the arrayCache argument.
     *
     * @param       in          input stream from which XZ-compressed
     *                          data is read
     *
     * @param       memoryLimit memory usage limit in kibibytes (KiB)
     *                          or -1 to impose no
     *                          memory usage limit
     *
     * @param       verifyCheck if true, the integrity checks
     *                          will be verified; this should almost never
     *                          be set to false
     *
     * @param       arrayCache  cache to be used for allocating large arrays
     *
     * @throws      XZFormatException
     *                          input is not in the XZ format
     *
     * @throws      CorruptedInputException
     *                          XZ header CRC32 doesn't match
     *
     * @throws      UnsupportedOptionsException
     *                          XZ header is valid but specifies options
     *                          not supported by this implementation
     *
     * @throws      EOFException
     *                          less than 12 bytes of input was available
     *                          from in
     *
     * @throws      IOException may be thrown by in
     *
     * @since 1.7
     */
    public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
                         ArrayCache arrayCache) throws IOException {
        this.arrayCache = arrayCache;
        this.in = in;
        this.memoryLimit = memoryLimit;
        this.verifyCheck = verifyCheck;
        this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck,
                                            arrayCache);
    }

    /**
     * Decompresses the next byte from this input stream.
     * 

     * Reading lots of data with read() from this input stream
     * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
     * if you need to read lots of data one byte at a time.
     *
     * @return      the next decompressed byte, or -1
     *              to indicate the end of the compressed stream
     *
     * @throws      CorruptedInputException
     * @throws      UnsupportedOptionsException
     * @throws      MemoryLimitException
     *
     * @throws      XZIOException if the stream has been closed
     *
     * @throws      EOFException
     *                          compressed input is truncated or corrupt
     *
     * @throws      IOException may be thrown by in
     */
    public int read() throws IOException {
        return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
    }

    /**
     * Decompresses into an array of bytes.
     * 

     * If len is zero, no bytes are read and 0
     * is returned. Otherwise this will try to decompress len
     * bytes of uncompressed data. Less than len bytes may
     * be read only in the following situations:
     * 

     *   The end of the compressed data was reached successfully.
     *   An error is detected after at least one but less len
     *       bytes have already been successfully decompressed.
     *       The next call with non-zero len will immediately
     *       throw the pending exception.
     *   An exception is thrown.
     * 
     *
     * @param       buf         target buffer for uncompressed data
     * @param       off         start offset in buf
     * @param       len         maximum number of uncompressed bytes to read
     *
     * @return      number of bytes read, or -1 to indicate
     *              the end of the compressed stream
     *
     * @throws      CorruptedInputException
     * @throws      UnsupportedOptionsException
     * @throws      MemoryLimitException
     *
     * @throws      XZIOException if the stream has been closed
     *
     * @throws      EOFException
     *                          compressed input is truncated or corrupt
     *
     * @throws      IOException may be thrown by in
     */
    public int read(byte[] buf, int off, int len) throws IOException {
        if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
            throw new IndexOutOfBoundsException();

        if (len == 0)
            return 0;

        if (in == null)
            throw new XZIOException("Stream closed");

        if (exception != null)
            throw exception;

        if (endReached)
            return -1;

        int size = 0;

        try {
            while (len > 0) {
                if (xzIn == null) {
                    prepareNextStream();
                    if (endReached)
                        return size == 0 ? -1 : size;
                }

                int ret = xzIn.read(buf, off, len);

                if (ret > 0) {
                    size += ret;
                    off += ret;
                    len -= ret;
                } else if (ret == -1) {
                    xzIn = null;
                }
            }
        } catch (IOException e) {
            exception = e;
            if (size == 0)
                throw e;
        }

        return size;
    }

    private void prepareNextStream() throws IOException {
        DataInputStream inData = new DataInputStream(in);
        byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];

        // The size of Stream Padding must be a multiple of four bytes,
        // all bytes zero.
        do {
            // First try to read one byte to see if we have reached the end
            // of the file.
            int ret = inData.read(buf, 0, 1);
            if (ret == -1) {
                endReached = true;
                return;
            }

            // Since we got one byte of input, there must be at least
            // three more available in a valid file.
            inData.readFully(buf, 1, 3);

        } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);

        // Not all bytes are zero. In a valid Stream it indicates the
        // beginning of the next Stream. Read the rest of the Stream Header
        // and initialize the XZ decoder.
        inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);

        try {
            xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf,
                                           arrayCache);
        } catch (XZFormatException e) {
            // Since this isn't the first .xz Stream, it is more
            // logical to tell that the data is corrupt.
            throw new CorruptedInputException(
                    "Garbage after a valid XZ Stream");
        }
    }

    /**
     * Returns the number of uncompressed bytes that can be read
     * without blocking. The value is returned with an assumption
     * that the compressed input data will be valid. If the compressed
     * data is corrupt, CorruptedInputException may get
     * thrown before the number of bytes claimed to be available have
     * been read from this input stream.
     *
     * @return      the number of uncompressed bytes that can be read
     *              without blocking
     */
    public int available() throws IOException {
        if (in == null)
            throw new XZIOException("Stream closed");

        if (exception != null)
            throw exception;

        return xzIn == null ? 0 : xzIn.available();
    }

    /**
     * Closes the stream and calls in.close().
     * If the stream was already closed, this does nothing.
     * 
     * This is equivalent to close(true).
     *
     * @throws  IOException if thrown by in.close()
     */
    public void close() throws IOException {
        close(true);
    }

    /**
     * Closes the stream and optionally calls in.close().
     * If the stream was already closed, this does nothing.
     * If close(false) has been called, a further
     * call of close(true) does nothing (it doesn't call
     * in.close()).
     * 

     * If you don't want to close the underlying InputStream,
     * there is usually no need to worry about closing this stream either;
     * it's fine to do nothing and let the garbage collector handle it.
     * However, if you are using {@link ArrayCache}, close(false)
     * can be useful to put the allocated arrays back to the cache without
     * closing the underlying InputStream.
     * 
     * Note that if you successfully reach the end of the stream
     * (read returns -1), the arrays are
     * automatically put back to the cache by that read call. In
     * this situation close(false) is redundant (but harmless).
     *
     * @throws  IOException if thrown by in.close()
     *
     * @since 1.7
     */
    public void close(boolean closeInput) throws IOException {
        if (in != null) {
            if (xzIn != null) {
                xzIn.close(false);
                xzIn = null;
            }

            try {
                if (closeInput)
                    in.close();
            } finally {
                in = null;
            }
        }
    }
}