org.tukaani.xz.LZMA2InputStream Maven / Gradle / Ivy
/*
* LZMA2InputStream
*
* Authors: Lasse Collin
* Igor Pavlov
*
* This file has been put into the public domain.
* You can do whatever you want with this file.
*/
package org.tukaani.xz;
import java.io.InputStream;
import java.io.DataInputStream;
import java.io.IOException;
import org.tukaani.xz.lz.LZDecoder;
import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
import org.tukaani.xz.lzma.LZMADecoder;
/**
* Decompresses a raw LZMA2 stream (no XZ headers).
*/
public class LZMA2InputStream extends InputStream {
/**
* Smallest valid LZMA2 dictionary size.
*
* Very tiny dictionaries would be a performance problem, so
* the minimum is 4 KiB.
*/
public static final int DICT_SIZE_MIN = 4096;
/**
* Largest dictionary size supported by this implementation.
*
* The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
* This implementation supports only 16 bytes less than 2 GiB for raw
* LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
* limitation is due to Java using signed 32-bit integers for array
* indexing. The limitation shouldn't matter much in practice since so
* huge dictionaries are not normally used.
*/
public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
private static final int COMPRESSED_SIZE_MAX = 1 << 16;
private DataInputStream in;
private final LZDecoder lz;
private final RangeDecoderFromBuffer rc
= new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX);
private LZMADecoder lzma;
private int uncompressedSize = 0;
private boolean isLZMAChunk;
private boolean needDictReset = true;
private boolean needProps = true;
private boolean endReached = false;
private IOException exception = null;
private final byte[] tempBuf = new byte[1];
/**
* Gets approximate decompressor memory requirements as kibibytes for
* the given dictionary size.
*
* @param dictSize LZMA2 dictionary size as bytes, must be
* in the range [DICT_SIZE_MIN
,
* DICT_SIZE_MAX
]
*
* @return approximate memory requirements as kibibytes (KiB)
*/
public static int getMemoryUsage(int dictSize) {
// The base state is around 30-40 KiB (probabilities etc.),
// range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
// and LZ decoder needs a dictionary buffer.
return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
}
private static int getDictSize(int dictSize) {
if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX)
throw new IllegalArgumentException(
"Unsupported dictionary size " + dictSize);
// Round dictionary size upward to a multiple of 16. This way LZMA
// can use LZDecoder.getPos() for calculating LZMA's posMask.
// Note that this check is needed only for raw LZMA2 streams; it is
// redundant with .xz.
return (dictSize + 15) & ~15;
}
/**
* Creates a new input stream that decompresses raw LZMA2 data
* from in
.
*
* The caller needs to know the dictionary size used when compressing;
* the dictionary size isn't stored as part of a raw LZMA2 stream.
*
* Specifying a too small dictionary size will prevent decompressing
* the stream. Specifying a too big dictionary is waste of memory but
* decompression will work.
*
* There is no need to specify a dictionary bigger than
* the uncompressed size of the data even if a bigger dictionary
* was used when compressing. If you know the uncompressed size
* of the data, this might allow saving some memory.
*
* @param in input stream from which LZMA2-compressed
* data is read
*
* @param dictSize LZMA2 dictionary size as bytes, must be
* in the range [DICT_SIZE_MIN
,
* DICT_SIZE_MAX
]
*/
public LZMA2InputStream(InputStream in, int dictSize) {
this(in, dictSize, null);
}
/**
* Creates a new LZMA2 decompressor using a preset dictionary.
*
* This is like LZMA2InputStream(InputStream, int)
except
* that the dictionary may be initialized using a preset dictionary.
* If a preset dictionary was used when compressing the data, the
* same preset dictionary must be provided when decompressing.
*
* @param in input stream from which LZMA2-compressed
* data is read
*
* @param dictSize LZMA2 dictionary size as bytes, must be
* in the range [DICT_SIZE_MIN
,
* DICT_SIZE_MAX
]
*
* @param presetDict preset dictionary or null
* to use no preset dictionary
*/
public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
// Check for null because otherwise null isn't detect
// in this constructor.
if (in == null)
throw new NullPointerException();
this.in = new DataInputStream(in);
this.lz = new LZDecoder(getDictSize(dictSize), presetDict);
if (presetDict != null && presetDict.length > 0)
needDictReset = false;
}
/**
* Decompresses the next byte from this input stream.
*
* Reading lots of data with read()
from this input stream
* may be inefficient. Wrap it in java.io.BufferedInputStream
* if you need to read lots of data one byte at a time.
*
* @return the next decompressed byte, or -1
* to indicate the end of the compressed stream
*
* @throws CorruptedInputException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by in
*/
public int read() throws IOException {
return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
}
/**
* Decompresses into an array of bytes.
*
* If len
is zero, no bytes are read and 0
* is returned. Otherwise this will block until len
* bytes have been decompressed, the end of the LZMA2 stream is reached,
* or an exception is thrown.
*
* @param buf target buffer for uncompressed data
* @param off start offset in buf
* @param len maximum number of uncompressed bytes to read
*
* @return number of bytes read, or -1
to indicate
* the end of the compressed stream
*
* @throws CorruptedInputException
*
* @throws XZIOException if the stream has been closed
*
* @throws EOFException
* compressed input is truncated or corrupt
*
* @throws IOException may be thrown by in
*/
public int read(byte[] buf, int off, int len) throws IOException {
if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
throw new IndexOutOfBoundsException();
if (len == 0)
return 0;
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
if (endReached)
return -1;
try {
int size = 0;
while (len > 0) {
if (uncompressedSize == 0) {
decodeChunkHeader();
if (endReached)
return size == 0 ? -1 : size;
}
int copySizeMax = Math.min(uncompressedSize, len);
if (!isLZMAChunk) {
lz.copyUncompressed(in, copySizeMax);
} else {
lz.setLimit(copySizeMax);
lzma.decode();
if (!rc.isInBufferOK())
throw new CorruptedInputException();
}
int copiedSize = lz.flush(buf, off);
off += copiedSize;
len -= copiedSize;
size += copiedSize;
uncompressedSize -= copiedSize;
if (uncompressedSize == 0)
if (!rc.isFinished() || lz.hasPending())
throw new CorruptedInputException();
}
return size;
} catch (IOException e) {
exception = e;
throw e;
}
}
private void decodeChunkHeader() throws IOException {
int control = in.readUnsignedByte();
if (control == 0x00) {
endReached = true;
return;
}
if (control >= 0xE0 || control == 0x01) {
needProps = true;
needDictReset = false;
lz.reset();
} else if (needDictReset) {
throw new CorruptedInputException();
}
if (control >= 0x80) {
isLZMAChunk = true;
uncompressedSize = (control & 0x1F) << 16;
uncompressedSize += in.readUnsignedShort() + 1;
int compressedSize = in.readUnsignedShort() + 1;
if (control >= 0xC0) {
needProps = false;
decodeProps();
} else if (needProps) {
throw new CorruptedInputException();
} else if (control >= 0xA0) {
lzma.reset();
}
rc.prepareInputBuffer(in, compressedSize);
} else if (control > 0x02) {
throw new CorruptedInputException();
} else {
isLZMAChunk = false;
uncompressedSize = in.readUnsignedShort() + 1;
}
}
private void decodeProps() throws IOException {
int props = in.readUnsignedByte();
if (props > (4 * 5 + 4) * 9 + 8)
throw new CorruptedInputException();
int pb = props / (9 * 5);
props -= pb * 9 * 5;
int lp = props / 9;
int lc = props - lp * 9;
if (lc + lp > 4)
throw new CorruptedInputException();
lzma = new LZMADecoder(lz, rc, lc, lp, pb);
}
/**
* Returns the number of uncompressed bytes that can be read
* without blocking. The value is returned with an assumption
* that the compressed input data will be valid. If the compressed
* data is corrupt, CorruptedInputException
may get
* thrown before the number of bytes claimed to be available have
* been read from this input stream.
*
* In LZMA2InputStream, the return value will be non-zero when the
* decompressor is in the middle of an LZMA2 chunk. The return value
* will then be the number of uncompressed bytes remaining from that
* chunk.
*
* @return the number of uncompressed bytes that can be read
* without blocking
*/
public int available() throws IOException {
if (in == null)
throw new XZIOException("Stream closed");
if (exception != null)
throw exception;
return uncompressedSize;
}
/**
* Closes the stream and calls in.close()
.
* If the stream was already closed, this does nothing.
*
* @throws IOException if thrown by in.close()
*/
public void close() throws IOException {
if (in != null) {
try {
in.close();
} finally {
in = null;
}
}
}
}