
io.deephaven.io.streams.SevenZipInputStream Maven / Gradle / Ivy
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
package io.deephaven.io.streams;
import SevenZip.CRC;
import SevenZip.Compression.LZ.OutWindow;
import SevenZip.Compression.LZMA.Base;
import SevenZip.Compression.RangeCoder.BitTreeDecoder;
import io.deephaven.io.InputStreamFactory;
import io.deephaven.base.verify.Assert;
import io.deephaven.base.verify.Require;
import io.deephaven.base.Reference;
import java.io.ByteArrayOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.ZipException;
// --------------------------------------------------------------------
/**
* {@link InputStream} that can read 7zip archives (.7z) (partial implementation).
*/
public class SevenZipInputStream extends InputStream {
public static final int UBYTE_TO_INT = 0xFF;
public static final long UBYTE_TO_LONG = 0xFFL;
public static final int USHORT_TO_INT = 0xFFFF;
public static final long UINT_TO_LONG = 0xFFFFFFFFL;
// 7zHeader.cpp
private static final byte[] SIGNATURE = {0x37, 0x7A, (byte) 0xBC, (byte) 0xAF, (byte) 0x27, (byte) 0x1C};
public static final long SIGNATURE_AS_LONG =
(SIGNATURE[0] & UBYTE_TO_LONG) | ((SIGNATURE[1] & UBYTE_TO_LONG) << 8)
| ((SIGNATURE[2] & UBYTE_TO_LONG) << 16) | ((SIGNATURE[3] & UBYTE_TO_LONG) << 24)
| ((SIGNATURE[4] & UBYTE_TO_LONG) << 32) | ((SIGNATURE[5] & UBYTE_TO_LONG) << 40);
private static final int SIGNATURE_LENGTH = SIGNATURE.length; // 7zHeader.h
private static final byte ARCHIVE_VER_MAJOR = 0; // 7zHeader.h
private static final int NUM_MAX = 0x7FFFFFFF; // 7zItem.h
private static final int NUM_NO_INDEX = 0xFFFFFFFF; // 7zItem.h
public static final int START_HEADER_LENGTH = 8 + 8 + 4; // 7zHeader.h
public static final int START_HEADER_CRC_LENGTH = 4;
private static final int VERSION_INFO_LENGTH = 2;
// ################################################################
// Archive header / "database" parsing
// 7zHeader.h (NID::EEnum)
private interface BlockType {
int END = 0x00;
int HEADER = 0x01;
int ARCHIVE_PROPERTIES = 0x02;
int ADDITIONAL_STREAMS_INFO = 0x03;
int MAIN_STREAMS_INFO = 0x04;
int FILES_INFO = 0x05;
int PACK_INFO = 0x06;
int UNPACK_INFO = 0x07;
int SUBSTREAMS_INFO = 0x08;
int SIZE = 0x09;
int CRC = 0x0A;
int FOLDER = 0x0B;
int CODERS_UNPACK_SIZE = 0x0C;
int NUM_UNPACK_STREAM = 0x0D;
int EMPTY_STREAM = 0x0E;
int EMPTY_FILE = 0x0F;
int ANTI = 0x10;
int NAME = 0x11;
int CREATION_TIME = 0x12;
int LAST_ACCESS_TIME = 0x13;
int LAST_WRITE_TIME = 0x14;
int WIN_ATTRIBUTES = 0x15;
int COMMENT = 0x16;
int ENCODED_HEADER = 0x17;
int START_POS = 0x18;
}
/**
* An archive consists of:
*
* - A list of packed streams.
*
- A list of folders, each of which consumes one or more of the packed streams (in order) and produces one
* output unpacked stream.
*
- A list of substream counts and lengths. Each unpacked stream (in order) is split into one or more substreams.
*
- A list of files. Each file (in order) may or may not consume one substream. (Directories and anit-files do
* not consume a stream.) Files have things like names, timestamps, attributes, etc.
*
*/
// 7zItem.h
private static class ArchiveDatabase {
/** Lengths of each packed stream. */
List PackSizes = new LinkedList();
/** CRCs of each packed stream. */
List PackCRCs = new LinkedList();
/** List of folders in archive. */
List Folders = new LinkedList();
/** Number of substreams in each unpacked stream (folder). */
List NumUnpackStreamsVector = new LinkedList();
/** List of files in archive. */
List Files = new LinkedList();
public void clear() {
PackSizes.clear();
PackCRCs.clear();
Folders.clear();
NumUnpackStreamsVector.clear();
Files.clear();
}
}
// 7zIn.h
private static class ArchiveDatabaseEx extends ArchiveDatabase {
InArchiveInfo ArchiveInfo = new InArchiveInfo();
/** Offsets to the beginning of each packed stream. */
List PackStreamStartPositions = new LinkedList();
/** Index of the first packed stream for this folder. */
List FolderStartPackStreamIndex = new LinkedList();
/** Index of the first file for this folder. */
List FolderStartFileIndex = new LinkedList();
/** Index of the folder containing this file. */
List FileIndexToFolderIndexMap = new LinkedList();
@Override
public void clear() {
super.clear();
ArchiveInfo.clear();
PackStreamStartPositions.clear();
FolderStartPackStreamIndex.clear();
FolderStartFileIndex.clear();
FileIndexToFolderIndexMap.clear();
}
public void fill() throws ZipException {
fillFolderStartPackStream();
fillStartPos();
fillFolderStartFileIndex();
}
private void fillFolderStartPackStream() {
FolderStartPackStreamIndex.clear();
int startPos = 0;
for (Folder Folder : Folders) {
FolderStartPackStreamIndex.add(startPos);
startPos += Folder.PackStreams.size();
}
}
private void fillStartPos() {
PackStreamStartPositions.clear();
long startPos = 0;
for (Long PackSize : PackSizes) {
PackStreamStartPositions.add(startPos);
startPos += PackSize;
}
}
private void fillFolderStartFileIndex() throws ZipException {
FolderStartFileIndex.clear();
FileIndexToFolderIndexMap.clear();
int folderIndex = 0;
int indexInFolder = 0;
for (int i = 0; i < Files.size(); i++) {
FileItem file = Files.get(i);
boolean emptyStream = !file.HasStream;
if (emptyStream && indexInFolder == 0) {
FileIndexToFolderIndexMap.add(NUM_NO_INDEX);
continue;
}
if (indexInFolder == 0) {
// v3.13 incorrectly worked with empty folders
// v4.07: Loop for skipping empty folders
while (true) {
if (folderIndex >= Folders.size()) {
throw new ZipException("Bad header.");
}
FolderStartFileIndex.add(i); // check it
if (NumUnpackStreamsVector.get(folderIndex) != 0) {
break;
}
folderIndex++;
}
}
FileIndexToFolderIndexMap.add(folderIndex);
if (emptyStream) {
continue;
}
indexInFolder++;
if (indexInFolder >= NumUnpackStreamsVector.get(folderIndex)) {
folderIndex++;
indexInFolder = 0;
}
}
}
public long getFolderStreamPos(int folderIndex, int indexInFolder) {
return ArchiveInfo.DataStartPosition
+ PackStreamStartPositions.get(FolderStartPackStreamIndex.get(folderIndex) + indexInFolder);
}
public long getFolderFullPackSize(int folderIndex) {
int packStreamIndex = FolderStartPackStreamIndex.get(folderIndex);
Folder folder = Folders.get(folderIndex);
long size = 0;
for (int i = 0; i < folder.PackStreams.size(); i++) {
size += PackSizes.get(packStreamIndex + i);
}
return size;
}
public long getFolderPackStreamSize(int folderIndex, int streamIndex) {
return PackSizes.get(FolderStartPackStreamIndex.get(folderIndex) + streamIndex);
}
public long getFilePackSize(int fileIndex) {
int folderIndex = FileIndexToFolderIndexMap.get(fileIndex);
if (folderIndex >= 0) {
if (FolderStartFileIndex.get(folderIndex) == fileIndex) {
return getFolderFullPackSize(folderIndex);
}
}
return 0;
}
}
// 7zHeader.h
private static class ArchiveVersion {
byte Major;
byte Minor;
}
// 7zIn.h
private static class InArchiveInfo {
ArchiveVersion Version = new ArchiveVersion();
long StartPosition;
long StartPositionAfterHeader;
long DataStartPosition;
long DataStartPosition2;
List FileInfoPopIDs = new LinkedList();
public void clear() {
FileInfoPopIDs.clear();
}
}
/**
* A Folder is one compressed chunk of data. A folder has one codec and the cyphertext is a small number of packed
* streams (usually one). Since the plaintext is one stream, a folder has one CRC. The folder's plaintext stream
* will often be the concatenation of a bunch of files, but the Folder knows nothing of this.
*
* A codec (my term) is a small graph of coders. A coder does a transform from n input streams to m
* output streams. Bind pairs attach the output stream of one coder to the input stream of another coder. A
* codec always has one (unbound) output stream, but can have many (unboud) input streams. The most common codec
* consists of one coder with one input stream, one output stream, and no bind pairs.
*
*
* Input streams and output streams are numbered in the order the coders are listed. PackStreams is used to map from
* the (implied) list of packed (input) streams in the archive to the input streams of the coders.
*/
// 7zItem.h
private static class Folder {
/**
* List of coders. Input and output stream indices (as referenced by the BindPairs) are defined by the order of
* this list.
*/
List Coders = new LinkedList();
/**
* List of BindPairs. Bind pairs attach the output stream of one coder to the input stream of another coder.
* Stream indices are defined by the coders list.
*/
List BindPairs = new LinkedList();
/**
* Map [ packed stream index (in this folder) -> input stream index (in the list of coders) ]
*/
List PackStreams = new LinkedList();
/**
* Lengths of the output streams from the coders. Includes bound and unbound output streams.
*/
List UnpackSizes = new LinkedList();
/** CRC for the entire codec output stream. */
Integer UnpackCRC;
public long getUnpackSize() throws ZipException {
if (UnpackSizes.isEmpty()) {
return 0;
}
for (int i = UnpackSizes.size() - 1; i >= 0; i--) {
if (findBindPairForOutStream(i) < 0) {
return UnpackSizes.get(i);
}
}
throw new ZipException("Could not determine unpacked size for folder.");
}
public int getNumOutStreams() {
int nResult = 0;
for (CoderInfo Coder : Coders) {
nResult += Coder.NumOutStreams;
}
return nResult;
}
public int findBindPairForInStream(long inStreamIndex) {
for (int i = 0; i < BindPairs.size(); i++) {
if (BindPairs.get(i).InIndex == inStreamIndex) {
return i;
}
}
return -1;
}
private int findBindPairForOutStream(int outStreamIndex) {
for (int i = 0; i < BindPairs.size(); i++) {
if (BindPairs.get(i).OutIndex == outStreamIndex) {
return i;
}
}
return -1;
}
public int findPackStreamArrayIndex(int inStreamIndex) {
for (int i = 0; i < PackStreams.size(); i++) {
if (PackStreams.get(i) == inStreamIndex) {
return i;
}
}
return -1;
}
}
// 7zItem.h
private static class FileItem {
long CreationTime; // note, this is an NT file time
long LastWriteTime; // note, this is an NT file time
long LastAccessTime; // note, this is an NT file time
long UnPackSize;
long StartPos;
int Attributes;
Integer FileCRC;
String Name;
boolean HasStream; // Test it !!! it means that there is
// stream in some folder. It can be empty stream
boolean IsDirectory;
boolean IsAnti;
boolean AreAttributesDefined;
boolean IsCreationTimeDefined;
boolean IsLastWriteTimeDefined;
boolean IsLastAccessTimeDefined;
boolean IsStartPosDefined;
void setAttributes(int attributes) {
AreAttributesDefined = true;
Attributes = attributes;
}
void setCreationTime(long creationTime) {
IsCreationTimeDefined = true;
CreationTime = creationTime;
}
void setLastWriteTime(long lastWriteTime) {
IsLastWriteTimeDefined = true;
LastWriteTime = lastWriteTime;
}
void setLastAccessTime(long lastAccessTime) {
IsLastAccessTimeDefined = true;
LastAccessTime = lastAccessTime;
}
}
// 7zItem.h
private static class CoderInfo {
int NumInStreams;
int NumOutStreams;
List AltCoders = new LinkedList();
public boolean isSimpleCoder() {
return (NumInStreams == 1) && (NumOutStreams == 1);
}
}
// 7zItem.h
private static class AltCoderInfo {
MethodID MethodID = new MethodID();
byte[] Properties;
}
// 7zMethodID.h
private static class MethodID {
byte[] ID;
byte IDSize;
public MethodID() {}
public MethodID(byte... id) {
ID = id;
IDSize = (byte) id.length;
}
@Override
public boolean equals(Object that) {
if (this == that) {
return true;
}
if (that == null || getClass() != that.getClass()) {
return false;
}
MethodID methodID = (MethodID) that;
if (IDSize != methodID.IDSize) {
return false;
}
return Arrays.equals(ID, methodID.ID);
}
@Override
public int hashCode() {
int result = (int) IDSize;
result = 29 * result + Arrays.hashCode(ID);
return result;
}
}
// 7zItem.h
private static class BindPair {
int InIndex;
int OutIndex;
}
// 7zIn.h
private static class InArchive {
// 7zIn.cpp
public void readDatabase(InputStream inputStream, ArchiveDatabaseEx database,
InputStreamFactory inputStreamFactory, int nBeginStreamPosition) throws IOException {
database.clear();
database.ArchiveInfo.StartPosition = nBeginStreamPosition;
ByteBuffer byteBuffer = readToByteBuffer(inputStream, VERSION_INFO_LENGTH);
database.ArchiveInfo.Version.Major = byteBuffer.get();
database.ArchiveInfo.Version.Minor = byteBuffer.get();
if (ARCHIVE_VER_MAJOR != database.ArchiveInfo.Version.Major) {
throw new ZipException("Archive version mismatch.");
}
byteBuffer = readToByteBuffer(inputStream, START_HEADER_CRC_LENGTH);
int nStartHeaderCrc = byteBuffer.getInt();
byteBuffer = readToByteBuffer(inputStream, START_HEADER_LENGTH);
CRC crc = new CRC();
crc.Update(byteBuffer.array());
long nNextHeaderOffset = byteBuffer.getLong();
long nNextHeaderSize = byteBuffer.getLong();
int nNextHeaderCrc = byteBuffer.getInt();
database.ArchiveInfo.StartPositionAfterHeader = database.ArchiveInfo.StartPosition + VERSION_INFO_LENGTH
+ START_HEADER_CRC_LENGTH + START_HEADER_LENGTH;
if (nStartHeaderCrc != crc.GetDigest()) {
throw new ZipException("Header CRC mismatch.");
}
if (0 == nNextHeaderSize) {
return; // no entries
}
if (nNextHeaderSize > Integer.MAX_VALUE) {
throw new ZipException("Invalid header size.");
}
skipFully(inputStream, nNextHeaderOffset);
byteBuffer = readToByteBuffer(inputStream, (int) nNextHeaderSize);
crc.Init();
crc.Update(byteBuffer.array());
if (nNextHeaderCrc != crc.GetDigest()) {
throw new ZipException("Header CRC mismatch.");
}
List dataVector = new LinkedList();
while (true) {
long type = readId(byteBuffer);
if (type == BlockType.HEADER) {
break;
}
if (type != BlockType.ENCODED_HEADER) {
throw new ZipException("Bad block type in header.");
}
Reference startPositionAfterHeaderRef =
new Reference(database.ArchiveInfo.StartPositionAfterHeader);
Reference dataStartPosition2Ref = new Reference(database.ArchiveInfo.DataStartPosition2);
readAndDecodePackedStreams(byteBuffer, startPositionAfterHeaderRef, dataStartPosition2Ref, dataVector,
inputStreamFactory);
database.ArchiveInfo.StartPositionAfterHeader = startPositionAfterHeaderRef.getValue();
database.ArchiveInfo.DataStartPosition2 = dataStartPosition2Ref.getValue();
if (dataVector.isEmpty()) {
return;
}
if (dataVector.size() > 1) {
throw new ZipException("Bad header.");
}
byteBuffer = dataVector.remove(0);
}
readHeader(byteBuffer, database, inputStreamFactory);
}
// 7zIn.cpp
private void readHeader(ByteBuffer byteBuffer, ArchiveDatabaseEx database,
InputStreamFactory inputStreamFactory) throws IOException {
long nBlockType = readId(byteBuffer);
if (BlockType.ARCHIVE_PROPERTIES == nBlockType) {
readArchiveProperties(byteBuffer, database.ArchiveInfo);
nBlockType = readId(byteBuffer);
}
List dataVector = new ArrayList();
if (BlockType.ADDITIONAL_STREAMS_INFO == nBlockType) {
Reference startPositionAfterHeaderRef =
new Reference(database.ArchiveInfo.StartPositionAfterHeader);
Reference dataStartPosition2Ref = new Reference(database.ArchiveInfo.DataStartPosition2);
readAndDecodePackedStreams(byteBuffer, startPositionAfterHeaderRef, dataStartPosition2Ref, dataVector,
inputStreamFactory);
database.ArchiveInfo.StartPositionAfterHeader = startPositionAfterHeaderRef.getValue();
database.ArchiveInfo.DataStartPosition2 = dataStartPosition2Ref.getValue();
database.ArchiveInfo.DataStartPosition2 += database.ArchiveInfo.StartPositionAfterHeader;
nBlockType = readId(byteBuffer);
}
List unPackSizes = new LinkedList();
List digests = new LinkedList();
if (BlockType.MAIN_STREAMS_INFO == nBlockType) {
Reference dataStartPositionRef = new Reference(database.ArchiveInfo.DataStartPosition);
readStreamsInfo(byteBuffer, dataVector, dataStartPositionRef, database.PackSizes, database.PackCRCs,
database.Folders, database.NumUnpackStreamsVector, unPackSizes, digests);
database.ArchiveInfo.DataStartPosition = dataStartPositionRef.getValue();
nBlockType = readId(byteBuffer);
} else {
for (Folder folder : database.Folders) {
database.NumUnpackStreamsVector.add(1);
unPackSizes.add(folder.getUnpackSize());
digests.add(folder.UnpackCRC);
}
}
database.Files.clear();
if (nBlockType == BlockType.END) {
return;
}
if (nBlockType != BlockType.FILES_INFO) {
throw new ZipException("Bad block type in header.");
}
int numFiles = readNum(byteBuffer);
int i;
for (i = 0; i < numFiles; i++) {
database.Files.add(new FileItem());
}
database.ArchiveInfo.FileInfoPopIDs.add((long) BlockType.SIZE);
if (!database.PackSizes.isEmpty()) {
database.ArchiveInfo.FileInfoPopIDs.add((long) BlockType.PACK_INFO);
}
if (numFiles > 0 && !digests.isEmpty()) {
database.ArchiveInfo.FileInfoPopIDs.add((long) BlockType.CRC);
}
List emptyStreamVector = new ArrayList(numFiles);
for (i = 0; i < numFiles; i++) {
emptyStreamVector.add(false);
}
List emptyFileVector = new LinkedList();
List antiFileVector = new LinkedList();
int numEmptyStreams = 0;
while (true) {
long type = readId(byteBuffer);
if (type == BlockType.END) {
break;
}
long size = readNumber(byteBuffer);
database.ArchiveInfo.FileInfoPopIDs.add(type);
switch ((int) type) {
case BlockType.NAME: {
ByteBuffer workingByteBuffer = chooseStream(byteBuffer, dataVector);
readFileNames(workingByteBuffer, database.Files);
break;
}
case BlockType.WIN_ATTRIBUTES: {
List boolVector = new ArrayList(database.Files.size());
readBoolVector2(byteBuffer, database.Files.size(), boolVector);
ByteBuffer workingByteBuffer = chooseStream(byteBuffer, dataVector);
for (i = 0; i < numFiles; i++) {
FileItem file = database.Files.get(i);
file.AreAttributesDefined = boolVector.get(i);
if (file.AreAttributesDefined) {
file.Attributes = workingByteBuffer.getInt();
}
}
break;
}
case BlockType.START_POS: {
List boolVector = new ArrayList(database.Files.size());
readBoolVector2(byteBuffer, database.Files.size(), boolVector);
ByteBuffer workingByteBuffer = chooseStream(byteBuffer, dataVector);
for (i = 0; i < numFiles; i++) {
FileItem file = database.Files.get(i);
file.IsStartPosDefined = boolVector.get(i);
if (file.IsStartPosDefined) {
file.StartPos = workingByteBuffer.getLong();
}
}
break;
}
case BlockType.EMPTY_STREAM: {
readBoolVector(byteBuffer, numFiles, emptyStreamVector);
for (i = 0; i < emptyStreamVector.size(); i++) {
if (emptyStreamVector.get(i)) {
numEmptyStreams++;
}
}
for (i = 0; i < numEmptyStreams; i++) {
emptyFileVector.add(false);
antiFileVector.add(false);
}
break;
}
case BlockType.EMPTY_FILE: {
readBoolVector(byteBuffer, numEmptyStreams, emptyFileVector);
break;
}
case BlockType.ANTI: {
readBoolVector(byteBuffer, numEmptyStreams, antiFileVector);
break;
}
case BlockType.CREATION_TIME:
case BlockType.LAST_WRITE_TIME:
case BlockType.LAST_ACCESS_TIME: {
readTime(byteBuffer, dataVector, database.Files, type);
break;
}
default: {
database.ArchiveInfo.FileInfoPopIDs.remove(database.ArchiveInfo.FileInfoPopIDs.size() - 1);
skipData(byteBuffer, size);
}
}
}
int emptyFileIndex = 0;
int sizeIndex = 0;
for (i = 0; i < numFiles; i++) {
FileItem file = database.Files.get(i);
file.HasStream = !emptyStreamVector.get(i);
if (file.HasStream) {
file.IsDirectory = false;
file.IsAnti = false;
file.UnPackSize = unPackSizes.get(sizeIndex);
file.FileCRC = digests.get(sizeIndex);
sizeIndex++;
} else {
file.IsDirectory = !emptyFileVector.get(emptyFileIndex);
file.IsAnti = antiFileVector.get(emptyFileIndex);
emptyFileIndex++;
file.UnPackSize = 0;
file.FileCRC = null;
}
}
}
// 7zIn.cpp
private void readTime(ByteBuffer byteBuffer, List alternateByteBuffers, List files,
long type) throws ZipException {
List boolVector = new ArrayList(files.size());
readBoolVector2(byteBuffer, files.size(), boolVector);
byteBuffer = chooseStream(byteBuffer, alternateByteBuffers);
for (int i = 0; i < files.size(); i++) {
FileItem file = files.get(i);
long fileTime = 0;
boolean defined = boolVector.get(i);
if (defined) {
fileTime = byteBuffer.getLong();
}
switch ((int) type) {
case BlockType.CREATION_TIME:
file.IsCreationTimeDefined = defined;
if (defined) {
file.CreationTime = fileTime;
}
break;
case BlockType.LAST_WRITE_TIME:
file.IsLastWriteTimeDefined = defined;
if (defined) {
file.LastWriteTime = fileTime;
}
break;
case BlockType.LAST_ACCESS_TIME:
file.IsLastAccessTimeDefined = defined;
if (defined) {
file.LastAccessTime = fileTime;
}
break;
}
}
}
// 7zIn.cpp
private void readFileNames(ByteBuffer byteBuffer, List files) {
for (FileItem file : files) {
StringBuilder stringBuilder = new StringBuilder();
while (true) {
char c = byteBuffer.getChar();
if (c == '\0') {
break;
}
stringBuilder.append(c);
}
file.Name = stringBuilder.toString();
}
}
// 7zIn.cpp
private void readAndDecodePackedStreams(ByteBuffer byteBuffer, Reference baseOffset,
Reference dataOffset, List dataVector, InputStreamFactory inputStreamFactory)
throws IOException {
List packSizes = new LinkedList();
List packCRCs = new LinkedList();
List folders = new LinkedList();
List numUnPackStreamsInFolders = new LinkedList();
List unPackSizes = new LinkedList();
List digests = new LinkedList();
readStreamsInfo(byteBuffer, null, dataOffset, packSizes, packCRCs, folders, numUnPackStreamsInFolders,
unPackSizes, digests);
int packIndex = 0;
Decoder decoder = new Decoder();
long dataStartPos = baseOffset.getValue() + dataOffset.getValue();
for (int i = 0; i < folders.size(); i++) {
Folder folder = folders.get(i);
long unPackSize = folder.getUnpackSize();
if (unPackSize > NUM_MAX) {
throw new ZipException("Bad header.");
}
if (unPackSize > Integer.MAX_VALUE) {
throw new ZipException("Bad header.");
}
ByteArrayOutputStream outStream = new ByteArrayOutputStream((int) unPackSize);
decoder.Decode(inputStreamFactory, dataStartPos,
packSizes.subList(packIndex, packSizes.size()), folder,
new SequentialOutStreamWrapper(outStream), null);
byte[] bytes = outStream.toByteArray();
if (null != folder.UnpackCRC) {
CRC crc = new CRC();
crc.Update(bytes);
if (folder.UnpackCRC != crc.GetDigest()) {
throw new ZipException("Bad header.");
}
}
dataVector.add(ByteBuffer.wrap(bytes).order(ByteOrder.LITTLE_ENDIAN));
for (int j = 0; j < folder.PackStreams.size(); j++) {
dataStartPos += packSizes.get(packIndex++);
}
}
}
// 7zIn.cpp
private void readArchiveProperties(ByteBuffer byteBuffer, InArchiveInfo archiveInfo) {
while (true) {
long nBlockType = readId(byteBuffer);
if (BlockType.END == nBlockType) {
break;
}
skipData(byteBuffer);
}
}
// 7zIn.cpp
private void readStreamsInfo(ByteBuffer byteBuffer, List alternateByteBuffers,
Reference dataOffsetRef,
List packedStreamSizes, List packedStreamDigests, List folders,
List numUnpackStreamsInFolders, List unpackedStreamSizes, List digests)
throws ZipException {
while (true) {
long nBlockType = readId(byteBuffer);
if (BlockType.END == nBlockType) {
return;
} else if (BlockType.PACK_INFO == nBlockType) {
readPackInfo(byteBuffer, dataOffsetRef, packedStreamSizes, packedStreamDigests);
} else if (BlockType.UNPACK_INFO == nBlockType) {
readUnpackInfo(byteBuffer, alternateByteBuffers, folders);
} else if (BlockType.SUBSTREAMS_INFO == nBlockType) {
readSubstreamsInfo(byteBuffer, folders, numUnpackStreamsInFolders, unpackedStreamSizes, digests);
} else {
throw new ZipException("Bad block type in header.");
}
}
}
// 7zIn.cpp
private void readSubstreamsInfo(ByteBuffer byteBuffer, List folders,
List numUnpackStreamsInFolders, List unpackedStreamSizes, List digests)
throws ZipException {
numUnpackStreamsInFolders.clear();
digests.clear(); // (not in original code)
long type;
while (true) {
type = readId(byteBuffer);
if (type == BlockType.NUM_UNPACK_STREAM) {
for (int i = 0; i < folders.size(); i++) {
int value = readNum(byteBuffer);
numUnpackStreamsInFolders.add(value);
}
continue;
}
if (type == BlockType.CRC || type == BlockType.SIZE) {
break;
}
if (type == BlockType.END) {
break;
}
skipData(byteBuffer);
}
if (numUnpackStreamsInFolders.isEmpty()) {
for (int i = 0; i < folders.size(); i++) {
numUnpackStreamsInFolders.add(1);
}
}
int i;
for (i = 0; i < numUnpackStreamsInFolders.size(); i++) {
// v3.13 incorrectly worked with empty folders
// v4.07: we check that folder is empty
int numSubstreams = numUnpackStreamsInFolders.get(i);
if (numSubstreams == 0) {
continue;
}
long sum = 0;
for (int j = 1; j < numSubstreams; j++) {
if (type == BlockType.SIZE) {
long size = readNumber(byteBuffer);
unpackedStreamSizes.add(size);
sum += size;
}
}
unpackedStreamSizes.add(folders.get(i).getUnpackSize() - sum);
}
if (type == BlockType.SIZE) {
type = readId(byteBuffer);
}
int numDigests = 0;
int numDigestsTotal = 0;
for (i = 0; i < folders.size(); i++) {
int numSubstreams = numUnpackStreamsInFolders.get(i);
if (numSubstreams != 1 || null == folders.get(i).UnpackCRC) {
numDigests += numSubstreams;
}
numDigestsTotal += numSubstreams;
}
while (true) {
if (type == BlockType.CRC) {
List digests2 = new ArrayList(numDigests);
readHashDigests(byteBuffer, numDigests, digests2);
int digestIndex = 0;
for (i = 0; i < folders.size(); i++) {
int numSubstreams = numUnpackStreamsInFolders.get(i);
Folder folder = folders.get(i);
if (numSubstreams == 1 && null != folder.UnpackCRC) {
digests.add(folder.UnpackCRC);
} else {
for (int j = 0; j < numSubstreams; j++, digestIndex++) {
digests.add(digests2.get(digestIndex));
}
}
}
} else if (type == BlockType.END) {
if (digests.isEmpty()) {
for (int k = 0; k < numDigestsTotal; k++) {
digests.add(null);
}
}
return;
} else {
skipData(byteBuffer);
}
type = readId(byteBuffer);
}
}
// 7zIn.cpp
private void readUnpackInfo(ByteBuffer byteBuffer, List alternateByteBuffers, List folders)
throws ZipException {
skipToBlockType(byteBuffer, BlockType.FOLDER);
int numFolders = readNum(byteBuffer);
ByteBuffer workingByteBuffer = chooseStream(byteBuffer, alternateByteBuffers);
folders.clear();
for (int nIndex = 0; nIndex < numFolders; nIndex++) {
Folder folder = new Folder();
folders.add(folder);
readNextFolderItem(workingByteBuffer, folder);
}
skipToBlockType(byteBuffer, BlockType.CODERS_UNPACK_SIZE);
int i;
for (i = 0; i < numFolders; i++) {
Folder folder = folders.get(i);
int numOutStreams = folder.getNumOutStreams();
for (int j = 0; j < numOutStreams; j++) {
long unPackSize = readNumber(byteBuffer);
folder.UnpackSizes.add(unPackSize);
}
}
while (true) {
long type = readId(byteBuffer);
if (type == BlockType.END) {
return;
}
if (type == BlockType.CRC) {
List crcs = new ArrayList(numFolders);
readHashDigests(byteBuffer, numFolders, crcs);
for (i = 0; i < numFolders; i++) {
Folder folder = folders.get(i);
folder.UnpackCRC = crcs.get(i);
}
continue;
}
skipData(byteBuffer);
}
}
// 7zIn.cpp (GetNextFolderItem)
private void readNextFolderItem(ByteBuffer byteBuffer, Folder folder) throws ZipException {
int numCoders = readNum(byteBuffer);
folder.Coders.clear();
int numInStreams = 0;
int numOutStreams = 0;
int i;
for (i = 0; i < numCoders; i++) {
CoderInfo coder = new CoderInfo();
folder.Coders.add(coder);
while (true) {
AltCoderInfo altCoder = new AltCoderInfo();
coder.AltCoders.add(altCoder);
byte mainByte = byteBuffer.get();
altCoder.MethodID.IDSize = (byte) (mainByte & 0xF);
altCoder.MethodID.ID = new byte[altCoder.MethodID.IDSize];
byteBuffer.get(altCoder.MethodID.ID);
if ((mainByte & 0x10) != 0) {
coder.NumInStreams = readNum(byteBuffer);
coder.NumOutStreams = readNum(byteBuffer);
} else {
coder.NumInStreams = 1;
coder.NumOutStreams = 1;
}
if ((mainByte & 0x20) != 0) {
int propertiesSize = readNum(byteBuffer);
altCoder.Properties = new byte[propertiesSize];
byteBuffer.get(altCoder.Properties);
}
if ((mainByte & 0x80) == 0) {
break;
}
}
numInStreams += coder.NumInStreams;
numOutStreams += coder.NumOutStreams;
}
int numBindPairs = numOutStreams - 1;
folder.BindPairs.clear();
for (i = 0; i < numBindPairs; i++) {
BindPair bindPair = new BindPair();
bindPair.InIndex = readNum(byteBuffer);
bindPair.OutIndex = readNum(byteBuffer);
folder.BindPairs.add(bindPair);
}
int numPackedStreams = numInStreams - numBindPairs;
if (numPackedStreams == 1) {
for (int j = 0; j < numInStreams; j++) {
if (folder.findBindPairForInStream(j) < 0) {
folder.PackStreams.add(j);
break;
}
}
} else {
for (i = 0; i < numPackedStreams; i++) {
int packStreamInfo = readNum(byteBuffer);
folder.PackStreams.add(packStreamInfo);
}
}
}
// 7zIn.cpp (CStreamSwitch::Set(CInArchive *, const CObjectVector *)
private ByteBuffer chooseStream(ByteBuffer byteBuffer, List alternateByteBuffers)
throws ZipException {
if (0 == byteBuffer.get()) {
return byteBuffer;
} else {
return alternateByteBuffers.get(readNum(byteBuffer));
}
}
// 7zIn.cpp
private void readPackInfo(ByteBuffer byteBuffer, Reference dataOffsetRef, List packSizes,
List packCRCs) throws ZipException {
dataOffsetRef.setValue(readNumber(byteBuffer));
int numPackStreams = readNum(byteBuffer);
skipToBlockType(byteBuffer, BlockType.SIZE);
packSizes.clear();
for (int i = 0; i < numPackStreams; i++) {
packSizes.add(readNumber(byteBuffer));
}
boolean bHasCrcs = false;
while (true) {
long nBlockType = readId(byteBuffer);
if (BlockType.END == nBlockType) {
break;
}
if (BlockType.CRC == nBlockType) {
readHashDigests(byteBuffer, numPackStreams, packCRCs);
bHasCrcs = true;
continue;
}
skipData(byteBuffer);
}
if (false == bHasCrcs) {
packCRCs.clear();
for (int i = 0; i < numPackStreams; i++) {
packCRCs.add(null);
}
}
}
// 7zIn.cpp
private void readHashDigests(ByteBuffer byteBuffer, int nItems, List digests) {
digests.clear();
List digestsDefined = new ArrayList(nItems);
readBoolVector2(byteBuffer, nItems, digestsDefined);
for (Boolean isDigestDefined : digestsDefined) {
if (true == isDigestDefined) {
digests.add(byteBuffer.getInt());
} else {
digests.add(null);
}
}
}
// 7zIn.cpp
private void readBoolVector(ByteBuffer byteBuffer, int nItems, List booleans) {
booleans.clear();
byte nValue = 0;
int nMask = 0;
for (int nIndex = 0; nIndex < nItems; nIndex++) {
if (0 == nMask) {
nValue = byteBuffer.get();
nMask = 0x80;
}
booleans.add(0 != (nValue & nMask));
nMask >>>= 1;
}
}
// 7zIn.cpp
private void readBoolVector2(ByteBuffer byteBuffer, int nItems, List booleans) {
byte nAllAreDefined = byteBuffer.get();
if (0 == nAllAreDefined) {
readBoolVector(byteBuffer, nItems, booleans);
} else {
booleans.clear();
for (int nIndex = 0; nIndex < nItems; nIndex++) {
booleans.add(true);
}
}
}
// 7zIn.cpp (WaitAttribute)
private boolean skipToBlockType(ByteBuffer byteBuffer, long nTargetBlockType) {
while (true) {
long nBlockType = readId(byteBuffer);
if (nTargetBlockType == nBlockType) {
return true;
} else if (BlockType.END == nBlockType) {
return false;
}
skipData(byteBuffer);
}
}
// 7zIn.cpp (SkeepData)
private void skipData(ByteBuffer byteBuffer) {
skipData(byteBuffer, readNumber(byteBuffer));
}
// 7zIn.cpp (SkeepData)
private void skipData(ByteBuffer byteBuffer, long nBytesToSkip) {
if (nBytesToSkip > Integer.MAX_VALUE) {
nBytesToSkip = Integer.MAX_VALUE;
}
byteBuffer.position(byteBuffer.position() + (int) nBytesToSkip);
}
// 7zIn.cpp
private long readId(ByteBuffer byteBuffer) {
return readNumber(byteBuffer);
}
// 7zIn.cpp
private int readNum(ByteBuffer byteBuffer) throws ZipException {
long nValue = readNumber(byteBuffer);
if (nValue > NUM_MAX) {
throw new ZipException("Numeric value out of range.");
}
return (int) nValue;
}
// 7zIn.cpp
private long readNumber(ByteBuffer byteBuffer) {
byte nFirstByte = byteBuffer.get();
int nMask = 0x80;
long nValue = 0;
for (int nIndex = 0; nIndex < 8; nIndex++) {
if (0 == (nFirstByte & nMask)) {
long nHighPart = nFirstByte & (nMask - 1);
nValue += (nHighPart << (nIndex * 8));
break;
}
byte b = byteBuffer.get();
nValue |= ((b & UBYTE_TO_LONG) << (8 * nIndex));
nMask >>>= 1;
}
return nValue;
}
}
// ################################################################
// Extracting from archive
private interface ISequentialOutStream {
}
private static class SequentialOutStreamWrapper extends Reference implements ISequentialOutStream {
public SequentialOutStreamWrapper(OutputStream value) {
super(value);
}
}
private interface ISequentialInStream {
}
private static class SequentialInStreamWrapper extends Reference implements ISequentialInStream {
public SequentialInStreamWrapper(InputStream value) {
super(value);
}
}
private static class CoderMixer2 implements ICompressCoder2 {
private BindInfoEx m_bindInfo;
private List m_compressCoders = new LinkedList();
private List m_packSizes = new LinkedList();
private List m_unpackSizes = new LinkedList();
public void SetBindInfo(BindInfoEx bindInfo) {
m_bindInfo = bindInfo;
for (CoderStreamsInfo coderStreamsInfo : m_bindInfo.Coders) {
if (1 != coderStreamsInfo.NumInStreams || 1 != coderStreamsInfo.NumOutStreams) {
Assert.statementNeverExecuted("Not implemented.");
}
}
}
public void ReInit() {
m_packSizes.clear();
m_unpackSizes.clear();
}
public void AddCoder(ICompressCoder decoder) {
m_compressCoders.add(decoder);
}
public void SetCoderInfo(int nCoderIndex, List> packSizes, List> unpackSizes) {
Assert.eq(nCoderIndex, "nCoderIndex", m_packSizes.size(), "m_packSizes.size()");
Assert.eq(packSizes.size(), "packSizes.size()", 1);
Assert.eq(unpackSizes.size(), "unpackSizes.size()", 1);
List packSizesInner = packSizes.get(0);
List unpackSizesInner = unpackSizes.get(0);
Assert.geq(packSizesInner.size(), "packSizesInner.size()", 1);
Assert.geq(unpackSizesInner.size(), "unpackSizesInner.size()", 1);
m_packSizes.add(packSizesInner.get(0));
m_unpackSizes.add(unpackSizesInner.get(0));
}
public void Code(ISequentialInStream inStream, ISequentialOutStream outStream, long inSize, long outSize,
ICompressProgressInfo progress) {
Assert.statementNeverExecuted();
}
public void Code(List inStreams, List> inSizes, int nInStreams,
List outStreams, List> outSizes, int nOutStreams,
ICompressProgressInfo progress) throws IOException {
Assert.eq(inStreams.size(), "inStreams.size()", nInStreams, "nInStreams");
Assert.eq(outStreams.size(), "outStreams.size()", nOutStreams, "nOutStreams");
Assert.eq(nOutStreams, "nOutStreams", m_compressCoders.size(), "m_compressCoders.size()");
Assert.eq(nOutStreams, "nOutStreams", nInStreams, "nInStreams");
Assert.eq(m_compressCoders.size(), "m_compressCoders.size()", m_packSizes.size(), "m_packSizes.size()");
Iterator compressCodersItr = m_compressCoders.iterator();
Iterator inStreamsItr = inStreams.iterator();
Iterator outStreamsItr = outStreams.iterator();
Iterator packSizesItr = m_packSizes.iterator();
Iterator unpackSizesItr = m_unpackSizes.iterator();
while (compressCodersItr.hasNext()) {
compressCodersItr.next().Code(inStreamsItr.next(), outStreamsItr.next(), packSizesItr.next(),
unpackSizesItr.next(), progress);
}
}
}
private static class LzmaWrapper implements ICompressCoder, ICompressSetDecoderProperties2 {
private final SevenZip.Compression.LZMA.Decoder decoder = new SevenZip.Compression.LZMA.Decoder();
public void setDecoderProperties(byte[] properties) throws ZipException {
if (false == decoder.SetDecoderProperties(properties)) {
throw new ZipException("Bad decoder properties.");
}
}
public void Code(ISequentialInStream inStream, ISequentialOutStream outStream, long inSize, long outSize,
ICompressProgressInfo progress) throws IOException {
if (false == decoder.Code(((Reference) inStream).getValue(),
((Reference) outStream).getValue(), outSize)) {
throw new ZipException("Bad compressed data.");
}
}
}
// ICoder.h
private interface ICompressSetDecoderProperties2 {
void setDecoderProperties(byte[] properties) throws ZipException;
}
// ICoder.h
private interface ICompressProgressInfo {
void SetRatioInfo(long inSize, long outSize);
}
// ICoder.h
private interface ICompressCoder {
void Code(ISequentialInStream inStream, ISequentialOutStream outStream, long inSize, long outSize,
ICompressProgressInfo progress) throws IOException;
}
// ICoder.h
private interface ICompressCoder2 extends ICompressCoder {
void Code(List inStreams, List> inSizes, int nInStreams,
List outStreams, List> outSizes, int nOutStreams,
ICompressProgressInfo progress) throws IOException;
}
// 7zDecode.cpp
private static final MethodID LZMA_METHOD_ID = new MethodID((byte) 0x3, (byte) 0x1, (byte) 0x1);
// Archive/Common/CoderMixer2.h
private static class CoderStreamsInfo {
int NumInStreams;
int NumOutStreams;
}
// Archive/Common/CoderMixer2.h
private static class BindInfo {
List Coders = new LinkedList();
List BindPairs = new LinkedList();
List InStreams = new LinkedList();
List OutStreams = new LinkedList();
void getNumStreams(Reference numInStreamsRef, Reference numOutStreamsRef) {
int numInStreams = 0;
int numOutStreams = 0;
for (CoderStreamsInfo coderStreamsInfo : Coders) {
numInStreams += coderStreamsInfo.NumInStreams;
numOutStreams += coderStreamsInfo.NumOutStreams;
}
numInStreamsRef.setValue(numInStreams);
numOutStreamsRef.setValue(numOutStreams);
}
int findBinderForInStream(int inStream) {
for (int i = 0; i < BindPairs.size(); i++) {
if (BindPairs.get(i).InIndex == inStream) {
return i;
}
}
return -1;
}
int findBinderForOutStream(int outStream) {
for (int i = 0; i < BindPairs.size(); i++) {
if (BindPairs.get(i).OutIndex == outStream) {
return i;
}
}
return -1;
}
int getCoderInStreamIndex(int coderIndex) {
int streamIndex = 0;
for (int i = 0; i < coderIndex; i++) {
streamIndex += Coders.get(i).NumInStreams;
}
return streamIndex;
}
int getCoderOutStreamIndex(int coderIndex) {
int streamIndex = 0;
for (int i = 0; i < coderIndex; i++) {
streamIndex += Coders.get(i).NumOutStreams;
}
return streamIndex;
}
void findInStream(int streamIndex, Reference coderIndexRef, Reference coderStreamIndexRef)
throws ZipException {
int coderIndex;
int coderStreamIndex;
for (coderIndex = 0; coderIndex < (int) Coders.size(); coderIndex++) {
int curSize = Coders.get(coderIndex).NumInStreams;
if (streamIndex < curSize) {
coderStreamIndex = streamIndex;
coderStreamIndexRef.setValue(coderStreamIndex);
coderIndexRef.setValue(coderIndex);
return;
}
streamIndex -= curSize;
}
throw new ZipException();
}
void findOutStream(int streamIndex, Reference coderIndexRef, Reference coderStreamIndexRef)
throws ZipException {
int coderIndex;
int coderStreamIndex;
for (coderIndex = 0; coderIndex < (int) Coders.size(); coderIndex++) {
int curSize = Coders.get(coderIndex).NumOutStreams;
if (streamIndex < curSize) {
coderStreamIndex = streamIndex;
coderStreamIndexRef.setValue(coderStreamIndex);
coderIndexRef.setValue(coderIndex);
return;
}
streamIndex -= curSize;
}
throw new ZipException();
}
}
// 7zDecode.h
private static class BindInfoEx extends BindInfo {
List CoderMethodIDs = new LinkedList();
}
// 7zDecode.h,.cpp
private static class Decoder {
boolean _bindInfoExPrevIsDefinded;
BindInfoEx _bindInfoExPrev;
CoderMixer2 _mixerCoderCommon;
ICompressCoder2 _mixerCoder;
List