All Downloads are FREE. Search and download functionalities are using the official Maven repository.

kala.compress.archivers.sevenz.SevenZArchiveReader Maven / Gradle / Ivy

/*
 *  Licensed to the Apache Software Foundation (ASF) under one or more
 *  contributor license agreements.  See the NOTICE file distributed with
 *  this work for additional information regarding copyright ownership.
 *  The ASF licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 */
package kala.compress.archivers.sevenz;

import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.SeekableByteChannel;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.*;
import java.util.stream.Collectors;
import java.util.zip.CRC32;

import kala.compress.MemoryLimitException;
import kala.compress.utils.BoundedInputStream;
import kala.compress.utils.ByteUtils;
import kala.compress.utils.CRC32VerifyingInputStream;
import kala.compress.utils.IOUtils;
import kala.compress.utils.InputStreamStatistics;

/**
 * Reads a 7z file, using SeekableByteChannel under
 * the covers.
 * 

* The 7z file format is a flexible container * that can contain many compression and * encryption types, but at the moment only * only Copy, LZMA, LZMA2, BZIP2, Deflate and AES-256 + SHA-256 * are supported. *

* The format is very Windows/Intel specific, * so it uses little-endian byte order, * doesn't store user/group or permission bits, * and represents times using NTFS timestamps * (100 nanosecond units since 1 January 1601). * Hence the official tools recommend against * using it for backup purposes on *nix, and * recommend .tar.7z or .tar.lzma or .tar.xz * instead. *

* Both the header and file contents may be * compressed and/or encrypted. With both * encrypted, neither file names nor file * contents can be read, but the use of * encryption isn't plausibly deniable. * *

Multi volume archives can be read by concatenating the parts in * correct order - either manually or by using {link * kala.compress.utils.MultiReadOnlySeekableByteChannel} * for example.

* * @NotThreadSafe * @since 1.6 */ public class SevenZArchiveReader implements Closeable { static final int SIGNATURE_HEADER_SIZE = 32; private static final String DEFAULT_FILE_NAME = "unknown archive"; private final String fileName; private SeekableByteChannel channel; private final Archive archive; private int currentEntryIndex = -1; private int currentFolderIndex = -1; private InputStream currentFolderInputStream; private byte[] password; private final SevenZArchiveReaderOptions options; private long compressedBytesReadFromCurrentEntry; private long uncompressedBytesReadFromCurrentEntry; private final ArrayList deferredBlockStreams = new ArrayList<>(); // shared with SevenZArchiveWriter and tests, neither mutates it static final byte[] sevenZSignature = { //NOSONAR (byte)'7', (byte)'z', (byte)0xBC, (byte)0xAF, (byte)0x27, (byte)0x1C }; /** * Reads a file as unencrypted 7z archive * * @param fileName the file to read * @throws IOException if reading the archive fails */ public SevenZArchiveReader(final File fileName) throws IOException { this(fileName, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a file as unencrypted 7z archive * * @param fileName the file to read * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.19 */ public SevenZArchiveReader(final File fileName, final SevenZArchiveReaderOptions options) throws IOException { this(fileName, null, options); } /** * Reads a file as 7z archive * * @param fileName the file to read * @param password optional password if the archive is encrypted * @throws IOException if reading the archive fails * @since 1.17 */ public SevenZArchiveReader(final File fileName, final char[] password) throws IOException { this(fileName, password, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a file as 7z archive with additional options. * * @param fileName the file to read * @param password optional password if the archive is encrypted * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.19 */ public SevenZArchiveReader(final File fileName, final char[] password, final SevenZArchiveReaderOptions options) throws IOException { this(fileName.toPath(), password, options); } /** * Reads a file as unencrypted 7z archive * * @param fileName the file to read * @throws IOException if reading the archive fails */ public SevenZArchiveReader(final Path fileName) throws IOException { this(fileName, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a file as unencrypted 7z archive * * @param fileName the file to read * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.21.0.1 */ public SevenZArchiveReader(final Path fileName, final SevenZArchiveReaderOptions options) throws IOException { this(fileName, null, options); } /** * Reads a file as 7z archive * * @param fileName the file to read * @param password optional password if the archive is encrypted * @throws IOException if reading the archive fails * @since 1.21.0.1 */ public SevenZArchiveReader(final Path fileName, final char[] password) throws IOException { this(fileName, password, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a file as 7z archive with additional options. * * @param fileName the file to read * @param password optional password if the archive is encrypted * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.21.0.1 */ public SevenZArchiveReader(final Path fileName, final char[] password, final SevenZArchiveReaderOptions options) throws IOException { this(Files.newByteChannel(fileName, Collections.singleton(StandardOpenOption.READ)), // NOSONAR fileName.toAbsolutePath().toString(), utf16Encode(password), true, options); } /** * Reads a SeekableByteChannel as 7z archive * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @throws IOException if reading the archive fails * @since 1.13 */ public SevenZArchiveReader(final SeekableByteChannel channel) throws IOException { this(channel, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a SeekableByteChannel as 7z archive with addtional options. * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.19 */ public SevenZArchiveReader(final SeekableByteChannel channel, final SevenZArchiveReaderOptions options) throws IOException { this(channel, DEFAULT_FILE_NAME, null, options); } /** * Reads a SeekableByteChannel as 7z archive * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @param password optional password if the archive is encrypted * @throws IOException if reading the archive fails * @since 1.17 */ public SevenZArchiveReader(final SeekableByteChannel channel, final char[] password) throws IOException { this(channel, password, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a SeekableByteChannel as 7z archive with additional options. * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @param password optional password if the archive is encrypted * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.19 */ public SevenZArchiveReader(final SeekableByteChannel channel, final char[] password, final SevenZArchiveReaderOptions options) throws IOException { this(channel, DEFAULT_FILE_NAME, password, options); } /** * Reads a SeekableByteChannel as 7z archive * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @param fileName name of the archive - only used for error reporting * @param password optional password if the archive is encrypted * @throws IOException if reading the archive fails * @since 1.17 */ public SevenZArchiveReader(final SeekableByteChannel channel, final String fileName, final char[] password) throws IOException { this(channel, fileName, password, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a SeekableByteChannel as 7z archive with addtional options. * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @param fileName name of the archive - only used for error reporting * @param password optional password if the archive is encrypted * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.19 */ public SevenZArchiveReader(final SeekableByteChannel channel, final String fileName, final char[] password, final SevenZArchiveReaderOptions options) throws IOException { this(channel, fileName, utf16Encode(password), false, options); } /** * Reads a SeekableByteChannel as 7z archive * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @param fileName name of the archive - only used for error reporting * @throws IOException if reading the archive fails * @since 1.17 */ public SevenZArchiveReader(final SeekableByteChannel channel, final String fileName) throws IOException { this(channel, fileName, SevenZArchiveReaderOptions.DEFAULT); } /** * Reads a SeekableByteChannel as 7z archive with additional options. * *

{@link * kala.compress.utils.SeekableInMemoryByteChannel} * allows you to read from an in-memory archive.

* * @param channel the channel to read * @param fileName name of the archive - only used for error reporting * @param options the options to apply * @throws IOException if reading the archive fails or the memory limit (if set) is too small * @since 1.19 */ public SevenZArchiveReader(final SeekableByteChannel channel, final String fileName, final SevenZArchiveReaderOptions options) throws IOException { this(channel, fileName, null, false, options); } private SevenZArchiveReader(final SeekableByteChannel channel, final String filename, final byte[] password, final boolean closeOnError, final SevenZArchiveReaderOptions options) throws IOException { boolean succeeded = false; this.channel = channel; this.fileName = filename; this.options = options; try { archive = readHeaders(password); if (password != null) { this.password = Arrays.copyOf(password, password.length); } else { this.password = null; } succeeded = true; } finally { if (!succeeded && closeOnError) { this.channel.close(); } } } /** * Closes the archive. * @throws IOException if closing the file fails */ @Override public void close() throws IOException { if (channel != null) { try { channel.close(); } finally { channel = null; if (password != null) { Arrays.fill(password, (byte) 0); } password = null; } } } /** * Returns the next Archive Entry in this archive. * * @return the next entry, * or {@code null} if there are no more entries * @throws IOException if the next entry could not be read */ public SevenZArchiveEntry getNextEntry() throws IOException { if (currentEntryIndex >= archive.files.length - 1) { return null; } ++currentEntryIndex; final SevenZArchiveEntry entry = archive.files[currentEntryIndex]; if (entry.getName() == null && options.getUseDefaultNameForUnnamedEntries()) { entry.setName(getDefaultName()); } buildDecodingStream(currentEntryIndex, false); uncompressedBytesReadFromCurrentEntry = compressedBytesReadFromCurrentEntry = 0; return entry; } /** * Returns a copy of meta-data of all archive entries. * *

This method only provides meta-data, the entries can not be * used to read the contents, you still need to process all * entries in order using {@link #getNextEntry} for that.

* *

The content methods are only available for entries that have * already been reached via {@link #getNextEntry}.

* * @return a copy of meta-data of all archive entries. * @since 1.11 */ public Iterable getEntries() { return new ArrayList<>(Arrays.asList(archive.files)); } private Archive readHeaders(final byte[] password) throws IOException { final ByteBuffer buf = ByteBuffer.allocate(12 /* signature + 2 bytes version + 4 bytes CRC */) .order(ByteOrder.LITTLE_ENDIAN); readFully(buf); final byte[] signature = new byte[6]; buf.get(signature); if (!Arrays.equals(signature, sevenZSignature)) { throw new IOException("Bad 7z signature"); } // 7zFormat.txt has it wrong - it's first major then minor final byte archiveVersionMajor = buf.get(); final byte archiveVersionMinor = buf.get(); if (archiveVersionMajor != 0) { throw new IOException(String.format("Unsupported 7z version (%d,%d)", archiveVersionMajor, archiveVersionMinor)); } boolean headerLooksValid = false; // See https://www.7-zip.org/recover.html - "There is no correct End Header at the end of archive" final long startHeaderCrc = 0xffffFFFFL & buf.getInt(); if (startHeaderCrc == 0) { // This is an indication of a corrupt header - peek the next 20 bytes final long currentPosition = channel.position(); final ByteBuffer peekBuf = ByteBuffer.allocate(20); readFully(peekBuf); channel.position(currentPosition); // Header invalid if all data is 0 while (peekBuf.hasRemaining()) { if (peekBuf.get()!=0) { headerLooksValid = true; break; } } } else { headerLooksValid = true; } if (headerLooksValid) { final StartHeader startHeader = readStartHeader(startHeaderCrc); return initializeArchive(startHeader, password, true); } // No valid header found - probably first file of multipart archive was removed too early. Scan for end header. if (options.getTryToRecoverBrokenArchives()) { return tryToLocateEndHeader(password); } throw new IOException("archive seems to be invalid.\nYou may want to retry and enable the" + " tryToRecoverBrokenArchives if the archive could be a multi volume archive that has been closed" + " prematurely."); } private Archive tryToLocateEndHeader(final byte[] password) throws IOException { final ByteBuffer nidBuf = ByteBuffer.allocate(1); final long searchLimit = 1024L * 1024 * 1; // Main header, plus bytes that readStartHeader would read final long previousDataSize = channel.position() + 20; final long minPos; // Determine minimal position - can't start before current position if (channel.position() + searchLimit > channel.size()) { minPos = channel.position(); } else { minPos = channel.size() - searchLimit; } long pos = channel.size() - 1; // Loop: Try from end of archive while (pos > minPos) { pos--; channel.position(pos); nidBuf.rewind(); if (channel.read(nidBuf) < 1) { throw new EOFException(); } final int nid = nidBuf.array()[0]; // First indicator: Byte equals one of these header identifiers if (nid == NID.kEncodedHeader || nid == NID.kHeader) { try { // Try to initialize Archive structure from here final StartHeader startHeader = new StartHeader(); startHeader.nextHeaderOffset = pos - previousDataSize; startHeader.nextHeaderSize = channel.size() - pos; final Archive result = initializeArchive(startHeader, password, false); // Sanity check: There must be some data... if (result.packSizes.length > 0 && result.files.length > 0) { return result; } } catch (final Exception ignore) { // Wrong guess... } } } throw new IOException("Start header corrupt and unable to guess end header"); } private Archive initializeArchive(final StartHeader startHeader, final byte[] password, final boolean verifyCrc) throws IOException { assertFitsIntoNonNegativeInt("nextHeaderSize", startHeader.nextHeaderSize); final int nextHeaderSizeInt = (int) startHeader.nextHeaderSize; channel.position(SIGNATURE_HEADER_SIZE + startHeader.nextHeaderOffset); ByteBuffer buf = ByteBuffer.allocate(nextHeaderSizeInt).order(ByteOrder.LITTLE_ENDIAN); readFully(buf); if (verifyCrc) { final CRC32 crc = new CRC32(); crc.update(buf.array(), 0, buf.array().length); if (startHeader.nextHeaderCrc != crc.getValue()) { throw new IOException("NextHeader CRC mismatch"); } } Archive archive = new Archive(); int nid = getUnsignedByte(buf); if (nid == NID.kEncodedHeader) { buf = readEncodedHeader(buf, archive, password); // Archive gets rebuilt with the new header archive = new Archive(); nid = getUnsignedByte(buf); } if (nid != NID.kHeader) { throw new IOException("Broken or unsupported archive: no Header"); } readHeader(buf, archive); archive.subStreamsInfo = null; return archive; } private StartHeader readStartHeader(final long startHeaderCrc) throws IOException { final StartHeader startHeader = new StartHeader(); // using Stream rather than ByteBuffer for the benefit of the // built-in CRC check try (DataInputStream dataInputStream = new DataInputStream(new CRC32VerifyingInputStream( new BoundedSeekableByteChannelInputStream(channel, 20), 20, startHeaderCrc))) { startHeader.nextHeaderOffset = Long.reverseBytes(dataInputStream.readLong()); if (startHeader.nextHeaderOffset < 0 || startHeader.nextHeaderOffset + SIGNATURE_HEADER_SIZE > channel.size()) { throw new IOException("nextHeaderOffset is out of bounds"); } startHeader.nextHeaderSize = Long.reverseBytes(dataInputStream.readLong()); final long nextHeaderEnd = startHeader.nextHeaderOffset + startHeader.nextHeaderSize; if (nextHeaderEnd < startHeader.nextHeaderOffset || nextHeaderEnd + SIGNATURE_HEADER_SIZE > channel.size()) { throw new IOException("nextHeaderSize is out of bounds"); } startHeader.nextHeaderCrc = 0xffffFFFFL & Integer.reverseBytes(dataInputStream.readInt()); return startHeader; } } private void readHeader(final ByteBuffer header, final Archive archive) throws IOException { final int pos = header.position(); final ArchiveStatistics stats = sanityCheckAndCollectStatistics(header); stats.assertValidity(options.getMaxMemoryLimitInKb()); header.position(pos); int nid = getUnsignedByte(header); if (nid == NID.kArchiveProperties) { readArchiveProperties(header); nid = getUnsignedByte(header); } if (nid == NID.kAdditionalStreamsInfo) { throw new IOException("Additional streams unsupported"); //nid = getUnsignedByte(header); } if (nid == NID.kMainStreamsInfo) { readStreamsInfo(header, archive); nid = getUnsignedByte(header); } if (nid == NID.kFilesInfo) { readFilesInfo(header, archive); nid = getUnsignedByte(header); } } private ArchiveStatistics sanityCheckAndCollectStatistics(final ByteBuffer header) throws IOException { final ArchiveStatistics stats = new ArchiveStatistics(); int nid = getUnsignedByte(header); if (nid == NID.kArchiveProperties) { sanityCheckArchiveProperties(header); nid = getUnsignedByte(header); } if (nid == NID.kAdditionalStreamsInfo) { throw new IOException("Additional streams unsupported"); //nid = getUnsignedByte(header); } if (nid == NID.kMainStreamsInfo) { sanityCheckStreamsInfo(header, stats); nid = getUnsignedByte(header); } if (nid == NID.kFilesInfo) { sanityCheckFilesInfo(header, stats); nid = getUnsignedByte(header); } if (nid != NID.kEnd) { throw new IOException("Badly terminated header, found " + nid); } return stats; } private void readArchiveProperties(final ByteBuffer input) throws IOException { // FIXME: the reference implementation just throws them away? int nid = getUnsignedByte(input); while (nid != NID.kEnd) { final long propertySize = readUint64(input); final byte[] property = new byte[(int)propertySize]; get(input, property); nid = getUnsignedByte(input); } } private void sanityCheckArchiveProperties(final ByteBuffer header) throws IOException { int nid = getUnsignedByte(header); while (nid != NID.kEnd) { final int propertySize = assertFitsIntoNonNegativeInt("propertySize", readUint64(header)); if (skipBytesFully(header, propertySize) < propertySize) { throw new IOException("invalid property size"); } nid = getUnsignedByte(header); } } private ByteBuffer readEncodedHeader(final ByteBuffer header, final Archive archive, final byte[] password) throws IOException { final int pos = header.position(); ArchiveStatistics stats = new ArchiveStatistics(); sanityCheckStreamsInfo(header, stats); stats.assertValidity(options.getMaxMemoryLimitInKb()); header.position(pos); readStreamsInfo(header, archive); if (archive.folders == null || archive.folders.length == 0) { throw new IOException("no folders, can't read encoded header"); } if (archive.packSizes == null || archive.packSizes.length == 0) { throw new IOException("no packed streams, can't read encoded header"); } // FIXME: merge with buildDecodingStream()/buildDecoderStack() at some stage? final Folder folder = archive.folders[0]; final int firstPackStreamIndex = 0; final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + 0; channel.position(folderOffset); InputStream inputStreamStack = new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]); for (final Coder coder : folder.getOrderedCoders()) { if (coder.numInStreams != 1 || coder.numOutStreams != 1) { throw new IOException("Multi input/output stream coders are not yet supported"); } inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, //NOSONAR folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); } if (folder.hasCrc) { inputStreamStack = new CRC32VerifyingInputStream(inputStreamStack, folder.getUnpackSize(), folder.crc); } final int unpackSize = assertFitsIntoNonNegativeInt("unpackSize", folder.getUnpackSize()); final byte[] nextHeader = IOUtils.readRange(inputStreamStack, unpackSize); if (nextHeader.length < unpackSize) { throw new IOException("premature end of stream"); } inputStreamStack.close(); return ByteBuffer.wrap(nextHeader).order(ByteOrder.LITTLE_ENDIAN); } private void sanityCheckStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { int nid = getUnsignedByte(header); if (nid == NID.kPackInfo) { sanityCheckPackInfo(header, stats); nid = getUnsignedByte(header); } if (nid == NID.kUnpackInfo) { sanityCheckUnpackInfo(header, stats); nid = getUnsignedByte(header); } if (nid == NID.kSubStreamsInfo) { sanityCheckSubStreamsInfo(header, stats); nid = getUnsignedByte(header); } if (nid != NID.kEnd) { throw new IOException("Badly terminated StreamsInfo"); } } private void readStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { int nid = getUnsignedByte(header); if (nid == NID.kPackInfo) { readPackInfo(header, archive); nid = getUnsignedByte(header); } if (nid == NID.kUnpackInfo) { readUnpackInfo(header, archive); nid = getUnsignedByte(header); } else { // archive without unpack/coders info archive.folders = Folder.EMPTY_FOLDER_ARRAY; } if (nid == NID.kSubStreamsInfo) { readSubStreamsInfo(header, archive); nid = getUnsignedByte(header); } } private void sanityCheckPackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { final long packPos = readUint64(header); if (packPos < 0 || SIGNATURE_HEADER_SIZE + packPos > channel.size() || SIGNATURE_HEADER_SIZE + packPos < 0) { throw new IOException("packPos (" + packPos + ") is out of range"); } final long numPackStreams = readUint64(header); stats.numberOfPackedStreams = assertFitsIntoNonNegativeInt("numPackStreams", numPackStreams); int nid = getUnsignedByte(header); if (nid == NID.kSize) { long totalPackSizes = 0; for (int i = 0; i < stats.numberOfPackedStreams; i++) { final long packSize = readUint64(header); totalPackSizes += packSize; final long endOfPackStreams = SIGNATURE_HEADER_SIZE + packPos + totalPackSizes; if (packSize < 0 || endOfPackStreams > channel.size() || endOfPackStreams < packPos) { throw new IOException("packSize (" + packSize + ") is out of range"); } } nid = getUnsignedByte(header); } if (nid == NID.kCRC) { final int crcsDefined = readAllOrBits(header, stats.numberOfPackedStreams) .cardinality(); if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { throw new IOException("invalid number of CRCs in PackInfo"); } nid = getUnsignedByte(header); } if (nid != NID.kEnd) { throw new IOException("Badly terminated PackInfo (" + nid + ")"); } } private void readPackInfo(final ByteBuffer header, final Archive archive) throws IOException { archive.packPos = readUint64(header); final int numPackStreamsInt = (int) readUint64(header); int nid = getUnsignedByte(header); if (nid == NID.kSize) { archive.packSizes = new long[numPackStreamsInt]; for (int i = 0; i < archive.packSizes.length; i++) { archive.packSizes[i] = readUint64(header); } nid = getUnsignedByte(header); } if (nid == NID.kCRC) { archive.packCrcsDefined = readAllOrBits(header, numPackStreamsInt); archive.packCrcs = new long[numPackStreamsInt]; for (int i = 0; i < numPackStreamsInt; i++) { if (archive.packCrcsDefined.get(i)) { archive.packCrcs[i] = 0xffffFFFFL & getInt(header); } } nid = getUnsignedByte(header); } } private void sanityCheckUnpackInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { int nid = getUnsignedByte(header); if (nid != NID.kFolder) { throw new IOException("Expected kFolder, got " + nid); } final long numFolders = readUint64(header); stats.numberOfFolders = assertFitsIntoNonNegativeInt("numFolders", numFolders); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("External unsupported"); } final List numberOfOutputStreamsPerFolder = new LinkedList<>(); for (int i = 0; i < stats.numberOfFolders; i++) { numberOfOutputStreamsPerFolder.add(sanityCheckFolder(header, stats)); } final long totalNumberOfBindPairs = stats.numberOfOutStreams - stats.numberOfFolders; final long packedStreamsRequiredByFolders = stats.numberOfInStreams - totalNumberOfBindPairs; if (packedStreamsRequiredByFolders < stats.numberOfPackedStreams) { throw new IOException("archive doesn't contain enough packed streams"); } nid = getUnsignedByte(header); if (nid != NID.kCodersUnpackSize) { throw new IOException("Expected kCodersUnpackSize, got " + nid); } for (int numberOfOutputStreams : numberOfOutputStreamsPerFolder) { for (int i = 0; i < numberOfOutputStreams; i++) { final long unpackSize = readUint64(header); if (unpackSize < 0) { throw new IllegalArgumentException("negative unpackSize"); } } } nid = getUnsignedByte(header); if (nid == NID.kCRC) { stats.folderHasCrc = readAllOrBits(header, stats.numberOfFolders); final int crcsDefined = stats.folderHasCrc.cardinality(); if (skipBytesFully(header, 4 * crcsDefined) < 4 * crcsDefined) { throw new IOException("invalid number of CRCs in UnpackInfo"); } nid = getUnsignedByte(header); } if (nid != NID.kEnd) { throw new IOException("Badly terminated UnpackInfo"); } } private void readUnpackInfo(final ByteBuffer header, final Archive archive) throws IOException { int nid = getUnsignedByte(header); final int numFoldersInt = (int) readUint64(header); final Folder[] folders = new Folder[numFoldersInt]; archive.folders = folders; /* final int external = */ getUnsignedByte(header); for (int i = 0; i < numFoldersInt; i++) { folders[i] = readFolder(header); } nid = getUnsignedByte(header); for (final Folder folder : folders) { assertFitsIntoNonNegativeInt("totalOutputStreams", folder.totalOutputStreams); folder.unpackSizes = new long[(int)folder.totalOutputStreams]; for (int i = 0; i < folder.totalOutputStreams; i++) { folder.unpackSizes[i] = readUint64(header); } } nid = getUnsignedByte(header); if (nid == NID.kCRC) { final BitSet crcsDefined = readAllOrBits(header, numFoldersInt); for (int i = 0; i < numFoldersInt; i++) { if (crcsDefined.get(i)) { folders[i].hasCrc = true; folders[i].crc = 0xffffFFFFL & getInt(header); } else { folders[i].hasCrc = false; } } nid = getUnsignedByte(header); } } private void sanityCheckSubStreamsInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { int nid = getUnsignedByte(header); final List numUnpackSubStreamsPerFolder = new LinkedList<>(); if (nid == NID.kNumUnpackStream) { for (int i = 0; i < stats.numberOfFolders; i++) { numUnpackSubStreamsPerFolder.add(assertFitsIntoNonNegativeInt("numStreams", readUint64(header))); } stats.numberOfUnpackSubStreams = numUnpackSubStreamsPerFolder.stream().collect(Collectors.summingLong(Integer::longValue)); nid = getUnsignedByte(header); } else { stats.numberOfUnpackSubStreams = stats.numberOfFolders; } assertFitsIntoNonNegativeInt("totalUnpackStreams", stats.numberOfUnpackSubStreams); if (nid == NID.kSize) { for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { if (numUnpackSubStreams == 0) { continue; } for (int i = 0; i < numUnpackSubStreams - 1; i++) { final long size = readUint64(header); if (size < 0) { throw new IOException("negative unpackSize"); } } } nid = getUnsignedByte(header); } int numDigests = 0; if (numUnpackSubStreamsPerFolder.isEmpty()) { numDigests = stats.folderHasCrc == null ? stats.numberOfFolders : stats.numberOfFolders - stats.folderHasCrc.cardinality(); } else { int folderIdx = 0; for (final int numUnpackSubStreams : numUnpackSubStreamsPerFolder) { if (numUnpackSubStreams != 1 || stats.folderHasCrc == null || !stats.folderHasCrc.get(folderIdx++)) { numDigests += numUnpackSubStreams; } } } if (nid == NID.kCRC) { assertFitsIntoNonNegativeInt("numDigests", numDigests); final int missingCrcs = readAllOrBits(header, numDigests) .cardinality(); if (skipBytesFully(header, 4 * missingCrcs) < 4 * missingCrcs) { throw new IOException("invalid number of missing CRCs in SubStreamInfo"); } nid = getUnsignedByte(header); } if (nid != NID.kEnd) { throw new IOException("Badly terminated SubStreamsInfo"); } } private void readSubStreamsInfo(final ByteBuffer header, final Archive archive) throws IOException { for (final Folder folder : archive.folders) { folder.numUnpackSubStreams = 1; } long unpackStreamsCount = archive.folders.length; int nid = getUnsignedByte(header); if (nid == NID.kNumUnpackStream) { unpackStreamsCount = 0; for (final Folder folder : archive.folders) { final long numStreams = readUint64(header); folder.numUnpackSubStreams = (int)numStreams; unpackStreamsCount += numStreams; } nid = getUnsignedByte(header); } final int totalUnpackStreams = (int) unpackStreamsCount; final SubStreamsInfo subStreamsInfo = new SubStreamsInfo(); subStreamsInfo.unpackSizes = new long[totalUnpackStreams]; subStreamsInfo.hasCrc = new BitSet(totalUnpackStreams); subStreamsInfo.crcs = new long[totalUnpackStreams]; int nextUnpackStream = 0; for (final Folder folder : archive.folders) { if (folder.numUnpackSubStreams == 0) { continue; } long sum = 0; if (nid == NID.kSize) { for (int i = 0; i < folder.numUnpackSubStreams - 1; i++) { final long size = readUint64(header); subStreamsInfo.unpackSizes[nextUnpackStream++] = size; sum += size; } } if (sum > folder.getUnpackSize()) { throw new IOException("sum of unpack sizes of folder exceeds total unpack size"); } subStreamsInfo.unpackSizes[nextUnpackStream++] = folder.getUnpackSize() - sum; } if (nid == NID.kSize) { nid = getUnsignedByte(header); } int numDigests = 0; for (final Folder folder : archive.folders) { if (folder.numUnpackSubStreams != 1 || !folder.hasCrc) { numDigests += folder.numUnpackSubStreams; } } if (nid == NID.kCRC) { final BitSet hasMissingCrc = readAllOrBits(header, numDigests); final long[] missingCrcs = new long[numDigests]; for (int i = 0; i < numDigests; i++) { if (hasMissingCrc.get(i)) { missingCrcs[i] = 0xffffFFFFL & getInt(header); } } int nextCrc = 0; int nextMissingCrc = 0; for (final Folder folder: archive.folders) { if (folder.numUnpackSubStreams == 1 && folder.hasCrc) { subStreamsInfo.hasCrc.set(nextCrc, true); subStreamsInfo.crcs[nextCrc] = folder.crc; ++nextCrc; } else { for (int i = 0; i < folder.numUnpackSubStreams; i++) { subStreamsInfo.hasCrc.set(nextCrc, hasMissingCrc.get(nextMissingCrc)); subStreamsInfo.crcs[nextCrc] = missingCrcs[nextMissingCrc]; ++nextCrc; ++nextMissingCrc; } } } nid = getUnsignedByte(header); } archive.subStreamsInfo = subStreamsInfo; } private int sanityCheckFolder(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { final int numCoders = assertFitsIntoNonNegativeInt("numCoders", readUint64(header)); if (numCoders == 0) { throw new IOException("Folder without coders"); } stats.numberOfCoders += numCoders; long totalOutStreams = 0; long totalInStreams = 0; for (int i = 0; i < numCoders; i++) { final int bits = getUnsignedByte(header); final int idSize = bits & 0xf; get(header, new byte[idSize]); final boolean isSimple = (bits & 0x10) == 0; final boolean hasAttributes = (bits & 0x20) != 0; final boolean moreAlternativeMethods = (bits & 0x80) != 0; if (moreAlternativeMethods) { throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR "The reference implementation doesn't support them either."); } if (isSimple) { totalInStreams++; totalOutStreams++; } else { totalInStreams += assertFitsIntoNonNegativeInt("numInStreams", readUint64(header)); totalOutStreams += assertFitsIntoNonNegativeInt("numOutStreams", readUint64(header)); } if (hasAttributes) { final int propertiesSize = assertFitsIntoNonNegativeInt("propertiesSize", readUint64(header)); if (skipBytesFully(header, propertiesSize) < propertiesSize) { throw new IOException("invalid propertiesSize in folder"); } } } assertFitsIntoNonNegativeInt("totalInStreams", totalInStreams); assertFitsIntoNonNegativeInt("totalOutStreams", totalOutStreams); stats.numberOfOutStreams += totalOutStreams; stats.numberOfInStreams += totalInStreams; if (totalOutStreams == 0) { throw new IOException("Total output streams can't be 0"); } final int numBindPairs = assertFitsIntoNonNegativeInt("numBindPairs", totalOutStreams - 1); if (totalInStreams < numBindPairs) { throw new IOException("Total input streams can't be less than the number of bind pairs"); } final BitSet inStreamsBound = new BitSet((int) totalInStreams); for (int i = 0; i < numBindPairs; i++) { final int inIndex = assertFitsIntoNonNegativeInt("inIndex", readUint64(header)); if (totalInStreams <= inIndex) { throw new IOException("inIndex is bigger than number of inStreams"); } inStreamsBound.set(inIndex); final int outIndex = assertFitsIntoNonNegativeInt("outIndex", readUint64(header)); if (totalOutStreams <= outIndex) { throw new IOException("outIndex is bigger than number of outStreams"); } } final int numPackedStreams = assertFitsIntoNonNegativeInt("numPackedStreams", totalInStreams - numBindPairs); if (numPackedStreams == 1) { if (inStreamsBound.nextClearBit(0) == -1) { throw new IOException("Couldn't find stream's bind pair index"); } } else { for (int i = 0; i < numPackedStreams; i++) { final int packedStreamIndex = assertFitsIntoNonNegativeInt("packedStreamIndex", readUint64(header)); if (packedStreamIndex >= totalInStreams) { throw new IOException("packedStreamIndex is bigger than number of totalInStreams"); } } } return (int) totalOutStreams; } private Folder readFolder(final ByteBuffer header) throws IOException { final Folder folder = new Folder(); final long numCoders = readUint64(header); final Coder[] coders = new Coder[(int)numCoders]; long totalInStreams = 0; long totalOutStreams = 0; for (int i = 0; i < coders.length; i++) { coders[i] = new Coder(); final int bits = getUnsignedByte(header); final int idSize = bits & 0xf; final boolean isSimple = (bits & 0x10) == 0; final boolean hasAttributes = (bits & 0x20) != 0; final boolean moreAlternativeMethods = (bits & 0x80) != 0; coders[i].decompressionMethodId = new byte[idSize]; get(header, coders[i].decompressionMethodId); if (isSimple) { coders[i].numInStreams = 1; coders[i].numOutStreams = 1; } else { coders[i].numInStreams = readUint64(header); coders[i].numOutStreams = readUint64(header); } totalInStreams += coders[i].numInStreams; totalOutStreams += coders[i].numOutStreams; if (hasAttributes) { final long propertiesSize = readUint64(header); coders[i].properties = new byte[(int)propertiesSize]; get(header, coders[i].properties); } // would need to keep looping as above: while (moreAlternativeMethods) { throw new IOException("Alternative methods are unsupported, please report. " + // NOSONAR "The reference implementation doesn't support them either."); } } folder.coders = coders; folder.totalInputStreams = totalInStreams; folder.totalOutputStreams = totalOutStreams; final long numBindPairs = totalOutStreams - 1; final BindPair[] bindPairs = new BindPair[(int)numBindPairs]; for (int i = 0; i < bindPairs.length; i++) { bindPairs[i] = new BindPair(); bindPairs[i].inIndex = readUint64(header); bindPairs[i].outIndex = readUint64(header); } folder.bindPairs = bindPairs; final long numPackedStreams = totalInStreams - numBindPairs; final long[] packedStreams = new long[(int)numPackedStreams]; if (numPackedStreams == 1) { int i; for (i = 0; i < (int)totalInStreams; i++) { if (folder.findBindPairForInStream(i) < 0) { break; } } packedStreams[0] = i; } else { for (int i = 0; i < (int)numPackedStreams; i++) { packedStreams[i] = readUint64(header); } } folder.packedStreams = packedStreams; return folder; } private BitSet readAllOrBits(final ByteBuffer header, final int size) throws IOException { final int areAllDefined = getUnsignedByte(header); final BitSet bits; if (areAllDefined != 0) { bits = new BitSet(size); for (int i = 0; i < size; i++) { bits.set(i, true); } } else { bits = readBits(header, size); } return bits; } private BitSet readBits(final ByteBuffer header, final int size) throws IOException { final BitSet bits = new BitSet(size); int mask = 0; int cache = 0; for (int i = 0; i < size; i++) { if (mask == 0) { mask = 0x80; cache = getUnsignedByte(header); } bits.set(i, (cache & mask) != 0); mask >>>= 1; } return bits; } private void sanityCheckFilesInfo(final ByteBuffer header, final ArchiveStatistics stats) throws IOException { stats.numberOfEntries = assertFitsIntoNonNegativeInt("numFiles", readUint64(header)); int emptyStreams = -1; while (true) { final int propertyType = getUnsignedByte(header); if (propertyType == 0) { break; } final long size = readUint64(header); switch (propertyType) { case NID.kEmptyStream: { emptyStreams = readBits(header, stats.numberOfEntries).cardinality(); break; } case NID.kEmptyFile: { if (emptyStreams == -1) { throw new IOException("Header format error: kEmptyStream must appear before kEmptyFile"); } readBits(header, emptyStreams); break; } case NID.kAnti: { if (emptyStreams == -1) { throw new IOException("Header format error: kEmptyStream must appear before kAnti"); } readBits(header, emptyStreams); break; } case NID.kName: { final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Not implemented"); } final int namesLength = assertFitsIntoNonNegativeInt("file names length", size - 1); if ((namesLength & 1) != 0) { throw new IOException("File names length invalid"); } int filesSeen = 0; for (int i = 0; i < namesLength; i += 2) { final char c = getChar(header); if (c == 0) { filesSeen++; } } if (filesSeen != stats.numberOfEntries) { throw new IOException("Invalid number of file names (" + filesSeen + " instead of " + stats.numberOfEntries + ")"); } break; } case NID.kCTime: { final int timesDefined = readAllOrBits(header, stats.numberOfEntries) .cardinality(); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Not implemented"); } if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { throw new IOException("invalid creation dates size"); } break; } case NID.kATime: { final int timesDefined = readAllOrBits(header, stats.numberOfEntries) .cardinality(); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Not implemented"); } if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { throw new IOException("invalid access dates size"); } break; } case NID.kMTime: { final int timesDefined = readAllOrBits(header, stats.numberOfEntries) .cardinality(); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Not implemented"); } if (skipBytesFully(header, 8 * timesDefined) < 8 * timesDefined) { throw new IOException("invalid modification dates size"); } break; } case NID.kWinAttributes: { final int attributesDefined = readAllOrBits(header, stats.numberOfEntries) .cardinality(); final int external = getUnsignedByte(header); if (external != 0) { throw new IOException("Not implemented"); } if (skipBytesFully(header, 4 * attributesDefined) < 4 * attributesDefined) { throw new IOException("invalid windows attributes size"); } break; } case NID.kStartPos: { throw new IOException("kStartPos is unsupported, please report"); } case NID.kDummy: { // 7z 9.20 asserts the content is all zeros and ignores the property // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 if (skipBytesFully(header, size) < size) { throw new IOException("Incomplete kDummy property"); } break; } default: { // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 if (skipBytesFully(header, size) < size) { throw new IOException("Incomplete property of type " + propertyType); } break; } } } stats.numberOfEntriesWithStream = stats.numberOfEntries - (emptyStreams > 0 ? emptyStreams : 0); } private void readFilesInfo(final ByteBuffer header, final Archive archive) throws IOException { final int numFilesInt = (int) readUint64(header);; final Map fileMap = new HashMap<>(); BitSet isEmptyStream = null; BitSet isEmptyFile = null; BitSet isAnti = null; while (true) { final int propertyType = getUnsignedByte(header); if (propertyType == 0) { break; } final long size = readUint64(header); switch (propertyType) { case NID.kEmptyStream: { isEmptyStream = readBits(header, numFilesInt); break; } case NID.kEmptyFile: { isEmptyFile = readBits(header, isEmptyStream.cardinality()); break; } case NID.kAnti: { isAnti = readBits(header, isEmptyStream.cardinality()); break; } case NID.kName: { /* final int external = */ getUnsignedByte(header); final byte[] names = new byte[(int) (size - 1)]; final int namesLength = names.length; get(header, names); int nextFile = 0; int nextName = 0; for (int i = 0; i < namesLength; i += 2) { if (names[i] == 0 && names[i + 1] == 0) { checkEntryIsInitialized(fileMap, nextFile); fileMap.get(nextFile).setName(new String(names, nextName, i - nextName, StandardCharsets.UTF_16LE)); nextName = i + 2; nextFile++; } } if (nextName != namesLength || nextFile != numFilesInt) { throw new IOException("Error parsing file names"); } break; } case NID.kCTime: { final BitSet timesDefined = readAllOrBits(header, numFilesInt); /* final int external = */ getUnsignedByte(header); for (int i = 0; i < numFilesInt; i++) { checkEntryIsInitialized(fileMap, i); final SevenZArchiveEntry entryAtIndex = fileMap.get(i); entryAtIndex.setHasCreationDate(timesDefined.get(i)); if (entryAtIndex.getHasCreationDate()) { entryAtIndex.setCreationDate(getLong(header)); } } break; } case NID.kATime: { final BitSet timesDefined = readAllOrBits(header, numFilesInt); /* final int external = */ getUnsignedByte(header); for (int i = 0; i < numFilesInt; i++) { checkEntryIsInitialized(fileMap, i); final SevenZArchiveEntry entryAtIndex = fileMap.get(i); entryAtIndex.setHasAccessDate(timesDefined.get(i)); if (entryAtIndex.getHasAccessDate()) { entryAtIndex.setAccessDate(getLong(header)); } } break; } case NID.kMTime: { final BitSet timesDefined = readAllOrBits(header, numFilesInt); /* final int external = */ getUnsignedByte(header); for (int i = 0; i < numFilesInt; i++) { checkEntryIsInitialized(fileMap, i); final SevenZArchiveEntry entryAtIndex = fileMap.get(i); entryAtIndex.setHasLastModifiedDate(timesDefined.get(i)); if (entryAtIndex.getHasLastModifiedDate()) { entryAtIndex.setLastModifiedDate(getLong(header)); } } break; } case NID.kWinAttributes: { final BitSet attributesDefined = readAllOrBits(header, numFilesInt); /* final int external = */ getUnsignedByte(header); for (int i = 0; i < numFilesInt; i++) { checkEntryIsInitialized(fileMap, i); final SevenZArchiveEntry entryAtIndex = fileMap.get(i); entryAtIndex.setHasWindowsAttributes(attributesDefined.get(i)); if (entryAtIndex.getHasWindowsAttributes()) { entryAtIndex.setWindowsAttributes(getInt(header)); } } break; } case NID.kDummy: { // 7z 9.20 asserts the content is all zeros and ignores the property // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 skipBytesFully(header, size); break; } default: { // Compress up to 1.8.1 would throw an exception, now we ignore it (see COMPRESS-287 skipBytesFully(header, size); break; } } } int nonEmptyFileCounter = 0; int emptyFileCounter = 0; for (int i = 0; i < numFilesInt; i++) { final SevenZArchiveEntry entryAtIndex = fileMap.get(i); if (entryAtIndex == null) { continue; } entryAtIndex.setHasStream(isEmptyStream == null || !isEmptyStream.get(i)); if (entryAtIndex.hasStream()) { if (archive.subStreamsInfo == null) { throw new IOException("Archive contains file with streams but no subStreamsInfo"); } entryAtIndex.setDirectory(false); entryAtIndex.setAntiItem(false); entryAtIndex.setHasCrc(archive.subStreamsInfo.hasCrc.get(nonEmptyFileCounter)); entryAtIndex.setCrcValue(archive.subStreamsInfo.crcs[nonEmptyFileCounter]); entryAtIndex.setSize(archive.subStreamsInfo.unpackSizes[nonEmptyFileCounter]); if (entryAtIndex.getSize() < 0) { throw new IOException("broken archive, entry with negative size"); } ++nonEmptyFileCounter; } else { entryAtIndex.setDirectory(isEmptyFile == null || !isEmptyFile.get(emptyFileCounter)); entryAtIndex.setAntiItem(isAnti != null && isAnti.get(emptyFileCounter)); entryAtIndex.setHasCrc(false); entryAtIndex.setSize(0); ++emptyFileCounter; } } final List entries = new ArrayList<>(); for (final SevenZArchiveEntry e : fileMap.values()) { if (e != null) { entries.add(e); } } archive.files = entries.toArray(SevenZArchiveEntry.EMPTY_SEVEN_Z_ARCHIVE_ENTRY_ARRAY); calculateStreamMap(archive); } private void checkEntryIsInitialized(final Map archiveEntries, final int index) { if (archiveEntries.get(index) == null) { archiveEntries.put(index, new SevenZArchiveEntry()); } } private void calculateStreamMap(final Archive archive) throws IOException { final StreamMap streamMap = new StreamMap(); int nextFolderPackStreamIndex = 0; final int numFolders = archive.folders != null ? archive.folders.length : 0; streamMap.folderFirstPackStreamIndex = new int[numFolders]; for (int i = 0; i < numFolders; i++) { streamMap.folderFirstPackStreamIndex[i] = nextFolderPackStreamIndex; nextFolderPackStreamIndex += archive.folders[i].packedStreams.length; } long nextPackStreamOffset = 0; final int numPackSizes = archive.packSizes.length; streamMap.packStreamOffsets = new long[numPackSizes]; for (int i = 0; i < numPackSizes; i++) { streamMap.packStreamOffsets[i] = nextPackStreamOffset; nextPackStreamOffset += archive.packSizes[i]; } streamMap.folderFirstFileIndex = new int[numFolders]; streamMap.fileFolderIndex = new int[archive.files.length]; int nextFolderIndex = 0; int nextFolderUnpackStreamIndex = 0; for (int i = 0; i < archive.files.length; i++) { if (!archive.files[i].hasStream() && nextFolderUnpackStreamIndex == 0) { streamMap.fileFolderIndex[i] = -1; continue; } if (nextFolderUnpackStreamIndex == 0) { for (; nextFolderIndex < archive.folders.length; ++nextFolderIndex) { streamMap.folderFirstFileIndex[nextFolderIndex] = i; if (archive.folders[nextFolderIndex].numUnpackSubStreams > 0) { break; } } if (nextFolderIndex >= archive.folders.length) { throw new IOException("Too few folders in archive"); } } streamMap.fileFolderIndex[i] = nextFolderIndex; if (!archive.files[i].hasStream()) { continue; } ++nextFolderUnpackStreamIndex; if (nextFolderUnpackStreamIndex >= archive.folders[nextFolderIndex].numUnpackSubStreams) { ++nextFolderIndex; nextFolderUnpackStreamIndex = 0; } } archive.streamMap = streamMap; } /** * Build the decoding stream for the entry to be read. * This method may be called from a random access(getInputStream) or * sequential access(getNextEntry). * If this method is called from a random access, some entries may * need to be skipped(we put them to the deferredBlockStreams and * skip them when actually needed to improve the performance) * * @param entryIndex the index of the entry to be read * @param isRandomAccess is this called in a random access * @throws IOException if there are exceptions when reading the file */ private void buildDecodingStream(final int entryIndex, final boolean isRandomAccess) throws IOException { if (archive.streamMap == null) { throw new IOException("Archive doesn't contain stream information to read entries"); } final int folderIndex = archive.streamMap.fileFolderIndex[entryIndex]; if (folderIndex < 0) { deferredBlockStreams.clear(); // TODO: previously it'd return an empty stream? // new BoundedInputStream(new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY), 0); return; } final SevenZArchiveEntry file = archive.files[entryIndex]; boolean isInSameFolder = false; if (currentFolderIndex == folderIndex) { // (COMPRESS-320). // The current entry is within the same (potentially opened) folder. The // previous stream has to be fully decoded before we can start reading // but don't do it eagerly -- if the user skips over the entire folder nothing // is effectively decompressed. if (entryIndex > 0) { file.setContentMethods(archive.files[entryIndex - 1].getContentMethods()); } // if this is called in a random access, then the content methods of previous entry may be null // the content methods should be set to methods of the first entry as it must not be null, // and the content methods would only be set if the content methods was not set if(isRandomAccess && file.getContentMethods() == null) { final int folderFirstFileIndex = archive.streamMap.folderFirstFileIndex[folderIndex]; final SevenZArchiveEntry folderFirstFile = archive.files[folderFirstFileIndex]; file.setContentMethods(folderFirstFile.getContentMethods()); } isInSameFolder = true; } else { currentFolderIndex = folderIndex; // We're opening a new folder. Discard any queued streams/ folder stream. reopenFolderInputStream(folderIndex, file); } boolean haveSkippedEntries = false; if (isRandomAccess) { // entries will only need to be skipped if it's a random access haveSkippedEntries = skipEntriesWhenNeeded(entryIndex, isInSameFolder, folderIndex); } if (isRandomAccess && currentEntryIndex == entryIndex && !haveSkippedEntries) { // we don't need to add another entry to the deferredBlockStreams when : // 1. If this method is called in a random access and the entry index // to be read equals to the current entry index, the input stream // has already been put in the deferredBlockStreams // 2. If this entry has not been read(which means no entries are skipped) return; } InputStream fileStream = new BoundedInputStream(currentFolderInputStream, file.getSize()); if (file.getHasCrc()) { fileStream = new CRC32VerifyingInputStream(fileStream, file.getSize(), file.getCrcValue()); } deferredBlockStreams.add(fileStream); } /** * Discard any queued streams/ folder stream, and reopen the current folder input stream. * * @param folderIndex the index of the folder to reopen * @param file the 7z entry to read * @throws IOException if exceptions occur when reading the 7z file */ private void reopenFolderInputStream(final int folderIndex, final SevenZArchiveEntry file) throws IOException { deferredBlockStreams.clear(); if (currentFolderInputStream != null) { currentFolderInputStream.close(); currentFolderInputStream = null; } final Folder folder = archive.folders[folderIndex]; final int firstPackStreamIndex = archive.streamMap.folderFirstPackStreamIndex[folderIndex]; final long folderOffset = SIGNATURE_HEADER_SIZE + archive.packPos + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; currentFolderInputStream = buildDecoderStack(folder, folderOffset, firstPackStreamIndex, file); } /** * Skip all the entries if needed. * Entries need to be skipped when: *

* 1. it's a random access * 2. one of these 2 condition is meet : *

* 2.1 currentEntryIndex != entryIndex : this means there are some entries * to be skipped(currentEntryIndex < entryIndex) or the entry has already * been read(currentEntryIndex > entryIndex) *

* 2.2 currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead: * if the entry to be read is the current entry, but some data of it has * been read before, then we need to reopen the stream of the folder and * skip all the entries before the current entries * * @param entryIndex the entry to be read * @param isInSameFolder are the entry to be read and the current entry in the same folder * @param folderIndex the index of the folder which contains the entry * @return true if there are entries actually skipped * @throws IOException there are exceptions when skipping entries * @since 1.21 */ private boolean skipEntriesWhenNeeded(final int entryIndex, final boolean isInSameFolder, final int folderIndex) throws IOException { final SevenZArchiveEntry file = archive.files[entryIndex]; // if the entry to be read is the current entry, and the entry has not // been read yet, then there's nothing we need to do if (currentEntryIndex == entryIndex && !hasCurrentEntryBeenRead()) { return false; } // 1. if currentEntryIndex < entryIndex : // this means there are some entries to be skipped(currentEntryIndex < entryIndex) // 2. if currentEntryIndex > entryIndex || (currentEntryIndex == entryIndex && hasCurrentEntryBeenRead) : // this means the entry has already been read before, and we need to reopen the // stream of the folder and skip all the entries before the current entries int filesToSkipStartIndex = archive.streamMap.folderFirstFileIndex[currentFolderIndex]; if (isInSameFolder) { if (currentEntryIndex < entryIndex) { // the entries between filesToSkipStartIndex and currentEntryIndex had already been skipped filesToSkipStartIndex = currentEntryIndex + 1; } else { // the entry is in the same folder of current entry, but it has already been read before, we need to reset // the position of the currentFolderInputStream to the beginning of folder, and then skip the files // from the start entry of the folder again reopenFolderInputStream(folderIndex, file); } } for (int i = filesToSkipStartIndex; i < entryIndex; i++) { final SevenZArchiveEntry fileToSkip = archive.files[i]; InputStream fileStreamToSkip = new BoundedInputStream(currentFolderInputStream, fileToSkip.getSize()); if (fileToSkip.getHasCrc()) { fileStreamToSkip = new CRC32VerifyingInputStream(fileStreamToSkip, fileToSkip.getSize(), fileToSkip.getCrcValue()); } deferredBlockStreams.add(fileStreamToSkip); // set the content methods as well, it equals to file.getContentMethods() because they are in same folder fileToSkip.setContentMethods(file.getContentMethods()); } return true; } /** * Find out if any data of current entry has been read or not. * This is achieved by comparing the bytes remaining to read * and the size of the file. * * @return true if any data of current entry has been read * @since 1.21 */ private boolean hasCurrentEntryBeenRead() { boolean hasCurrentEntryBeenRead = false; if (!deferredBlockStreams.isEmpty()) { final InputStream currentEntryInputStream = deferredBlockStreams.get(deferredBlockStreams.size() - 1); // get the bytes remaining to read, and compare it with the size of // the file to figure out if the file has been read if (currentEntryInputStream instanceof CRC32VerifyingInputStream) { hasCurrentEntryBeenRead = ((CRC32VerifyingInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); } if (currentEntryInputStream instanceof BoundedInputStream) { hasCurrentEntryBeenRead = ((BoundedInputStream) currentEntryInputStream).getBytesRemaining() != archive.files[currentEntryIndex].getSize(); } } return hasCurrentEntryBeenRead; } private InputStream buildDecoderStack(final Folder folder, final long folderOffset, final int firstPackStreamIndex, final SevenZArchiveEntry entry) throws IOException { channel.position(folderOffset); InputStream inputStreamStack = new FilterInputStream(new BufferedInputStream( new BoundedSeekableByteChannelInputStream(channel, archive.packSizes[firstPackStreamIndex]))) { @Override public int read() throws IOException { final int r = in.read(); if (r >= 0) { count(1); } return r; } @Override public int read(final byte[] b) throws IOException { return read(b, 0, b.length); } @Override public int read(final byte[] b, final int off, final int len) throws IOException { if (len == 0) { return 0; } final int r = in.read(b, off, len); if (r >= 0) { count(r); } return r; } private void count(final int c) { compressedBytesReadFromCurrentEntry += c; } }; final LinkedList methods = new LinkedList<>(); for (final Coder coder : folder.getOrderedCoders()) { if (coder.numInStreams != 1 || coder.numOutStreams != 1) { throw new IOException("Multi input/output stream coders are not yet supported"); } final SevenZMethod method = SevenZMethod.byId(coder.decompressionMethodId); inputStreamStack = Coders.addDecoder(fileName, inputStreamStack, folder.getUnpackSizeForCoder(coder), coder, password, options.getMaxMemoryLimitInKb()); methods.addFirst(new SevenZMethodConfiguration(method, Coders.findByMethod(method).getOptionsFromCoder(coder, inputStreamStack))); } entry.setContentMethods(methods); if (folder.hasCrc) { return new CRC32VerifyingInputStream(inputStreamStack, folder.getUnpackSize(), folder.crc); } return inputStreamStack; } /** * Reads a byte of data. * * @return the byte read, or -1 if end of input is reached * @throws IOException * if an I/O error has occurred */ public int read() throws IOException { final int b = getCurrentStream().read(); if (b >= 0) { uncompressedBytesReadFromCurrentEntry++; } return b; } private InputStream getCurrentStream() throws IOException { if (archive.files[currentEntryIndex].getSize() == 0) { return new ByteArrayInputStream(ByteUtils.EMPTY_BYTE_ARRAY); } if (deferredBlockStreams.isEmpty()) { throw new IllegalStateException("No current 7z entry (call getNextEntry() first)."); } while (deferredBlockStreams.size() > 1) { // In solid compression mode we need to decompress all leading folder' // streams to get access to an entry. We defer this until really needed // so that entire blocks can be skipped without wasting time for decompression. try (final InputStream stream = deferredBlockStreams.remove(0)) { IOUtils.skip(stream, Long.MAX_VALUE); } compressedBytesReadFromCurrentEntry = 0; } return deferredBlockStreams.get(0); } /** * Returns an InputStream for reading the contents of the given entry. * *

For archives using solid compression randomly accessing * entries will be significantly slower than reading the archive * sequentially.

* * @param entry the entry to get the stream for. * @return a stream to read the entry from. * @throws IOException if unable to create an input stream from the zipentry * @since Compress 1.20 */ public InputStream getInputStream(final SevenZArchiveEntry entry) throws IOException { int entryIndex = -1; for (int i = 0; i < this.archive.files.length;i++) { if (entry == this.archive.files[i]) { entryIndex = i; break; } } if (entryIndex < 0) { throw new IllegalArgumentException("Can not find " + entry.getName() + " in " + this.fileName); } buildDecodingStream(entryIndex, true); currentEntryIndex = entryIndex; currentFolderIndex = archive.streamMap.fileFolderIndex[entryIndex]; return getCurrentStream(); } /** * Reads data into an array of bytes. * * @param b the array to write data to * @return the number of bytes read, or -1 if end of input is reached * @throws IOException * if an I/O error has occurred */ public int read(final byte[] b) throws IOException { return read(b, 0, b.length); } /** * Reads data into an array of bytes. * * @param b the array to write data to * @param off offset into the buffer to start filling at * @param len of bytes to read * @return the number of bytes read, or -1 if end of input is reached * @throws IOException * if an I/O error has occurred */ public int read(final byte[] b, final int off, final int len) throws IOException { if (len == 0) { return 0; } final int cnt = getCurrentStream().read(b, off, len); if (cnt > 0) { uncompressedBytesReadFromCurrentEntry += cnt; } return cnt; } /** * Provides statistics for bytes read from the current entry. * * @return statistics for bytes read from the current entry * @since 1.17 */ public InputStreamStatistics getStatisticsForCurrentEntry() { return new InputStreamStatistics() { @Override public long getCompressedCount() { return compressedBytesReadFromCurrentEntry; } @Override public long getUncompressedCount() { return uncompressedBytesReadFromCurrentEntry; } }; } private static long readUint64(final ByteBuffer in) throws IOException { // long rather than int as it might get shifted beyond the range of an int final long firstByte = getUnsignedByte(in); int mask = 0x80; long value = 0; for (int i = 0; i < 8; i++) { if ((firstByte & mask) == 0) { return value | ((firstByte & (mask - 1)) << (8 * i)); } final long nextByte = getUnsignedByte(in); value |= nextByte << (8 * i); mask >>>= 1; } return value; } private static char getChar(final ByteBuffer buf) throws IOException { if (buf.remaining() < 2) { throw new EOFException(); } return buf.getChar(); } private static int getInt(final ByteBuffer buf) throws IOException { if (buf.remaining() < 4) { throw new EOFException(); } return buf.getInt(); } private static long getLong(final ByteBuffer buf) throws IOException { if (buf.remaining() < 8) { throw new EOFException(); } return buf.getLong(); } private static void get(final ByteBuffer buf, final byte[] to) throws IOException { if (buf.remaining() < to.length) { throw new EOFException(); } buf.get(to); } private static int getUnsignedByte(final ByteBuffer buf) throws IOException { if (!buf.hasRemaining()) { throw new EOFException(); } return buf.get() & 0xff; } /** * Checks if the signature matches what is expected for a 7z file. * * @param signature * the bytes to check * @param length * the number of bytes to check * @return true, if this is the signature of a 7z archive. * @since 1.8 */ public static boolean matches(final byte[] signature, final int length) { if (length < sevenZSignature.length) { return false; } for (int i = 0; i < sevenZSignature.length; i++) { if (signature[i] != sevenZSignature[i]) { return false; } } return true; } private static long skipBytesFully(final ByteBuffer input, long bytesToSkip) throws IOException { if (bytesToSkip < 1) { return 0; } final int current = input.position(); final int maxSkip = input.remaining(); if (maxSkip < bytesToSkip) { bytesToSkip = maxSkip; } input.position(current + (int) bytesToSkip); return bytesToSkip; } private void readFully(final ByteBuffer buf) throws IOException { buf.rewind(); IOUtils.readFully(channel, buf); buf.flip(); } @Override public String toString() { return archive.toString(); } /** * Derives a default file name from the archive name - if known. * *

This implements the same heuristics the 7z tools use. In * 7z's case if an archive contains entries without a name - * i.e. {@link SevenZArchiveEntry#getName} returns {@code null} - * then its command line and GUI tools will use this default name * when extracting the entries.

* * @return null if the name of the archive is unknown. Otherwise * if the name of the archive has got any extension, it is * stripped and the remainder returned. Finally if the name of the * archive hasn't got any extension then a {@code ~} character is * appended to the archive name. * * @since 1.19 */ public String getDefaultName() { if (DEFAULT_FILE_NAME.equals(fileName) || fileName == null) { return null; } final String lastSegment = new File(fileName).getName(); final int dotPos = lastSegment.lastIndexOf("."); if (dotPos > 0) { // if the file starts with a dot then this is not an extension return lastSegment.substring(0, dotPos); } return lastSegment + "~"; } private static byte[] utf16Encode(final char[] chars) throws IOException { if (chars == null) { return null; } byte[] output = new byte[chars.length * 2]; for (int i = 0; i < chars.length; i++) { char ch = chars[i]; output[i * 2] = (byte) ch; output[i * 2 + 1] = (byte) (ch >>> 8); } return output; } private static int assertFitsIntoNonNegativeInt(final String what, final long value) throws IOException { if (value > Integer.MAX_VALUE || value < 0) { throw new IOException("Cannot handle " + what + " " + value); } return (int) value; } private static class ArchiveStatistics { private int numberOfPackedStreams; private long numberOfCoders; private long numberOfOutStreams; private long numberOfInStreams; private long numberOfUnpackSubStreams; private int numberOfFolders; private BitSet folderHasCrc; private int numberOfEntries; private int numberOfEntriesWithStream; @Override public String toString() { return "Archive with " + numberOfEntries + " entries in " + numberOfFolders + " folders. Estimated size " + (estimateSize()/1024l) + " kB."; } long estimateSize() { long lowerBound = 16l * numberOfPackedStreams /* packSizes, packCrcs in Archive */ + numberOfPackedStreams / 8 /* packCrcsDefined in Archive */ + numberOfFolders * folderSize() /* folders in Archive */ + numberOfCoders * coderSize() /* coders in Folder */ + (numberOfOutStreams - numberOfFolders) * bindPairSize() /* bindPairs in Folder */ + 8l * (numberOfInStreams - numberOfOutStreams + numberOfFolders) /* packedStreams in Folder */ + 8l * numberOfOutStreams /* unpackSizes in Folder */ + numberOfEntries * entrySize() /* files in Archive */ + streamMapSize() ; return 2 * lowerBound /* conservative guess */; } void assertValidity(int maxMemoryLimitInKb) throws IOException { if (numberOfEntriesWithStream > 0 && numberOfFolders == 0) { throw new IOException("archive with entries but no folders"); } if (numberOfEntriesWithStream > numberOfUnpackSubStreams) { throw new IOException("archive doesn't contain enough substreams for entries"); } final long memoryNeededInKb = estimateSize() / 1024; if (maxMemoryLimitInKb < memoryNeededInKb) { throw new MemoryLimitException(memoryNeededInKb, maxMemoryLimitInKb); } } private long folderSize() { return 30; /* nested arrays are accounted for separately */ } private long coderSize() { return 2 /* methodId is between 1 and four bytes currently, COPY and LZMA2 are the most common with 1 */ + 16 + 4 /* properties, guess */ ; } private long bindPairSize() { return 16; } private long entrySize() { return 100; /* real size depends on name length, everything without name is about 70 bytes */ } private long streamMapSize() { return 8 * numberOfFolders /* folderFirstPackStreamIndex, folderFirstFileIndex */ + 8 * numberOfPackedStreams /* packStreamOffsets */ + 4 * numberOfEntries /* fileFolderIndex */ ; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy