com.feilong.lib.compress.archivers.zip.ZipFile Maven / Gradle / Ivy
Show all versions of feilong Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package com.feilong.lib.compress.archivers.zip;
import static com.feilong.lib.compress.archivers.zip.ZipConstants.DWORD;
import static com.feilong.lib.compress.archivers.zip.ZipConstants.SHORT;
import static com.feilong.lib.compress.archivers.zip.ZipConstants.WORD;
import static com.feilong.lib.compress.archivers.zip.ZipConstants.ZIP64_MAGIC;
import static com.feilong.lib.compress.archivers.zip.ZipConstants.ZIP64_MAGIC_SHORT;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
import java.nio.file.Files;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.zip.Inflater;
import java.util.zip.ZipException;
import com.feilong.lib.compress.archivers.EntryStreamOffsets;
import com.feilong.lib.compress.utils.CountingInputStream;
import com.feilong.lib.compress.utils.IOUtils;
import com.feilong.lib.compress.utils.InputStreamStatistics;
/**
* Replacement for java.util.ZipFile
.
*
*
* This class adds support for file name encodings other than UTF-8
* (which is required to work on ZIP files created by native zip tools
* and is able to skip a preamble like the one found in self
* extracting archives. Furthermore it returns instances of
* org.apache.commons.compress.archivers.zip.ZipArchiveEntry
* instead of java.util.zip.ZipEntry
.
*
*
*
* It doesn't extend java.util.zip.ZipFile
as it would
* have to reimplement all methods anyway. Like
* java.util.ZipFile
, it uses SeekableByteChannel under the
* covers and supports compressed and uncompressed entries. As of
* Apache Commons Compress 1.3 it also transparently supports Zip64
* extensions and thus individual entries and archives larger than 4
* GB or with more than 65536 entries.
*
*
*
* The method signatures mimic the ones of
* java.util.zip.ZipFile
, with a couple of exceptions:
*
*
* - There is no getName method.
* - entries has been renamed to getEntries.
* - getEntries and getEntry return
*
org.apache.commons.compress.archivers.zip.ZipArchiveEntry
* instances.
* - close is allowed to throw IOException.
*
*
*/
public class ZipFile implements Closeable{
private static final int HASH_SIZE = 509;
static final int NIBLET_MASK = 0x0f;
static final int BYTE_SHIFT = 8;
private static final int POS_0 = 0;
private static final int POS_1 = 1;
private static final int POS_2 = 2;
private static final int POS_3 = 3;
private static final byte[] ONE_ZERO_BYTE = new byte[1];
/**
* List of entries in the order they appear inside the central
* directory.
*/
private final List entries = new LinkedList<>();
/**
* Maps String to list of ZipArchiveEntrys, name -> actual entries.
*/
private final Map> nameMap = new HashMap<>(HASH_SIZE);
/**
* The encoding to use for file names and the file comment.
*
*
* For a list of possible values see http://java.sun.com/j2se/1.5.0/docs/guide/intl/encoding.doc.html.
* Defaults to UTF-8.
*
*/
private final String encoding;
/**
* The zip encoding to use for file names and the file comment.
*/
private final ZipEncoding zipEncoding;
/**
* File name of actual source.
*/
private final String archiveName;
/**
* The actual data source.
*/
private final SeekableByteChannel archive;
/**
* Whether to look for and use Unicode extra fields.
*/
private final boolean useUnicodeExtraFields;
/**
* Whether the file is closed.
*/
private volatile boolean closed = true;
/**
* Whether the zip archive is a splite zip archive
*/
private final boolean isSplitZipArchive;
// cached buffers - must only be used locally in the class (COMPRESS-172 - reduce garbage collection)
private final byte[] dwordBuf = new byte[DWORD];
private final byte[] wordBuf = new byte[WORD];
private final byte[] cfhBuf = new byte[CFH_LEN];
private final byte[] shortBuf = new byte[SHORT];
private final ByteBuffer dwordBbuf = ByteBuffer.wrap(dwordBuf);
private final ByteBuffer wordBbuf = ByteBuffer.wrap(wordBuf);
private final ByteBuffer cfhBbuf = ByteBuffer.wrap(cfhBuf);
private final ByteBuffer shortBbuf = ByteBuffer.wrap(shortBuf);
/**
* Opens the given file for reading, assuming "UTF8" for file names.
*
* @param f
* the archive.
*
* @throws IOException
* if an error occurs while reading the file.
*/
public ZipFile(final File f) throws IOException{
this(f, ZipEncodingHelper.UTF8);
}
/**
* Opens the given file for reading, assuming "UTF8".
*
* @param name
* name of the archive.
*
* @throws IOException
* if an error occurs while reading the file.
*/
public ZipFile(final String name) throws IOException{
this(new File(name), ZipEncodingHelper.UTF8);
}
/**
* Opens the given file for reading, assuming the specified
* encoding for file names, scanning unicode extra fields.
*
* @param name
* name of the archive.
* @param encoding
* the encoding to use for file names, use null
* for the platform's default encoding
*
* @throws IOException
* if an error occurs while reading the file.
*/
public ZipFile(final String name, final String encoding) throws IOException{
this(new File(name), encoding, true);
}
/**
* Opens the given file for reading, assuming the specified
* encoding for file names and scanning for unicode extra fields.
*
* @param f
* the archive.
* @param encoding
* the encoding to use for file names, use null
* for the platform's default encoding
*
* @throws IOException
* if an error occurs while reading the file.
*/
public ZipFile(final File f, final String encoding) throws IOException{
this(f, encoding, true);
}
/**
* Opens the given file for reading, assuming the specified
* encoding for file names.
*
* @param f
* the archive.
* @param encoding
* the encoding to use for file names, use null
* for the platform's default encoding
* @param useUnicodeExtraFields
* whether to use InfoZIP Unicode
* Extra Fields (if present) to set the file names.
*
* @throws IOException
* if an error occurs while reading the file.
*/
public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields) throws IOException{
this(f, encoding, useUnicodeExtraFields, false);
}
/**
* Opens the given file for reading, assuming the specified
* encoding for file names.
*
*
*
* By default the central directory record and all local file headers of the archive will be read immediately
* which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
* can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
* may contain information not present inside of the central directory which will not be available when the argument
* is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
* ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also
* {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code
* true}.
*
*
* @param f
* the archive.
* @param encoding
* the encoding to use for file names, use null
* for the platform's default encoding
* @param useUnicodeExtraFields
* whether to use InfoZIP Unicode
* Extra Fields (if present) to set the file names.
* @param ignoreLocalFileHeader
* whether to ignore information
* stored inside the local file header (see the notes in this method's javadoc)
*
* @throws IOException
* if an error occurs while reading the file.
* @since 1.19
*/
public ZipFile(final File f, final String encoding, final boolean useUnicodeExtraFields, final boolean ignoreLocalFileHeader)
throws IOException{
this(Files.newByteChannel(f.toPath(), EnumSet.of(StandardOpenOption.READ)), f.getAbsolutePath(), encoding, useUnicodeExtraFields,
true, ignoreLocalFileHeader);
}
/**
* Opens the given channel for reading, assuming "UTF8" for file names.
*
*
* {@link
* com.feilong.lib.compress.utils.SeekableInMemoryByteChannel}
* allows you to read from an in-memory archive.
*
*
* @param channel
* the archive.
*
* @throws IOException
* if an error occurs while reading the file.
* @since 1.13
*/
public ZipFile(final SeekableByteChannel channel) throws IOException{
this(channel, "unknown archive", ZipEncodingHelper.UTF8, true);
}
/**
* Opens the given channel for reading, assuming the specified
* encoding for file names.
*
*
* {@link
* com.feilong.lib.compress.utils.SeekableInMemoryByteChannel}
* allows you to read from an in-memory archive.
*
*
* @param channel
* the archive.
* @param encoding
* the encoding to use for file names, use null
* for the platform's default encoding
*
* @throws IOException
* if an error occurs while reading the file.
* @since 1.13
*/
public ZipFile(final SeekableByteChannel channel, final String encoding) throws IOException{
this(channel, "unknown archive", encoding, true);
}
/**
* Opens the given channel for reading, assuming the specified
* encoding for file names.
*
*
* {@link
* com.feilong.lib.compress.utils.SeekableInMemoryByteChannel}
* allows you to read from an in-memory archive.
*
*
* @param channel
* the archive.
* @param archiveName
* name of the archive, used for error messages only.
* @param encoding
* the encoding to use for file names, use null
* for the platform's default encoding
* @param useUnicodeExtraFields
* whether to use InfoZIP Unicode
* Extra Fields (if present) to set the file names.
*
* @throws IOException
* if an error occurs while reading the file.
* @since 1.13
*/
public ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields)
throws IOException{
this(channel, archiveName, encoding, useUnicodeExtraFields, false, false);
}
/**
* Opens the given channel for reading, assuming the specified
* encoding for file names.
*
*
* {@link
* com.feilong.lib.compress.utils.SeekableInMemoryByteChannel}
* allows you to read from an in-memory archive.
*
*
*
* By default the central directory record and all local file headers of the archive will be read immediately
* which may take a considerable amount of time when the archive is big. The {@code ignoreLocalFileHeader} parameter
* can be set to {@code true} which restricts parsing to the central directory. Unfortunately the local file header
* may contain information not present inside of the central directory which will not be available when the argument
* is set to {@code true}. This includes the content of the Unicode extra field, so setting {@code
* ignoreLocalFileHeader} to {@code true} means {@code useUnicodeExtraFields} will be ignored effectively. Also
* {@link #getRawInputStream} is always going to return {@code null} if {@code ignoreLocalFileHeader} is {@code
* true}.
*
*
* @param channel
* the archive.
* @param archiveName
* name of the archive, used for error messages only.
* @param encoding
* the encoding to use for file names, use null
* for the platform's default encoding
* @param useUnicodeExtraFields
* whether to use InfoZIP Unicode
* Extra Fields (if present) to set the file names.
* @param ignoreLocalFileHeader
* whether to ignore information
* stored inside the local file header (see the notes in this method's javadoc)
*
* @throws IOException
* if an error occurs while reading the file.
* @since 1.19
*/
public ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields,
final boolean ignoreLocalFileHeader) throws IOException{
this(channel, archiveName, encoding, useUnicodeExtraFields, false, ignoreLocalFileHeader);
}
private ZipFile(final SeekableByteChannel channel, final String archiveName, final String encoding, final boolean useUnicodeExtraFields,
final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException{
isSplitZipArchive = (channel instanceof ZipSplitReadOnlySeekableByteChannel);
this.archiveName = archiveName;
this.encoding = encoding;
this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
this.useUnicodeExtraFields = useUnicodeExtraFields;
archive = channel;
boolean success = false;
try{
final Map entriesWithoutUTF8Flag = populateFromCentralDirectory();
if (!ignoreLocalFileHeader){
resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
}
fillNameMap();
success = true;
}finally{
closed = !success;
if (!success && closeOnError){
IOUtils.closeQuietly(archive);
}
}
}
/**
* The encoding to use for file names and the file comment.
*
* @return null if using the platform's default character encoding.
*/
public String getEncoding(){
return encoding;
}
/**
* Closes the archive.
*
* @throws IOException
* if an error occurs closing the archive.
*/
@Override
public void close() throws IOException{
// this flag is only written here and read in finalize() which
// can never be run in parallel.
// no synchronization needed.
closed = true;
archive.close();
}
/**
* close a zipfile quietly; throw no io fault, do nothing
* on a null parameter
*
* @param zipfile
* file to close, can be null
*/
public static void closeQuietly(final ZipFile zipfile){
IOUtils.closeQuietly(zipfile);
}
/**
* Returns all entries.
*
*
* Entries will be returned in the same order they appear
* within the archive's central directory.
*
*
* @return all entries as {@link ZipArchiveEntry} instances
*/
public Enumeration getEntries(){
return Collections.enumeration(entries);
}
/**
* Returns all entries in physical order.
*
*
* Entries will be returned in the same order their contents
* appear within the archive.
*
*
* @return all entries as {@link ZipArchiveEntry} instances
*
* @since 1.1
*/
public Enumeration getEntriesInPhysicalOrder(){
final ZipArchiveEntry[] allEntries = entries.toArray(new ZipArchiveEntry[entries.size()]);
Arrays.sort(allEntries, offsetComparator);
return Collections.enumeration(Arrays.asList(allEntries));
}
/**
* Returns a named entry - or {@code null} if no entry by
* that name exists.
*
*
* If multiple entries with the same name exist the first entry
* in the archive's central directory by that name is
* returned.
*
*
* @param name
* name of the entry.
* @return the ZipArchiveEntry corresponding to the given name - or
* {@code null} if not present.
*/
public ZipArchiveEntry getEntry(final String name){
final LinkedList entriesOfThatName = nameMap.get(name);
return entriesOfThatName != null ? entriesOfThatName.getFirst() : null;
}
/**
* Returns all named entries in the same order they appear within
* the archive's central directory.
*
* @param name
* name of the entry.
* @return the Iterable<ZipArchiveEntry> corresponding to the
* given name
* @since 1.6
*/
public Iterable getEntries(final String name){
final List entriesOfThatName = nameMap.get(name);
return entriesOfThatName != null ? entriesOfThatName : Collections. emptyList();
}
/**
* Returns all named entries in the same order their contents
* appear within the archive.
*
* @param name
* name of the entry.
* @return the Iterable<ZipArchiveEntry> corresponding to the
* given name
* @since 1.6
*/
public Iterable getEntriesInPhysicalOrder(final String name){
ZipArchiveEntry[] entriesOfThatName = new ZipArchiveEntry[0];
if (nameMap.containsKey(name)){
entriesOfThatName = nameMap.get(name).toArray(entriesOfThatName);
Arrays.sort(entriesOfThatName, offsetComparator);
}
return Arrays.asList(entriesOfThatName);
}
/**
* Whether this class is able to read the given entry.
*
*
* May return false if it is set up to use encryption or a
* compression method that hasn't been implemented yet.
*
*
* @since 1.1
* @param ze
* the entry
* @return whether this class is able to read the given entry.
*/
public boolean canReadEntryData(final ZipArchiveEntry ze){
return ZipUtil.canHandleEntryData(ze);
}
/**
* Expose the raw stream of the archive entry (compressed form).
*
*
* This method does not relate to how/if we understand the payload in the
* stream, since we really only intend to move it on to somewhere else.
*
*
* @param ze
* The entry to get the stream for
* @return The raw input stream containing (possibly) compressed data.
* @since 1.11
*/
public InputStream getRawInputStream(final ZipArchiveEntry ze){
if (!(ze instanceof Entry)){
return null;
}
final long start = ze.getDataOffset();
if (start == EntryStreamOffsets.OFFSET_UNKNOWN){
return null;
}
return createBoundedInputStream(start, ze.getCompressedSize());
}
/**
* Transfer selected entries from this zipfile to a given #ZipArchiveOutputStream.
* Compression and all other attributes will be as in this file.
*
* This method transfers entries based on the central directory of the zip file.
*
*
* @param target
* The zipArchiveOutputStream to write the entries to
* @param predicate
* A predicate that selects which entries to write
* @throws IOException
* on error
*/
public void copyRawEntries(final ZipArchiveOutputStream target,final ZipArchiveEntryPredicate predicate) throws IOException{
final Enumeration src = getEntriesInPhysicalOrder();
while (src.hasMoreElements()){
final ZipArchiveEntry entry = src.nextElement();
if (predicate.test(entry)){
target.addRawArchiveEntry(entry, getRawInputStream(entry));
}
}
}
/**
* Returns an InputStream for reading the contents of the given entry.
*
* @param ze
* the entry to get the stream for.
* @return a stream to read the entry from. The returned stream
* implements {@link InputStreamStatistics}.
* @throws IOException
* if unable to create an input stream from the zipentry
*/
public InputStream getInputStream(final ZipArchiveEntry ze) throws IOException{
if (!(ze instanceof Entry)){
return null;
}
// cast validity is checked just above
ZipUtil.checkRequestedFeatures(ze);
final long start = getDataOffset(ze);
// doesn't get closed if the method is not supported - which
// should never happen because of the checkRequestedFeatures
// call above
final InputStream is = new BufferedInputStream(createBoundedInputStream(start, ze.getCompressedSize())); //NOSONAR
switch (ZipMethod.getMethodByCode(ze.getMethod())) {
case STORED:
return new StoredStatisticsStream(is);
// case UNSHRINKING:
// return new UnshrinkingInputStream(is);
case IMPLODING:
return new ExplodingInputStream(
ze.getGeneralPurposeBit().getSlidingDictionarySize(),
ze.getGeneralPurposeBit().getNumberOfShannonFanoTrees(),
is);
case DEFLATED:
final Inflater inflater = new Inflater(true);
// Inflater with nowrap=true has this odd contract for a zero padding
// byte following the data stream; this used to be zlib's requirement
// and has been fixed a long time ago, but the contract persists so
// we comply.
// https://docs.oracle.com/javase/7/docs/api/java/util/zip/Inflater.html#Inflater(boolean)
return new InflaterInputStreamWithStatistics(
new SequenceInputStream(is, new ByteArrayInputStream(ONE_ZERO_BYTE)),
inflater){
@Override
public void close() throws IOException{
try{
super.close();
}finally{
inflater.end();
}
}
};
// case BZIP2:
// return new BZip2CompressorInputStream(is);
// case ENHANCED_DEFLATED:
// return new Deflate64CompressorInputStream(is);
case AES_ENCRYPTED:
case EXPANDING_LEVEL_1:
case EXPANDING_LEVEL_2:
case EXPANDING_LEVEL_3:
case EXPANDING_LEVEL_4:
case JPEG:
case LZMA:
case PKWARE_IMPLODING:
case PPMD:
case TOKENIZATION:
case UNKNOWN:
case WAVPACK:
case XZ:
default:
throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(ze.getMethod()), ze);
}
}
/**
*
* Convenience method to return the entry's content as a String if isUnixSymlink()
* returns true for it, otherwise returns null.
*
*
*
* This method assumes the symbolic link's file name uses the
* same encoding that as been specified for this ZipFile.
*
*
* @param entry
* ZipArchiveEntry object that represents the symbolic link
* @return entry's content as a String
* @throws IOException
* problem with content's input stream
* @since 1.5
*/
public String getUnixSymlink(final ZipArchiveEntry entry) throws IOException{
if (entry != null && entry.isUnixSymlink()){
try (InputStream in = getInputStream(entry)){
return zipEncoding.decode(IOUtils.toByteArray(in));
}
}
return null;
}
/**
* Ensures that the close method of this zipfile is called when
* there are no more references to it.
*
* @see #close()
*/
@Override
protected void finalize() throws Throwable{
try{
if (!closed){
System.err.println("Cleaning up unclosed ZipFile for archive " + archiveName);
close();
}
}finally{
super.finalize();
}
}
/**
* Length of a "central directory" entry structure without file
* name, extra fields or comment.
*/
private static final int CFH_LEN = /* version made by */ SHORT/* version needed to extract */ + SHORT
/* general purpose bit flag */ + SHORT
/* compression method */ + SHORT
/* last mod file time */ + SHORT
/* last mod file date */ + SHORT
/* crc-32 */ + WORD
/* compressed size */ + WORD
/* uncompressed size */ + WORD
/* file name length */ + SHORT
/* extra field length */ + SHORT
/* file comment length */ + SHORT
/* disk number start */ + SHORT
/* internal file attributes */ + SHORT
/* external file attributes */ + WORD
/* relative offset of local header */ + WORD;
private static final long CFH_SIG = ZipLong.getValue(ZipArchiveOutputStream.CFH_SIG);
/**
* Reads the central directory of the given archive and populates
* the internal tables with ZipArchiveEntry instances.
*
*
* The ZipArchiveEntrys will know all data that can be obtained from
* the central directory alone, but not the data that requires the
* local file header or additional data to be read.
*
*
* @return a map of zipentries that didn't have the language
* encoding flag set when read.
*/
private Map populateFromCentralDirectory() throws IOException{
final HashMap noUTF8Flag = new HashMap<>();
positionAtCentralDirectory();
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
long sig = ZipLong.getValue(wordBuf);
if (sig != CFH_SIG && startsWithLocalFileHeader()){
throw new IOException("Central directory is empty, can't expand" + " corrupt archive.");
}
while (sig == CFH_SIG){
readCentralDirectoryEntry(noUTF8Flag);
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
sig = ZipLong.getValue(wordBuf);
}
return noUTF8Flag;
}
/**
* Reads an individual entry of the central directory, creats an
* ZipArchiveEntry from it and adds it to the global maps.
*
* @param noUTF8Flag
* map used to collect entries that don't have
* their UTF-8 flag set and whose name will be set by data read
* from the local file header later. The current entry may be
* added to this map.
*/
private void readCentralDirectoryEntry(final Map noUTF8Flag) throws IOException{
cfhBbuf.rewind();
IOUtils.readFully(archive, cfhBbuf);
int off = 0;
final Entry ze = new Entry();
final int versionMadeBy = ZipShort.getValue(cfhBuf, off);
off += SHORT;
ze.setVersionMadeBy(versionMadeBy);
ze.setPlatform((versionMadeBy >> BYTE_SHIFT) & NIBLET_MASK);
ze.setVersionRequired(ZipShort.getValue(cfhBuf, off));
off += SHORT; // version required
final GeneralPurposeBit gpFlag = GeneralPurposeBit.parse(cfhBuf, off);
final boolean hasUTF8Flag = gpFlag.usesUTF8ForNames();
final ZipEncoding entryEncoding = hasUTF8Flag ? ZipEncodingHelper.UTF8_ZIP_ENCODING : zipEncoding;
if (hasUTF8Flag){
ze.setNameSource(ZipArchiveEntry.NameSource.NAME_WITH_EFS_FLAG);
}
ze.setGeneralPurposeBit(gpFlag);
ze.setRawFlag(ZipShort.getValue(cfhBuf, off));
off += SHORT;
//noinspection MagicConstant
ze.setMethod(ZipShort.getValue(cfhBuf, off));
off += SHORT;
final long time = ZipUtil.dosToJavaTime(ZipLong.getValue(cfhBuf, off));
ze.setTime(time);
off += WORD;
ze.setCrc(ZipLong.getValue(cfhBuf, off));
off += WORD;
ze.setCompressedSize(ZipLong.getValue(cfhBuf, off));
off += WORD;
ze.setSize(ZipLong.getValue(cfhBuf, off));
off += WORD;
final int fileNameLen = ZipShort.getValue(cfhBuf, off);
off += SHORT;
final int extraLen = ZipShort.getValue(cfhBuf, off);
off += SHORT;
final int commentLen = ZipShort.getValue(cfhBuf, off);
off += SHORT;
ze.setDiskNumberStart(ZipShort.getValue(cfhBuf, off));
off += SHORT;
ze.setInternalAttributes(ZipShort.getValue(cfhBuf, off));
off += SHORT;
ze.setExternalAttributes(ZipLong.getValue(cfhBuf, off));
off += WORD;
final byte[] fileName = new byte[fileNameLen];
IOUtils.readFully(archive, ByteBuffer.wrap(fileName));
ze.setName(entryEncoding.decode(fileName), fileName);
// LFH offset,
ze.setLocalHeaderOffset(ZipLong.getValue(cfhBuf, off));
// data offset will be filled later
entries.add(ze);
final byte[] cdExtraData = new byte[extraLen];
IOUtils.readFully(archive, ByteBuffer.wrap(cdExtraData));
ze.setCentralDirectoryExtra(cdExtraData);
setSizesAndOffsetFromZip64Extra(ze);
final byte[] comment = new byte[commentLen];
IOUtils.readFully(archive, ByteBuffer.wrap(comment));
ze.setComment(entryEncoding.decode(comment));
if (!hasUTF8Flag && useUnicodeExtraFields){
noUTF8Flag.put(ze, new NameAndComment(fileName, comment));
}
ze.setStreamContiguous(true);
}
/**
* If the entry holds a Zip64 extended information extra field,
* read sizes from there if the entry's sizes are set to
* 0xFFFFFFFFF, do the same for the offset of the local file
* header.
*
*
* Ensures the Zip64 extra either knows both compressed and
* uncompressed size or neither of both as the internal logic in
* ExtraFieldUtils forces the field to create local header data
* even if they are never used - and here a field with only one
* size would be invalid.
*
*/
private void setSizesAndOffsetFromZip64Extra(final ZipArchiveEntry ze) throws IOException{
final Zip64ExtendedInformationExtraField z64 = (Zip64ExtendedInformationExtraField) ze
.getExtraField(Zip64ExtendedInformationExtraField.HEADER_ID);
if (z64 != null){
final boolean hasUncompressedSize = ze.getSize() == ZIP64_MAGIC;
final boolean hasCompressedSize = ze.getCompressedSize() == ZIP64_MAGIC;
final boolean hasRelativeHeaderOffset = ze.getLocalHeaderOffset() == ZIP64_MAGIC;
final boolean hasDiskStart = ze.getDiskNumberStart() == ZIP64_MAGIC_SHORT;
z64.reparseCentralDirectoryData(hasUncompressedSize, hasCompressedSize, hasRelativeHeaderOffset, hasDiskStart);
if (hasUncompressedSize){
ze.setSize(z64.getSize().getLongValue());
}else if (hasCompressedSize){
z64.setSize(new ZipEightByteInteger(ze.getSize()));
}
if (hasCompressedSize){
ze.setCompressedSize(z64.getCompressedSize().getLongValue());
}else if (hasUncompressedSize){
z64.setCompressedSize(new ZipEightByteInteger(ze.getCompressedSize()));
}
if (hasRelativeHeaderOffset){
ze.setLocalHeaderOffset(z64.getRelativeHeaderOffset().getLongValue());
}
if (hasDiskStart){
ze.setDiskNumberStart(z64.getDiskStartNumber().getValue());
}
}
}
/**
* Length of the "End of central directory record" - which is
* supposed to be the last structure of the archive - without file
* comment.
*/
static final int MIN_EOCD_SIZE = /* end of central dir signature */ WORD
/* number of this disk */ + SHORT
/*
* number of the
* disk
* with the
*/
/* start of the central directory */ + SHORT
/*
* total number of
* entries in
*/
/* the central dir on this disk */ + SHORT
/*
* total number of
* entries in
*/
/* the central dir */ + SHORT
/* size of the central directory */ + WORD
/*
* offset of start
* of
* central
*/
/*
* directory with
* respect to
*/
/* the starting disk number */ + WORD
/* zipfile comment length */ + SHORT;
/**
* Maximum length of the "End of central directory record" with a
* file comment.
*/
private static final int MAX_EOCD_SIZE = MIN_EOCD_SIZE
/* maximum length of zipfile comment */ + ZIP64_MAGIC_SHORT;
/**
* Offset of the field that holds the location of the first
* central directory entry inside the "End of central directory
* record" relative to the start of the "End of central directory
* record".
*/
private static final int CFD_LOCATOR_OFFSET = /* end of central dir signature */ WORD
/* number of this disk */ + SHORT
/*
* number of the
* disk
* with the
*/
/* start of the central directory */ + SHORT
/*
* total number of
* entries in
*/
/* the central dir on this disk */ + SHORT
/*
* total number of
* entries in
*/
/* the central dir */ + SHORT
/* size of the central directory */ + WORD;
/**
* Offset of the field that holds the disk number of the first
* central directory entry inside the "End of central directory
* record" relative to the start of the "End of central directory
* record".
*/
private static final int CFD_DISK_OFFSET = /* end of central dir signature */ WORD
/* number of this disk */ + SHORT;
/**
* Offset of the field that holds the location of the first
* central directory entry inside the "End of central directory
* record" relative to the "number of the disk with the start
* of the central directory".
*/
private static final int CFD_LOCATOR_RELATIVE_OFFSET =
/*
* total number of
* entries in
*/
/* the central dir on this disk */ +SHORT
/*
* total number of
* entries in
*/
/* the central dir */ + SHORT
/* size of the central directory */ + WORD;
/**
* Length of the "Zip64 end of central directory locator" - which
* should be right in front of the "end of central directory
* record" if one is present at all.
*/
private static final int ZIP64_EOCDL_LENGTH = /* zip64 end of central dir locator sig */ WORD
/*
* number of the
* disk
* with the start
*/
/*
* start of the
* zip64
* end of
*/
/* central directory */ + WORD
/*
* relative offset
* of
* the zip64
*/
/* end of central directory record */ + DWORD
/* total number of disks */ + WORD;
/**
* Offset of the field that holds the location of the "Zip64 end
* of central directory record" inside the "Zip64 end of central
* directory locator" relative to the start of the "Zip64 end of
* central directory locator".
*/
private static final int ZIP64_EOCDL_LOCATOR_OFFSET = /* zip64 end of central dir locator sig */ WORD
/*
* number of the
* disk
* with the start
*/
/*
* start of the
* zip64
* end of
*/
/* central directory */ + WORD;
/**
* Offset of the field that holds the location of the first
* central directory entry inside the "Zip64 end of central
* directory record" relative to the start of the "Zip64 end of
* central directory record".
*/
private static final int ZIP64_EOCD_CFD_LOCATOR_OFFSET =
/*
* zip64 end of
* central
* dir
*/
/* signature */ WORD
/*
* size of zip64 end
* of
* central
*/
/* directory record */ + DWORD
/* version made by */ + SHORT
/* version needed to extract */ + SHORT
/* number of this disk */ + WORD
/*
* number of the
* disk
* with the
*/
/* start of the central directory */ + WORD
/*
* total number of
* entries in the
*/
/* central directory on this disk */ + DWORD
/*
* total number of
* entries in the
*/
/* central directory */ + DWORD
/* size of the central directory */ + DWORD;
/**
* Offset of the field that holds the disk number of the first
* central directory entry inside the "Zip64 end of central
* directory record" relative to the start of the "Zip64 end of
* central directory record".
*/
private static final int ZIP64_EOCD_CFD_DISK_OFFSET =
/*
* zip64 end of
* central
* dir
*/
/* signature */ WORD
/*
* size of zip64 end
* of
* central
*/
/* directory record */ + DWORD
/* version made by */ + SHORT
/* version needed to extract */ + SHORT
/* number of this disk */ + WORD;
/**
* Offset of the field that holds the location of the first
* central directory entry inside the "Zip64 end of central
* directory record" relative to the "number of the disk
* with the start of the central directory".
*/
private static final int ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET =
/*
* total number of
* entries in the
*/
/* central directory on this disk */ DWORD
/*
* total number of
* entries in the
*/
/* central directory */ + DWORD
/* size of the central directory */ + DWORD;
/**
* Searches for either the "Zip64 end of central directory
* locator" or the "End of central dir record", parses
* it and positions the stream at the first central directory
* record.
*/
private void positionAtCentralDirectory() throws IOException{
positionAtEndOfCentralDirectoryRecord();
boolean found = false;
final boolean searchedForZip64EOCD = archive.position() > ZIP64_EOCDL_LENGTH;
if (searchedForZip64EOCD){
archive.position(archive.position() - ZIP64_EOCDL_LENGTH);
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
found = Arrays.equals(ZipArchiveOutputStream.ZIP64_EOCD_LOC_SIG, wordBuf);
}
if (!found){
// not a ZIP64 archive
if (searchedForZip64EOCD){
skipBytes(ZIP64_EOCDL_LENGTH - WORD);
}
positionAtCentralDirectory32();
}else{
positionAtCentralDirectory64();
}
}
/**
* Parses the "Zip64 end of central directory locator",
* finds the "Zip64 end of central directory record" using the
* parsed information, parses that and positions the stream at the
* first central directory record.
*
* Expects stream to be positioned right behind the "Zip64
* end of central directory locator"'s signature.
*/
private void positionAtCentralDirectory64() throws IOException{
if (isSplitZipArchive){
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
final long diskNumberOfEOCD = ZipLong.getValue(wordBuf);
dwordBbuf.rewind();
IOUtils.readFully(archive, dwordBbuf);
final long relativeOffsetOfEOCD = ZipEightByteInteger.getLongValue(dwordBuf);
((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfEOCD, relativeOffsetOfEOCD);
}else{
skipBytes(ZIP64_EOCDL_LOCATOR_OFFSET - WORD /* signature has already been read */);
dwordBbuf.rewind();
IOUtils.readFully(archive, dwordBbuf);
archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
}
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
if (!Arrays.equals(wordBuf, ZipArchiveOutputStream.ZIP64_EOCD_SIG)){
throw new ZipException("Archive's ZIP64 end of central " + "directory locator is corrupt.");
}
if (isSplitZipArchive){
skipBytes(ZIP64_EOCD_CFD_DISK_OFFSET - WORD /* signature has already been read */);
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
final long diskNumberOfCFD = ZipLong.getValue(wordBuf);
skipBytes(ZIP64_EOCD_CFD_LOCATOR_RELATIVE_OFFSET);
dwordBbuf.rewind();
IOUtils.readFully(archive, dwordBbuf);
final long relativeOffsetOfCFD = ZipEightByteInteger.getLongValue(dwordBuf);
((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfCFD, relativeOffsetOfCFD);
}else{
skipBytes(ZIP64_EOCD_CFD_LOCATOR_OFFSET - WORD /* signature has already been read */);
dwordBbuf.rewind();
IOUtils.readFully(archive, dwordBbuf);
archive.position(ZipEightByteInteger.getLongValue(dwordBuf));
}
}
/**
* Parses the "End of central dir record" and positions
* the stream at the first central directory record.
*
* Expects stream to be positioned at the beginning of the
* "End of central dir record".
*/
private void positionAtCentralDirectory32() throws IOException{
if (isSplitZipArchive){
skipBytes(CFD_DISK_OFFSET);
shortBbuf.rewind();
IOUtils.readFully(archive, shortBbuf);
final int diskNumberOfCFD = ZipShort.getValue(shortBuf);
skipBytes(CFD_LOCATOR_RELATIVE_OFFSET);
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
final long relativeOffsetOfCFD = ZipLong.getValue(wordBuf);
((ZipSplitReadOnlySeekableByteChannel) archive).position(diskNumberOfCFD, relativeOffsetOfCFD);
}else{
skipBytes(CFD_LOCATOR_OFFSET);
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
archive.position(ZipLong.getValue(wordBuf));
}
}
/**
* Searches for the and positions the stream at the start of the
* "End of central dir record".
*/
private void positionAtEndOfCentralDirectoryRecord() throws IOException{
final boolean found = tryToLocateSignature(MIN_EOCD_SIZE, MAX_EOCD_SIZE, ZipArchiveOutputStream.EOCD_SIG);
if (!found){
throw new ZipException("Archive is not a ZIP archive");
}
}
/**
* Searches the archive backwards from minDistance to maxDistance
* for the given signature, positions the RandomaccessFile right
* at the signature if it has been found.
*/
private boolean tryToLocateSignature(final long minDistanceFromEnd,final long maxDistanceFromEnd,final byte[] sig) throws IOException{
boolean found = false;
long off = archive.size() - minDistanceFromEnd;
final long stopSearching = Math.max(0L, archive.size() - maxDistanceFromEnd);
if (off >= 0){
for (; off >= stopSearching; off--){
archive.position(off);
try{
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
wordBbuf.flip();
}catch (EOFException ex){ // NOSONAR
break;
}
int curr = wordBbuf.get();
if (curr == sig[POS_0]){
curr = wordBbuf.get();
if (curr == sig[POS_1]){
curr = wordBbuf.get();
if (curr == sig[POS_2]){
curr = wordBbuf.get();
if (curr == sig[POS_3]){
found = true;
break;
}
}
}
}
}
}
if (found){
archive.position(off);
}
return found;
}
/**
* Skips the given number of bytes or throws an EOFException if
* skipping failed.
*/
private void skipBytes(final int count) throws IOException{
long currentPosition = archive.position();
long newPosition = currentPosition + count;
if (newPosition > archive.size()){
throw new EOFException();
}
archive.position(newPosition);
}
/**
* Number of bytes in local file header up to the "length of
* file name" entry.
*/
private static final long LFH_OFFSET_FOR_FILENAME_LENGTH = /* local file header signature */ WORD/* version needed to extract */ + SHORT
/* general purpose bit flag */ + SHORT
/* compression method */ + SHORT
/* last mod file time */ + SHORT
/* last mod file date */ + SHORT
/* crc-32 */ + WORD
/* compressed size */ + WORD
/* uncompressed size */ + (long) WORD;
/**
* Walks through all recorded entries and adds the data available
* from the local file header.
*
*
* Also records the offsets for the data to read from the
* entries.
*
*/
private void resolveLocalFileHeaderData(final Map entriesWithoutUTF8Flag) throws IOException{
for (final ZipArchiveEntry zipArchiveEntry : entries){
// entries is filled in populateFromCentralDirectory and
// never modified
final Entry ze = (Entry) zipArchiveEntry;
int[] lens = setDataOffset(ze);
final int fileNameLen = lens[0];
final int extraFieldLen = lens[1];
skipBytes(fileNameLen);
final byte[] localExtraData = new byte[extraFieldLen];
IOUtils.readFully(archive, ByteBuffer.wrap(localExtraData));
ze.setExtra(localExtraData);
if (entriesWithoutUTF8Flag.containsKey(ze)){
final NameAndComment nc = entriesWithoutUTF8Flag.get(ze);
ZipUtil.setNameAndCommentFromExtraFields(ze, nc.name, nc.comment);
}
}
}
private void fillNameMap(){
for (final ZipArchiveEntry ze : entries){
// entries is filled in populateFromCentralDirectory and
// never modified
final String name = ze.getName();
LinkedList entriesOfThatName = nameMap.get(name);
if (entriesOfThatName == null){
entriesOfThatName = new LinkedList<>();
nameMap.put(name, entriesOfThatName);
}
entriesOfThatName.addLast(ze);
}
}
private int[] setDataOffset(ZipArchiveEntry ze) throws IOException{
long offset = ze.getLocalHeaderOffset();
if (isSplitZipArchive){
((ZipSplitReadOnlySeekableByteChannel) archive).position(ze.getDiskNumberStart(), offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
// the offset should be updated to the global offset
offset = archive.position() - LFH_OFFSET_FOR_FILENAME_LENGTH;
}else{
archive.position(offset + LFH_OFFSET_FOR_FILENAME_LENGTH);
}
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
wordBbuf.flip();
wordBbuf.get(shortBuf);
final int fileNameLen = ZipShort.getValue(shortBuf);
wordBbuf.get(shortBuf);
final int extraFieldLen = ZipShort.getValue(shortBuf);
ze.setDataOffset(offset + LFH_OFFSET_FOR_FILENAME_LENGTH + SHORT + SHORT + fileNameLen + extraFieldLen);
return new int[] { fileNameLen, extraFieldLen };
}
private long getDataOffset(ZipArchiveEntry ze) throws IOException{
long s = ze.getDataOffset();
if (s == EntryStreamOffsets.OFFSET_UNKNOWN){
setDataOffset(ze);
return ze.getDataOffset();
}
return s;
}
/**
* Checks whether the archive starts with a LFH. If it doesn't,
* it may be an empty archive.
*/
private boolean startsWithLocalFileHeader() throws IOException{
archive.position(0);
wordBbuf.rewind();
IOUtils.readFully(archive, wordBbuf);
return Arrays.equals(wordBuf, ZipArchiveOutputStream.LFH_SIG);
}
/**
* Creates new BoundedInputStream, according to implementation of
* underlying archive channel.
*/
private BoundedInputStream createBoundedInputStream(long start,long remaining){
return archive instanceof FileChannel ? new BoundedFileChannelInputStream(start, remaining)
: new BoundedInputStream(start, remaining);
}
/**
* InputStream that delegates requests to the underlying
* SeekableByteChannel, making sure that only bytes from a certain
* range can be read.
*/
private class BoundedInputStream extends InputStream{
private ByteBuffer singleByteBuffer;
private final long end;
private long loc;
BoundedInputStream(final long start, final long remaining){
this.end = start + remaining;
if (this.end < start){
// check for potential vulnerability due to overflow
throw new IllegalArgumentException("Invalid length of stream at offset=" + start + ", length=" + remaining);
}
loc = start;
}
@Override
public synchronized int read() throws IOException{
if (loc >= end){
return -1;
}
if (singleByteBuffer == null){
singleByteBuffer = ByteBuffer.allocate(1);
}else{
singleByteBuffer.rewind();
}
int read = read(loc, singleByteBuffer);
if (read < 0){
return read;
}
loc++;
return singleByteBuffer.get() & 0xff;
}
@Override
public synchronized int read(final byte[] b,final int off,int len) throws IOException{
if (len <= 0){
return 0;
}
if (len > end - loc){
if (loc >= end){
return -1;
}
len = (int) (end - loc);
}
ByteBuffer buf;
buf = ByteBuffer.wrap(b, off, len);
int ret = read(loc, buf);
if (ret > 0){
loc += ret;
return ret;
}
return ret;
}
protected int read(long pos,ByteBuffer buf) throws IOException{
int read;
synchronized (archive){
archive.position(pos);
read = archive.read(buf);
}
buf.flip();
return read;
}
}
/**
* Lock-free implementation of BoundedInputStream. The
* implementation uses positioned reads on the underlying archive
* file channel and therefore performs significantly faster in
* concurrent environment.
*/
private class BoundedFileChannelInputStream extends BoundedInputStream{
private final FileChannel archive;
BoundedFileChannelInputStream(final long start, final long remaining){
super(start, remaining);
archive = (FileChannel) ZipFile.this.archive;
}
@Override
protected int read(long pos,ByteBuffer buf) throws IOException{
int read = archive.read(buf, pos);
buf.flip();
return read;
}
}
private static final class NameAndComment{
private final byte[] name;
private final byte[] comment;
private NameAndComment(final byte[] name, final byte[] comment){
this.name = name;
this.comment = comment;
}
}
/**
* Compares two ZipArchiveEntries based on their offset within the archive.
*
*
* Won't return any meaningful results if one of the entries
* isn't part of the archive at all.
*
*
* @since 1.1
*/
private final Comparator offsetComparator = (e1,e2) -> {
if (e1 == e2){
return 0;
}
final Entry ent1 = e1 instanceof Entry ? (Entry) e1 : null;
final Entry ent2 = e2 instanceof Entry ? (Entry) e2 : null;
if (ent1 == null){
return 1;
}
if (ent2 == null){
return -1;
}
// disk number is prior to relative offset
final long diskNumberStartVal = ent1.getDiskNumberStart() - ent2.getDiskNumberStart();
if (diskNumberStartVal != 0){
return diskNumberStartVal < 0 ? -1 : +1;
}
final long val = (ent1.getLocalHeaderOffset() - ent2.getLocalHeaderOffset());
return val == 0 ? 0 : val < 0 ? -1 : +1;
};
/**
* Extends ZipArchiveEntry to store the offset within the archive.
*/
private static class Entry extends ZipArchiveEntry{
Entry(){
}
@Override
public int hashCode(){
return 3 * super.hashCode() + (int) getLocalHeaderOffset() + (int) (getLocalHeaderOffset() >> 32);
}
@Override
public boolean equals(final Object other){
if (super.equals(other)){
// super.equals would return false if other were not an Entry
final Entry otherEntry = (Entry) other;
return getLocalHeaderOffset() == otherEntry.getLocalHeaderOffset() && super.getDataOffset() == otherEntry.getDataOffset()
&& super.getDiskNumberStart() == otherEntry.getDiskNumberStart();
}
return false;
}
}
private static class StoredStatisticsStream extends CountingInputStream implements InputStreamStatistics{
StoredStatisticsStream(InputStream in){
super(in);
}
@Override
public long getCompressedCount(){
return super.getBytesRead();
}
@Override
public long getUncompressedCount(){
return getCompressedCount();
}
}
}