All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.archivepatcher.generator.MinimalZipParser Maven / Gradle / Ivy

The newest version!
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.archivepatcher.generator;

import com.google.archivepatcher.shared.RandomAccessFileInputStream;

import java.io.IOException;
import java.io.InputStream;
import java.util.zip.ZipException;

/**
 * A minimal set of zip-parsing utilities just adequate to produce a {@link MinimalZipEntry} and
 * update it. This parser is neither robust nor exhaustive. The parser is built to understand
 * version 2.0 of the ZIP specification, with the notable exception that it does not have support
 * for encrypted central directories.
 * 

* The offsets, lengths and fields that this parser understands and exposes are based on version * 6.3.3 of the ZIP specification (the most recent available at the time of this writing), which may * be found at the following URL: *

  • https://www.pkware.com/documents/APPNOTE/APPNOTE-6.3.3.TXT
*

* Please note that the parser does not attempt to verify the version-needed-to-extract field, since * there is no guarantee that all ZIP implementations have set the value correctly to the minimum * needed to truly support extraction. */ class MinimalZipParser { /** * Standard 32-bit signature for a "end-of-central-directory" record in a ZIP-like archive. This * is in little-endian order. */ public static final int EOCD_SIGNATURE = 0x06054b50; /** * Standard 32-bit signature for a "central directory entry" record in a ZIP-like archive. This is * in little-endian order. */ public static final int CENTRAL_DIRECTORY_ENTRY_SIGNATURE = 0x02014b50; /** * Standard 32-bit signature for a "local file entry" in a ZIP-like archive. This is in * little-endian order. */ public static final int LOCAL_ENTRY_SIGNATURE = 0x04034b50; /** * Read exactly one byte, throwing an exception if unsuccessful. * @param in the stream to read from * @return the byte read * @throws IOException if EOF is reached */ private static int readByteOrDie(InputStream in) throws IOException { int result = in.read(); if (result == -1) { throw new IOException("EOF"); } return result; } /** * Skips exactly the specified number of bytes, throwing an exception if unsuccessful. * @param in the stream to read from * @param numBytes the number of bytes to skip * @throws IOException if EOF is reached or no more bytes can be skipped */ private static void skipOrDie(InputStream in, long numBytes) throws IOException { long numLeft = numBytes; long numSkipped = 0; while ((numSkipped = in.skip(numLeft)) > 0) { numLeft -= numSkipped; } if (numLeft != 0) { throw new IOException("Unable to skip"); } } /** * Reads 2 bytes from the current offset as an unsigned, 32-bit little-endian value. * @param in the stream to read from * @return the value as a java int * @throws IOException if unable to read */ private static int read16BitUnsigned(InputStream in) throws IOException { int value = readByteOrDie(in); value |= readByteOrDie(in) << 8; return value; } /** * Reads 4 bytes from the current offset as an unsigned, 32-bit little-endian value. * @param in the stream to read from * @return the value as a java long * @throws IOException if unable to read */ private static long read32BitUnsigned(InputStream in) throws IOException { long value = readByteOrDie(in); value |= ((long) readByteOrDie(in)) << 8; value |= ((long) readByteOrDie(in)) << 16; value |= ((long) readByteOrDie(in)) << 24; return value; } /** * Read exactly the specified amount of data into the specified buffer, throwing an exception if * unsuccessful. * @param in the stream to read from * @param buffer the buffer to file * @param offset the offset at which to start writing to the buffer * @param length the number of bytes to place into the buffer from the input stream * @throws IOException if unable to read */ private static void readOrDie(InputStream in, byte[] buffer, int offset, int length) throws IOException { if (length < 0) { throw new IllegalArgumentException("length must be >= 0"); } int numRead = 0; while (numRead < length) { int readThisRound = in.read(buffer, offset + numRead, length - numRead); if (numRead == -1) { throw new IOException("EOF"); } numRead += readThisRound; } } /** * Parse one central directory entry, starting at the current file position. * @param in the input stream to read from, assumed to start at the first byte of the entry * @return the entry that was parsed * @throws IOException if unable to complete the parsing */ public static MinimalZipEntry parseCentralDirectoryEntry(InputStream in) throws IOException { // *** 4 bytes encode the CENTRAL_DIRECTORY_ENTRY_SIGNATURE, verify for sanity // 2 bytes encode the version-made-by, ignore // 2 bytes encode the version-needed-to-extract, ignore // *** 2 bytes encode the general-purpose flags, read for language encoding. [READ THIS] // *** 2 bytes encode the compression method, [READ THIS] // 2 bytes encode the MSDOS last modified file time, ignore // 2 bytes encode the MSDOS last modified file date, ignore // *** 4 bytes encode the CRC32 of the uncompressed data [READ THIS] // *** 4 bytes encode the compressed size [READ THIS] // *** 4 bytes encode the uncompressed size [READ THIS] // *** 2 bytes encode the length of the file name [READ THIS] // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS] // *** 2 bytes encode the length of the comment, needed to skip the bytes later [READ THIS] // 2 bytes encode the disk number, ignore // 2 bytes encode the internal file attributes, ignore // 4 bytes encode the external file attributes, ignore // *** 4 bytes encode the offset of the local section entry, where the data is [READ THIS] // n bytes encode the file name // n bytes encode the extras // n bytes encode the comment if (((int) read32BitUnsigned(in)) != CENTRAL_DIRECTORY_ENTRY_SIGNATURE) { throw new ZipException("Bad central directory header"); } skipOrDie(in, 2 + 2); // Skip version stuff int generalPurposeFlags = read16BitUnsigned(in); int compressionMethod = read16BitUnsigned(in); skipOrDie(in, 2 + 2); // Skip MSDOS junk long crc32OfUncompressedData = read32BitUnsigned(in); long compressedSize = read32BitUnsigned(in); long uncompressedSize = read32BitUnsigned(in); int fileNameLength = read16BitUnsigned(in); int extrasLength = read16BitUnsigned(in); int commentLength = read16BitUnsigned(in); skipOrDie(in, 2 + 2 + 4); // Skip the disk number and file attributes long fileOffsetOfLocalEntry = read32BitUnsigned(in); byte[] fileNameBuffer = new byte[fileNameLength]; readOrDie(in, fileNameBuffer, 0, fileNameBuffer.length); skipOrDie(in, extrasLength + commentLength); // General purpose flag bit 11 is an important hint for the character set used for file names. boolean generalPurposeFlagBit11 = (generalPurposeFlags & (0x1 << 10)) != 0; return new MinimalZipEntry( compressionMethod, crc32OfUncompressedData, compressedSize, uncompressedSize, fileNameBuffer, generalPurposeFlagBit11, fileOffsetOfLocalEntry); } /** * Parses one local file entry and returns the offset from the first byte at which the compressed * data begins * @param in the input stream to read from, assumed to start at the first byte of the entry * @return as described * @throws IOException if unable to complete the parsing */ public static long parseLocalEntryAndGetCompressedDataOffset(InputStream in) throws IOException { // *** 4 bytes encode the LOCAL_ENTRY_SIGNATURE, verify for sanity // 2 bytes encode the version-needed-to-extract, ignore // 2 bytes encode the general-purpose flags, ignore // 2 bytes encode the compression method, ignore (redundant with central directory) // 2 bytes encode the MSDOS last modified file time, ignore // 2 bytes encode the MSDOS last modified file date, ignore // 4 bytes encode the CRC32 of the uncompressed data, ignore (redundant with central directory) // 4 bytes encode the compressed size, ignore (redundant with central directory) // 4 bytes encode the uncompressed size, ignore (redundant with central directory) // *** 2 bytes encode the length of the file name, needed to skip the bytes later [READ THIS] // *** 2 bytes encode the length of the extras, needed to skip the bytes later [READ THIS] // The rest is the data, which is the main attraction here. if (((int) read32BitUnsigned(in)) != LOCAL_ENTRY_SIGNATURE) { throw new ZipException("Bad local entry header"); } int junkLength = 2 + 2 + 2 + 2 + 2 + 4 + 4 + 4; skipOrDie(in, junkLength); // Skip everything up to the length of the file name final int fileNameLength = read16BitUnsigned(in); final int extrasLength = read16BitUnsigned(in); // The file name is already known and will match the central directory, so no need to read it. // The extra field length can be different here versus in the central directory and is used for // things like zipaligning APKs. This single value is the critical part as it dictates where the // actual DATA for the entry begins. return 4 + junkLength + 2 + 2 + fileNameLength + extrasLength; } /** * Find the end-of-central-directory record by scanning backwards from the end of a file looking * for the signature of the record. * @param in the file to read from * @param searchBufferLength the length of the search buffer, starting from the end of the file * @return the offset in the file at which the first byte of the EOCD signature is located, or -1 * if the signature is not found in the search buffer * @throws IOException if there is a problem reading */ public static long locateStartOfEocd(RandomAccessFileInputStream in, int searchBufferLength) throws IOException { final int maxBufferSize = (int) Math.min(searchBufferLength, in.length()); final byte[] buffer = new byte[maxBufferSize]; final long rangeStart = in.length() - buffer.length; in.setRange(rangeStart, buffer.length); readOrDie(in, buffer, 0, buffer.length); int offset = locateStartOfEocd(buffer); if (offset == -1) { return -1; } return rangeStart + offset; } /** * Find the end-of-central-directory record by scanning backwards looking for the signature of the * record. * @param buffer the buffer in which to search * @return the offset in the buffer at which the first byte of the EOCD signature is located, or * -1 if the complete signature is not found */ public static int locateStartOfEocd(byte[] buffer) { int last4Bytes = 0; // This is the 32 bits of data from the file for (int offset = buffer.length - 1; offset >= 0; offset--) { last4Bytes <<= 8; last4Bytes |= buffer[offset]; if (last4Bytes == EOCD_SIGNATURE) { return offset; } } return -1; } /** * Parse the end-of-central-directory record and return the critical information from it. * @param in the input stream to read from, assumed to start at the first byte of the entry * @return the metadata * @throws IOException if unable to read * @throws ZipException if the metadata indicates this is a zip64 archive, which is not supported */ public static MinimalCentralDirectoryMetadata parseEocd(InputStream in) throws IOException, ZipException { if (((int) read32BitUnsigned(in)) != EOCD_SIGNATURE) { throw new ZipException("Bad eocd header"); } // *** 4 bytes encode EOCD_SIGNATURE, ignore (already found and verified). // 2 bytes encode disk number for this archive, ignore. // 2 bytes encode disk number for the central directory, ignore. // 2 bytes encode num entries in the central directory on this disk, ignore. // *** 2 bytes encode num entries in the central directory overall [READ THIS] // *** 4 bytes encode the length of the central directory [READ THIS] // *** 4 bytes encode the file offset of the central directory [READ THIS] // 2 bytes encode the length of the zip file comment, ignore. // Everything else from here to the EOF is the zip file comment, or junk. Ignore. skipOrDie(in, 2 + 2 + 2); int numEntriesInCentralDirectory = read16BitUnsigned(in); if (numEntriesInCentralDirectory == 0xffff) { // If 0xffff, this is a zip64 archive and this code doesn't handle that. throw new ZipException("No support for zip64"); } long lengthOfCentralDirectory = read32BitUnsigned(in); long offsetOfCentralDirectory = read32BitUnsigned(in); return new MinimalCentralDirectoryMetadata( numEntriesInCentralDirectory, offsetOfCentralDirectory, lengthOfCentralDirectory); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy