All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.druid.segment.ZipIndexFileSource Maven / Gradle / Ivy

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.druid.segment;

import com.facebook.presto.druid.DataInputSource;
import com.facebook.presto.druid.zip.CentralDirectoryFileHeader;
import com.facebook.presto.druid.zip.EndOfCentralDirectoryRecord;
import com.facebook.presto.druid.zip.Zip64EndOfCentralDirectory;
import com.facebook.presto.druid.zip.Zip64EndOfCentralDirectoryLocator;
import com.facebook.presto.druid.zip.ZipFileData;
import com.facebook.presto.druid.zip.ZipFileEntry;
import com.facebook.presto.druid.zip.ZipUtil;
import com.facebook.presto.spi.PrestoException;

import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;
import java.util.zip.ZipException;

import static com.facebook.presto.druid.DruidErrorCode.DRUID_SEGMENT_LOAD_ERROR;
import static com.google.common.base.Preconditions.checkState;
import static java.lang.String.format;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

public class ZipIndexFileSource
        implements IndexFileSource, Closeable, AutoCloseable
{
    public static final int LOCAL_HEADER_SIGNATURE = 0x04034b50;
    public static final int LOCAL_HEADER_FIXED_DATA_SIZE = 30;
    public static final int LOCAL_HEADER_FILENAME_LENGTH_OFFSET = 26;
    public static final int LOCAL_HEADER_EXTRA_FIELD_LENGTH_OFFSET = 28;

    private final DataInputSource dataInputSource;

    private ZipFileData zipData;

    public ZipIndexFileSource(DataInputSource dataInputSource)
    {
        this.dataInputSource = requireNonNull(dataInputSource, "dataInputSource is null");
        zipData = readCentralDirectory();
    }

    /**
     * Reads file inside a zip archive
     */
    @Override
    public byte[] readFile(String fileName)
            throws IOException
    {
        ZipFileEntry entry = zipData.getEntry(fileName);
        if (entry == null) {
            throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, format("Zip doesn't contain file: %s", fileName));
        }
        byte[] fileData = new byte[(int) entry.getSize()];
        readFully(entry, 0, fileData, 0, fileData.length);
        return fileData;
    }

    @Override
    public final void readFile(String fileName, long position, byte[] buffer)
            throws IOException
    {
        ZipFileEntry entry = zipData.getEntry(fileName);
        if (entry == null) {
            throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, format("Zip doesn't contain file: %s", fileName));
        }
        readFully(entry, position, buffer, 0, buffer.length);
    }

    private void readFully(ZipFileEntry entry, long position, byte[] buffer, int bufferOffset, int bufferLength)
            throws IOException
    {
        long offset = entry.getLocalHeaderOffset();
        byte[] fileHeader = new byte[LOCAL_HEADER_FIXED_DATA_SIZE];
        dataInputSource.readFully(offset, fileHeader);
        offset += fileHeader.length;

        if (!ZipUtil.arrayStartsWith(fileHeader, ZipUtil.intToLittleEndian(LOCAL_HEADER_SIGNATURE))) {
            throw new PrestoException(
                    DRUID_SEGMENT_LOAD_ERROR,
                    format("The file '%s' is not a correctly formatted zip file: Expected a File Header at offset %d, but not present.", entry.getName(), offset));
        }

        // skip name and extra field
        int nameLength = ZipUtil.getUnsignedShort(fileHeader, LOCAL_HEADER_FILENAME_LENGTH_OFFSET);
        int extraFieldLength = ZipUtil.getUnsignedShort(fileHeader, LOCAL_HEADER_EXTRA_FIELD_LENGTH_OFFSET);
        offset += (nameLength + extraFieldLength);

        // deflate
        int compressedSize = (int) entry.getCompressedSize();
        byte[] compressedData = new byte[compressedSize];
        dataInputSource.readFully(offset, compressedData);
        InflaterInputStream inflaterInputStream = new InflaterInputStream(new ByteArrayInputStream(compressedData), new Inflater(true));

        try {
            inflaterInputStream.skip(position);
            int size = chunkedRead(inflaterInputStream, buffer, bufferOffset, bufferLength);
            checkState(size == bufferLength);
        }
        catch (IOException e) {
            throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, format("Malformed zip file: %s", entry.getName()));
        }
    }

    // read inflater stream chunk by chunk
    private static int chunkedRead(InflaterInputStream inflaterInputStream, byte[] buffer, int offset, int length)
            throws IOException
    {
        int position = offset;
        int bytesRead = 0;

        while (position - offset < length && bytesRead != -1) {
            bytesRead = inflaterInputStream.read(buffer, position, offset + length - position);
            if (bytesRead > 0) {
                position += bytesRead;
            }
        }

        return position - offset;
    }

    /**
     * Finds, reads and parses ZIP file entries from the central directory.
     */
    private ZipFileData readCentralDirectory()
    {
        try {
            long centralDirectoryEndOffset = getCentralDirectoryEndOffset();
            ZipFileData fileData = new ZipFileData(UTF_8);
            EndOfCentralDirectoryRecord.read(fileData, dataInputSource, centralDirectoryEndOffset);

            if (fileData.isMaybeZip64()) {
                try {
                    Zip64EndOfCentralDirectoryLocator.read(fileData, dataInputSource, centralDirectoryEndOffset - Zip64EndOfCentralDirectoryLocator.FIXED_DATA_SIZE);
                    Zip64EndOfCentralDirectory.read(fileData, dataInputSource, fileData.getZip64EndOfCentralDirectoryOffset());
                }
                catch (ZipException e) {
                    // expected if not in Zip64 format
                }
            }

            if (fileData.isZip64()) {
                // If in Zip64 format or using strict entry numbers, use the parsed information as is to read
                // the central directory file headers.
                readCentralDirectoryFileHeaders(fileData, dataInputSource, fileData.getCentralDirectoryOffset(), fileData.getCharset(), fileData.getExpectedEntries());
            }
            else {
                // If not in Zip64 format, compute central directory offset by end of central directory record
                // offset and central directory size to allow reading large non-compliant Zip32 directories.
                long centralDirectoryOffset = centralDirectoryEndOffset - fileData.getCentralDirectorySize();
                // If the lower 4 bytes match, the above calculation is correct; otherwise fallback to
                // reported offset.
                if ((int) centralDirectoryOffset == (int) fileData.getCentralDirectoryOffset()) {
                    readCentralDirectoryFileHeaders(fileData, dataInputSource, centralDirectoryOffset, fileData.getCharset());
                }
                else {
                    readCentralDirectoryFileHeaders(fileData, dataInputSource, fileData.getCentralDirectoryOffset(), fileData.getCharset(), fileData.getExpectedEntries());
                }
            }
            return fileData;
        }
        catch (IOException e) {
            throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, e);
        }
    }

    /**
     * Finds the file offset of the end of central directory record.
     */
    private long getCentralDirectoryEndOffset()
            throws IOException
    {
        long fileSize = dataInputSource.getSize();
        byte[] signature = ZipUtil.intToLittleEndian(EndOfCentralDirectoryRecord.SIGNATURE);
        byte[] buffer = new byte[(int) Math.min(64, fileSize)];
        int readLength = buffer.length;
        if (readLength < EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) {
            throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, format("Zip file '%s' is malformed. It does not contain an end of central directory record.", dataInputSource.getId()));
        }

        long offset = fileSize - buffer.length;
        while (offset >= 0) {
            dataInputSource.readFully(offset, buffer, 0, readLength);
            int signatureLocation = scanBackwards(signature, buffer, buffer.length);
            while (signatureLocation != -1) {
                long eocdSize = fileSize - offset - signatureLocation;
                if (eocdSize >= EndOfCentralDirectoryRecord.FIXED_DATA_SIZE) {
                    int commentLength = ZipUtil.getUnsignedShort(buffer, signatureLocation
                            + EndOfCentralDirectoryRecord.COMMENT_LENGTH_OFFSET);
                    long readCommentLength = eocdSize - EndOfCentralDirectoryRecord.FIXED_DATA_SIZE;
                    if (commentLength == readCommentLength) {
                        return offset + signatureLocation;
                    }
                }
                signatureLocation = scanBackwards(signature, buffer, signatureLocation - 1);
            }
            readLength = buffer.length - 3;
            buffer[buffer.length - 3] = buffer[0];
            buffer[buffer.length - 2] = buffer[1];
            buffer[buffer.length - 1] = buffer[2];
            offset -= readLength;
        }
        throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, format("Zip file '%s' is malformed. It does not contain an end of central directory record.", dataInputSource.getId()));
    }

    /**
     * Reads and parses ZIP file entries from the central directory.
     */
    private void readCentralDirectoryFileHeaders(ZipFileData fileData, DataInputSource dataInputSource, long fileOffset, Charset charset, long count)
            throws IOException
    {
        try {
            long position = fileOffset;
            for (long i = 0; i < count; i++) {
                position += CentralDirectoryFileHeader.read(fileData, dataInputSource, position, charset);
            }
        }
        catch (ZipException e) {
            throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, e);
        }
    }

    /**
     * Reads and parses ZIP file entries from the central directory.
     */
    private void readCentralDirectoryFileHeaders(ZipFileData fileData, DataInputSource dataInputSource, long fileOffset, Charset charset)
            throws IOException
    {
        try {
            long position = fileOffset;
            while ((position - fileOffset) < fileData.getCentralDirectorySize()) {
                position += CentralDirectoryFileHeader.read(fileData, dataInputSource, position, charset);
            }
        }
        catch (ZipException e) {
            throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, e);
        }
    }

    /**
     * Looks for the target sub array in the buffer scanning backwards starting at offset. Returns the
     * index where the target is found or -1 if not found.
     */
    private int scanBackwards(byte[] target, byte[] buffer, int offset)
    {
        int start = Math.min(offset, buffer.length - target.length);
        for (int i = start; i >= 0; i--) {
            for (int j = 0; j < target.length; j++) {
                if (buffer[i + j] != target[j]) {
                    break;
                }
                else if (j == target.length - 1) {
                    return i;
                }
            }
        }
        return -1;
    }

    @Override
    public void close()
            throws IOException
    {
        dataInputSource.close();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy