io.github.lukehutch.fastclasspathscanner.utils.NestedJarHandler Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of fast-classpath-scanner Show documentation
Uber-fast, ultra-lightweight Java classpath scanner. Scans the classpath by parsing the classfile binary format directly rather than by using reflection. See https://github.com/lukehutch/fast-classpath-scanner
There is a newer version: 4.0.0-beta-7
Show newest version
/*
 * This file is part of FastClasspathScanner.
 *
 * Author: Luke Hutchison
 *
 * Hosted at: https://github.com/lukehutch/fast-classpath-scanner
 *
 * --
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2016 Luke Hutchison
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 * documentation files (the "Software"), to deal in the Software without restriction, including without
 * limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 * the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
 * EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
 * OR OTHER DEALINGS IN THE SOFTWARE.
 */
package io.github.lukehutch.fastclasspathscanner.utils;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.AbstractMap.SimpleEntry;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import io.github.lukehutch.fastclasspathscanner.scanner.ModuleRef;
import io.github.lukehutch.fastclasspathscanner.scanner.ModuleRef.ModuleReaderProxy;

/**
 * Unzip a jarfile within a jarfile to a temporary file on disk. Also handles the download of jars from http(s) URLs
 * to temp files.
 *
 * 
 * Somewhat paradoxically, the fastest way to support scanning zipfiles-within-zipfiles is to unzip the inner
 * zipfile to a temporary file on disk, because the inner zipfile can only be read using ZipInputStream, not ZipFile
 * (the ZipFile constructors only take a File argument). ZipInputStream doesn't have methods for reading the zip
 * directory at the beginning of the stream, so using ZipInputStream rather than ZipFile, you have to decompress the
 * entire zipfile to read all the directory entries. However, there may be many non-whitelisted entries in the
 * zipfile, so this could be a lot of wasted work.
 *
 * 

 * FastClasspathScanner makes two passes, one to read the zipfile directory, which whitelist and blacklist criteria
 * are applied to (this is a fast operation when using ZipFile), and then an additional pass to read only
 * whitelisted (non-blacklisted) entries. Therefore, in the general case, the ZipFile API is always going to be
 * faster than ZipInputStream. Therefore, decompressing the inner zipfile to disk is the only efficient option.
 */
public class NestedJarHandler {
    private final ConcurrentLinkedDeque tempFiles = new ConcurrentLinkedDeque<>();
    private final SingletonMap>> nestedPathToJarfileAndRootRelativePathsMap;
    private final SingletonMap> canonicalPathToZipFileRecyclerMap;
    private final SingletonMap> //
    moduleReaderProxyToModuleReaderRecyclerMap;
    private final InterruptionChecker interruptionChecker;

    public static final String TEMP_FILENAME_LEAF_SEPARATOR = "---";

    public NestedJarHandler(final boolean stripSFXHeader, final InterruptionChecker interruptionChecker,
            final LogNode log) {
        this.interruptionChecker = interruptionChecker;

        // Set up a singleton map from canonical path to ZipFile recycler
        this.canonicalPathToZipFileRecyclerMap = new SingletonMap>() {
            @Override
            public Recycler newInstance(final String canonicalPath, final LogNode log)
                    throws Exception {
                return new Recycler() {
                    @Override
                    public ZipFile newInstance() throws IOException {
                        return new ZipFile(canonicalPath);
                    }
                };
            }
        };

        // Set up a singleton map from ModuleRef object to ModuleReaderProxy recycler
        this.moduleReaderProxyToModuleReaderRecyclerMap = //
                new SingletonMap>() {
                    @Override
                    public Recycler newInstance(final ModuleRef moduleRef,
                            final LogNode log) throws Exception {
                        return new Recycler() {
                            @Override
                            public ModuleReaderProxy newInstance() throws IOException {
                                return moduleRef.open();
                            }
                        };
                    }
                };

        // Create a singleton map from path to zipfile File, in order to eliminate repeatedly unzipping the same
        // file when there are multiple jars-within-jars that need unzipping to temporary files.
        this.nestedPathToJarfileAndRootRelativePathsMap = new SingletonMap>>() {
            @Override
            public Entry> newInstance(final String nestedJarPath, final LogNode log)
                    throws Exception {
                final int lastPlingIdx = nestedJarPath.lastIndexOf('!');
                if (lastPlingIdx < 0) {
                    // nestedJarPath is a simple file path or URL (i.e. doesn't have any '!' sections). This is also
                    // the last frame of recursion for the 'else' clause below.

                    // If the path starts with "http(s)://", download the jar to a temp file
                    final boolean isRemote = nestedJarPath.startsWith("http://")
                            || nestedJarPath.startsWith("https://");
                    final File pathFile = isRemote ? downloadTempFile(nestedJarPath, log) : new File(nestedJarPath);
                    if (isRemote && pathFile == null) {
                        if (log != null) {
                            log.log(nestedJarPath, "Could not download jarfile " + nestedJarPath);
                        }
                        return null;
                    }
                    File canonicalFile;
                    try {
                        canonicalFile = pathFile.getCanonicalFile();
                    } catch (final IOException | SecurityException e) {
                        if (log != null) {
                            log.log(nestedJarPath,
                                    "Path component " + nestedJarPath + " could not be canonicalized: " + e);
                        }
                        return null;
                    }
                    if (!ClasspathUtils.canRead(canonicalFile)) {
                        if (log != null) {
                            log.log(nestedJarPath, "Path component " + nestedJarPath + " does not exist");
                        }
                        return null;
                    }
                    if (!canonicalFile.isFile()) {
                        if (log != null) {
                            log.log(nestedJarPath,
                                    "Path component " + nestedJarPath + "  is not a file (expected a jarfile)");
                        }
                        return null;
                    }
                    // Handle self-extracting archives (they can be created by Spring-Boot)
                    final File bareJarfile = stripSFXHeader ? stripSFXHeader(canonicalFile, log) : canonicalFile;
                    // Return canonical file as the singleton entry for this path
                    final Set rootRelativePaths = new HashSet<>();
                    return new SimpleEntry<>(bareJarfile, rootRelativePaths);

                } else {
                    // This path has one or more '!' sections.
                    final String parentPath = nestedJarPath.substring(0, lastPlingIdx);
                    String childPath = nestedJarPath.substring(lastPlingIdx + 1);
                    if (childPath.startsWith("/")) {
                        // "file.jar!/path_or_jar" -> "file.jar!path_or_jar"
                        childPath = childPath.substring(1);
                    }
                    // Recursively remove one '!' section at a time, back towards the beginning of the URL or
                    // file path. At the last frame of recursion, the toplevel jarfile will be reached and
                    // returned. The recursion is guaranteed to terminate because parentPath gets one
                    // '!'-section shorter with each recursion frame.
                    final Entry> parentJarfileAndRootRelativePaths = //
                            nestedPathToJarfileAndRootRelativePathsMap.getOrCreateSingleton(parentPath, log);
                    // Only the last item in a '!'-delimited list can be a non-jar path, so the parent must
                    // always be a jarfile.
                    final File parentJarFile = parentJarfileAndRootRelativePaths.getKey();
                    if (parentJarFile == null) {
                        // Failed to get topmost jarfile, e.g. file not found
                        return null;
                    }

                    // Avoid decompressing the same nested jarfiles multiple times for different non-canonical
                    // parent paths, by calling getOrCreateSingleton() again using parentJarfile (which has a
                    // canonicalized path). This recursion is guaranteed to terminate after one extra recursion
                    // if File.getCanonicalFile() is idempotent, which it should be by definition.
                    final String parentJarFilePath = FastPathResolver.resolve(parentJarFile.getPath());
                    if (!parentJarFilePath.equals(parentPath)) {
                        // The path normalization process changed the path -- return a mapping
                        // to the NestedJarHandler resolution of the normalized path 
                        return nestedPathToJarfileAndRootRelativePathsMap
                                .getOrCreateSingleton(parentJarFilePath + "!" + childPath, log);
                    }

                    // Get the ZipFile recycler for the parent jar's canonical path
                    final Recycler parentJarRecycler = canonicalPathToZipFileRecyclerMap
                            .getOrCreateSingleton(parentJarFile.getCanonicalPath(), log);
                    ZipFile parentZipFile = null;
                    try {
                        // Look up the child path within the parent zipfile
                        parentZipFile = parentJarRecycler.acquire();
                        ZipEntry childZipEntry;
                        if (childPath.endsWith("/")) {
                            childZipEntry = parentZipFile.getEntry(childPath);
                        } else {
                            // Try appending "/" to childPath when fetching the ZipEntry. This will return
                            // the correct directory entry in buggy versions of the JRE, rather than returning
                            // a non-directory entry for directories (Bug #171). See:
                            // http://www.oracle.com/technetwork/java/javase/8u144-relnotes-3838694.html
                            childZipEntry = parentZipFile.getEntry(childPath + "/");
                            if (childZipEntry == null) {
                                // If there was no directory entry ending in "/", then look up the childPath
                                // without the appended "/".
                                childZipEntry = parentZipFile.getEntry(childPath);
                            }
                        }
                        if (childZipEntry == null) {
                            if (log != null) {
                                log.log(nestedJarPath, "Child path component " + childPath
                                        + " does not exist in jarfile " + parentJarFile);
                            }
                            return null;
                        }

                        // Make sure path component is a file, not a directory (can't unzip directories)
                        if (childZipEntry.isDirectory()) {
                            if (log != null) {
                                log.log(nestedJarPath, "Child path component " + childPath + " in jarfile "
                                        + parentJarFile + " is a directory, not a file -- using as scanning root");
                            }
                            // Add directory path to parent jarfile root relative paths set
                            parentJarfileAndRootRelativePaths.getValue().add(childPath);
                            // Return parent entry
                            return parentJarfileAndRootRelativePaths;
                        }

                        // Unzip the child zipfile to a temporary file
                        final File childTempFile = unzipToTempFile(parentZipFile, childZipEntry, log);

                        try {
                            // Handle self-extracting archives (can be created by Spring-Boot)
                            final File bareChildTempFile = stripSFXHeader ? stripSFXHeader(childTempFile, log)
                                    : childTempFile;

                            // Return the child temp zipfile as a new entry
                            final Set rootRelativePaths = new HashSet<>();
                            return new SimpleEntry<>(bareChildTempFile, rootRelativePaths);

                        } catch (final IOException e) {
                            // Thrown if the extracted file did not have a "PK" header
                            if (log != null) {
                                log.log(nestedJarPath, "File does not appear to be a zipfile: " + childPath);
                            }
                            return null;
                        }

                    } finally {
                        parentJarRecycler.release(parentZipFile);
                    }
                }
            }
        };
    }

    /**
     * Get a ZipFile recycler given the (non-nested) canonical path of a jarfile.
     *
     * @return The ZipFile recycler.
     */
    public Recycler getZipFileRecycler(final String canonicalPath, final LogNode log)
            throws Exception {
        return canonicalPathToZipFileRecyclerMap.getOrCreateSingleton(canonicalPath, log);
    }

    /**
     * Get a ModuleReaderProxy recycler given a ModuleRef.
     *
     * @return The ModuleReaderProxy recycler.
     */
    public Recycler getModuleReaderProxyRecycler(final ModuleRef moduleRef,
            final LogNode log) throws Exception {
        return moduleReaderProxyToModuleReaderRecyclerMap.getOrCreateSingleton(moduleRef, log);
    }

    /**
     * Get a File for a given (possibly nested) jarfile path, unzipping the first N-1 segments of an N-segment
     * '!'-delimited path to temporary files, then returning the File reference for the N-th temporary file.
     *
     * 

     * If the path does not contain '!', returns the File represented by the path.
     *
     * 
     * All path segments should end in a jarfile extension, e.g. ".jar" or ".zip".
     *
     * @return An {@code Entry>}, where the {@code File} is the innermost jar, and the
     *         {@code Set} is the set of all relative paths of scanning roots within the innermost jar (may
     *         be empty, or may contain strings like "target/classes" or similar). If there was an issue with the
     *         path, returns null.
     */
    public Entry> getInnermostNestedJar(final String nestedJarPath, final LogNode log)
            throws Exception {
        return nestedPathToJarfileAndRootRelativePathsMap.getOrCreateSingleton(nestedJarPath, log);
    }

    /** Download a jar from a URL to a temporary file. */
    private File downloadTempFile(final String jarURL, final LogNode log) {
        final LogNode subLog = log == null ? null : log.log(jarURL, "Downloading URL " + jarURL);
        File tempFile = null;
        try {
            final String suffix = TEMP_FILENAME_LEAF_SEPARATOR + jarURL.replace('/', '_').replace(':', '_')
                    .replace('?', '_').replace('&', '_').replace('=', '_');
            tempFile = File.createTempFile("FastClasspathScanner-", suffix);
            tempFile.deleteOnExit();
            tempFiles.add(tempFile);
            final URL url = new URL(jarURL);
            try (InputStream inputStream = url.openStream()) {
                Files.copy(inputStream, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
            }
            if (subLog != null) {
                subLog.addElapsedTime();
            }
        } catch (final Exception e) {
            if (subLog != null) {
                subLog.log("Could not download " + jarURL, e);
            }
            return null;
        }
        if (subLog != null) {
            subLog.log("Downloaded to temporary file " + tempFile);
            subLog.log("***** Note that it is time-consuming to scan jars at http(s) addresses, "
                    + "they must be downloaded for every scan, and the same jars must also be "
                    + "separately downloaded by the ClassLoader *****");
        }
        return tempFile;
    }

    /**
     * Unzip a ZipEntry to a temporary file, then return the temporary file. The temporary file will be removed when
     * NestedJarHandler#close() is called.
     */
    private File unzipToTempFile(final ZipFile zipFile, final ZipEntry zipEntry, final LogNode log)
            throws IOException {
        String zipEntryPath = zipEntry.getName();
        if (zipEntryPath.startsWith("/")) {
            zipEntryPath = zipEntryPath.substring(1);
        }
        final String zipEntryLeaf = zipEntryPath.substring(zipEntryPath.lastIndexOf('/') + 1);
        // The following filename format is also expected by JarUtils.leafName()
        final File tempFile = File.createTempFile("FastClasspathScanner-",
                TEMP_FILENAME_LEAF_SEPARATOR + zipEntryLeaf);
        tempFile.deleteOnExit();
        tempFiles.add(tempFile);
        LogNode subLog = null;
        if (log != null) {
            final String qualifiedPath = zipFile.getName() + "!/" + zipEntryPath;
            subLog = log.log(qualifiedPath, "Unzipping zip entry " + qualifiedPath);
            subLog.log("Extracted to temporary file " + tempFile.getPath());
        }
        try (InputStream inputStream = zipFile.getInputStream(zipEntry)) {
            Files.copy(inputStream, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
        }
        if (subLog != null) {
            subLog.addElapsedTime();
        }
        return tempFile;
    }

    /**
     * Strip self-extracting archive ("ZipSFX") header from zipfile, if present. (Simply strips everything before
     * the first "PK".)
     * 
     * @return The zipfile with the ZipSFX header removed
     * @throws IOException
     *             if the file does not appear to be a zipfile (i.e. if no "PK" marker is found).
     */
    private File stripSFXHeader(final File zipfile, final LogNode log) throws IOException {
        final long sfxHeaderBytes = JarUtils.countBytesBeforePKMarker(zipfile);
        if (sfxHeaderBytes == -1L) {
            throw new IOException("Could not find zipfile \"PK\" marker in file " + zipfile);
        } else if (sfxHeaderBytes == 0L) {
            // No self-extracting zipfile header
            return zipfile;
        } else {
            // Need to strip off ZipSFX header (e.g. Bash script prepended by Spring-Boot)
            final File bareZipfile = File.createTempFile("FastClasspathScanner-",
                    TEMP_FILENAME_LEAF_SEPARATOR + JarUtils.leafName(zipfile.getName()));
            bareZipfile.deleteOnExit();
            tempFiles.add(bareZipfile);
            if (log != null) {
                log.log("Zipfile " + zipfile + " contains a self-extracting executable header of " + sfxHeaderBytes
                        + " bytes. Stripping off header to create bare zipfile " + bareZipfile);
            }
            JarUtils.stripSFXHeader(zipfile, sfxHeaderBytes, bareZipfile);
            return bareZipfile;
        }
    }

    /** Delete temporary files and release other resources. */
    public void close(final LogNode log) {
        final LogNode rmLog = tempFiles.isEmpty() || log == null ? null : log.log("Removing temporary files");
        while (!tempFiles.isEmpty()) {
            final File head = tempFiles.remove();
            final String path = head.getPath();
            final boolean success = head.delete();
            if (log != null) {
                rmLog.log((success ? "Removed" : "Unable to remove") + " " + path);
            }
        }
        List> recyclers = null;
        try {
            recyclers = canonicalPathToZipFileRecyclerMap.values();
        } catch (final InterruptedException e) {
            // Stop other threads
            interruptionChecker.interrupt();
        }
        if (recyclers != null) {
            for (final Recycler recycler : recyclers) {
                recycler.close();
            }
            canonicalPathToZipFileRecyclerMap.clear();
        }
    }
}