io.github.lukehutch.fastclasspathscanner.scanner.Scanner Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of fast-classpath-scanner Show documentation
Uber-fast, ultra-lightweight Java classpath scanner. Scans the classpath by parsing the classfile binary format directly rather than by using reflection. See https://github.com/lukehutch/fast-classpath-scanner
There is a newer version: 4.0.0-beta-7
Show newest version
/*
 * This file is part of FastClasspathScanner.
 *
 * Author: Luke Hutchison
 *
 * Hosted at: https://github.com/lukehutch/fast-classpath-scanner
 *
 * --
 *
 * The MIT License (MIT)
 *
 * Copyright (c) 2016 Luke Hutchison
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
 * documentation files (the "Software"), to deal in the Software without restriction, including without
 * limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 * the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 * LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
 * EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
 * OR OTHER DEALINGS IN THE SOFTWARE.
 */
package io.github.lukehutch.fastclasspathscanner.scanner;

import java.io.File;
import java.util.AbstractMap.SimpleEntry;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;

import io.github.lukehutch.fastclasspathscanner.utils.InterruptionChecker;
import io.github.lukehutch.fastclasspathscanner.utils.LogNode;
import io.github.lukehutch.fastclasspathscanner.utils.NestedJarHandler;
import io.github.lukehutch.fastclasspathscanner.utils.Recycler;
import io.github.lukehutch.fastclasspathscanner.utils.WorkQueue;
import io.github.lukehutch.fastclasspathscanner.utils.WorkQueue.WorkQueuePreStartHook;
import io.github.lukehutch.fastclasspathscanner.utils.WorkQueue.WorkUnitProcessor;

/** The classpath scanner. */
public class Scanner implements Callable {
    private final ScanSpec scanSpec;
    private final ExecutorService executorService;
    private final int numParallelTasks;
    private final boolean enableRecursiveScanning;
    private final InterruptionChecker interruptionChecker = new InterruptionChecker();
    private final ScanResultProcessor scanResultProcessor;
    private final FailureHandler failureHandler;
    private final LogNode log;
    private NestedJarHandler nestedJarHandler;

    /**
     * The number of files within a given classpath element (directory or zipfile) to send in a chunk to the workers
     * that are calling the classfile binary parser. The smaller this number is, the better the load leveling at the
     * end of the scan, but the higher the overhead in re-opening the same ZipFile in different worker threads.
     */
    private static final int NUM_FILES_PER_CHUNK = 200;

    /** The classpath scanner. */
    public Scanner(final ScanSpec scanSpec, final ExecutorService executorService, final int numParallelTasks,
            final boolean enableRecursiveScanning, final ScanResultProcessor scannResultProcessor,
            final FailureHandler failureHandler, final LogNode log) {
        this.scanSpec = scanSpec;
        this.executorService = executorService;
        this.numParallelTasks = numParallelTasks;
        this.enableRecursiveScanning = enableRecursiveScanning;
        this.scanResultProcessor = scannResultProcessor;
        this.failureHandler = failureHandler;
        this.log = log;
    }

    // -------------------------------------------------------------------------------------------------------------

    /**
     * Recursively perform a depth-first search of jar interdependencies, breaking cycles if necessary, to determine
     * the final classpath element order.
     */
    private static void findClasspathOrder(final ClasspathElement currSingleton,
            final RelativePathToElementMap classpathElementMap,
            final HashSet visitedClasspathElts, final ArrayList order)
            throws InterruptedException {
        if (visitedClasspathElts.add(currSingleton)) {
            order.add(currSingleton);
            if (currSingleton.childClasspathElts != null) {
                for (final RelativePath childClasspathElt : currSingleton.childClasspathElts) {
                    final ClasspathElement childSingleton = classpathElementMap.get(childClasspathElt);
                    if (childSingleton != null && !childSingleton.ioExceptionOnOpen) {
                        findClasspathOrder(childSingleton, classpathElementMap, visitedClasspathElts, order);
                    }
                }
            }
        }
    }

    /**
     * Recursively perform a depth-first search of jar interdependencies, breaking cycles if necessary, to determine
     * the final classpath element order.
     */
    private static List findClasspathOrder(final List rawClasspathElements,
            final RelativePathToElementMap classpathElementMap) throws InterruptedException {
        // Recurse from toplevel classpath elements to determine a total ordering of classpath elements (jars with
        // Class-Path entries in their manifest file should have those child resources included in-place in the
        // classpath).
        final HashSet visitedClasspathElts = new HashSet<>();
        final ArrayList order = new ArrayList<>();
        for (final RelativePath toplevelClasspathElt : rawClasspathElements) {
            final ClasspathElement toplevelSingleton = classpathElementMap.get(toplevelClasspathElt);
            if (toplevelSingleton != null && !toplevelSingleton.ioExceptionOnOpen) {
                findClasspathOrder(toplevelSingleton, classpathElementMap, visitedClasspathElts, order);
            }
        }
        return order;
    }

    // -------------------------------------------------------------------------------------------------------------

    /**
     * Holds range limits for chunks of classpath files that need to be scanned in a given classpath element.
     */
    private static class ClassfileParserChunk {
        private final ClasspathElement classpathElement;
        private final int classfileStartIdx;
        private final int classfileEndIdx;

        public ClassfileParserChunk(final ClasspathElement classpathElementSingleton, final int classfileStartIdx,
                final int classfileEndIdx) {
            this.classpathElement = classpathElementSingleton;
            this.classfileStartIdx = classfileStartIdx;
            this.classfileEndIdx = classfileEndIdx;
        }
    }

    /**
     * Break the classfiles that need to be scanned in each classpath element into chunks of approximately
     * NUM_FILES_PER_CHUNK files. This helps with load leveling so that the worker threads all complete their work
     * at approximately the same time.
     */
    private static List getClassfileParserChunks(
            final List classpathOrder) {
        LinkedList> chunks = new LinkedList<>();
        for (final ClasspathElement classpathElement : classpathOrder) {
            final LinkedList chunksForClasspathElt = new LinkedList<>();
            final int numClassfileMatches = classpathElement.getNumClassfileMatches();
            if (numClassfileMatches > 0) {
                final int numChunks = (int) Math.ceil((float) numClassfileMatches / (float) NUM_FILES_PER_CHUNK);
                final float filesPerChunk = (float) numClassfileMatches / (float) numChunks;
                for (int i = 0; i < numChunks; i++) {
                    final int classfileStartIdx = (int) (i * filesPerChunk);
                    final int classfileEndIdx = i < numChunks - 1 ? (int) ((i + 1) * filesPerChunk)
                            : numClassfileMatches;
                    if (classfileEndIdx > classfileStartIdx) {
                        chunksForClasspathElt.add(
                                new ClassfileParserChunk(classpathElement, classfileStartIdx, classfileEndIdx));
                    }
                }
            }
            chunks.add(chunksForClasspathElt);
        }
        // There should be no overlap between the relative paths in any of the chunks, because classpath masking has
        // already been applied, so these chunks can be scanned in any order. But since a ZipFile instance can only
        // be used by one thread at a time, we want to space the chunks for a given ZipFile as far apart as possible
        // in the work queue to minimize the chance that two threads will try to open the same ZipFile at the same
        // time, as this will cause a second copy of the ZipFile to have to be opened by the ZipFile recycler. The
        // combination of chunking and interleaving therefore lets us achieve load leveling without work stealing or
        // other more complex mechanism.
        final List interleavedChunks = new ArrayList<>();
        while (!chunks.isEmpty()) {
            final LinkedList> nextChunks = new LinkedList<>();
            for (final LinkedList chunksForClasspathElt : chunks) {
                if (!chunksForClasspathElt.isEmpty()) {
                    final ClassfileParserChunk head = chunksForClasspathElt.remove();
                    interleavedChunks.add(head);
                    if (!chunksForClasspathElt.isEmpty()) {
                        nextChunks.add(chunksForClasspathElt);
                    }
                }
            }
            chunks = nextChunks;
        }
        return interleavedChunks;
    }

    // -------------------------------------------------------------------------------------------------------------

    /**
     * Determine the unique ordered classpath elements, and run a scan looking for file or classfile matches if
     * necessary.
     */
    @Override
    public ScanResult call() throws InterruptedException, ExecutionException {
        final LogNode classpathFinderLog = log == null ? null : log.log("Finding classpath entries");
        this.nestedJarHandler = new NestedJarHandler(scanSpec.stripSFXHeader, interruptionChecker,
                classpathFinderLog);
        try {
            final long scanStart = System.nanoTime();

            // Get classpath finder
            final LogNode getRawElementsLog = classpathFinderLog == null ? null
                    : classpathFinderLog.log("Getting raw classpath elements");
            final ClasspathFinder classpathFinder = new ClasspathFinder(scanSpec, nestedJarHandler,
                    getRawElementsLog);
            final ClassLoader[] classLoaderOrder = classpathFinder.getClassLoaderOrder();
            final List rawClasspathEltOrder = new ArrayList<>();

            // Add modules to start of classpath order (in JDK9+)
            if (!scanSpec.blacklistSystemJars && !scanSpec.blacklistSystemPackages) {
                final List systemModules = classpathFinder.getSystemModuleRefs();
                if (systemModules != null) {
                    for (final ModuleRef systemModule : systemModules) {
                        rawClasspathEltOrder
                                .add(new RelativePath(systemModule, nestedJarHandler, getRawElementsLog));
                    }
                }
            }
            final List nonSystemModules = classpathFinder.getNonSystemModuleRefs();
            if (nonSystemModules != null) {
                for (final ModuleRef nonSystemModule : nonSystemModules) {
                    rawClasspathEltOrder
                            .add(new RelativePath(nonSystemModule, nestedJarHandler, getRawElementsLog));
                }
            }

            // Add non-module classpath elements to classpath order
            rawClasspathEltOrder.addAll(classpathFinder.getRawClasspathElements());

            // In parallel, resolve raw classpath elements to canonical paths, creating a ClasspathElement singleton
            // for each unique canonical path. Also check jars against jar whitelist/blacklist.a
            final LogNode preScanLog = classpathFinderLog == null ? null
                    : classpathFinderLog.log("Searching for \"Class-Path:\" entries within manifest files");
            final RelativePathToElementMap classpathElementMap = new RelativePathToElementMap(
                    enableRecursiveScanning, scanSpec, nestedJarHandler, interruptionChecker);
            WorkQueue.runWorkQueue(rawClasspathEltOrder, executorService, numParallelTasks,
                    new WorkUnitProcessor() {
                        @Override
                        public void processWorkUnit(final RelativePath rawClasspathEltPath) throws Exception {
                            // Check if classpath element is already in the singleton map -- saves needlessly
                            // repeating work in isValidClasspathElement() and createSingleton() (need to check for
                            // duplicates again, even though we checked above, since additonal classpath entries can
                            // come from Class-Path entries in manifests)
                            if (classpathElementMap.get(rawClasspathEltPath) != null) {
                                if (preScanLog != null) {
                                    preScanLog.log("Ignoring duplicate classpath element: " + rawClasspathEltPath);
                                }
                            } else if (rawClasspathEltPath.isValidClasspathElement(scanSpec, preScanLog)) {
                                try {
                                    final boolean isModule = rawClasspathEltPath.getModuleRef() != null;
                                    final boolean isFile = !isModule && rawClasspathEltPath.isFile(preScanLog);
                                    final boolean isDir = !isModule && rawClasspathEltPath.isDirectory(preScanLog);
                                    if (isModule) {
                                        // Scan all modules that were not already filtered out as system modules
                                        classpathElementMap.createSingleton(rawClasspathEltPath, preScanLog);
                                    } else if (isFile && !scanSpec.scanJars) {
                                        if (preScanLog != null) {
                                            preScanLog.log("Ignoring because jar scanning has been disabled: "
                                                    + rawClasspathEltPath);
                                        }
                                    } else if (isFile && !scanSpec
                                            .jarIsWhitelisted(rawClasspathEltPath.getCanonicalPath(preScanLog))) {
                                        if (preScanLog != null) {
                                            preScanLog
                                                    .log("Ignoring jarfile that is blacklisted or not whitelisted: "
                                                            + rawClasspathEltPath);
                                        }
                                    } else if (isDir && !scanSpec.scanDirs) {
                                        if (preScanLog != null) {
                                            preScanLog.log("Ignoring because directory scanning has been disabled: "
                                                    + rawClasspathEltPath);
                                        }
                                    } else {
                                        // Classpath element is valid, add as a singleton. This will trigger calling
                                        // the ClasspathElementZip constructor in the case of jarfiles, which will
                                        // check the manifest file for Class-Path entries, and if any are found,
                                        // additional work units will be added to the work queue to scan those
                                        // jarfiles too. If Class-Path entries are found, they are added as child
                                        // elements of the current classpath element, so that they can be inserted
                                        // at the correct location in the classpath order.
                                        classpathElementMap.createSingleton(rawClasspathEltPath, preScanLog);
                                    }
                                } catch (final Exception e) {
                                    if (preScanLog != null) {
                                        // Could not create singleton, probably due to path canonicalization problem
                                        preScanLog.log("Classpath element " + rawClasspathEltPath
                                                + " is not valid (" + e + ") -- skipping");
                                    }
                                }
                            }
                        }
                    }, new WorkQueuePreStartHook() {
                        @Override
                        public void processWorkQueueRef(final WorkQueue workQueue) {
                            // Store a ref back to the work queue in the classpath element map, because some
                            // classpath elements will need to schedule additional classpath elements for scanning,
                            // e.g. "Class-Path:" refs in jar manifest files
                            classpathElementMap.setWorkQueue(workQueue);
                        }
                    }, interruptionChecker, preScanLog);

            // Determine total ordering of classpath elements, inserting jars referenced in manifest Class-Path
            // entries in-place into the ordering, if they haven't been listed earlier in the classpath already.
            final List classpathOrder = findClasspathOrder(rawClasspathEltOrder,
                    classpathElementMap);

            // Print final classpath element order, after inserting Class-Path entries from manifest files
            if (classpathFinderLog != null) {
                final LogNode logNode = classpathFinderLog.log("Final classpath element order:");
                for (int i = 0; i < classpathOrder.size(); i++) {
                    final ClasspathElement classpathElt = classpathOrder.get(i);
                    logNode.log(i + ": " + classpathElt);
                }
            }

            ScanResult scanResult;
            if (enableRecursiveScanning) {

                // Find classpath elements that are path prefixes of other classpath elements
                final List> classpathEltResolvedPathToElement = new ArrayList<>();
                for (int i = 0; i < classpathOrder.size(); i++) {
                    final ClasspathElement classpathElement = classpathOrder.get(i);
                    classpathEltResolvedPathToElement.add(new SimpleEntry<>(
                            classpathElement.classpathEltPath.getResolvedPath(), classpathElement));
                }
                Collections.sort(classpathEltResolvedPathToElement,
                        new Comparator>() {
                            // Sort classpath elements into lexicographic order
                            @Override
                            public int compare(final SimpleEntry o1,
                                    final SimpleEntry o2) {
                                // Path strings will all be unique
                                return o1.getKey().compareTo(o2.getKey());
                            }
                        });
                LogNode nestedClasspathRootNode = null;
                for (int i = 0; i < classpathEltResolvedPathToElement.size(); i++) {
                    // See if each classpath element is a prefix of any others (if so, they will immediately follow
                    // in lexicographic order)
                    final SimpleEntry ei = classpathEltResolvedPathToElement.get(i);
                    final String basePath = ei.getKey();
                    final int basePathLen = basePath.length();
                    for (int j = i + 1; j < classpathEltResolvedPathToElement.size(); j++) {
                        final SimpleEntry ej = classpathEltResolvedPathToElement.get(j);
                        final String comparePath = ej.getKey();
                        final int comparePathLen = comparePath.length();
                        boolean foundNestedClasspathRoot = false;
                        if (comparePath.startsWith(basePath) && comparePathLen > basePathLen) {
                            // Require a separator after the prefix
                            final char nextChar = comparePath.charAt(basePathLen);
                            if (nextChar == '/' || nextChar == '!') {
                                // basePath is a path prefix of comparePath. Ensure that the nested classpath does
                                // not contain another '!' zip-separator (since classpath scanning does not recurse
                                // to jars-within-jars unless they are explicitly listed on the classpath)
                                final String nestedClasspathRelativePath = comparePath.substring(basePathLen + 1);
                                if (nestedClasspathRelativePath.indexOf('!') < 0) {
                                    // Found a nested classpath root
                                    foundNestedClasspathRoot = true;
                                    // Store link from prefix element to nested elements
                                    final ClasspathElement baseElement = ei.getValue();
                                    if (baseElement.nestedClasspathRoots == null) {
                                        baseElement.nestedClasspathRoots = new HashSet<>();
                                    }
                                    baseElement.nestedClasspathRoots.add(nestedClasspathRelativePath + "/");
                                    if (classpathFinderLog != null) {
                                        if (nestedClasspathRootNode == null) {
                                            nestedClasspathRootNode = classpathFinderLog
                                                    .log("Found nested classpath elements");
                                        }
                                        nestedClasspathRootNode.log(
                                                basePath + " is a prefix of the nested element " + comparePath);
                                    }
                                }
                            }
                        }
                        if (!foundNestedClasspathRoot) {
                            // After the first non-match, there can be no more prefix matches in the sorted order
                            break;
                        }
                    }
                }

                // Scan for matching classfiles / files, looking only at filenames / file paths, and not contents
                final LogNode pathScanLog = classpathFinderLog == null ? null
                        : classpathFinderLog.log("Scanning filenames within classpath elements");
                WorkQueue.runWorkQueue(classpathOrder, executorService, numParallelTasks,
                        new WorkUnitProcessor() {
                            @Override
                            public void processWorkUnit(final ClasspathElement classpathElement) throws Exception {
                                // Scan the paths within a directory or jar
                                classpathElement.scanPaths(pathScanLog);
                            }
                        }, interruptionChecker, pathScanLog);

                // Implement classpath masking -- if the same relative classfile path occurs multiple times in the
                // classpath, ignore (remove) the second and subsequent occurrences. Note that classpath masking is
                // performed whether or not a jar is whitelisted, and whether or not jar or dir scanning is enabled,
                // in order to ensure that class references passed into MatchProcessors are the same as those that
                // would be loaded by standard classloading. (See bug #100.)
                final LogNode maskLog = log == null ? null : log.log("Masking classpath files");
                final HashSet classpathRelativePathsFound = new HashSet<>();
                for (int classpathIdx = 0; classpathIdx < classpathOrder.size(); classpathIdx++) {
                    final ClasspathElement classpathElement = classpathOrder.get(classpathIdx);
                    classpathElement.maskFiles(classpathIdx, classpathRelativePathsFound, maskLog);
                }

                // Merge the maps from file to timestamp across all classpath elements (there will be no overlap in
                // keyspace, since file masking was already performed)
                final Map fileToLastModified = new HashMap<>();
                for (final ClasspathElement classpathElement : classpathOrder) {
                    fileToLastModified.putAll(classpathElement.fileToLastModified);
                }

                // Scan classfile binary headers in parallel
                final ConcurrentLinkedQueue classInfoUnlinked = //
                        new ConcurrentLinkedQueue<>();
                final LogNode classfileScanLog = log == null ? null : log.log("Scanning classfile binary headers");
                try (final Recycler classfileBinaryParserRecycler = //
                        new Recycler() {
                            @Override
                            public ClassfileBinaryParser newInstance() {
                                return new ClassfileBinaryParser();
                            }
                        }) {
                    final List classfileParserChunks = getClassfileParserChunks(
                            classpathOrder);
                    WorkQueue.runWorkQueue(classfileParserChunks, executorService, numParallelTasks,
                            new WorkUnitProcessor() {
                                @Override
                                public void processWorkUnit(final ClassfileParserChunk chunk) throws Exception {
                                    ClassfileBinaryParser classfileBinaryParser = null;
                                    try {
                                        classfileBinaryParser = classfileBinaryParserRecycler.acquire();
                                        chunk.classpathElement.parseClassfiles(classfileBinaryParser,
                                                chunk.classfileStartIdx, chunk.classfileEndIdx, classInfoUnlinked,
                                                classfileScanLog);
                                    } finally {
                                        classfileBinaryParserRecycler.release(classfileBinaryParser);
                                        classfileBinaryParser = null;
                                    }
                                }
                            }, interruptionChecker, classfileScanLog);
                }
                if (classfileScanLog != null) {
                    classfileScanLog.addElapsedTime();
                }

                // Build the class graph: convert ClassInfoUnlinked to linked ClassInfo objects.
                final LogNode classGraphLog = log == null ? null : log.log("Building class graph");
                final Map classNameToClassInfo = new HashMap<>();
                for (final ClassInfoUnlinked c : classInfoUnlinked) {
                    // Need to do two passes, so that annotation default parameter vals are available when linking
                    // non-attribute classes. In first pass, link annotations with default parameter vals.
                    if (c.annotationParamDefaultValues != null) {
                        c.link(scanSpec, classNameToClassInfo, classGraphLog);
                    }
                }
                for (final ClassInfoUnlinked c : classInfoUnlinked) {
                    // In second pass, link everything else.
                    if (c.annotationParamDefaultValues == null) {
                        // Create ClassInfo object from ClassInfoUnlinked object, and link into class graph
                        c.link(scanSpec, classNameToClassInfo, classGraphLog);
                    }
                }
                final ClassGraphBuilder classGraphBuilder = new ClassGraphBuilder(scanSpec, classNameToClassInfo);
                if (classGraphLog != null) {
                    classGraphLog.addElapsedTime();
                }

                // Create ScanResult
                scanResult = new ScanResult(scanSpec, classpathOrder, classLoaderOrder, classGraphBuilder,
                        fileToLastModified, nestedJarHandler, interruptionChecker, log);

                // Run scanResultProcessor in the current thread
                if (scanResultProcessor != null) {
                    scanResultProcessor.processScanResult(scanResult);
                }

            } else {
                // This is the result of a call to FastClasspathScanner#getUniqueClasspathElementsAsync(), so just
                // create placeholder ScanResult to contain classpathElementFilesOrdered.
                scanResult = new ScanResult(scanSpec, classpathOrder, classLoaderOrder,
                        /* classGraphBuilder = */ null, /* fileToLastModified = */ null, nestedJarHandler,
                        interruptionChecker, log);
            }
            if (log != null) {
                log.log("Completed scan", System.nanoTime() - scanStart);
            }

            // No exceptions were thrown -- return scan result
            return scanResult;

        } catch (final Throwable e) {
            // Remove temporary files if an exception was thrown
            if (this.nestedJarHandler != null) {
                this.nestedJarHandler.close(log);
            }
            if (log != null) {
                log.log(e);
            }
            if (failureHandler == null) {
                throw e;
            } else {
                failureHandler.onFailure(e);
                // Return null from the Future if a FailureHandler was added and there was an exception
                return null;
            }

        } finally {
            if (log != null) {
                log.flush();
            }
        }
    }
}