All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.zaproxy.zap.spider.Spider Maven / Gradle / Ivy

Go to download

The Zed Attack Proxy (ZAP) is an easy to use integrated penetration testing tool for finding vulnerabilities in web applications. It is designed to be used by people with a wide range of security experience and as such is ideal for developers and functional testers who are new to penetration testing. ZAP provides automated scanners as well as a set of tools that allow you to find security vulnerabilities manually.

There is a newer version: 2.15.0
Show newest version
/*
 * Zed Attack Proxy (ZAP) and its related class files.
 *
 * ZAP is an HTTP/HTTPS proxy for assessing web application security.
 *
 * Copyright 2012 The ZAP Development Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.zaproxy.zap.spider;

import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.log4j.Logger;
import org.parosproxy.paros.model.Model;
import org.parosproxy.paros.network.ConnectionParam;
import org.parosproxy.paros.network.HttpMessage;
import org.parosproxy.paros.network.HttpRequestHeader;
import org.parosproxy.paros.network.HttpSender;
import org.zaproxy.zap.extension.spider.ExtensionSpider;
import org.zaproxy.zap.model.Context;
import org.zaproxy.zap.spider.filters.DefaultFetchFilter;
import org.zaproxy.zap.spider.filters.DefaultParseFilter;
import org.zaproxy.zap.spider.filters.FetchFilter;
import org.zaproxy.zap.spider.filters.FetchFilter.FetchStatus;
import org.zaproxy.zap.spider.filters.ParseFilter;
import org.zaproxy.zap.spider.parser.SpiderParser;
import org.zaproxy.zap.users.User;

/** The Class Spider. */
public class Spider {

    /** The spider parameters. */
    private SpiderParam spiderParam;

    /** The connection parameters. */
    private ConnectionParam connectionParam;

    /** The model. */
    private Model model;

    /** The listeners for Spider related events. */
    private List listeners;

    /** If the spider is currently paused. */
    private volatile boolean paused;

    /** The the spider is currently stopped. */
    private volatile boolean stopped;

    /** The pause lock, used for locking access to the "paused" variable. */
    private ReentrantLock pauseLock = new ReentrantLock();

    /** The controller that manages the spidering process. */
    private SpiderController controller;

    /**
     * The condition that is used for the threads in the pool to wait on, when the Spider crawling
     * is paused. When the Spider is resumed, all the waiting threads are awakened.
     */
    private Condition pausedCondition = pauseLock.newCondition();

    /** The thread pool for spider workers. */
    private ExecutorService threadPool;

    /** The default fetch filter. */
    private DefaultFetchFilter defaultFetchFilter;

    /** The seed list. */
    private LinkedHashSet seedList;

    /** The extension. */
    private ExtensionSpider extension;

    /** The Constant log. */
    private static final Logger log = Logger.getLogger(Spider.class);

    /** The HTTP sender used to effectively send the data. */
    private HttpSender httpSender;

    /** The count of the tasks finished. */
    private int tasksDoneCount;

    /** The total count of all the submitted tasks. */
    private int tasksTotalCount;

    /** The scan context. If null, the scan is not performed in a context. */
    private Context scanContext;

    /** The scan user. */
    private User scanUser;

    /** The time the scan was started */
    private long timeStarted;

    /**
     * The initialized marks if the spidering process is completely started. It solves the problem
     * when the first task is processed and the process is finished before the other seeds are
     * added.
     */
    private boolean initialized;

    /**
     * we do not want to recurse into an SVN folder, or a subfolder of an SVN folder, if one was
     * created from a previous Spider run
     */
    private static final Pattern svnUrlPattern = Pattern.compile("\\.svn/"); // case sensitive

    /**
     * we do not want to recurse into a Git folder, or a subfolder of a Git folder, if one was
     * created from a previous Spider run
     */
    private static final Pattern gitUrlPattern = Pattern.compile("\\.git/"); // case sensitive

    private final String id;

    /**
     * Instantiates a new spider.
     *
     * @param extension the extension
     * @param spiderParam the spider param
     * @param connectionParam the connection param
     * @param model the model
     * @param scanContext if a scan context is set, only URIs within the context are fetched and
     *     processed
     * @deprecated (2.6.0) Use {@link #Spider(String, ExtensionSpider, SpiderParam, ConnectionParam,
     *     Model, Context)} instead.
     */
    @Deprecated
    public Spider(
            ExtensionSpider extension,
            SpiderParam spiderParam,
            ConnectionParam connectionParam,
            Model model,
            Context scanContext) {
        this("?", extension, spiderParam, connectionParam, model, scanContext);
    }

    /**
     * Constructs a {@code Spider} with the given data.
     *
     * @param id the ID of the spider, usually a unique integer
     * @param extension the extension
     * @param spiderParam the spider param
     * @param connectionParam the connection param
     * @param model the model
     * @param scanContext if a scan context is set, only URIs within the context are fetched and
     *     processed
     * @since 2.6.0
     */
    public Spider(
            String id,
            ExtensionSpider extension,
            SpiderParam spiderParam,
            ConnectionParam connectionParam,
            Model model,
            Context scanContext) {
        super();
        log.info("Spider initializing...");
        this.id = id;
        this.spiderParam = spiderParam;
        this.connectionParam = connectionParam;
        this.model = model;
        this.extension = extension;
        this.controller = new SpiderController(this, extension.getCustomParsers());
        this.listeners = new LinkedList<>();
        this.seedList = new LinkedHashSet<>();
        this.scanContext = scanContext;

        init();
    }

    /** Initialize the spider. */
    private void init() {
        this.paused = false;
        this.stopped = true;
        this.tasksDoneCount = 0;
        this.tasksTotalCount = 0;
        this.initialized = false;

        // Add a default fetch filter and any custom ones
        defaultFetchFilter = new DefaultFetchFilter();
        this.addFetchFilter(defaultFetchFilter);

        for (FetchFilter filter : extension.getCustomFetchFilters()) {
            this.addFetchFilter(filter);
        }

        // Add a default parse filter and any custom ones
        this.addParseFilter(new DefaultParseFilter(spiderParam, extension.getMessages()));
        for (ParseFilter filter : extension.getCustomParseFilters()) this.addParseFilter(filter);

        // Add the scan context, if any
        defaultFetchFilter.setScanContext(this.scanContext);
        defaultFetchFilter.setDomainsAlwaysInScope(spiderParam.getDomainsAlwaysInScopeEnabled());
    }

    /* SPIDER Related */
    /**
     * Adds a new seed for the Spider.
     *
     * @param msg the message used for seed. The request URI is used from the Request Header
     */
    public void addSeed(HttpMessage msg) {
        URI uri = msg.getRequestHeader().getURI();
        addSeed(uri);
    }

    /**
     * Adds a new seed for the Spider.
     *
     * @param uri the uri
     */
    public void addSeed(URI uri) {
        // Update the scope of the spidering process
        String host = null;

        try {
            host = uri.getHost();
            defaultFetchFilter.addScopeRegex(host);
        } catch (URIException e) {
            log.error("There was an error while adding seed value: " + uri, e);
            return;
        }
        // Add the seed to the list -- it will be added to the task list only when the spider is
        // started
        this.seedList.add(uri);
        // Add the appropriate 'robots.txt' as a seed
        if (getSpiderParam().isParseRobotsTxt()) {
            addRootFileSeed(uri, "robots.txt");
        }
        // Add the appropriate 'sitemap.xml' as a seed
        if (getSpiderParam().isParseSitemapXml()) {
            addRootFileSeed(uri, "sitemap.xml");
        }
        // And add '.svn/entries' as a seed, for SVN based spidering
        if (getSpiderParam().isParseSVNEntries()) {
            addFileSeed(uri, ".svn/entries", svnUrlPattern);
            addFileSeed(uri, ".svn/wc.db", svnUrlPattern);
        }

        // And add '.git/index' as a seed, for Git based spidering
        if (getSpiderParam().isParseGit()) {
            addFileSeed(uri, ".git/index", gitUrlPattern);
        }
    }

    /**
     * Adds a file seed, with the given file name, at the root of the base URI.
     *
     * 

For example, with base URI as {@code http://example.com/some/path/file.html} and file name * as {@code sitemap.xml} it's added the seed {@code http://example.com/sitemap.xml}. * * @param baseUri the base URI. * @param fileName the file name. */ private void addRootFileSeed(URI baseUri, String fileName) { String seed = buildUri( baseUri.getScheme(), baseUri.getRawHost(), baseUri.getPort(), "/" + fileName); try { this.seedList.add(new URI(seed, true)); } catch (Exception e) { log.warn("Error while creating [" + fileName + "] seed: " + seed, e); } } /** * Creates a URI (string) with the given scheme, host, port and path. The port is only added if * not the default for the given scheme. * * @param scheme the scheme, {@code http} or {@code https}. * @param host the name of the host. * @param port the port. * @param path the path, should start with {@code /}. * @return the URI with the provided components. */ private static String buildUri(String scheme, char[] host, int port, String path) { StringBuilder strBuilder = new StringBuilder(150); strBuilder.append(scheme).append("://").append(host); if (!isDefaultPort(scheme, port)) { strBuilder.append(':').append(port); } strBuilder.append(path); return strBuilder.toString(); } /** * Adds a file seed using the given base URI, file name and condition. * *

The file is added as part of the path, without existing file name. For example, with base * URI as {@code http://example.com/some/path/file.html} and file name as {@code .git/index} * it's added the seed {@code http://example.com/some/path/.git/index}. * *

If the given condition matches the base URI's path without the file name, the file seed is * not added (this prevents adding the seed once again). * * @param baseUri the base URI to construct the file seed. * @param fileName the name of the file seed. * @param condition the condition to add the file seed. */ private void addFileSeed(URI baseUri, String fileName, Pattern condition) { String fullpath = baseUri.getEscapedPath(); if (fullpath == null) { fullpath = ""; } String name = baseUri.getEscapedName(); if (name == null) { name = ""; } String pathminusfilename = fullpath.substring(0, fullpath.lastIndexOf(name)); if (pathminusfilename.isEmpty()) { pathminusfilename = "/"; } if (condition.matcher(pathminusfilename).find()) { return; } String uri = buildUri( baseUri.getScheme(), baseUri.getRawHost(), baseUri.getPort(), pathminusfilename + fileName); try { this.seedList.add(new URI(uri, true)); } catch (Exception e) { log.warn( "Error while creating a seed URI for file [" + fileName + "] from [" + baseUri + "] using [" + uri + "]:", e); } } /** * Tells whether or not the given port is the default for the given scheme. * *

Only intended to be used with HTTP/S schemes. * * @param scheme the scheme. * @param port the port. * @return {@code true} if the given port is the default for the given scheme, {@code false} * otherwise. */ private static boolean isDefaultPort(String scheme, int port) { if (port == -1) { return true; } if ("http".equalsIgnoreCase(scheme)) { return port == 80; } if ("https".equalsIgnoreCase(scheme)) { return port == 443; } return false; } /** * Sets the exclude list which contains a List of strings, defining the uris that should be * excluded. * * @param excludeList the new exclude list */ public void setExcludeList(List excludeList) { log.debug("New Exclude list: " + excludeList); defaultFetchFilter.setExcludeRegexes(excludeList); } /** * Adds a new fetch filter to the spider. * * @param filter the filter */ public void addFetchFilter(FetchFilter filter) { controller.addFetchFilter(filter); } /** * Adds a new parse filter to the spider. * * @param filter the filter */ public void addParseFilter(ParseFilter filter) { controller.addParseFilter(filter); } /** * Gets the http sender. Can be called from the SpiderTask. * * @return the http sender */ protected HttpSender getHttpSender() { return httpSender; } /** * Gets the spider parameters. Can be called from the SpiderTask. * * @return the spider parameters */ protected SpiderParam getSpiderParam() { return spiderParam; } /** * Gets the controller. * * @return the controller */ protected SpiderController getController() { return controller; } /** * Gets the model. * * @return the model */ protected Model getModel() { return this.model; } /** * Submit a new task to the spidering task pool. * * @param task the task */ protected synchronized void submitTask(SpiderTask task) { if (isStopped()) { log.debug("Submitting task skipped (" + task + ") as the Spider process is stopped."); return; } if (isTerminated()) { log.debug( "Submitting task skipped (" + task + ") as the Spider process is terminated."); return; } this.tasksTotalCount++; try { this.threadPool.execute(task); } catch (RejectedExecutionException e) { if (log.isDebugEnabled()) { log.debug( "Submitted task was rejected (" + task + "), spider state: [stopped=" + isStopped() + ", terminated=" + isTerminated() + "]."); } } } /** * Gets the extension. * * @return the extension */ protected ExtensionSpider getExtensionSpider() { return this.extension; } /* SPIDER PROCESS maintenance - pause, resume, shutdown, etc. */ /** Starts the Spider crawling. */ public void start() { log.info("Starting spider..."); this.timeStarted = System.currentTimeMillis(); fetchFilterSeeds(); // Check if seeds are available, otherwise the Spider will start, but will not have any // seeds and will not stop. if (seedList == null || seedList.isEmpty()) { log.warn("No seeds available for the Spider. Cancelling scan..."); notifyListenersSpiderComplete(false); notifyListenersSpiderProgress(100, 0, 0); return; } if (scanUser != null) log.info( "Scan will be performed from the point of view of User: " + scanUser.getName()); this.controller.init(); this.stopped = false; this.paused = false; this.initialized = false; // Initialize the thread pool this.threadPool = Executors.newFixedThreadPool( spiderParam.getThreadCount(), new SpiderThreadFactory("ZAP-SpiderThreadPool-" + id + "-thread-")); // Initialize the HTTP sender httpSender = new HttpSender( connectionParam, connectionParam.isHttpStateEnabled() ? true : !spiderParam.isAcceptCookies(), HttpSender.SPIDER_INITIATOR); // Do not follow redirections because the request is not updated, the redirections will be // handled manually. httpSender.setFollowRedirect(false); // Add the seeds for (URI uri : seedList) { if (log.isDebugEnabled()) { log.debug("Adding seed for spider: " + uri); } controller.addSeed(uri, HttpRequestHeader.GET); } // Mark the process as completely initialized initialized = true; } /** * Filters the seed list using the current fetch filters, preventing any non-valid seed from * being accessed. * * @see #seedList * @see FetchFilter * @see SpiderController#getFetchFilters() * @since 2.5.0 */ private void fetchFilterSeeds() { if (seedList == null || seedList.isEmpty()) { return; } for (Iterator it = seedList.iterator(); it.hasNext(); ) { URI seed = it.next(); for (FetchFilter filter : controller.getFetchFilters()) { FetchStatus filterReason = filter.checkFilter(seed); if (filterReason != FetchStatus.VALID) { if (log.isDebugEnabled()) { log.debug("Seed: " + seed + " was filtered with reason: " + filterReason); } it.remove(); break; } } } } /** Stops the Spider crawling. Must not be called from any of the threads in the thread pool. */ public void stop() { if (stopped) { return; } this.stopped = true; log.info("Stopping spidering process by request."); if (this.paused) { // Have to resume first or we get a deadlock this.resume(); } // Issue the shutdown command this.threadPool.shutdown(); try { if (!this.threadPool.awaitTermination(2, TimeUnit.SECONDS)) { log.warn( "Failed to await for all spider threads to stop in the given time (2s)..."); for (Runnable task : this.threadPool.shutdownNow()) { ((SpiderTask) task).cleanup(); } } } catch (InterruptedException ignore) { log.warn("Interrupted while awaiting for all spider threads to stop..."); } if (httpSender != null) { this.getHttpSender().shutdown(); httpSender = null; } // Notify the controller to clean up memory controller.reset(); this.threadPool = null; // Notify the listeners -- in the meanwhile notifyListenersSpiderComplete(false); } /** The Spidering process is complete. */ private void complete() { if (stopped) { return; } log.info("Spidering process is complete. Shutting down..."); this.stopped = true; if (httpSender != null) { this.getHttpSender().shutdown(); httpSender = null; } // Notify the controller to clean up memory controller.reset(); // Issue the shutdown command on a separate thread, as the current thread is most likely one // from the pool new Thread( new Runnable() { @Override public void run() { if (threadPool != null) { threadPool.shutdown(); } // Notify the listeners -- in the meanwhile notifyListenersSpiderComplete(true); controller.reset(); threadPool = null; } }, "ZAP-SpiderShutdownThread-" + id) .start(); } /** Pauses the Spider crawling. */ public void pause() { pauseLock.lock(); try { paused = true; } finally { pauseLock.unlock(); } } /** Resumes the Spider crawling. */ public void resume() { pauseLock.lock(); try { paused = false; // Wake up all threads that are currently paused pausedCondition.signalAll(); } finally { pauseLock.unlock(); } } /** * Sets the spider so it will scan from the point of view of a user. * * @param user the user to be scanned as */ public void setScanAsUser(User user) { this.scanUser = user; } /** * Gets the user that will be used in the scanning. * * @return the scan user */ protected User getScanUser() { return this.scanUser; } /** * This method is run by each thread in the Thread Pool before the task execution. Particularly, * it checks if the Spidering process is paused and, if it is, it waits on the corresponding * condition for the process to be resumed. Called from the SpiderTask. */ protected void preTaskExecution() { checkPauseAndWait(); } /** * This method is run by Threads in the ThreadPool and checks if the scan is paused and, if it * is, waits until it's unpaused. */ protected void checkPauseAndWait() { pauseLock.lock(); try { while (paused && !stopped) { pausedCondition.await(); } } catch (InterruptedException e) { } finally { pauseLock.unlock(); } } /** * This method is run by each thread in the Thread Pool after the task execution. Particularly, * it notifies the listeners of the progress and checks if the scan is complete. Called from the * SpiderTask. */ protected synchronized void postTaskExecution() { if (stopped) { // Stopped, so don't count the task(s) as done. // (worker threads call this method even if the task was not really executed.) return; } tasksDoneCount++; int percentageComplete = tasksDoneCount * 100 / tasksTotalCount; // Compute the progress and notify the listeners this.notifyListenersSpiderProgress( percentageComplete, tasksDoneCount, tasksTotalCount - tasksDoneCount); // Check for ending conditions if (tasksDoneCount == tasksTotalCount && initialized) { this.complete(); } } /** * Checks if is paused. * * @return true, if is paused */ public boolean isPaused() { return this.paused; } /** * Checks if is stopped, i.e. a shutdown was issued or it is not running. * * @return true, if is stopped */ public boolean isStopped() { if (!stopped && this.spiderParam.getMaxDuration() > 0) { // Check to see if the scan has exceeded the specified maxDuration if (TimeUnit.MILLISECONDS.toMinutes(System.currentTimeMillis() - this.timeStarted) > this.spiderParam.getMaxDuration()) { log.info( "Spidering process has exceeded maxDuration of " + this.spiderParam.getMaxDuration() + " minute(s)"); this.complete(); } } return stopped; } /** * Checks if is terminated. * * @return true, if is terminated */ public boolean isTerminated() { return threadPool.isTerminated(); } /* LISTENERS SECTION */ /** * Adds a new spider listener. * * @param listener the listener */ public void addSpiderListener(SpiderListener listener) { this.listeners.add(listener); } /** * Removes a spider listener. * * @param listener the listener */ public void removeSpiderListener(SpiderListener listener) { this.listeners.remove(listener); } /** * Notifies all the listeners regarding the spider progress. * * @param percentageComplete the percentage complete * @param numberCrawled the number of pages crawled * @param numberToCrawl the number of pages left to crawl */ protected synchronized void notifyListenersSpiderProgress( int percentageComplete, int numberCrawled, int numberToCrawl) { for (SpiderListener l : listeners) { l.spiderProgress(percentageComplete, numberCrawled, numberToCrawl); } } /** * Notifies the listeners regarding a found uri. * * @param uri the uri * @param method the method used for fetching the resource * @param status the {@link FetchStatus} stating if this uri will be processed, and, if not, * stating the reason of the filtering */ protected synchronized void notifyListenersFoundURI( String uri, String method, FetchStatus status) { for (SpiderListener l : listeners) { l.foundURI(uri, method, status); } } /** * Notifies the listeners of a {@link SpiderTask}'s result. * * @param result the result of a spider task. */ protected synchronized void notifyListenersSpiderTaskResult(SpiderTaskResult result) { for (SpiderListener l : listeners) { l.notifySpiderTaskResult(result); } } /** * Notifies the listeners that the spider is complete. * * @param successful {@code true} if the spider completed successfully (e.g. was not stopped), * {@code false} otherwise */ protected synchronized void notifyListenersSpiderComplete(boolean successful) { for (SpiderListener l : listeners) { l.spiderComplete(successful); } } public void addCustomParser(SpiderParser sp) { this.controller.addSpiderParser(sp); } private static class SpiderThreadFactory implements ThreadFactory { private final AtomicInteger threadNumber; private final String namePrefix; private final ThreadGroup group; public SpiderThreadFactory(String namePrefix) { threadNumber = new AtomicInteger(1); this.namePrefix = namePrefix; SecurityManager s = System.getSecurityManager(); group = (s != null) ? s.getThreadGroup() : Thread.currentThread().getThreadGroup(); } @Override public Thread newThread(Runnable r) { Thread t = new Thread(group, r, namePrefix + threadNumber.getAndIncrement(), 0); if (t.isDaemon()) { t.setDaemon(false); } if (t.getPriority() != Thread.NORM_PRIORITY) { t.setPriority(Thread.NORM_PRIORITY); } return t; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy