All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.zaproxy.zap.spider.SpiderTask Maven / Gradle / Ivy

Go to download

The Zed Attack Proxy (ZAP) is an easy to use integrated penetration testing tool for finding vulnerabilities in web applications. It is designed to be used by people with a wide range of security experience and as such is ideal for developers and functional testers who are new to penetration testing. ZAP provides automated scanners as well as a set of tools that allow you to find security vulnerabilities manually.

There is a newer version: 2.15.0
Show newest version
/*
 * Zed Attack Proxy (ZAP) and its related class files.
 * 
 * ZAP is an HTTP/HTTPS proxy for assessing web application security.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); 
 * you may not use this file except in compliance with the License. 
 * You may obtain a copy of the License at 
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0 
 *   
 * Unless required by applicable law or agreed to in writing, software 
 * distributed under the License is distributed on an "AS IS" BASIS, 
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 * See the License for the specific language governing permissions and 
 * limitations under the License. 
 */
package org.zaproxy.zap.spider;

import java.io.IOException;
import java.net.ConnectException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.util.List;

import javax.net.ssl.SSLException;

import net.htmlparser.jericho.Source;

import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.log4j.Logger;
import org.parosproxy.paros.Constant;
import org.parosproxy.paros.control.Control;
import org.parosproxy.paros.db.DatabaseException;
import org.parosproxy.paros.extension.history.ExtensionHistory;
import org.parosproxy.paros.model.HistoryReference;
import org.parosproxy.paros.network.HttpHeader;
import org.parosproxy.paros.network.HttpMalformedHeaderException;
import org.parosproxy.paros.network.HttpMessage;
import org.parosproxy.paros.network.HttpRequestHeader;
import org.parosproxy.paros.network.HttpResponseHeader;
import org.zaproxy.zap.spider.filters.ParseFilter;
import org.zaproxy.zap.spider.filters.ParseFilter.FilterResult;
import org.zaproxy.zap.spider.parser.SpiderParser;

/**
 * The SpiderTask representing a spidering task performed during the Spidering process.
 */
public class SpiderTask implements Runnable {

	/** The parent spider. */
	private Spider parent;

	/**
	 * The history reference to the database record where the request message has been partially filled in.
	 * 

* Might be {@code null} if failed to create or persist the message, if the task was already executed or if a clean up was * performed. * * @see #cleanup() * @see #deleteHistoryReference() * @see #prepareHttpMessage() */ private HistoryReference reference; /** The depth of crawling where the uri was found. */ private int depth; private ExtensionHistory extHistory = null; /** The Constant log. */ private static final Logger log = Logger.getLogger(SpiderTask.class); /** * Instantiates a new spider task using the target URI. The purpose of this task is to crawl the given * uri, using the provided method, find any other uris in the fetched resource and create other tasks. * * * @param parent the spider controlling the crawling process * @param uri the uri that this task should process * @param depth the depth where this uri is located in the spidering process * @param method the HTTP method that should be used to fetch the resource * */ public SpiderTask(Spider parent, URI uri, int depth, String method) { this(parent, null, uri, depth, method, null); } /** * Instantiates a new spider task using the target URI. The purpose of this task is to crawl the given * uri, using the provided method, find any other uris in the fetched resource and create other tasks. * * @param parent the spider controlling the crawling process * @param sourceUri the URI where the given {@code uri} was found * @param uri the uri that this task should process * @param depth the depth where this uri is located in the spidering process * @param method the HTTP method that should be used to fetch the resource * @since 2.4.0 */ public SpiderTask(Spider parent, URI sourceUri, URI uri, int depth, String method) { this(parent, sourceUri, uri, depth, method, null); } /** * Instantiates a new spider task using the target URI. The purpose of this task is to crawl the given * uri, using the provided method, find any other uris in the fetched resource and create other tasks. * *

* The body of the request message is also provided in the {@literal requestBody} parameter and will be * used when fetching the resource from the specified uri. *

* * @param parent the spider controlling the crawling process * @param uri the uri that this task should process * @param depth the depth where this uri is located in the spidering process * @param method the HTTP method that should be used to fetch the resource * @param requestBody the body of the request */ public SpiderTask(Spider parent, URI uri, int depth, String method, String requestBody) { this(parent, null, uri, depth, method, requestBody); } /** * Instantiates a new spider task using the target URI. The purpose of this task is to crawl the given * uri, using the provided method, find any other uris in the fetched resource and create other tasks. *

* The body of the request message is also provided in the {@literal requestBody} parameter and will be * used when fetching the resource from the specified uri. * * @param parent the spider controlling the crawling process * @param sourceUri the URI where the given {@code uri} was found * @param uri the uri that this task should process * @param depth the depth where this uri is located in the spidering process * @param method the HTTP method that should be used to fetch the resource * @param requestBody the body of the request * @since 2.4.0 */ public SpiderTask(Spider parent, URI sourceUri, URI uri, int depth, String method, String requestBody) { super(); this.parent = parent; this.depth = depth; // Log the new task if (log.isDebugEnabled()) { log.debug("New task submitted for uri: " + uri); } // Create a new HttpMessage that will be used for the request and persist it in the database using // HistoryReference try { HttpRequestHeader requestHeader = new HttpRequestHeader(method, uri, HttpHeader.HTTP11, parent.getConnectionParam()); if (sourceUri != null && parent.getSpiderParam().isSendRefererHeader()) { requestHeader.setHeader(HttpRequestHeader.REFERER, sourceUri.toString()); } HttpMessage msg = new HttpMessage(requestHeader); if (requestBody != null) { msg.getRequestHeader().setContentLength(requestBody.length()); msg.setRequestBody(requestBody); } this.reference = new HistoryReference(parent.getModel().getSession(), HistoryReference.TYPE_SPIDER_TASK, msg); } catch (HttpMalformedHeaderException e) { log.error("Error while building HttpMessage for uri: " + uri, e); } catch (DatabaseException e) { log.error("Error while persisting HttpMessage for uri: " + uri, e); } } @Override public void run() { if (reference == null) { log.warn("Null URI. Skipping crawling task: " + this); parent.postTaskExecution(); return; } // Log the task start if (log.isDebugEnabled()) { log.debug("Spider Task Started. Processing uri at depth " + depth + " using already constructed message: " + reference.getURI()); } // Check if the should stop if (parent.isStopped()) { log.debug("Spider process is stopped. Skipping crawling task..."); deleteHistoryReference(); parent.postTaskExecution(); return; } // Check if the crawling process is paused and do any "before execution" processing parent.preTaskExecution(); // Fetch the resource HttpMessage msg; try { msg = prepareHttpMessage(); } catch (Exception e) { log.error("Failed to prepare HTTP message: ", e); parent.postTaskExecution(); return; } try { fetchResource(msg); } catch (Exception e) { setErrorResponse(msg, e); parent.notifyListenersSpiderTaskResult(new SpiderTaskResult(msg, getSkippedMessage("ioerror"))); // The exception was already logged, in fetchResource, with the URL (which we dont have here) parent.postTaskExecution(); return; } // Check if the should stop if (parent.isStopped()) { parent.notifyListenersSpiderTaskResult(new SpiderTaskResult(msg, getSkippedMessage("stopped"))); log.debug("Spider process is stopped. Skipping crawling task..."); parent.postTaskExecution(); return; } // Check if the crawling process is paused parent.checkPauseAndWait(); // Check the parse filters to see if the resource should be skipped from parsing for (ParseFilter filter : parent.getController().getParseFilters()) { FilterResult filterResult = filter.filtered(msg); if (filterResult.isFiltered()) { if (log.isDebugEnabled()) { log.debug( "Resource [" + msg.getRequestHeader().getURI() + "] fetched, but will not be parsed due to a ParseFilter rule: " + filterResult.getReason()); } parent.notifyListenersSpiderTaskResult(new SpiderTaskResult(msg, filterResult.getReason())); parent.postTaskExecution(); return; } } // Check if the should stop if (parent.isStopped()) { parent.notifyListenersSpiderTaskResult(new SpiderTaskResult(msg, getSkippedMessage("stopped"))); log.debug("Spider process is stopped. Skipping crawling task..."); parent.postTaskExecution(); return; } // Check if the crawling process is paused parent.checkPauseAndWait(); if (depth < parent.getSpiderParam().getMaxDepth()) { parent.notifyListenersSpiderTaskResult(new SpiderTaskResult(msg)); processResource(msg); } else { parent.notifyListenersSpiderTaskResult(new SpiderTaskResult(msg, getSkippedMessage("maxdepth"))); } // Update the progress and check if the spidering process should stop parent.postTaskExecution(); log.debug("Spider Task finished."); } private String getSkippedMessage(String key) { return parent.getExtensionSpider().getMessages().getString("spider.task.message.skipped." + key); } /** * Prepares the HTTP message to be sent to the target server. *

* The HTTP message is read from the database and set up with common headers (e.g. User-Agent) and properties (e.g. user). * * @return the HTTP message * @throws HttpMalformedHeaderException if an error occurred while parsing the HTTP message read from the database * @throws DatabaseException if an error occurred while reading the HTTP message from the database */ private HttpMessage prepareHttpMessage() throws HttpMalformedHeaderException, DatabaseException { // Build fetch the request message from the database HttpMessage msg; try { msg = reference.getHttpMessage(); // HistoryReference is about to be deleted, so no point keeping referencing it. msg.setHistoryRef(null); } finally { deleteHistoryReference(); } msg.getRequestHeader().setHeader(HttpHeader.IF_MODIFIED_SINCE, null); msg.getRequestHeader().setHeader(HttpHeader.IF_NONE_MATCH, null); // Check if there is a custom user agent if (parent.getSpiderParam().getUserAgent() != null) { msg.getRequestHeader().setHeader(HttpHeader.USER_AGENT, parent.getSpiderParam().getUserAgent()); } // Check if there's a need to send the message from the point of view of a User if (parent.getScanUser() != null) { msg.setRequestingUser(parent.getScanUser()); } return msg; } /** * Deletes the history reference, should be called when no longer needed. *

* The call to this method has no effect if the history reference no longer exists (i.e. {@code null}). * * @see #reference */ private void deleteHistoryReference() { if (reference == null) { return; } if (getExtensionHistory() != null) { getExtensionHistory().delete(reference); reference = null; } } private void setErrorResponse(HttpMessage msg, Exception cause) { StringBuilder strBuilder = new StringBuilder(250); if (cause instanceof SSLException) { strBuilder.append(Constant.messages.getString("network.ssl.error.connect")); strBuilder.append(msg.getRequestHeader().getURI().toString()).append('\n'); strBuilder.append(Constant.messages.getString("network.ssl.error.exception")) .append(cause.getMessage()) .append('\n'); strBuilder.append(Constant.messages.getString("network.ssl.error.exception.rootcause")) .append(ExceptionUtils.getRootCauseMessage(cause)) .append('\n'); strBuilder.append( Constant.messages .getString("network.ssl.error.help", Constant.messages.getString("network.ssl.error.help.url"))); strBuilder.append("\n\nStack Trace:\n"); for (String stackTraceFrame : ExceptionUtils.getRootCauseStackTrace(cause)) { strBuilder.append(stackTraceFrame).append('\n'); } } else { strBuilder.append(cause.getClass().getName()) .append(": ") .append(cause.getLocalizedMessage()) .append("\n\nStack Trace:\n"); for (String stackTraceFrame : ExceptionUtils.getRootCauseStackTrace(cause)) { strBuilder.append(stackTraceFrame).append('\n'); } } String message = strBuilder.toString(); HttpResponseHeader responseHeader; try { responseHeader = new HttpResponseHeader("HTTP/1.1 400 ZAP IO Error"); responseHeader.setHeader(HttpHeader.CONTENT_TYPE, "text/plain; charset=UTF-8"); responseHeader .setHeader(HttpHeader.CONTENT_LENGTH, Integer.toString(message.getBytes(StandardCharsets.UTF_8).length)); msg.setResponseHeader(responseHeader); msg.setResponseBody(message); } catch (HttpMalformedHeaderException e) { log.error("Failed to create error response:", e); } } /** * Process a resource, searching for links (uris) to other resources. * * @param message the HTTP Message */ private void processResource(HttpMessage message) { List parsers = parent.getController().getParsers(); // Prepare the Jericho source Source source = new Source(message.getResponseBody().toString()); // Get the full path of the file String path = null; try { path = message.getRequestHeader().getURI().getPath(); } catch (URIException e) { } finally { // Handle null paths. if (path == null) path = ""; } // Parse the resource boolean alreadyConsumed = false; for (SpiderParser parser : parsers) { if (parser.canParseResource(message, path, alreadyConsumed)) { if (log.isDebugEnabled()) log.debug("Parser "+ parser +" can parse resource '"+ path + "'"); if (parser.parseResource(message, source, depth)) alreadyConsumed = true; } else { if (log.isDebugEnabled()) log.debug("Parser "+ parser +" cannot parse resource '"+ path + "'"); } } } private ExtensionHistory getExtensionHistory() { if (this.extHistory == null) { this.extHistory = Control.getSingleton().getExtensionLoader().getExtension(ExtensionHistory.class); } return this.extHistory; } /** * Fetches a resource. * * @param msg the HTTP message that will be sent to the server * @throws IOException Signals that an I/O exception has occurred. */ private void fetchResource(HttpMessage msg) throws IOException { if (parent.getHttpSender() == null) { return; } try { parent.getHttpSender().sendAndReceive(msg); } catch (ConnectException e) { log.debug("Failed to connect to: " + msg.getRequestHeader().getURI(), e); throw e; } catch (SocketTimeoutException e) { log.debug("Socket timeout: " + msg.getRequestHeader().getURI(), e); throw e; } catch (SocketException e) { log.debug("Socket exception: " + msg.getRequestHeader().getURI(), e); throw e; } catch (UnknownHostException e) { log.debug("Unknown host: " + msg.getRequestHeader().getURI(), e); throw e; } catch (Exception e) { log.error("An error occurred while fetching the resource [" + msg.getRequestHeader().getURI() + "]: " + e.getMessage(), e); throw e; } } /** * Cleans up the resources used by the task. *

* Should be called if the task was not executed. * * @since 2.5.0 */ void cleanup() { deleteHistoryReference(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy