org.zaproxy.zap.spider.parser.SpiderParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of zap Show documentation
The Zed Attack Proxy (ZAP) is an easy to use integrated penetration testing tool for finding vulnerabilities in web applications. It is designed to be used by people with a wide range of security experience and as such is ideal for developers and functional testers who are new to penetration testing. ZAP provides automated scanners as well as a set of tools that allow you to find security vulnerabilities manually.
There is a newer version: 2.7.0
Show newest version
/*
 * Zed Attack Proxy (ZAP) and its related class files.
 *
 * ZAP is an HTTP/HTTPS proxy for assessing web application security.
 *
 * Copyright 2012 The ZAP Development Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.zaproxy.zap.spider.parser;

import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import net.htmlparser.jericho.Source;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.parosproxy.paros.network.HttpMessage;
import org.parosproxy.paros.network.HttpRequestHeader;

/**
 * The Abstract Class SpiderParser is the base for parsers used by the spider. The main purpose of
 * these Parsers is to find links (uris) to resources in the provided content. Uses the Jericho
 * Library for parsing.
 *
 * @deprecated (2.12.0) See the spider add-on in zap-extensions instead.
 */
@Deprecated
public abstract class SpiderParser {

    /** The listeners to spider parsing events. */
    private List listeners = new LinkedList<>();

    /**
     * The Constant log.
     *
     * @deprecated (2.10.0) Use {@link #getLogger()} instead.
     */
    @Deprecated
    protected static final org.apache.log4j.Logger log =
            org.apache.log4j.Logger.getLogger(SpiderParser.class);

    private final Logger logger = LogManager.getLogger(getClass());

    private org.zaproxy.zap.spider.SpiderParam spiderParam;

    public SpiderParser() {}

    public SpiderParser(org.zaproxy.zap.spider.SpiderParam spiderParam) {
        this.spiderParam =
                Objects.requireNonNull(spiderParam, "Parameter spiderParam must not be null.");
    }

    public void setSpiderParam(org.zaproxy.zap.spider.SpiderParam spiderParam) {
        this.spiderParam = spiderParam;
    }

    protected org.zaproxy.zap.spider.SpiderParam getSpiderParam() {
        return spiderParam;
    }

    /**
     * Gets the logger.
     *
     * @return the logger, never {@code null}.
     * @since 2.10.0
     */
    protected Logger getLogger() {
        return logger;
    }

    /**
     * Adds a listener to spider parsing events.
     *
     * @param listener the listener
     */
    public void addSpiderParserListener(SpiderParserListener listener) {
        listeners.add(listener);
    }

    /**
     * Removes a listener to spider parsing events.
     *
     * @param listener the listener
     */
    public void removeSpiderParserListener(SpiderParserListener listener) {
        this.listeners.remove(listener);
    }

    /**
     * Notify the listeners that a resource was found.
     *
     * @param resourceFound the resource found.
     * @since 2.11.0
     */
    protected void notifyListenersResourceFound(SpiderResourceFound resourceFound) {
        for (SpiderParserListener l : listeners) {
            l.resourceFound(resourceFound);
        }
    }

    /**
     * Notify the listeners that a resource was found.
     *
     * @param message the http message containing the response.
     * @param depth the depth of this resource in the crawling tree
     * @param uri the uri
     * @deprecated (2.11.0) Use {@link #notifyListenersResourceFound(SpiderResourceFound)} instead.
     */
    @Deprecated
    protected void notifyListenersResourceFound(HttpMessage message, int depth, String uri) {
        notifyListenersResourceFound(
                SpiderResourceFound.builder()
                        .setMessage(message)
                        .setDepth(depth)
                        .setUri(uri)
                        .build());
    }

    /**
     * Notify the listeners that a POST resource was found. You can read more about this call in the
     * documentation for resourcePostURIFound in {@link SpiderParserListener}.
     *
     * @param message the http message containing the response.
     * @param depth the depth of this resource in the crawling tree
     * @param uri the uri
     * @param requestBody the request body
     * @deprecated (2.11.0) Use {@link #notifyListenersResourceFound(SpiderResourceFound)} instead.
     */
    @Deprecated
    protected void notifyListenersPostResourceFound(
            HttpMessage message, int depth, String uri, String requestBody) {
        notifyListenersResourceFound(
                SpiderResourceFound.builder()
                        .setMessage(message)
                        .setDepth(depth)
                        .setUri(uri)
                        .setMethod(HttpRequestHeader.POST)
                        .setBody(requestBody)
                        .build());
    }

    /**
     * Builds an url and notifies the listeners.
     *
     * @param message the message
     * @param depth the depth
     * @param localURL the local url
     * @param baseURL the base url
     */
    protected void processURL(HttpMessage message, int depth, String localURL, String baseURL) {
        // Build the absolute canonical URL
        String fullURL = getCanonicalURL(localURL, baseURL);
        if (fullURL == null) {
            return;
        }

        getLogger().debug("Canonical URL constructed using '{}': {}", localURL, fullURL);
        notifyListenersResourceFound(
                SpiderResourceFound.builder()
                        .setMessage(message)
                        .setDepth(depth + 1)
                        .setUri(fullURL)
                        .build());
    }

    protected String getCanonicalURL(String localURL, String baseURL) {
        return org.zaproxy.zap.spider.URLCanonicalizer.getCanonicalURL(
                localURL, baseURL, spiderParam::isIrrelevantUrlParameter);
    }

    /**
     * Parses the resource. The HTTP message containing the request and the response is given. Also,
     * if possible, a Jericho source with the Response Body is provided.
     *
     * When a link is encountered, implementations can use {@link #processURL(HttpMessage, int,
     * String, String)} and {@link #notifyListenersResourceFound(SpiderResourceFound)} to announce
     * the found URIs.
     *
     * 
The return value specifies whether the resource should be considered 'completely
     * processed'/consumed and should be treated accordingly by subsequent parsers. For example, any
     * parsers which are meant to be 'fall-back' parsers should skip messages already processed by
     * other parsers.
     *
     * @param message the full http message containing the request and the response
     * @param source a Jericho source with the Response Body from the HTTP message. This parameter
     *     can be {@code null}, in which case the parser implementation should ignore it.
     * @param depth the depth of this resource
     * @return whether the resource is considered to be exhaustively processed
     */
    public abstract boolean parseResource(final HttpMessage message, Source source, int depth);

    /**
     * Checks whether the parser should be called to parse the given HttpMessage.
     *
     * 
Based on the specifics of the HttpMessage and whether this message was already processed
     * by another Parser, this method should decide whether the {@link #parseResource(HttpMessage,
     * Source, int)} should be invoked.
     *
     * The {@code wasAlreadyConsumed} could be used by parsers which represent a 'fall-back'
     * parser to check whether any other parser has processed the message before.
     *
     * @param message the full http message containing the request and the response
     * @param path the resource path, provided for convenience
     * @param wasAlreadyConsumed if the resource was already parsed by another SpiderParser
     * @return true, if the {@link #parseResource(HttpMessage, Source, int)} should be invoked.
     */
    public abstract boolean canParseResource(
            final HttpMessage message, String path, boolean wasAlreadyConsumed);
}