org.zaproxy.zap.spider.parser.SpiderSVNEntriesParser Maven / Gradle / Ivy

Go to download
/*
 * Zed Attack Proxy (ZAP) and its related class files.
 *
 * ZAP is an HTTP/HTTPS proxy for assessing web application security.
 *
 * Copyright 2014 The ZAP Development Team
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.zaproxy.zap.spider.parser;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import net.htmlparser.jericho.Source;
import org.apache.logging.log4j.LogManager;
import org.parosproxy.paros.network.HttpMessage;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.zaproxy.zap.utils.XmlUtils;

/**
 * The Class SpiderSVNEntriesParser is used for parsing SVN metadata, including SVN "entries" and
 * "wc.db" files.
 *
 * @author 70pointer
 * @deprecated (2.12.0) See the spider add-on in zap-extensions instead.
 */
@Deprecated
public class SpiderSVNEntriesParser extends SpiderParser {
    /* this class was Cloned from SpiderRobotstxtParser, by Cosmin. Credit where credit is due. */

    /** a pattern to match for SQLite based file (in ".svn/wc.db") */
    private static final Pattern svnSQLiteFormatPattern = Pattern.compile("^SQLite format ");

    /** a pattern to match for XML based entries files */
    private static final Pattern svnXMLFormatPattern = Pattern.compile("= SVN working copy format 12, or >= SVN 1.7)
            File tempSqliteFile;
            try {
                // get the binary data, and put it in a temp file we can use with the SQLite JDBC
                // driver
                // Note: File is not AutoCloseable, so cannot use a "try with resources" to manage
                // it
                tempSqliteFile = File.createTempFile("sqlite", null);
                tempSqliteFile.deleteOnExit();
                OutputStream fos = new FileOutputStream(tempSqliteFile);
                fos.write(message.getResponseBody().getBytes());
                fos.close();

                // now load the temporary SQLite file using JDBC, and query the file entries within.
                Class.forName("org.sqlite.JDBC");
                String sqliteConnectionUrl = "jdbc:sqlite:" + tempSqliteFile.getAbsolutePath();

                try (Connection conn = DriverManager.getConnection(sqliteConnectionUrl)) {
                    if (conn != null) {
                        Statement stmt = null;
                        ResultSet rsSVNWCFormat = null;
                        ResultSet rsNodes = null;
                        ResultSet rsRepo = null;
                        try {
                            stmt = conn.createStatement();
                            rsSVNWCFormat = stmt.executeQuery("pragma USER_VERSION");

                            // get the precise internal version of SVN in use
                            // this will inform how the Spider recurse should proceed in an
                            // efficient manner.
                            int svnFormat = 0;
                            while (rsSVNWCFormat.next()) {
                                getLogger().debug("Got a row from 'pragma USER_VERSION'");
                                svnFormat = rsSVNWCFormat.getInt(1);
                                break;
                            }
                            if (svnFormat < 29) {
                                throw new Exception(
                                        "The SVN Working Copy Format of the SQLite database should be >= 29. We found "
                                                + svnFormat);
                            }
                            if (svnFormat > 31) {
                                throw new Exception(
                                        "SVN Working Copy Format "
                                                + svnFormat
                                                + " is not supported at this time.  We support up to and including format 31 (~ SVN 1.8.5)");
                            }
                            getLogger()
                                    .debug(
                                            "Internal SVN Working Copy Format for {} is {}",
                                            tempSqliteFile,
                                            svnFormat);
                            getLogger()
                                    .debug(
                                            "Refer to http://svn.apache.org/repos/asf/subversion/trunk/subversion/libsvn_wc/wc.h for more details!");

                            // allow future changes to be easily handled
                            switch (svnFormat) {
                                case 29:
                                case 30:
                                case 31:
                                    rsNodes =
                                            stmt.executeQuery(
                                                    "select kind,local_relpath,'pristine/'||substr(checksum,7,2) || \"/\" || substr(checksum,7)|| \".svn-base\" from nodes order by wc_id");
                                    break;
                            }

                            if (rsNodes == null) {
                                getLogger()
                                        .error(
                                                "There was a problem parsing the resource. rsNodes should not be null.");
                                return false;
                            }
                            // now get the list of files stored in the SVN repo (or this folder of
                            // the repo, depending the SVN working copy format in use)
                            while (rsNodes.next()) {
                                getLogger()
                                        .debug(
                                                "Got a Node from the SVN wc.db file (format {})",
                                                svnFormat);
                                String kind = rsNodes.getString(1);
                                String filename = rsNodes.getString(2);
                                String svn_filename = rsNodes.getString(3);

                                if (filename != null && filename.length() > 0) {
                                    getLogger()
                                            .debug(
                                                    "Found a file/directory name in the (SQLite based) SVN wc.db file");

                                    processURL(
                                            message,
                                            depth,
                                            "../" + filename + (kind.equals("dir") ? "/" : ""),
                                            baseURL);

                                    // re-seed the spider for this directory.
                                    // this is not to do with the SVN version, but in case the SVN
                                    // root is not the WEB root..
                                    // in order to be sure we catch all the SVN repos, we recurse.
                                    if (kind.equals("dir")) {
                                        processURL(
                                                message,
                                                depth,
                                                "../" + filename + "/.svn/wc.db",
                                                baseURL);
                                    }
                                    // if we have an internal SVN filename for the file, process it.
                                    // this will probably result in source code disclosure at some
                                    // point.
                                    if (kind.equals("file")
                                            && svn_filename != null
                                            && svn_filename.length() > 0) {
                                        processURL(message, depth, svn_filename, baseURL);
                                    }
                                }
                            }

                            rsRepo = stmt.executeQuery("select root from REPOSITORY order by id");
                            // get additional information on where the SVN repository is located
                            while (rsRepo.next()) {
                                getLogger()
                                        .debug(
                                                "Got a potential Repository from the SVN wc.db file (format {})",
                                                svnFormat);
                                String repos_path = rsRepo.getString(1);
                                if (repos_path != null && repos_path.length() > 0) {
                                    // exclude local repositories here.. we cannot retrieve or
                                    // spider them
                                    Matcher repoMatcher =
                                            svnRepoLocationPattern.matcher(repos_path);
                                    if (repoMatcher.find()) {
                                        getLogger()
                                                .debug(
                                                        "Found an SVN repository location in the (SQLite based) SVN wc.db file");
                                        processURL(message, depth, repos_path + "/", baseURL);
                                    }
                                }
                            }
                        } catch (Exception e) {
                            getLogger()
                                    .error(
                                            "Error executing SQL on temporary SVN SQLite database '{}': ",
                                            sqliteConnectionUrl,
                                            e);
                        } finally {
                            // the JDBC driver in use does not play well with "try with resource"
                            // construct. I tried!
                            if (rsRepo != null) rsRepo.close();
                            if (rsNodes != null) rsNodes.close();
                            if (rsSVNWCFormat != null) rsSVNWCFormat.close();
                            if (stmt != null) stmt.close();
                        }
                    } else
                        throw new SQLException(
                                "Could not open a JDBC connection to SQLite file "
                                        + tempSqliteFile.getAbsolutePath());
                } catch (Exception e) {
                    // the connection will have been closed already, since we're used a try with
                    // resources
                    getLogger()
                            .error(
                                    "Error parsing temporary SVN SQLite database {}",
                                    sqliteConnectionUrl);
                } finally {
                    // delete the temp file.
                    // this will be deleted when the VM is shut down anyway, but better to be safe
                    // than to run out of disk space.
                    tempSqliteFile.delete();
                }

            } catch (IOException | ClassNotFoundException e) {
                getLogger()
                        .error(
                                "An error occurred trying to set up to parse the SQLite based file: ",
                                e);
                // We consider the message fully parsed, so it doesn't get parsed by 'fallback'
                // parsers
                return true;
            }

        } else if (svnXMLFormatMatcher.find()) {
            // XML format is being used, ( < SVN working copy format 7).
            // The XML based file was replaced with the text based format with SVN 1.4, when format
            // 8 went live
            // Not all the working copy formats went live in SVN versions, so tracking the format
            // against the SVN version is tricky.

            Document doc;
            try {
                // work around the "no protocol" issue by wrapping the content in a
                // ByteArrayInputStream
                doc =
                        dBuilder.parse(
                                new InputSource(
                                        new ByteArrayInputStream(content.getBytes("utf-8"))));
            } catch (SAXException | IOException e) {
                getLogger()
                        .error(
                                "An error occurred trying to parse the XML based .svn/entries file: ",
                                e);
                // We consider the message fully parsed, so it doesn't get parsed by 'fallback'
                // parsers
                return true;
            }
            NodeList nodelist = doc.getElementsByTagName("entry");
            for (int i = 0; i < nodelist.getLength(); i++) {
                Node svnEntryNode = nodelist.item(i);
                String svnEntryName = ((Element) svnEntryNode).getAttribute("name");
                String svnEntryKind = ((Element) svnEntryNode).getAttribute("kind");
                String svnEntryUrl = ((Element) svnEntryNode).getAttribute("url");
                String svnEntryCopyFromUrl = ((Element) svnEntryNode).getAttribute("copyfrom-url");

                if (svnEntryName != null && svnEntryName.length() > 0) {
                    getLogger()
                            .debug(
                                    "Found a file/directory name in the (XML based) SVN < 1.4 entries file");
                    processURL(
                            message,
                            depth,
                            "../" + svnEntryName + (svnEntryKind.equals("dir") ? "/" : ""),
                            baseURL);
                    // get the internal SVN file, probably leading to source code disclosure
                    if (svnEntryKind.equals("file")) {
                        processURL(
                                message, depth, "text-base/" + svnEntryName + ".svn-base", baseURL);
                    }
                    // re-seed the spider for this directory.
                    if (svnEntryKind.equals("dir")) {
                        processURL(message, depth, "../" + svnEntryName + "/.svn/entries", baseURL);
                    }
                }

                // expected to be true for the first entry only (the directory housing other
                // entries)
                if (svnEntryName != null
                        && svnEntryName.length() == 0
                        && svnEntryKind.equals("dir")) {
                    // exclude local repositories here.. we cannot retrieve or spider them
                    Matcher repoMatcher = svnRepoLocationPattern.matcher(svnEntryUrl);
                    if (repoMatcher.find()) {
                        getLogger()
                                .debug(
                                        "Found an SVN repository location in the (XML based) SVN < 1.4 entries file");
                        processURL(message, depth, svnEntryUrl + "/", baseURL);
                    }
                }
                // this attribute seems to be set on various entries. Correspond to files, rather
                // than directories
                Matcher urlMatcher = svnRepoLocationPattern.matcher(svnEntryCopyFromUrl);
                if (urlMatcher.find()) {
                    getLogger().debug("Found an SVN URL in the (XML based) SVN < 1.4 entries file");
                    processURL(message, depth, svnEntryCopyFromUrl, baseURL);
                }
            }
        } else {
            // text based format us being used, so >= SVN 1.4, and < SVN 1.7.x
            // Parse each line in the ".svn/entries" file
            // we cannot use the StringTokenizer approach used by the robots.txt logic,
            // since this causes empty lines to be ignored, which causes problems...
            String previousline = null;
            String[] lines = content.split("\n");
            for (String line : lines) {
                // If the line is empty, skip it
                if (line.length() > 0) {

                    // getLogger().debug("Processing SVN entries line: " + line);

                    Matcher matcher = svnTextFormatFileOrDirectoryPattern.matcher(line);
                    if (matcher.find()) {
                        // filetype is "dir" or "file", as per the contents of the SVN file.
                        String filetype = matcher.group(0);
                        // the previous line actually contains the file/directory name.
                        if (previousline != null && previousline.length() > 0) {
                            getLogger()
                                    .debug(
                                            "Found a file/directory name in the (text based) SVN 1.4/1.5/1.6 SVN entries file");

                            processURL(
                                    message,
                                    depth,
                                    "../" + previousline + (filetype.equals("dir") ? "/" : ""),
                                    baseURL);
                            // get the internal SVN file, probably leading to source code disclosure
                            if (filetype.equals("file")) {
                                processURL(
                                        message,
                                        depth,
                                        "text-base/" + previousline + ".svn-base",
                                        baseURL);
                            }

                            // re-seed the spider for this directory.
                            if (filetype.equals("dir")) {
                                processURL(
                                        message,
                                        depth,
                                        "../" + previousline + "/.svn/entries",
                                        baseURL);
                            }
                        }
                    } else {
                        // not a "file" or "dir" line, but it may contain details of the SVN repo
                        // location
                        Matcher repoMatcher = svnRepoLocationPattern.matcher(line);
                        if (repoMatcher.find()) {
                            getLogger()
                                    .debug(
                                            "Found an SVN repository location in the (text based) 1.4/1.5/1.6 SVN entries file");

                            processURL(message, depth, line + "/", baseURL);
                        }
                    }
                }
                // last thing to do is to record the line as the previous line for the next
                // iteration.
                previousline = line;
            }
        }
        // We consider the message fully parsed, so it doesn't get parsed by 'fallback' parsers
        return true;
    }

    @Override
    public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyParsed) {
        // matches the file name of files that should be parsed with the SVN entries file parser
        Matcher matcher = SVN_ENTRIES_FILE_PATTERN.matcher(path);
        return matcher.find();
    }
}