All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nu.validator.servlet.VerifierServlet Maven / Gradle / Ivy

Go to download

An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)

There is a newer version: 20.7.2
Show newest version
/*
 * Copyright (c) 2005 Henri Sivonen
 * Copyright (c) 2007-2014 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.servlet;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import nu.validator.messages.MessageEmitterAdapter;
import nu.validator.xml.PrudentHttpEntityResolver;

import org.apache.log4j.Logger;


/**
 * @version $Id$
 * @author hsivonen
 */
public class VerifierServlet extends HttpServlet {
    /**
     * 
     */
    private static final long serialVersionUID = 7811043632732680935L;

    private static final Logger log4j = Logger.getLogger(VerifierServlet.class);

    static final String GENERIC_HOST = System.getProperty("nu.validator.servlet.host.generic", "");

    static final String HTML5_HOST = System.getProperty("nu.validator.servlet.host.html5", "");

    static final String PARSETREE_HOST = System.getProperty("nu.validator.servlet.host.parsetree", "");

    static final String GENERIC_PATH = System.getProperty("nu.validator.servlet.path.generic", "/");

    static final String HTML5_PATH = System.getProperty("nu.validator.servlet.path.html5", "/html5/");

    static final String PARSETREE_PATH = System.getProperty("nu.validator.servlet.path.parsetree", "/parsetree/");

    private static final byte[] GENERIC_ROBOTS_TXT;

    private static final byte[] HTML5_ROBOTS_TXT;

    private static final byte[] PARSETREE_ROBOTS_TXT;

    private static final byte[] STYLE_CSS;

    private static final byte[] SCRIPT_JS;

    private static final byte[] ICON_PNG;

    private static final byte[] ABOUT_HTML;

    static {
        try {
            GENERIC_ROBOTS_TXT = buildRobotsTxt(GENERIC_HOST, GENERIC_PATH, HTML5_HOST, HTML5_PATH, PARSETREE_HOST, PARSETREE_PATH);
            HTML5_ROBOTS_TXT = buildRobotsTxt(HTML5_HOST, HTML5_PATH, GENERIC_HOST, GENERIC_PATH, PARSETREE_HOST, PARSETREE_PATH);
            PARSETREE_ROBOTS_TXT = buildRobotsTxt(PARSETREE_HOST, PARSETREE_PATH, HTML5_HOST, HTML5_PATH, GENERIC_HOST, GENERIC_PATH);
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        }
        try {
            STYLE_CSS = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/style.css");
            SCRIPT_JS = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/script.js");
            ICON_PNG = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/icon.png");
            ABOUT_HTML = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/about.html");
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        PrudentHttpEntityResolver.setParams(
            Integer.parseInt(System.getProperty("nu.validator.servlet.connection-timeout","5000")),
            Integer.parseInt(System.getProperty("nu.validator.servlet.socket-timeout","5000")),
            Integer.parseInt(System.getProperty("nu.validator.servlet.max-requests","100")));
        // force some class loading
        new VerifierServletTransaction(null, null);
        new MessageEmitterAdapter(null, null, false, null, 0, false, null);
    }

    /**
     * @return
     * @throws UnsupportedEncodingException
     */
    private static byte[] buildRobotsTxt(String primaryHost, String primaryPath, String secondaryHost, String secondaryPath, String tertiaryHost, String tertiaryPath) throws UnsupportedEncodingException {
        StringBuilder builder = new StringBuilder();
        builder.append("User-agent: *\nDisallow: ");
        builder.append(primaryPath);
        builder.append("?\n");
        if (primaryHost.equals(secondaryHost)) {
            builder.append("Disallow: ");
            builder.append(secondaryPath);
            builder.append("?\n");
        }
        if (primaryHost.equals(tertiaryHost)) {
            builder.append("Disallow: ");
            builder.append(tertiaryPath);
            builder.append("?\n");
        }
        return builder.toString().getBytes("UTF-8");
    }

    private static byte[] readFromClassLoaderIntoByteArray(String name)
            throws IOException {
        try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
            try (InputStream ios = VerifierServlet.class.getClassLoader().getResourceAsStream(
                    name)) {
                for (int b = ios.read(); b != -1; b = ios.read()) {
                    baos.write(b);
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            return baos.toByteArray();
        }
    }

    private void writeResponse(byte[] buffer, String type,
            HttpServletResponse response) throws IOException {
        try {
            response.setContentType(type);
            response.setContentLength(buffer.length);
            response.setDateHeader("Expires",
                    System.currentTimeMillis() + 43200000); // 12 hours
            try (OutputStream out = response.getOutputStream()) {
                out.write(buffer);
                out.flush();
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    /**
     * @see javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
     */
    @Override
    protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        if ("/robots.txt".equals(request.getPathInfo())) {
            String serverName = request.getServerName();
            byte[] robotsTxt = null;
            if (hostMatch(GENERIC_HOST, serverName)) {
                robotsTxt = GENERIC_ROBOTS_TXT;
            } else if (hostMatch(HTML5_HOST, serverName)) {
                robotsTxt = HTML5_ROBOTS_TXT;
            } else if (hostMatch(PARSETREE_HOST, serverName)) {
                robotsTxt = PARSETREE_ROBOTS_TXT;
            } else {
                response.sendError(HttpServletResponse.SC_NOT_FOUND);
                return;
            }
            writeResponse(robotsTxt, "text/plain; charset=utf-8", response);
            return;
        } else if ("/style.css".equals(request.getPathInfo())) {
            writeResponse(STYLE_CSS, "text/css; charset=utf-8", response);
            return;
        } else if ("/script.js".equals(request.getPathInfo())) {
            writeResponse(SCRIPT_JS, "text/javascript; charset=utf-8", response);
            return;
        } else if ("/icon.png".equals(request.getPathInfo())) {
            writeResponse(ICON_PNG, "image/png", response);
            return;
        } else if ("/about.html".equals(request.getPathInfo())) {
            writeResponse(ABOUT_HTML, "text/html; charset=utf-8", response);
            return;
        } else if (Statistics.STATISTICS != null && "/stats.html".equals(request.getPathInfo())) {
            Statistics.STATISTICS.writeToResponse(response);
            return;
        }
        doPost(request, response);
    }

    private boolean hostMatch(String reference, String host) {
        if ("".equals(reference)) {
            return true;
        } else {
            // XXX case-sensitivity
            return reference.equalsIgnoreCase(host);
        }
    }

    /**
     * @see javax.servlet.http.HttpServlet#doOptions(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
     */
    @Override
    protected void doOptions(HttpServletRequest request,
            HttpServletResponse response) throws ServletException, IOException {
        String pathInfo = request.getPathInfo();
        if ("*".equals(pathInfo)) { // useless RFC 2616 complication
            return;
        } else if ("/robots.txt".equals(pathInfo)) {
            String serverName = request.getServerName();
            if (hostMatch(GENERIC_HOST, serverName)
                    || hostMatch(HTML5_HOST, serverName)
                    || hostMatch(PARSETREE_HOST, serverName)) {
                sendGetOnlyOptions(request, response);
                return;
            } else {
                response.sendError(HttpServletResponse.SC_NOT_FOUND);
                return;
            }
        }
        doPost(request, response);
    }

    /**
     * @see javax.servlet.http.HttpServlet#doTrace(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
     */
    @Override
    protected void doTrace(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
        response.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED);
    }

    /**
     * @see javax.servlet.http.HttpServlet#doPost(javax.servlet.http.HttpServletRequest,
     *      javax.servlet.http.HttpServletResponse)
     */
    @Override
    protected void doPost(HttpServletRequest request,
            HttpServletResponse response) throws ServletException, IOException {
        String method = request.getMethod();
        String pathInfo = request.getPathInfo();
        String referer = request.getHeader("Referer");
        String query = request.getQueryString();
        String serverName = request.getServerName();
        String ua = request.getHeader("User-Agent");
        boolean isOptions = "OPTIONS".equals(method);
        if (referer != null
                && referer.contains("http://validator.w3.org/check?uri=")
                && !referer.startsWith("http://validator.w3.org/check?uri=")) {
            response.sendError(HttpServletResponse.SC_FORBIDDEN);
            return;
        }
        if (ua == null) {
            response.sendError(400, "Bad request. Valid requests must include a User-Agent header.");
            return;
        }
        if (!isOptions) {
            Object[] fields = new String[] { method, ua, query, referer };
            log4j.info(String.format("%s\t%s\t%s\t%s", fields));
        }
        if (pathInfo == null) {
            pathInfo = "/"; // Fix for Jigsaw
        }
        if ("/robots.txt".equals(pathInfo)) {
            // if we get here, we've got a POST
            response.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED);
            return;
        }
        log4j.debug("pathInfo: " + pathInfo);
        log4j.debug("serverName: " + serverName);

        if ("validator.nu".equals(serverName) && "/html5/".equals(pathInfo)) {
                response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
                response.setHeader("Location", "http://html5.validator.nu/" + (query == null ? "" : "?" + query));
        } else if (hostMatch(GENERIC_HOST, serverName) && GENERIC_PATH.equals(pathInfo)) {
            response.setHeader("Access-Control-Allow-Origin", "*");
            response.setHeader("Access-Control-Allow-Headers", "content-type");
            if (isOptions) {
                sendOptions(request, response);
            } else {
                new VerifierServletTransaction(request, response).service();
            }
        } else if (hostMatch(HTML5_HOST, serverName) && HTML5_PATH.equals(pathInfo)) {
            response.setHeader("Access-Control-Allow-Origin", "*");
            response.setHeader("Access-Control-Allow-Headers", "content-type");
            if (isOptions) {
                sendOptions(request, response);
            } else {
                new Html5ConformanceCheckerTransaction(request, response).service();
            }
        } else if (hostMatch(PARSETREE_HOST, serverName) && PARSETREE_PATH.equals(pathInfo)) {
            if (isOptions) {
                sendGetOnlyOptions(request, response);
            } else {
                new ParseTreePrinter(request, response).service();
            }
        } else {
            response.sendError(HttpServletResponse.SC_NOT_FOUND);
        }
    }

    private void sendGetOnlyOptions(HttpServletRequest request, HttpServletResponse response) {
        response.setHeader("Allow", "GET, HEAD, OPTIONS");
        response.setHeader("Access-Control-Allow-Methods", "GET, HEAD, POST, OPTIONS");
        response.setContentType("application/octet-stream");
        response.setContentLength(0);
    }

    private void sendOptions(HttpServletRequest request, HttpServletResponse response) {
        response.setHeader("Access-Control-Max-Age", "43200"); // 12 hours
        response.setHeader("Allow", "GET, HEAD, POST, OPTIONS");
        response.setHeader("Access-Control-Allow-Methods", "GET, HEAD, POST, OPTIONS");
        response.setContentType("application/octet-stream");
        response.setContentLength(0);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy