nu.validator.servlet.VerifierServlet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of validator Show documentation
Show all versions of validator Show documentation
An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)
/*
* Copyright (c) 2005 Henri Sivonen
* Copyright (c) 2007-2014 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.servlet;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import nu.validator.messages.MessageEmitterAdapter;
import nu.validator.xml.PrudentHttpEntityResolver;
import org.apache.log4j.Logger;
/**
* @version $Id$
* @author hsivonen
*/
public class VerifierServlet extends HttpServlet {
/**
*
*/
private static final long serialVersionUID = 7811043632732680935L;
private static final Logger log4j = Logger.getLogger(VerifierServlet.class);
static final String GENERIC_HOST = System.getProperty("nu.validator.servlet.host.generic", "");
static final String HTML5_HOST = System.getProperty("nu.validator.servlet.host.html5", "");
static final String PARSETREE_HOST = System.getProperty("nu.validator.servlet.host.parsetree", "");
static final String GENERIC_PATH = System.getProperty("nu.validator.servlet.path.generic", "/");
static final String HTML5_PATH = System.getProperty("nu.validator.servlet.path.html5", "/html5/");
static final String PARSETREE_PATH = System.getProperty("nu.validator.servlet.path.parsetree", "/parsetree/");
private static final byte[] GENERIC_ROBOTS_TXT;
private static final byte[] HTML5_ROBOTS_TXT;
private static final byte[] PARSETREE_ROBOTS_TXT;
private static final byte[] STYLE_CSS;
private static final byte[] SCRIPT_JS;
private static final byte[] ICON_PNG;
private static final byte[] ABOUT_HTML;
static {
try {
GENERIC_ROBOTS_TXT = buildRobotsTxt(GENERIC_HOST, GENERIC_PATH, HTML5_HOST, HTML5_PATH, PARSETREE_HOST, PARSETREE_PATH);
HTML5_ROBOTS_TXT = buildRobotsTxt(HTML5_HOST, HTML5_PATH, GENERIC_HOST, GENERIC_PATH, PARSETREE_HOST, PARSETREE_PATH);
PARSETREE_ROBOTS_TXT = buildRobotsTxt(PARSETREE_HOST, PARSETREE_PATH, HTML5_HOST, HTML5_PATH, GENERIC_HOST, GENERIC_PATH);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
try {
STYLE_CSS = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/style.css");
SCRIPT_JS = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/script.js");
ICON_PNG = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/icon.png");
ABOUT_HTML = readFromClassLoaderIntoByteArray("nu/validator/localentities/files/about.html");
} catch (IOException e) {
throw new RuntimeException(e);
}
PrudentHttpEntityResolver.setParams(
Integer.parseInt(System.getProperty("nu.validator.servlet.connection-timeout","5000")),
Integer.parseInt(System.getProperty("nu.validator.servlet.socket-timeout","5000")),
Integer.parseInt(System.getProperty("nu.validator.servlet.max-requests","100")));
// force some class loading
new VerifierServletTransaction(null, null);
new MessageEmitterAdapter(null, null, false, null, 0, false, null);
}
/**
* @return
* @throws UnsupportedEncodingException
*/
private static byte[] buildRobotsTxt(String primaryHost, String primaryPath, String secondaryHost, String secondaryPath, String tertiaryHost, String tertiaryPath) throws UnsupportedEncodingException {
StringBuilder builder = new StringBuilder();
builder.append("User-agent: *\nDisallow: ");
builder.append(primaryPath);
builder.append("?\n");
if (primaryHost.equals(secondaryHost)) {
builder.append("Disallow: ");
builder.append(secondaryPath);
builder.append("?\n");
}
if (primaryHost.equals(tertiaryHost)) {
builder.append("Disallow: ");
builder.append(tertiaryPath);
builder.append("?\n");
}
return builder.toString().getBytes("UTF-8");
}
private static byte[] readFromClassLoaderIntoByteArray(String name)
throws IOException {
try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
try (InputStream ios = VerifierServlet.class.getClassLoader().getResourceAsStream(
name)) {
for (int b = ios.read(); b != -1; b = ios.read()) {
baos.write(b);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
return baos.toByteArray();
}
}
private void writeResponse(byte[] buffer, String type,
HttpServletResponse response) throws IOException {
try {
response.setContentType(type);
response.setContentLength(buffer.length);
response.setDateHeader("Expires",
System.currentTimeMillis() + 43200000); // 12 hours
try (OutputStream out = response.getOutputStream()) {
out.write(buffer);
out.flush();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
/**
* @see javax.servlet.http.HttpServlet#doGet(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
*/
@Override
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
if ("/robots.txt".equals(request.getPathInfo())) {
String serverName = request.getServerName();
byte[] robotsTxt = null;
if (hostMatch(GENERIC_HOST, serverName)) {
robotsTxt = GENERIC_ROBOTS_TXT;
} else if (hostMatch(HTML5_HOST, serverName)) {
robotsTxt = HTML5_ROBOTS_TXT;
} else if (hostMatch(PARSETREE_HOST, serverName)) {
robotsTxt = PARSETREE_ROBOTS_TXT;
} else {
response.sendError(HttpServletResponse.SC_NOT_FOUND);
return;
}
writeResponse(robotsTxt, "text/plain; charset=utf-8", response);
return;
} else if ("/style.css".equals(request.getPathInfo())) {
writeResponse(STYLE_CSS, "text/css; charset=utf-8", response);
return;
} else if ("/script.js".equals(request.getPathInfo())) {
writeResponse(SCRIPT_JS, "text/javascript; charset=utf-8", response);
return;
} else if ("/icon.png".equals(request.getPathInfo())) {
writeResponse(ICON_PNG, "image/png", response);
return;
} else if ("/about.html".equals(request.getPathInfo())) {
writeResponse(ABOUT_HTML, "text/html; charset=utf-8", response);
return;
} else if (Statistics.STATISTICS != null && "/stats.html".equals(request.getPathInfo())) {
Statistics.STATISTICS.writeToResponse(response);
return;
}
doPost(request, response);
}
private boolean hostMatch(String reference, String host) {
if ("".equals(reference)) {
return true;
} else {
// XXX case-sensitivity
return reference.equalsIgnoreCase(host);
}
}
/**
* @see javax.servlet.http.HttpServlet#doOptions(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
*/
@Override
protected void doOptions(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
String pathInfo = request.getPathInfo();
if ("*".equals(pathInfo)) { // useless RFC 2616 complication
return;
} else if ("/robots.txt".equals(pathInfo)) {
String serverName = request.getServerName();
if (hostMatch(GENERIC_HOST, serverName)
|| hostMatch(HTML5_HOST, serverName)
|| hostMatch(PARSETREE_HOST, serverName)) {
sendGetOnlyOptions(request, response);
return;
} else {
response.sendError(HttpServletResponse.SC_NOT_FOUND);
return;
}
}
doPost(request, response);
}
/**
* @see javax.servlet.http.HttpServlet#doTrace(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
*/
@Override
protected void doTrace(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
response.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED);
}
/**
* @see javax.servlet.http.HttpServlet#doPost(javax.servlet.http.HttpServletRequest,
* javax.servlet.http.HttpServletResponse)
*/
@Override
protected void doPost(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
String method = request.getMethod();
String pathInfo = request.getPathInfo();
String referer = request.getHeader("Referer");
String query = request.getQueryString();
String serverName = request.getServerName();
String ua = request.getHeader("User-Agent");
boolean isOptions = "OPTIONS".equals(method);
if (referer != null
&& referer.contains("http://validator.w3.org/check?uri=")
&& !referer.startsWith("http://validator.w3.org/check?uri=")) {
response.sendError(HttpServletResponse.SC_FORBIDDEN);
return;
}
if (ua == null) {
response.sendError(400, "Bad request. Valid requests must include a User-Agent header.");
return;
}
if (!isOptions) {
Object[] fields = new String[] { method, ua, query, referer };
log4j.info(String.format("%s\t%s\t%s\t%s", fields));
}
if (pathInfo == null) {
pathInfo = "/"; // Fix for Jigsaw
}
if ("/robots.txt".equals(pathInfo)) {
// if we get here, we've got a POST
response.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED);
return;
}
log4j.debug("pathInfo: " + pathInfo);
log4j.debug("serverName: " + serverName);
if ("validator.nu".equals(serverName) && "/html5/".equals(pathInfo)) {
response.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
response.setHeader("Location", "http://html5.validator.nu/" + (query == null ? "" : "?" + query));
} else if (hostMatch(GENERIC_HOST, serverName) && GENERIC_PATH.equals(pathInfo)) {
response.setHeader("Access-Control-Allow-Origin", "*");
response.setHeader("Access-Control-Allow-Headers", "content-type");
if (isOptions) {
sendOptions(request, response);
} else {
new VerifierServletTransaction(request, response).service();
}
} else if (hostMatch(HTML5_HOST, serverName) && HTML5_PATH.equals(pathInfo)) {
response.setHeader("Access-Control-Allow-Origin", "*");
response.setHeader("Access-Control-Allow-Headers", "content-type");
if (isOptions) {
sendOptions(request, response);
} else {
new Html5ConformanceCheckerTransaction(request, response).service();
}
} else if (hostMatch(PARSETREE_HOST, serverName) && PARSETREE_PATH.equals(pathInfo)) {
if (isOptions) {
sendGetOnlyOptions(request, response);
} else {
new ParseTreePrinter(request, response).service();
}
} else {
response.sendError(HttpServletResponse.SC_NOT_FOUND);
}
}
private void sendGetOnlyOptions(HttpServletRequest request, HttpServletResponse response) {
response.setHeader("Allow", "GET, HEAD, OPTIONS");
response.setHeader("Access-Control-Allow-Methods", "GET, HEAD, POST, OPTIONS");
response.setContentType("application/octet-stream");
response.setContentLength(0);
}
private void sendOptions(HttpServletRequest request, HttpServletResponse response) {
response.setHeader("Access-Control-Max-Age", "43200"); // 12 hours
response.setHeader("Allow", "GET, HEAD, POST, OPTIONS");
response.setHeader("Access-Control-Allow-Methods", "GET, HEAD, POST, OPTIONS");
response.setContentType("application/octet-stream");
response.setContentLength(0);
}
}