All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nu.validator.servlet.VerifierServletTransaction Maven / Gradle / Ivy

Go to download

An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)

There is a newer version: 20.7.2
Show newest version
/*
 * Copyright (c) 2005, 2006 Henri Sivonen
 * Copyright (c) 2007-2014 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in 
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.servlet;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import nu.validator.checker.XmlPiChecker;
import nu.validator.checker.jing.CheckerSchema;
import nu.validator.gnu.xml.aelfred2.SAXDriver;
import nu.validator.htmlparser.common.DoctypeExpectation;
import nu.validator.htmlparser.common.DocumentMode;
import nu.validator.htmlparser.common.DocumentModeHandler;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.htmlparser.sax.HtmlParser;
import nu.validator.htmlparser.sax.HtmlSerializer;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.io.BoundedInputStream;
import nu.validator.io.DataUri;
import nu.validator.io.StreamBoundException;
import nu.validator.localentities.LocalCacheEntityResolver;
import nu.validator.messages.GnuMessageEmitter;
import nu.validator.messages.JsonMessageEmitter;
import nu.validator.messages.MessageEmitterAdapter;
import nu.validator.messages.TextMessageEmitter;
import nu.validator.messages.TooManyErrorsException;
import nu.validator.messages.XhtmlMessageEmitter;
import nu.validator.messages.XmlMessageEmitter;
import nu.validator.servlet.imagereview.ImageCollector;
import nu.validator.servlet.OutlineBuildingXMLReaderWrapper.Section;
import nu.validator.source.SourceCode;
import nu.validator.spec.Spec;
import nu.validator.spec.html5.Html5SpecBuilder;
import nu.validator.xml.AttributesImpl;
import nu.validator.xml.AttributesPermutingXMLReaderWrapper;
import nu.validator.xml.BaseUriTracker;
import nu.validator.xml.CharacterUtil;
import nu.validator.xml.CombineContentHandler;
import nu.validator.xml.ContentTypeParser;
import nu.validator.xml.DataUriEntityResolver;
import nu.validator.xml.IdFilter;
import nu.validator.xml.NamespaceDroppingXMLReaderWrapper;
import nu.validator.xml.NullEntityResolver;
import nu.validator.xml.PrudentHttpEntityResolver;
import nu.validator.xml.SystemErrErrorHandler;
import nu.validator.xml.TypedInputSource;
import nu.validator.xml.WiretapXMLReaderWrapper;
import nu.validator.xml.XhtmlSaxEmitter;
import nu.validator.xml.dataattributes.DataAttributeDroppingSchemaWrapper;
import nu.validator.xml.langattributes.XmlLangAttributeDroppingSchemaWrapper;
import nu.validator.xml.roleattributes.RoleAttributeFilteringSchemaWrapper;

import org.xml.sax.ContentHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;

import com.thaiopensource.relaxng.impl.CombineValidator;
import com.thaiopensource.util.PropertyMap;
import com.thaiopensource.util.PropertyMapBuilder;
import com.thaiopensource.validate.IncorrectSchemaException;
import com.thaiopensource.validate.Schema;
import com.thaiopensource.validate.SchemaReader;
import com.thaiopensource.validate.SchemaResolver;
import com.thaiopensource.validate.ValidateProperty;
import com.thaiopensource.validate.Validator;
import com.thaiopensource.validate.auto.AutoSchemaReader;
import com.thaiopensource.validate.prop.rng.RngProperty;
import com.thaiopensource.validate.prop.wrap.WrapProperty;
import com.thaiopensource.validate.rng.CompactSchemaReader;

import org.apache.log4j.Logger;
import com.ibm.icu.text.Normalizer;

/**
 * @version $Id: VerifierServletTransaction.java,v 1.10 2005/07/24 07:32:48
 *          hsivonen Exp $
 * @author hsivonen
 */
class VerifierServletTransaction implements DocumentModeHandler, SchemaResolver {

    private enum OutputFormat {
        HTML, XHTML, TEXT, XML, JSON, RELAXED, SOAP, UNICORN, GNU
    }

    private static final Logger log4j = Logger.getLogger(VerifierServletTransaction.class);

    private static final Pattern SPACE = Pattern.compile("\\s+");

    private static final Pattern JS_IDENTIFIER = Pattern.compile("[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$][\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}_\\$\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}]*");

    private static final String[] JS_RESERVED_WORDS = { "abstract", "boolean",
            "break", "byte", "case", "catch", "char", "class", "const",
            "continue", "debugger", "default", "delete", "do", "double",
            "else", "enum", "export", "extends", "final", "finally", "float",
            "for", "function", "goto", "if", "implements", "import", "in",
            "instanceof", "int", "interface", "long", "native", "new",
            "package", "private", "protected", "public", "return", "short",
            "static", "super", "switch", "synchronized", "this", "throw",
            "throws", "transient", "try", "typeof", "var", "void", "volatile",
            "while", "with" };

    private static final String[] CHARSETS = { "UTF-8", "UTF-16",
            "Windows-1250", "Windows-1251", "Windows-1252", "Windows-1253",
            "Windows-1254", "Windows-1255", "Windows-1256", "Windows-1257",
            "Windows-1258", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3",
            "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7",
            "ISO-8859-8", "ISO-8859-9", "ISO-8859-13", "ISO-8859-15", "KOI8-R",
            "TIS-620", "GBK", "GB18030", "Big5", "Big5-HKSCS", "Shift_JIS",
            "ISO-2022-JP", "EUC-JP", "ISO-2022-KR", "EUC-KR" };

    private static final char[][] CHARSET_DESCRIPTIONS = {
            "UTF-8 (Global)".toCharArray(), "UTF-16 (Global)".toCharArray(),
            "Windows-1250 (Central European)".toCharArray(),
            "Windows-1251 (Cyrillic)".toCharArray(),
            "Windows-1252 (Western)".toCharArray(),
            "Windows-1253 (Greek)".toCharArray(),
            "Windows-1254 (Turkish)".toCharArray(),
            "Windows-1255 (Hebrew)".toCharArray(),
            "Windows-1256 (Arabic)".toCharArray(),
            "Windows-1257 (Baltic)".toCharArray(),
            "Windows-1258 (Vietnamese)".toCharArray(),
            "ISO-8859-1 (Western)".toCharArray(),
            "ISO-8859-2 (Central European)".toCharArray(),
            "ISO-8859-3 (South European)".toCharArray(),
            "ISO-8859-4 (Baltic)".toCharArray(),
            "ISO-8859-5 (Cyrillic)".toCharArray(),
            "ISO-8859-6 (Arabic)".toCharArray(),
            "ISO-8859-7 (Greek)".toCharArray(),
            "ISO-8859-8 (Hebrew)".toCharArray(),
            "ISO-8859-9 (Turkish)".toCharArray(),
            "ISO-8859-13 (Baltic)".toCharArray(),
            "ISO-8859-15 (Western)".toCharArray(),
            "KOI8-R (Russian)".toCharArray(), "TIS-620 (Thai)".toCharArray(),
            "GBK (Chinese, simplified)".toCharArray(),
            "GB18030 (Chinese, simplified)".toCharArray(),
            "Big5 (Chinese, traditional)".toCharArray(),
            "Big5-HKSCS (Chinese, traditional)".toCharArray(),
            "Shift_JIS (Japanese)".toCharArray(),
            "ISO-2022-JP (Japanese)".toCharArray(),
            "EUC-JP (Japanese)".toCharArray(),
            "ISO-2022-KR (Korean)".toCharArray(),
            "EUC-KR (Korean)".toCharArray() };

    protected static final int HTML5_SCHEMA = 3;

    protected static final int XHTML1STRICT_SCHEMA = 2;

    protected static final int XHTML1TRANSITIONAL_SCHEMA = 1;

    protected static final int XHTML5_SCHEMA = 7;

    private static final char[] SERVICE_TITLE;

    private static final char[] LIVING_VERSION = "Living Validator".toCharArray();

    private static final char[] VERSION;

    private static final char[] RESULTS_TITLE;

    private static final char[] FOR = " for ".toCharArray();

    private static final char[] ABOUT_THIS_SERVICE = "About this Service".toCharArray();

    private static final char[] SIMPLE_UI = "Simplified Interface".toCharArray();

    private static final String USER_AGENT;

    private static Spec html5spec;

    private static int[] presetDoctypes;

    private static String[] presetLabels;

    private static String[] presetUrls;

    private static String[] presetNamespaces;

    // XXX SVG!!!

    private static final String[] KNOWN_CONTENT_TYPES = {
            "application/atom+xml", "application/docbook+xml",
            "application/xhtml+xml", "application/xv+xml", "image/svg+xml" };

    private static final String[] NAMESPACES_FOR_KNOWN_CONTENT_TYPES = {
            "http://www.w3.org/2005/Atom", "http://docbook.org/ns/docbook",
            "http://www.w3.org/1999/xhtml", "http://www.w3.org/1999/xhtml",
            "http://www.w3.org/2000/svg" };

    private static final String[] ALL_CHECKERS = {
            "http://c.validator.nu/table/", "http://c.validator.nu/nfc/",
            "http://c.validator.nu/text-content/",
            "http://c.validator.nu/unchecked/",
            "http://c.validator.nu/usemap/", "http://c.validator.nu/obsolete/",
            "http://c.validator.nu/xml-pi/", "http://c.validator.nu/unsupported/",
            "http://c.validator.nu/microdata/" };

    private static final String[] ALL_CHECKERS_HTML4 = {
            "http://c.validator.nu/table/", "http://c.validator.nu/nfc/",
            "http://c.validator.nu/unchecked/", "http://c.validator.nu/usemap/" };

    private long start = System.currentTimeMillis();

    protected final HttpServletRequest request;

    private final HttpServletResponse response;

    protected String document = null;

    private ParserMode parser = ParserMode.AUTO;

    private String profile = "";

    private boolean laxType = false;

    protected ContentHandler contentHandler;

    protected XhtmlSaxEmitter emitter;

    protected MessageEmitterAdapter errorHandler;

    protected final AttributesImpl attrs = new AttributesImpl();

    private OutputStream out;

    private PropertyMap jingPropertyMap;

    protected LocalCacheEntityResolver entityResolver;

    private static long lastModified;

    private static String[] preloadedSchemaUrls;

    private static Schema[] preloadedSchemas;

    private final static String ABOUT_PAGE = System.getProperty(
            "nu.validator.servlet.about-page", "https://about.validator.nu/");

    private final static String HTML5_FACET = (VerifierServlet.HTML5_HOST.isEmpty() ? "" : ("//" + VerifierServlet.HTML5_HOST)) + VerifierServlet.HTML5_PATH;

    private final static String STYLE_SHEET = System.getProperty(
            "nu.validator.servlet.style-sheet",
            "style.css");

    private final static String ICON = System.getProperty(
            "nu.validator.servlet.icon",
            "icon.png");

    private final static String SCRIPT = System.getProperty(
            "nu.validator.servlet.script",
            "script.js");

    private static final long SIZE_LIMIT = Integer.parseInt(System.getProperty(
            "nu.validator.servlet.max-file-size", "2097152"));

    private String schemaUrls = null;

    protected Validator validator = null;

    private BufferingRootNamespaceSniffer bufferingRootNamespaceSniffer = null;

    private String contentType = null;

    protected HtmlParser htmlParser = null;

    protected SAXDriver xmlParser = null;

    protected XMLReader reader;

    protected TypedInputSource documentInput;

    protected PrudentHttpEntityResolver httpRes;

    protected DataUriEntityResolver dataRes;

    protected ContentTypeParser contentTypeParser;

    private Set loadedValidatorUrls = new HashSet<>();

    private boolean checkNormalization = false;

    private boolean rootNamespaceSeen = false;

    private OutputFormat outputFormat;

    private String postContentType;

    private boolean methodIsGet;

    private SourceCode sourceCode = new SourceCode();

    private Deque
outline; private boolean showSource; private boolean showOutline; private boolean schemaIsDefault; private String userAgent; private BaseUriTracker baseUriTracker = null; private String charsetOverride = null; private Set filteredNamespaces = new LinkedHashSet<>(); // linked private LexicalHandler lexicalHandler; // for // UI // stability protected ImageCollector imageCollector; private boolean externalSchema = false; private boolean externalSchematron = false; private String schemaListForStats = null; static { try { log4j.debug("Starting static initializer."); lastModified = 0; BufferedReader r = new BufferedReader(new InputStreamReader(LocalCacheEntityResolver.getPresetsAsStream(), "UTF-8")); String line; List doctypes = new LinkedList<>(); List namespaces = new LinkedList<>(); List labels = new LinkedList<>(); List urls = new LinkedList<>(); Properties props = new Properties(); log4j.debug("Reading miscellaneous properties."); props.load(VerifierServlet.class.getClassLoader().getResourceAsStream( "nu/validator/localentities/files/misc.properties")); SERVICE_TITLE = (System.getProperty( "nu.validator.servlet.service-name", props.getProperty("nu.validator.servlet.service-name", "Validator.nu")) + " ").toCharArray(); RESULTS_TITLE = (System.getProperty( "nu.validator.servlet.results-title", props.getProperty( "nu.validator.servlet.results-title", "Validation results"))).toCharArray(); VERSION = (System.getProperty("nu.validator.servlet.version", props.getProperty("nu.validator.servlet.version", "Living Validator"))).toCharArray(); USER_AGENT= (System.getProperty("nu.validator.servlet.user-agent", props.getProperty("nu.validator.servlet.user-agent", "Validator.nu/LV"))); log4j.debug("Starting to loop over config file lines."); while ((line = r.readLine()) != null) { if ("".equals(line.trim())) { break; } String s[] = line.split("\t"); doctypes.add(s[0]); namespaces.add(s[1]); labels.add(s[2]); urls.add(s[3]); } log4j.debug("Finished reading config."); String[] presetDoctypesAsStrings = doctypes.toArray(new String[0]); presetNamespaces = namespaces.toArray(new String[0]); presetLabels = labels.toArray(new String[0]); presetUrls = urls.toArray(new String[0]); log4j.debug("Converted config to arrays."); for (int i = 0; i < presetNamespaces.length; i++) { String str = presetNamespaces[i]; if ("-".equals(str)) { presetNamespaces[i] = null; } else { presetNamespaces[i] = presetNamespaces[i].intern(); } } log4j.debug("Prepared namespace array."); presetDoctypes = new int[presetDoctypesAsStrings.length]; for (int i = 0; i < presetDoctypesAsStrings.length; i++) { presetDoctypes[i] = Integer.parseInt(presetDoctypesAsStrings[i]); } log4j.debug("Parsed doctype numbers into ints."); String prefix = System.getProperty("nu.validator.servlet.cachepathprefix"); log4j.debug("The cache path prefix is: " + prefix); ErrorHandler eh = new SystemErrErrorHandler(); LocalCacheEntityResolver er = new LocalCacheEntityResolver(new NullEntityResolver()); er.setAllowRnc(true); PropertyMapBuilder pmb = new PropertyMapBuilder(); pmb.put(ValidateProperty.ERROR_HANDLER, eh); pmb.put(ValidateProperty.ENTITY_RESOLVER, er); pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(eh, er)); RngProperty.CHECK_ID_IDREF.add(pmb); PropertyMap pMap = pmb.toPropertyMap(); log4j.debug("Parsing set up. Starting to read schemas."); SortedMap schemaMap = new TreeMap<>(); schemaMap.put("http://c.validator.nu/table/", CheckerSchema.TABLE_CHECKER); schemaMap.put("http://hsivonen.iki.fi/checkers/table/", CheckerSchema.TABLE_CHECKER); schemaMap.put("http://c.validator.nu/nfc/", CheckerSchema.NORMALIZATION_CHECKER); schemaMap.put("http://hsivonen.iki.fi/checkers/nfc/", CheckerSchema.NORMALIZATION_CHECKER); schemaMap.put("http://c.validator.nu/debug/", CheckerSchema.DEBUG_CHECKER); schemaMap.put("http://hsivonen.iki.fi/checkers/debug/", CheckerSchema.DEBUG_CHECKER); schemaMap.put("http://c.validator.nu/text-content/", CheckerSchema.TEXT_CONTENT_CHECKER); schemaMap.put("http://hsivonen.iki.fi/checkers/text-content/", CheckerSchema.TEXT_CONTENT_CHECKER); schemaMap.put("http://c.validator.nu/usemap/", CheckerSchema.USEMAP_CHECKER); schemaMap.put("http://n.validator.nu/checkers/usemap/", CheckerSchema.USEMAP_CHECKER); schemaMap.put("http://c.validator.nu/unchecked/", CheckerSchema.UNCHECKED_SUBTREE_WARNER); schemaMap.put("http://s.validator.nu/html5/assertions.sch", CheckerSchema.ASSERTION_SCH); schemaMap.put("http://s.validator.nu/html4/assertions.sch", CheckerSchema.HTML4ASSERTION_SCH); schemaMap.put("http://c.validator.nu/obsolete/", CheckerSchema.CONFORMING_BUT_OBSOLETE_WARNER); schemaMap.put("http://c.validator.nu/xml-pi/", CheckerSchema.XML_PI_CHECKER); schemaMap.put("http://c.validator.nu/unsupported/", CheckerSchema.UNSUPPORTED_CHECKER); schemaMap.put("http://c.validator.nu/microdata/", CheckerSchema.MICRODATA_CHECKER); schemaMap.put("http://c.validator.nu/rdfalite/", CheckerSchema.RDFALITE_CHECKER); for (String presetUrl : presetUrls) { for (String url : SPACE.split(presetUrl)) { if (schemaMap.get(url) == null && !isCheckerUrl(url)) { Schema sch = schemaByUrl(url, er, pMap); schemaMap.put(url, sch); } } } log4j.debug("Schemas read."); preloadedSchemaUrls = new String[schemaMap.size()]; preloadedSchemas = new Schema[schemaMap.size()]; int i = 0; for (Map.Entry entry : schemaMap.entrySet()) { preloadedSchemaUrls[i] = entry.getKey().intern(); Schema s = entry.getValue(); String u = entry.getKey(); if (isDataAttributeDroppingSchema(u)) { s = new DataAttributeDroppingSchemaWrapper( s); } if (isXmlLangAllowingSchema(u)) { s = new XmlLangAttributeDroppingSchemaWrapper(s); } if (isRoleAttributeFilteringSchema(u)) { s = new RoleAttributeFilteringSchemaWrapper(s); } preloadedSchemas[i] = s; i++; } log4j.debug("Reading spec."); html5spec = Html5SpecBuilder.parseSpec(LocalCacheEntityResolver.getHtml5SpecAsStream()); log4j.debug("Spec read."); log4j.debug("Initialization complete."); } catch (Exception e) { throw new RuntimeException(e); } } protected static String scrub(CharSequence s) { return Normalizer.normalize( CharacterUtil.prudentlyScrubCharacterData(s), Normalizer.NFC); } private static boolean isDataAttributeDroppingSchema(String key) { return ("http://s.validator.nu/xhtml5.rnc".equals(key) || "http://s.validator.nu/html5.rnc".equals(key) || "http://s.validator.nu/html5-all.rnc".equals(key) || "http://s.validator.nu/xhtml5-all.rnc".equals(key) || "http://s.validator.nu/html5-its.rnc".equals(key) || "http://s.validator.nu/xhtml5-rdfalite.rnc".equals(key) || "http://s.validator.nu/html5-rdfalite.rnc".equals(key)); } private static boolean isXmlLangAllowingSchema(String key) { return ("http://s.validator.nu/xhtml5.rnc".equals(key) || "http://s.validator.nu/html5.rnc".equals(key) || "http://s.validator.nu/html5-all.rnc".equals(key) || "http://s.validator.nu/xhtml5-all.rnc".equals(key) || "http://s.validator.nu/html5-its.rnc".equals(key) || "http://s.validator.nu/xhtml5-rdfalite.rnc".equals(key) || "http://s.validator.nu/html5-rdfalite.rnc".equals(key)); } private static boolean isRoleAttributeFilteringSchema(String key) { return ("http://s.validator.nu/xhtml5.rnc".equals(key) || "http://s.validator.nu/html5.rnc".equals(key) || "http://s.validator.nu/html5-all.rnc".equals(key) || "http://s.validator.nu/xhtml5-all.rnc".equals(key) || "http://s.validator.nu/html5-its.rnc".equals(key) || "http://s.validator.nu/xhtml5-rdfalite.rnc".equals(key) || "http://s.validator.nu/html5-rdfalite.rnc".equals(key)); } private static boolean isCheckerUrl(String url) { if ("http://c.validator.nu/all/".equals(url) || "http://hsivonen.iki.fi/checkers/all/".equals(url)) { return true; } else if ("http://c.validator.nu/all-html4/".equals(url) || "http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) { return true; } else if ("http://c.validator.nu/base/".equals(url)) { return true; } else if ("http://c.validator.nu/rdfalite/".equals(url)) { return true; } for (String checker : ALL_CHECKERS) { if (checker.equals(url)) { return true; } } return false; } /** * @param request * @param response */ VerifierServletTransaction(HttpServletRequest request, HttpServletResponse response) { this.request = request; this.response = response; } protected boolean willValidate() { if (methodIsGet) { return document != null; } else { // POST return true; } } void service() throws ServletException, IOException { this.methodIsGet = "GET".equals(request.getMethod()) || "HEAD".equals(request.getMethod()); this.out = response.getOutputStream(); try { request.setCharacterEncoding("utf-8"); } catch (NoSuchMethodError e) { log4j.debug("Vintage Servlet API doesn't support setCharacterEncoding().", e); } if (!methodIsGet) { postContentType = request.getContentType(); if (postContentType == null) { response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Content-Type missing"); return; } else if (postContentType.trim().toLowerCase().startsWith( "application/x-www-form-urlencoded")) { response.sendError( HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE, "application/x-www-form-urlencoded not supported. Please use multipart/form-data."); return; } } String outFormat = request.getParameter("out"); if (outFormat == null) { outputFormat = OutputFormat.HTML; } else { if ("html".equals(outFormat)) { outputFormat = OutputFormat.HTML; } else if ("xhtml".equals(outFormat)) { outputFormat = OutputFormat.XHTML; } else if ("text".equals(outFormat)) { outputFormat = OutputFormat.TEXT; } else if ("gnu".equals(outFormat)) { outputFormat = OutputFormat.GNU; } else if ("xml".equals(outFormat)) { outputFormat = OutputFormat.XML; } else if ("json".equals(outFormat)) { outputFormat = OutputFormat.JSON; } else { response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Unsupported output format"); return; } } if (!methodIsGet) { document = request.getHeader("Content-Location"); } if (document == null) { document = request.getParameter("doc"); } if (document == null) { document = request.getParameter("file"); } document = ("".equals(document)) ? null : document; String callback = null; if (outputFormat == OutputFormat.JSON) { callback = request.getParameter("callback"); if (callback != null) { Matcher m = JS_IDENTIFIER.matcher(callback); if (m.matches()) { if (Arrays.binarySearch(JS_RESERVED_WORDS, callback) >= 0) { response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Callback is a reserved word."); return; } } else { response.sendError(HttpServletResponse.SC_BAD_REQUEST, "Callback is not a valid ECMA 262 IdentifierName."); return; } } } if (willValidate()) { response.setDateHeader("Expires", 0); response.setHeader("Cache-Control", "no-cache"); } else if (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML) { response.setDateHeader("Last-Modified", lastModified); } else { response.sendError(HttpServletResponse.SC_BAD_REQUEST, "No input document"); return; } setup(); if (request.getParameter("useragent") != null) { userAgent = scrub(request.getParameter("useragent")); } else { userAgent = USER_AGENT; } Object inputType = request.getAttribute("nu.validator.servlet.MultipartFormDataFilter.type"); showSource = (request.getParameter("showsource") != null); showSource = (showSource || "textarea".equals(inputType)); showOutline = (request.getParameter("showoutline") != null); if (request.getParameter("showimagereport") != null) { imageCollector = new ImageCollector(sourceCode); } String charset = request.getParameter("charset"); if (charset != null) { charset = scrub(charset.trim()); if (!"".equals(charset)) { charsetOverride = charset; } } String nsfilter = request.getParameter("nsfilter"); if (nsfilter != null) { for (String ns : SPACE.split(nsfilter)) { if (ns.length() > 0) { filteredNamespaces.add(ns); } } } boolean errorsOnly = ("error".equals(request.getParameter("level"))); boolean asciiQuotes = (request.getParameter("asciiquotes") != null); int lineOffset = 0; String lineOffsetStr = request.getParameter("lineoffset"); if (lineOffsetStr != null) { try { lineOffset = Integer.parseInt(lineOffsetStr); } catch (NumberFormatException e) { } } try { if (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML) { if (outputFormat == OutputFormat.HTML) { response.setContentType("text/html; charset=utf-8"); contentHandler = new HtmlSerializer(out); } else { response.setContentType("application/xhtml+xml"); contentHandler = new XmlSerializer(out); } emitter = new XhtmlSaxEmitter(contentHandler); errorHandler = new MessageEmitterAdapter(sourceCode, showSource, imageCollector, lineOffset, false, new XhtmlMessageEmitter(contentHandler)); PageEmitter.emit(contentHandler, this); } else { if (outputFormat == OutputFormat.TEXT) { response.setContentType("text/plain; charset=utf-8"); errorHandler = new MessageEmitterAdapter(sourceCode, showSource, null, lineOffset, false, new TextMessageEmitter(out, asciiQuotes)); } else if (outputFormat == OutputFormat.GNU) { response.setContentType("text/plain; charset=utf-8"); errorHandler = new MessageEmitterAdapter(sourceCode, showSource, null, lineOffset, false, new GnuMessageEmitter(out, asciiQuotes)); } else if (outputFormat == OutputFormat.XML) { response.setContentType("application/xml"); errorHandler = new MessageEmitterAdapter(sourceCode, showSource, null, lineOffset, false, new XmlMessageEmitter(new XmlSerializer(out))); } else if (outputFormat == OutputFormat.JSON) { if (callback == null) { response.setContentType("application/json; charset=utf-8"); } else { response.setContentType("application/javascript; charset=utf-8"); } errorHandler = new MessageEmitterAdapter(sourceCode, showSource, null, lineOffset, false, new JsonMessageEmitter( new nu.validator.json.Serializer(out), callback)); } else { throw new RuntimeException("Unreachable."); } errorHandler.setErrorsOnly(errorsOnly); validate(); } } catch (SAXException e) { throw new ServletException(e); } } /** * @throws ServletException */ protected void setup() throws ServletException { String preset = request.getParameter("preset"); if (preset != null && !"".equals(preset)) { schemaUrls = preset; } else { schemaUrls = request.getParameter("schema"); } if (schemaUrls == null) { schemaUrls = ""; } String parserStr = request.getParameter("parser"); if ("html".equals(parserStr)) { parser = ParserMode.HTML_AUTO; } else if ("xmldtd".equals(parserStr)) { parser = ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION; } else if ("xml".equals(parserStr)) { parser = ParserMode.XML_NO_EXTERNAL_ENTITIES; } else if ("html5".equals(parserStr)) { parser = ParserMode.HTML; } else if ("html4".equals(parserStr)) { parser = ParserMode.HTML401_STRICT; } else if ("html4tr".equals(parserStr)) { parser = ParserMode.HTML401_TRANSITIONAL; } // else auto laxType = (request.getParameter("laxtype") != null); } private boolean useHtml5Schema() { if ("".equals(schemaUrls)) { return false; } return (schemaUrls.contains("http://s.validator.nu/html5.rnc") || schemaUrls.contains("http://s.validator.nu/html5-all.rnc") || schemaUrls.contains("http://s.validator.nu/html5-its.rnc") || schemaUrls.contains("http://s.validator.nu/html5-rdfalite.rnc")); } private boolean isHtmlUnsafePreset() { if ("".equals(schemaUrls)) { return false; } boolean preset = false; for (String presetUrl : presetUrls) { if (presetUrl.equals(schemaUrls)) { preset = true; break; } } if (!preset) { return false; } return !(schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-basic.rnc") || schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-strict.rnc") || schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-transitional.rnc") || schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-frameset.rnc") || schemaUrls.startsWith("http://s.validator.nu/html5.rnc") || schemaUrls.startsWith("http://s.validator.nu/html5-all.rnc") || schemaUrls.startsWith("http://s.validator.nu/html5-its.rnc") || schemaUrls.startsWith("http://s.validator.nu/html5-rdfalite.rnc")); } /** * @throws SAXException */ @SuppressWarnings({ "deprecation", "unchecked" }) void validate() throws SAXException { if (!willValidate()) { return; } boolean isHtmlOrXhtml = (outputFormat == OutputFormat.HTML || outputFormat == OutputFormat.XHTML); if (isHtmlOrXhtml) { try { out.flush(); } catch (IOException e1) { throw new SAXException(e1); } } httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType, errorHandler); httpRes.setUserAgent(userAgent); dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler); contentTypeParser = new ContentTypeParser(errorHandler, laxType); entityResolver = new LocalCacheEntityResolver(dataRes); setAllowRnc(true); try { this.errorHandler.start(document); PropertyMapBuilder pmb = new PropertyMapBuilder(); pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler); pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver); pmb.put(ValidateProperty.XML_READER_CREATOR, new VerifierServletXMLReaderCreator(errorHandler, entityResolver)); pmb.put(ValidateProperty.SCHEMA_RESOLVER, this); RngProperty.CHECK_ID_IDREF.add(pmb); jingPropertyMap = pmb.toPropertyMap(); tryToSetupValidator(); setAllowRnc(false); loadDocAndSetupParser(); setErrorProfile(); reader.setErrorHandler(errorHandler); contentType = documentInput.getType(); sourceCode.initialize(documentInput); if (validator == null) { checkNormalization = true; } if (checkNormalization) { reader.setFeature( "http://xml.org/sax/features/unicode-normalization-checking", true); } WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper( reader); ContentHandler recorder = sourceCode.getLocationRecorder(); if (baseUriTracker == null) { wiretap.setWiretapContentHander(recorder); } else { wiretap.setWiretapContentHander(new CombineContentHandler( recorder, baseUriTracker)); } wiretap.setWiretapLexicalHandler((LexicalHandler) recorder); reader = wiretap; if (htmlParser != null) { htmlParser.addCharacterHandler(sourceCode); htmlParser.setMappingLangToXmlLang(true); htmlParser.setErrorHandler(errorHandler.getExactErrorHandler()); htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler); errorHandler.setHtml(true); } else if (xmlParser != null) { // this must be after wiretap! if (!filteredNamespaces.isEmpty()) { reader = new NamespaceDroppingXMLReaderWrapper(reader, filteredNamespaces); } xmlParser.setErrorHandler(errorHandler.getExactErrorHandler()); xmlParser.lockErrorHandler(); } else { throw new RuntimeException("Bug. Unreachable."); } reader = new AttributesPermutingXMLReaderWrapper(reader); // make // RNG // validation // better if (charsetOverride != null) { String charset = documentInput.getEncoding(); if (charset == null) { errorHandler.warning(new SAXParseException( "Overriding document character encoding from none to \u201C" + charsetOverride + "\u201D.", null)); } else { errorHandler.warning(new SAXParseException( "Overriding document character encoding from \u201C" + charset + "\u201D to \u201C" + charsetOverride + "\u201D.", null)); } documentInput.setEncoding(charsetOverride); } if (showOutline) { reader = new OutlineBuildingXMLReaderWrapper(reader, request); reader.parse(documentInput); outline = (Deque
) request.getAttribute("http://validator.nu/properties/document-outline"); } else { reader.parse(documentInput); } } catch (TooManyErrorsException e) { log4j.debug("TooManyErrorsException", e); errorHandler.fatalError(e); } catch (SAXException e) { log4j.debug("SAXException", e); } catch (IOException e) { isHtmlOrXhtml = false; log4j.info("IOException", e); errorHandler.ioError(e); } catch (IncorrectSchemaException e) { log4j.debug("IncorrectSchemaException", e); errorHandler.schemaError(e); } catch (RuntimeException e) { isHtmlOrXhtml = false; log4j.error("RuntimeException, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e); errorHandler.internalError( e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified."); } catch (Error e) { isHtmlOrXhtml = false; log4j.error("Error, doc: " + document + " schema: " + schemaUrls + " lax: " + laxType, e); errorHandler.internalError( e, "Oops. That was not supposed to happen. A bug manifested itself in the application internals. Unable to continue. Sorry. The admin was notified."); } finally { errorHandler.end(successMessage(), failureMessage()); gatherStatistics(); } if (isHtmlOrXhtml) { XhtmlOutlineEmitter outlineEmitter = new XhtmlOutlineEmitter( contentHandler, outline); outlineEmitter.emitA11y(); outlineEmitter.emit(); emitDetails(); StatsEmitter.emit(contentHandler, this); } } private void gatherStatistics() { Statistics stats = Statistics.STATISTICS; if (stats == null) { return; } synchronized (stats) { stats.incrementTotal(); if (charsetOverride != null) { stats.incrementField(Statistics.Field.CUSTOM_ENC); } switch (parser) { case HTML401_STRICT: case HTML401_TRANSITIONAL: stats.incrementField(Statistics.Field.PARSER_HTML4); break; case XML_EXTERNAL_ENTITIES_NO_VALIDATION: stats.incrementField(Statistics.Field.PARSER_XML_EXTERNAL); break; case AUTO: case HTML: case HTML_AUTO: case XML_NO_EXTERNAL_ENTITIES: default: break; } if (!filteredNamespaces.isEmpty()) { stats.incrementField(Statistics.Field.XMLNS_FILTER); } if (laxType) { stats.incrementField(Statistics.Field.LAX_TYPE); } if (imageCollector != null) { stats.incrementField(Statistics.Field.IMAGE_REPORT); } if (showSource) { stats.incrementField(Statistics.Field.SHOW_SOURCE); } if (showOutline) { stats.incrementField(Statistics.Field.SHOW_OUTLINE); } if (methodIsGet) { stats.incrementField(Statistics.Field.INPUT_GET); } else { // POST stats.incrementField(Statistics.Field.INPUT_POST); Object inputType = request.getAttribute("nu.validator.servlet.MultipartFormDataFilter.type"); if ("textarea".equals(inputType)) { stats.incrementField(Statistics.Field.INPUT_TEXT_FIELD); } else if ("file".equals(inputType)) { stats.incrementField(Statistics.Field.INPUT_FILE_UPLOAD); } else { stats.incrementField(Statistics.Field.INPUT_ENTITY_BODY); } } if (htmlParser != null) { stats.incrementField(Statistics.Field.INPUT_HTML); } if (xmlParser != null) { stats.incrementField(Statistics.Field.INPUT_XML); } switch (outputFormat) { case GNU: stats.incrementField(Statistics.Field.OUTPUT_GNU); break; case HTML: stats.incrementField(Statistics.Field.OUTPUT_HTML); break; case JSON: stats.incrementField(Statistics.Field.OUTPUT_JSON); break; case TEXT: stats.incrementField(Statistics.Field.OUTPUT_TEXT); break; case XHTML: stats.incrementField(Statistics.Field.OUTPUT_XHTML); break; case XML: stats.incrementField(Statistics.Field.OUTPUT_XML); break; case RELAXED: case SOAP: case UNICORN: default: break; } if (schemaListForStats == null) { stats.incrementField(Statistics.Field.LOGIC_ERROR); } else { boolean preset = false; for (int i = 0; i < presetUrls.length; i++) { if (presetUrls[i].equals(schemaListForStats)) { preset = true; if (externalSchema || externalSchematron) { stats.incrementField(Statistics.Field.LOGIC_ERROR); } else { stats.incrementField(Statistics.Field.PRESET_SCHEMA); /* * XXX WARNING WARNING: These mappings correspond to * values in the presets.txt file in the validator * source repo. They might be bogus if a custom * presets file is used instead. */ switch (i) { case 0: case 5: stats.incrementField(Statistics.Field.HTML5_SCHEMA); break; case 1: case 6: stats.incrementField(Statistics.Field.HTML5_RDFA_LITE_SCHEMA); break; case 2: stats.incrementField(Statistics.Field.HTML4_STRICT_SCHEMA); break; case 3: stats.incrementField(Statistics.Field.HTML4_TRANSITIONAL_SCHEMA); break; case 4: stats.incrementField(Statistics.Field.HTML4_FRAMESET_SCHEMA); break; case 7: stats.incrementField(Statistics.Field.XHTML1_COMPOUND_SCHEMA); break; case 8: stats.incrementField(Statistics.Field.SVG_SCHEMA); break; default: stats.incrementField(Statistics.Field.LOGIC_ERROR); break; } } break; } } if (!preset && !externalSchema) { stats.incrementField(Statistics.Field.BUILT_IN_NON_PRESET); } } if ("".equals(schemaUrls)) { stats.incrementField(Statistics.Field.AUTO_SCHEMA); if (externalSchema) { stats.incrementField(Statistics.Field.LOGIC_ERROR); } } else if (externalSchema) { if (externalSchematron) { stats.incrementField(Statistics.Field.EXTERNAL_SCHEMA_SCHEMATRON); } else { stats.incrementField(Statistics.Field.EXTERNAL_SCHEMA_NON_SCHEMATRON); } } else if (externalSchematron) { stats.incrementField(Statistics.Field.LOGIC_ERROR); } } } /** * @return * @throws SAXException */ protected String successMessage() throws SAXException { return "The document validates according to the specified schema(s)."; } protected String failureMessage() throws SAXException { return "There were errors."; } void emitDetails() throws SAXException { Object inputType = request.getAttribute("nu.validator.servlet.MultipartFormDataFilter.type"); String type = documentInput != null ? documentInput.getType() : ""; if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) { attrs.clear(); emitter.startElementWithClass("div", "details"); if (schemaIsDefault) { emitter.startElementWithClass("p", "msgschema"); emitter.characters(String.format("Used the schema for %s.", getPresetLabel(HTML5_SCHEMA))); emitter.endElement("p"); } emitter.startElementWithClass("p", "msgmediatype"); if (!isHtmlUnsafePreset()) { emitter.characters("Used the HTML parser."); } if (methodIsGet && !"textarea".equals(inputType) && !"file".equals(inputType)) { String charset = documentInput.getEncoding(); if (charset != null) { emitter.characters(String.format( " Externally specified character encoding was %s.", charset)); } } emitter.endElement("div"); } } /** * @throws SAXException * @throws IOException * @throws IncorrectSchemaException */ protected void tryToSetupValidator() throws SAXException, IOException, IncorrectSchemaException { validator = validatorByUrls(schemaUrls); } protected void setErrorProfile() { profile = request.getParameter("profile"); HashMap profileMap = new HashMap<>(); if ("pedagogical".equals(profile)) { profileMap.put("xhtml1", "warn"); } else if ("polyglot".equals(profile)) { profileMap.put("xhtml1", "warn"); profileMap.put("xhtml2", "warn"); } else { return; // presumed to be permissive } htmlParser.setErrorProfile(profileMap); } /** * @throws SAXException * @throws IOException * @throws IncorrectSchemaException * @throws SAXNotRecognizedException * @throws SAXNotSupportedException */ protected void loadDocAndSetupParser() throws SAXException, IOException, IncorrectSchemaException, SAXNotRecognizedException, SAXNotSupportedException { switch (parser) { case HTML_AUTO: case HTML: case HTML401_STRICT: case HTML401_TRANSITIONAL: if (isHtmlUnsafePreset()) { String message = "The chosen preset schema is not appropriate for HTML."; SAXException se = new SAXException(message); errorHandler.schemaError(se); throw se; } setAllowGenericXml(false); setAllowHtml(true); setAcceptAllKnownXmlTypes(false); setAllowXhtml(false); loadDocumentInput(); newHtmlParser(); DoctypeExpectation doctypeExpectation; int schemaId; switch (parser) { case HTML: doctypeExpectation = DoctypeExpectation.HTML; schemaId = HTML5_SCHEMA; break; case HTML401_STRICT: doctypeExpectation = DoctypeExpectation.HTML401_STRICT; schemaId = XHTML1STRICT_SCHEMA; break; case HTML401_TRANSITIONAL: doctypeExpectation = DoctypeExpectation.HTML401_TRANSITIONAL; schemaId = XHTML1TRANSITIONAL_SCHEMA; break; default: doctypeExpectation = DoctypeExpectation.AUTO; schemaId = 0; break; } htmlParser.setDoctypeExpectation(doctypeExpectation); htmlParser.setDocumentModeHandler(this); reader = htmlParser; if (validator == null) { validator = validatorByDoctype(schemaId); } if (validator != null) { reader.setContentHandler(validator.getContentHandler()); } break; case XML_NO_EXTERNAL_ENTITIES: case XML_EXTERNAL_ENTITIES_NO_VALIDATION: setAllowGenericXml(true); setAllowHtml(false); setAcceptAllKnownXmlTypes(true); setAllowXhtml(true); loadDocumentInput(); setupXmlParser(); break; default: setAllowGenericXml(true); setAllowHtml(true); setAcceptAllKnownXmlTypes(true); setAllowXhtml(true); loadDocumentInput(); String type = documentInput.getType(); if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) { if (isHtmlUnsafePreset()) { String message = "The Content-Type was \u201C" + type + "\u201D, but the chosen preset schema is not appropriate for HTML."; SAXException se = new SAXException(message); errorHandler.schemaError(se); throw se; } newHtmlParser(); if (useHtml5Schema()) { htmlParser.setDoctypeExpectation(DoctypeExpectation.HTML); } else { htmlParser.setDoctypeExpectation(DoctypeExpectation.AUTO); } htmlParser.setDocumentModeHandler(this); reader = htmlParser; if (validator != null) { reader.setContentHandler(validator.getContentHandler()); } } else { errorHandler.info("The Content-Type was \u201C" + type + "\u201D. Using the XML parser (not resolving external entities)."); setupXmlParser(); } break; } } /** * */ protected void newHtmlParser() { htmlParser = new HtmlParser(); htmlParser.setCommentPolicy(XmlViolationPolicy.ALLOW); htmlParser.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW); htmlParser.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET); htmlParser.setNamePolicy(XmlViolationPolicy.ALLOW); htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL); htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET); htmlParser.setMappingLangToXmlLang(true); htmlParser.setHtml4ModeCompatibleWithXhtml1Schemata(true); htmlParser.setHeuristics(Heuristics.ALL); } protected Validator validatorByDoctype(int schemaId) throws SAXException, IOException, IncorrectSchemaException { if (schemaId == 0) { return null; } for (int i = 0; i < presetDoctypes.length; i++) { if (presetDoctypes[i] == schemaId) { return validatorByUrls(presetUrls[i]); } } throw new RuntimeException("Doctype mappings not initialized properly."); } /** * @throws SAXNotRecognizedException * @throws SAXNotSupportedException */ protected void setupXmlParser() throws SAXNotRecognizedException, SAXNotSupportedException { xmlParser = new SAXDriver(); xmlParser.setCharacterHandler(sourceCode); if (lexicalHandler != null) { xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler); } reader = new IdFilter(xmlParser); reader.setFeature("http://xml.org/sax/features/string-interning", true); reader.setFeature( "http://xml.org/sax/features/external-general-entities", parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION); reader.setFeature( "http://xml.org/sax/features/external-parameter-entities", parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION); if (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION) { reader.setEntityResolver(entityResolver); } else { reader.setEntityResolver(new NullEntityResolver()); } if (validator == null) { bufferingRootNamespaceSniffer = new BufferingRootNamespaceSniffer( this); reader.setContentHandler(bufferingRootNamespaceSniffer); } else { reader.setContentHandler(new RootNamespaceSniffer(this, validator.getContentHandler())); reader.setDTDHandler(validator.getDTDHandler()); } } /** * @param validator * @return * @throws SAXException * @throws IOException * @throws IncorrectSchemaException */ private Validator validatorByUrls(String schemaList) throws SAXException, IOException, IncorrectSchemaException { System.setProperty("nu.validator.schema.rdfa-full", "0"); schemaListForStats = schemaList; Validator v = null; String[] schemas = SPACE.split(schemaList); for (int i = schemas.length - 1; i > -1; i--) { String url = schemas[i]; if ("http://s.validator.nu/html5-all.rnc".equals(url)) { System.setProperty("nu.validator.schema.rdfa-full", "1"); } if ("http://c.validator.nu/all/".equals(url) || "http://hsivonen.iki.fi/checkers/all/".equals(url)) { for (String checker : ALL_CHECKERS) { v = combineValidatorByUrl(v, checker); } } else if ("http://c.validator.nu/all-html4/".equals(url) || "http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) { for (String checker : ALL_CHECKERS_HTML4) { v = combineValidatorByUrl(v, checker); } } else { v = combineValidatorByUrl(v, url); } } if (imageCollector != null && v != null) { v = new CombineValidator(imageCollector, v); } return v; } /** * @param val * @param url * @return * @throws SAXException * @throws IOException * @throws IncorrectSchemaException */ private Validator combineValidatorByUrl(Validator val, String url) throws SAXException, IOException, IncorrectSchemaException { if (!"".equals(url)) { Validator v = validatorByUrl(url); if (val == null) { val = v; } else { val = new CombineValidator(v, val); } } return val; } /** * @param url * @return * @throws SAXException * @throws IOException * @throws IncorrectSchemaException */ private Validator validatorByUrl(String url) throws SAXException, IOException, IncorrectSchemaException { if (loadedValidatorUrls.contains(url)) { return null; } loadedValidatorUrls.add(url); if ("http://s.validator.nu/xhtml5.rnc".equals(url) || "http://s.validator.nu/html5.rnc".equals(url) || "http://s.validator.nu/html5-all.rnc".equals(url) || "http://s.validator.nu/xhtml5-all.rnc".equals(url) || "http://s.validator.nu/html5-its.rnc".equals(url) || "http://s.validator.nu/xhtml5-rdfalite.rnc".equals(url) || "http://s.validator.nu/html5-rdfalite.rnc".equals(url)) { errorHandler.setSpec(html5spec); } Schema sch = resolveSchema(url, jingPropertyMap); Validator validator = sch.createValidator(jingPropertyMap); if (validator.getContentHandler() instanceof XmlPiChecker) { lexicalHandler = (LexicalHandler) validator.getContentHandler(); } return validator; } @Override public Schema resolveSchema(String url, PropertyMap options) throws SAXException, IOException, IncorrectSchemaException { int i = Arrays.binarySearch(preloadedSchemaUrls, url); if (i > -1) { Schema rv = preloadedSchemas[i]; if (options.contains(WrapProperty.ATTRIBUTE_OWNER)) { if (rv instanceof CheckerSchema) { errorHandler.error(new SAXParseException( "A non-schema checker cannot be used as an attribute schema.", null, url, -1, -1)); throw new IncorrectSchemaException(); } else { // ugly fall through } } else { return rv; } } externalSchema = true; TypedInputSource schemaInput = (TypedInputSource) entityResolver.resolveEntity( null, url); SchemaReader sr = null; if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) { sr = CompactSchemaReader.getInstance(); } else { sr = new AutoSchemaReader(); } Schema sch = sr.createSchema(schemaInput, options); if (Statistics.STATISTICS != null && "com.thaiopensource.validate.schematron.SchemaImpl".equals(sch.getClass().getName())) { externalSchematron = true; } return sch; } /** * @param url * @return * @throws SAXException * @throws IOException * @throws IncorrectSchemaException */ private static Schema schemaByUrl(String url, EntityResolver resolver, PropertyMap pMap) throws SAXException, IOException, IncorrectSchemaException { log4j.debug("Will load schema: " + url); TypedInputSource schemaInput; try { schemaInput = (TypedInputSource) resolver.resolveEntity( null, url); } catch (ClassCastException e) { log4j.fatal(url, e); throw e; } SchemaReader sr = null; if ("application/relax-ng-compact-syntax".equals(schemaInput.getType())) { sr = CompactSchemaReader.getInstance(); } else { sr = new AutoSchemaReader(); } Schema sch = sr.createSchema(schemaInput, pMap); return sch; } /** * @throws SAXException */ void emitTitle(boolean markupAllowed) throws SAXException { if (willValidate()) { emitter.characters(RESULTS_TITLE); emitter.characters(FOR); if (document != null && document.length() > 0) { emitter.characters(scrub(shortenDataUri(document))); } else if (request.getAttribute("nu.validator.servlet.MultipartFormDataFilter.filename") != null) { emitter.characters("uploaded file " + scrub(request.getAttribute( "nu.validator.servlet.MultipartFormDataFilter.filename").toString())); } else { emitter.characters("contents of text-input area"); } } else { emitter.characters(SERVICE_TITLE); if (markupAllowed && System.getProperty("nu.validator.servlet.service-name", "").equals("Validator.nu")) { emitter.startElement("span"); emitter.characters(LIVING_VERSION); emitter.endElement("span"); } } } protected String shortenDataUri(String uri) { if (DataUri.startsWithData(uri)) { return "data:\u2026"; } else { return uri; } } void emitForm() throws SAXException { attrs.clear(); attrs.addAttribute("method", "get"); // attrs.addAttribute("action", request.getRequestURL().toString()); if (isSimple()) { attrs.addAttribute("class", "simple"); } // attrs.addAttribute("onsubmit", "formSubmission()"); emitter.startElement("form", attrs); emitFormContent(); emitter.endElement("form"); } protected boolean isSimple() { return false; } /** * @throws SAXException */ protected void emitFormContent() throws SAXException { FormEmitter.emit(contentHandler, this); } void emitSchemaField() throws SAXException { attrs.clear(); attrs.addAttribute("name", "schema"); attrs.addAttribute("id", "schema"); // attrs.addAttribute("onchange", "schemaChanged();"); attrs.addAttribute( "pattern", "(?:(?:(?:https?://\\S+)|(?:data:\\S+))(?:\\s+(?:(?:https?://\\S+)|(?:data:\\S+)))*)?"); attrs.addAttribute("title", "Space-separated list of schema IRIs. (Leave blank to let the service guess.)"); if (schemaUrls != null) { attrs.addAttribute("value", scrub(schemaUrls)); } emitter.startElement("input", attrs); emitter.endElement("input"); } void emitDocField() throws SAXException { attrs.clear(); attrs.addAttribute("type", "url"); attrs.addAttribute("name", "doc"); attrs.addAttribute("id", "doc"); attrs.addAttribute("pattern", "(?:(?:https?://.+)|(?:data:.+))?"); attrs.addAttribute("title", "Absolute IRI (http, https or data only) of the document to be checked."); attrs.addAttribute("tabindex", "0"); attrs.addAttribute("autofocus", "autofocus"); if (document != null) { attrs.addAttribute("value", scrub(document)); } Object att = request.getAttribute("nu.validator.servlet.MultipartFormDataFilter.type"); if (att != null) { attrs.addAttribute("class", att.toString()); } emitter.startElement("input", attrs); emitter.endElement("input"); } /** * @throws SAXException * */ void emitSchemaDuration() throws SAXException { } /** * @throws SAXException * */ void emitDocDuration() throws SAXException { } /** * @throws SAXException * */ void emitTotalDuration() throws SAXException { emitter.characters("" + (System.currentTimeMillis() - start)); } /** * @throws SAXException * */ void emitPresetOptions() throws SAXException { for (int i = 0; i < presetUrls.length; i++) { emitter.option(presetLabels[i], presetUrls[i], false); } } /** * @throws SAXException * */ void emitParserOptions() throws SAXException { emitter.option("Automatically from Content-Type", "", (parser == ParserMode.AUTO)); emitter.option("XML; don\u2019t load external entities", "xml", (parser == ParserMode.XML_NO_EXTERNAL_ENTITIES)); emitter.option("XML; load external entities", "xmldtd", (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION)); emitter.option("HTML; flavor from doctype", "html", (parser == ParserMode.HTML_AUTO)); emitter.option("HTML5", "html5", (parser == ParserMode.HTML)); emitter.option("HTML 4.01 Strict", "html4", (parser == ParserMode.HTML401_STRICT)); emitter.option("HTML 4.01 Transitional", "html4tr", (parser == ParserMode.HTML401_TRANSITIONAL)); } /** * @throws SAXException * */ void emitProfileOptions() throws SAXException { profile = request.getParameter("profile"); emitter.option("Permissive: only what the spec requires", "", ("".equals(profile))); emitter.option("Pedagogical: suitable for teaching purposes", "pedagogical", ("pedagogical".equals(profile))); emitter.option("Polyglot: works both as HTML and as XML", "polyglot", ("polyglot".equals(profile))); } /** * @throws SAXException * */ void emitLaxTypeField() throws SAXException { emitter.checkbox("laxtype", "yes", laxType); } /** * @throws SAXException * */ void emitShowSourceField() throws SAXException { emitter.checkbox("showsource", "yes", showSource); } /** * @throws SAXException * */ void emitShowOutlineField() throws SAXException { emitter.checkbox("showoutline", "yes", showOutline); } /** * @throws SAXException * */ void emitShowImageReportField() throws SAXException { emitter.checkbox("showimagereport", "yes", imageCollector != null); } void rootNamespace(String namespace, Locator locator) throws SAXException { if (validator == null) { int index = -1; for (int i = 0; i < presetNamespaces.length; i++) { if (namespace.equals(presetNamespaces[i])) { index = i; break; } } if (index == -1) { String message = "Cannot find preset schema for namespace: \u201C" + namespace + "\u201D."; SAXException se = new SAXException(message); errorHandler.schemaError(se); throw se; } String label = presetLabels[index]; String urls = presetUrls[index]; errorHandler.info("Using the preset for " + label + " based on the root namespace."); try { validator = validatorByUrls(urls); } catch (IncorrectSchemaException | IOException e) { // At this point the schema comes from memory. throw new RuntimeException(e); } if (bufferingRootNamespaceSniffer == null) { throw new RuntimeException( "Bug! bufferingRootNamespaceSniffer was null."); } bufferingRootNamespaceSniffer.setContentHandler(validator.getContentHandler()); } if (!rootNamespaceSeen) { rootNamespaceSeen = true; if (contentType != null) { int i; if ((i = Arrays.binarySearch(KNOWN_CONTENT_TYPES, contentType)) > -1) { if (!NAMESPACES_FOR_KNOWN_CONTENT_TYPES[i].equals(namespace)) { String message = "".equals(namespace) ? "\u201C" + contentType + "\u201D is not an appropriate Content-Type for a document whose root element is not in a namespace." : "\u201C" + contentType + "\u201D is not an appropriate Content-Type for a document whose root namespace is \u201C" + namespace + "\u201D."; SAXParseException spe = new SAXParseException(message, locator); errorHandler.warning(spe); } } } } } @Override public void documentMode(DocumentMode mode, String publicIdentifier, String systemIdentifier, boolean html4SpecificAdditionalErrorChecks) throws SAXException { if (validator == null) { try { if ("yes".equals(request.getParameter("sniffdoctype"))) { if ("-//W3C//DTD XHTML 1.0 Transitional//EN".equals(publicIdentifier)) { errorHandler.info("XHTML 1.0 Transitional doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + "." + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled." : "")); validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA); } else if ("-//W3C//DTD XHTML 1.0 Strict//EN".equals(publicIdentifier)) { errorHandler.info("XHTML 1.0 Strict doctype seen. Appendix C is not supported. Proceeding anyway for your convenience. The parser is still an HTML parser, so namespace processing is not performed and \u201Cxml:*\u201D attributes are not supported. Using the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + "." + (html4SpecificAdditionalErrorChecks ? " HTML4-specific tokenization errors are enabled." : "")); validator = validatorByDoctype(XHTML1STRICT_SCHEMA); } else if ("-//W3C//DTD HTML 4.01 Transitional//EN".equals(publicIdentifier)) { errorHandler.info("HTML 4.01 Transitional doctype seen. Using the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + "." + (html4SpecificAdditionalErrorChecks ? "" : " HTML4-specific tokenization errors are not enabled.")); validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA); } else if ("-//W3C//DTD HTML 4.01//EN".equals(publicIdentifier)) { errorHandler.info("HTML 4.01 Strict doctype seen. Using the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + "." + (html4SpecificAdditionalErrorChecks ? "" : " HTML4-specific tokenization errors are not enabled.")); validator = validatorByDoctype(XHTML1STRICT_SCHEMA); } else if ("-//W3C//DTD HTML 4.0 Transitional//EN".equals(publicIdentifier)) { errorHandler.info("Legacy HTML 4.0 Transitional doctype seen. Please consider using HTML 4.01 Transitional instead. Proceeding anyway for your convenience with the schema for " + getPresetLabel(XHTML1TRANSITIONAL_SCHEMA) + "." + (html4SpecificAdditionalErrorChecks ? "" : " HTML4-specific tokenization errors are not enabled.")); validator = validatorByDoctype(XHTML1TRANSITIONAL_SCHEMA); } else if ("-//W3C//DTD HTML 4.0//EN".equals(publicIdentifier)) { errorHandler.info("Legacy HTML 4.0 Strict doctype seen. Please consider using HTML 4.01 instead. Proceeding anyway for your convenience with the schema for " + getPresetLabel(XHTML1STRICT_SCHEMA) + "." + (html4SpecificAdditionalErrorChecks ? "" : " HTML4-specific tokenization errors are not enabled.")); validator = validatorByDoctype(XHTML1STRICT_SCHEMA); } } else { schemaIsDefault = true; if (html4SpecificAdditionalErrorChecks) { errorHandler.info("HTML4-specific tokenization errors are enabled."); } validator = validatorByDoctype(HTML5_SCHEMA); } } catch (IncorrectSchemaException | IOException e) { // At this point the schema comes from memory. throw new RuntimeException(e); } ContentHandler ch = validator.getContentHandler(); ch.setDocumentLocator(htmlParser.getDocumentLocator()); ch.startDocument(); reader.setContentHandler(ch); } else { if (html4SpecificAdditionalErrorChecks) { errorHandler.info("HTML4-specific tokenization errors are enabled."); } } } private String getPresetLabel(int schemaId) { for (int i = 0; i < presetDoctypes.length; i++) { if (presetDoctypes[i] == schemaId) { return presetLabels[i]; } } return "unknown"; } /** * @param acceptAllKnownXmlTypes * @see nu.validator.xml.ContentTypeParser#setAcceptAllKnownXmlTypes(boolean) */ protected void setAcceptAllKnownXmlTypes(boolean acceptAllKnownXmlTypes) { contentTypeParser.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes); dataRes.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes); httpRes.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes); } /** * @param allowGenericXml * @see nu.validator.xml.ContentTypeParser#setAllowGenericXml(boolean) */ protected void setAllowGenericXml(boolean allowGenericXml) { contentTypeParser.setAllowGenericXml(allowGenericXml); httpRes.setAllowGenericXml(allowGenericXml); dataRes.setAllowGenericXml(allowGenericXml); } /** * @param allowHtml * @see nu.validator.xml.ContentTypeParser#setAllowHtml(boolean) */ protected void setAllowHtml(boolean allowHtml) { contentTypeParser.setAllowHtml(allowHtml); httpRes.setAllowHtml(allowHtml); dataRes.setAllowHtml(allowHtml); } /** * @param allowRnc * @see nu.validator.xml.ContentTypeParser#setAllowRnc(boolean) */ protected void setAllowRnc(boolean allowRnc) { contentTypeParser.setAllowRnc(allowRnc); httpRes.setAllowRnc(allowRnc); dataRes.setAllowRnc(allowRnc); entityResolver.setAllowRnc(allowRnc); } /** * @param allowXhtml * @see nu.validator.xml.ContentTypeParser#setAllowXhtml(boolean) */ protected void setAllowXhtml(boolean allowXhtml) { contentTypeParser.setAllowXhtml(allowXhtml); httpRes.setAllowXhtml(allowXhtml); dataRes.setAllowXhtml(allowXhtml); } /** * @throws SAXException * @throws IOException */ protected void loadDocumentInput() throws SAXException, IOException { if (methodIsGet) { documentInput = (TypedInputSource) entityResolver.resolveEntity( null, document); errorHandler.setLoggingOk(true); } else { // POST long len = request.getContentLength(); if (len > SIZE_LIMIT) { throw new StreamBoundException("Resource size exceeds limit."); } documentInput = contentTypeParser.buildTypedInputSource(document, null, postContentType); documentInput.setByteStream(len < 0 ? new BoundedInputStream( request.getInputStream(), SIZE_LIMIT, document) : request.getInputStream()); documentInput.setSystemId(request.getHeader("Content-Location")); } if (imageCollector != null) { baseUriTracker = new BaseUriTracker(documentInput.getSystemId(), documentInput.getLanguage()); imageCollector.initializeContext(baseUriTracker); } } void emitStyle() throws SAXException { attrs.clear(); attrs.addAttribute("href", STYLE_SHEET); attrs.addAttribute("rel", "stylesheet"); emitter.startElement("link", attrs); emitter.endElement("link"); } void emitIcon() throws SAXException { attrs.clear(); attrs.addAttribute("href", ICON); attrs.addAttribute("rel", "icon"); emitter.startElement("link", attrs); emitter.endElement("link"); } void emitScript() throws SAXException { attrs.clear(); attrs.addAttribute("src", SCRIPT); emitter.startElement("script", attrs); emitter.endElement("script"); } void emitAbout() throws SAXException { attrs.clear(); attrs.addAttribute("href", ABOUT_PAGE); emitter.startElement("a", attrs); emitter.characters(ABOUT_THIS_SERVICE); emitter.endElement("a"); } void emitVersion() throws SAXException { emitter.characters(VERSION); } void emitUserAgentInput() throws SAXException { attrs.clear(); attrs.addAttribute("name", "useragent"); attrs.addAttribute("list", "useragents"); attrs.addAttribute("value", userAgent); emitter.startElement("input", attrs); emitter.endElement("input"); } void emitOtherFacetLink() throws SAXException { attrs.clear(); attrs.addAttribute("href", HTML5_FACET); emitter.startElement("a", attrs); emitter.characters(SIMPLE_UI); emitter.endElement("a"); } void emitNsfilterField() throws SAXException { attrs.clear(); attrs.addAttribute("name", "nsfilter"); attrs.addAttribute("id", "nsfilter"); attrs.addAttribute("pattern", "(?:.+:.+(?:\\s+.+:.+)*)?"); attrs.addAttribute("title", "Space-separated namespace URIs for vocabularies to be filtered out."); if (!filteredNamespaces.isEmpty()) { StringBuilder sb = new StringBuilder(); boolean first = true; for (String ns : filteredNamespaces) { if (!first) { sb.append(' '); } sb.append(ns); first = false; } attrs.addAttribute("value", scrub(sb)); } emitter.startElement("input", attrs); emitter.endElement("input"); } void maybeEmitNsfilterField() throws SAXException { NsFilterEmitter.emit(contentHandler, this); } void emitCharsetOptions() throws SAXException { boolean found = false; for (int i = 0; i < CHARSETS.length; i++) { String charset = CHARSETS[i]; boolean selected = charset.equalsIgnoreCase(charsetOverride); // XXX // use // ASCII-caseinsensitivity emitter.option(CHARSET_DESCRIPTIONS[i], charset, selected); if (selected) { found = true; } } if (!found && charsetOverride != null) { emitter.option(charsetOverride, charsetOverride, true); } } void maybeEmitCharsetField() throws SAXException { CharsetEmitter.emit(contentHandler, this); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy