nu.validator.client.EmbeddedValidator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of validator Show documentation
Show all versions of validator Show documentation
An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)
/*
* Copyright (c) 2017 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.client;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicBoolean;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.json.Serializer;
import nu.validator.messages.GnuMessageEmitter;
import nu.validator.messages.JsonMessageEmitter;
import nu.validator.messages.MessageEmitter;
import nu.validator.messages.MessageEmitterAdapter;
import nu.validator.messages.TextMessageEmitter;
import nu.validator.messages.XmlMessageEmitter;
import nu.validator.servlet.imagereview.ImageCollector;
import nu.validator.validation.SimpleDocumentValidator;
import nu.validator.xml.SystemErrErrorHandler;
/**
* Nu validator client for HTML validation from within another
* application.
*/
public class EmbeddedValidator {
public static final String SCHEMA_URL = "http://s.validator.nu/html5-rdfalite.rnc";
public static enum OutputFormat {
TEXT, XML, JSON, GNU
}
private boolean asciiQuotes = false;
private boolean detectLanguages = false;
private boolean forceHTML = false;
private int lineOffset = 0;
private boolean loadEntities = false;
private boolean noStream = false;
private OutputFormat outputFormat = OutputFormat.JSON;
private String schemaUrl = SCHEMA_URL;
/**
* Validate the file at the given path
*
* @param path
* a valid {@link Path} to a readable file
* @return validation output {@link String}
* @throws IllegalStateException
* @throws IOException
* @throws SAXException
*/
public String validate(Path path) throws IOException, SAXException {
try (OneOffValidator validator = new OneOffValidator(asciiQuotes, detectLanguages, forceHTML, lineOffset, loadEntities, noStream, outputFormat, schemaUrl)) {
return validator.validate(path);
}
}
/**
* Validate the input source
*
* @param in
* a valid {@link InputStream} to a readable file
* @return validation output {@link String}
* @throws IllegalStateException
* @throws IOException
* @throws SAXException
*/
public String validate(InputStream in) throws IOException, SAXException {
try (OneOffValidator validator = new OneOffValidator(asciiQuotes, detectLanguages, forceHTML, lineOffset, loadEntities, noStream, outputFormat, schemaUrl)) {
return validator.validate(in);
}
}
public OutputFormat getOutputFormat() {
return outputFormat;
}
/**
* Set the output format for the validation
*
* @param outputFormat
* {@link OutputFormat}, not {@code null}
* @throws IllegalArgumentException
* if argument is {@code null}
*/
public void setOutputFormat(OutputFormat outputFormat) {
if (outputFormat == null) {
throw new IllegalArgumentException("outputFormat can not be null");
}
this.outputFormat = outputFormat;
}
public boolean isLoadEntities() {
return loadEntities;
}
/**
* @param loadEntities
* {@code true} to have XML parser load remote DTDs, etc
*/
public void setLoadEntities(boolean loadEntities) {
this.loadEntities = loadEntities;
}
public boolean isNoLangDetect() {
return detectLanguages;
}
/**
* @param detectLanguages
* {@code true} to enable language detection, {@code false} to
* disable language detection
*/
public void setNoLangDetect(boolean noLangDetect) {
this.detectLanguages = noLangDetect;
}
public boolean isNoStream() {
return noStream;
}
/**
* @param noStream
* if {@code true}, HTML parser will buffer instead of streaming
*/
public void setNoStream(boolean noStream) {
this.noStream = noStream;
}
public boolean isForceHTML() {
return forceHTML;
}
/**
* @param forceHTML
* if {@code true}, input will be validated as HTML regardless of
* its actual document type
*/
public void setForceHTML(boolean forceHTML) {
this.forceHTML = forceHTML;
}
public boolean isAsciiQuotes() {
return asciiQuotes;
}
/**
* @param asciiQuotes
* {@code true} if curly quotes ({@code '\u2018'} &
* {@code '\u2019'}) in emitted warnings and errors should be
* replaced with ascii quotes
*/
public void setAsciiQuotes(boolean asciiQuotes) {
this.asciiQuotes = asciiQuotes;
}
public int getLineOffset() {
return lineOffset;
}
/**
* @param lineOffset
* offset to add or subtract from the line number in emitted
* warnings and errors
*/
public void setLineOffset(int lineOffset) {
this.lineOffset = lineOffset;
}
public String getSchemaUrl() {
return schemaUrl;
}
/**
* Default value is {@value #SCHEMA_URL}
*
* @param schemaUrl
* url to the required schema
*/
public void setSchemaUrl(String schemaUrl) {
if (schemaUrl != null && !schemaUrl.startsWith("http:")) {
throw new IllegalArgumentException("schemaUrl should be a URL");
}
this.schemaUrl = schemaUrl;
}
/**
* Self-contained, single use class for encapsulated building of an embedded
* validator.
*/
private class OneOffValidator implements AutoCloseable {
private static final String MSG_SUCCESS = "Document checking completed. No errors found.";
private static final String MSG_FAIL = "Document checking completed.";
private static final String EXTENSION_ERROR = "File was not checked. Files must have .html, .xhtml, .htm, or .xht extensions.";
private final AtomicBoolean used = new AtomicBoolean(false);
private final ByteArrayOutputStream out = new ByteArrayOutputStream();
private final BufferedOutputStream bufOut = new BufferedOutputStream(out);
private final SimpleDocumentValidator validator;
private final MessageEmitterAdapter errorHandler;
private final boolean forceHtml;
private OneOffValidator(boolean asciiQuotes, boolean detectLanguages, boolean forceHtml, int lineOffset, boolean loadEntities,
boolean noStream, OutputFormat outputFormat, String schemaUrl) throws SAXException {
this.validator = new SimpleDocumentValidator(true, false, !detectLanguages);
this.errorHandler = newErrorHandler(lineOffset, asciiQuotes, outputFormat);
this.forceHtml = forceHtml;
try {
this.validator.setUpMainSchema(schemaUrl == null ? SCHEMA_URL : schemaUrl, new SystemErrErrorHandler());
} catch (Exception e) {
throw new IllegalStateException(e);
}
this.validator.setUpValidatorAndParsers(errorHandler, noStream, loadEntities);
}
public MessageEmitterAdapter newErrorHandler(int lineOffset, boolean asciiQuotes, OutputFormat outputFormat) throws SAXException {
boolean showSource = true;
boolean batchMode = true;
MessageEmitterAdapter adapter = new MessageEmitterAdapter(null, this.validator.getSourceCode(), showSource,
new ImageCollector(this.validator.getSourceCode()), lineOffset, batchMode, newEmitter(asciiQuotes, outputFormat));
adapter.setErrorsOnly(false);
adapter.setHtml(true);
adapter.start(null);
return adapter;
}
private MessageEmitter newEmitter(boolean asciiQuotes, OutputFormat outputFormat) {
switch (outputFormat) {
case TEXT:
return new TextMessageEmitter(this.out, asciiQuotes);
case GNU:
return new GnuMessageEmitter(this.out, asciiQuotes);
case JSON:
return new JsonMessageEmitter(new Serializer(this.out), null);
case XML:
return new XmlMessageEmitter(new XmlSerializer(this.out));
default:
throw new UnsupportedOperationException("OutputFormat " + outputFormat + " not supported");
}
}
private String validate(Path path) throws IOException, SAXException {
if (!used.compareAndSet(false, true)) {
throw new IllegalStateException("OneOffValidator instances are not reusable");
}
try {
if (Files.notExists(path) || !Files.isReadable(path)) {
errorHandler.warning(new SAXParseException(
"File not found.", null, path.toString(), -1, -1));
} else if (isXhtml(path.toFile())) {
if (forceHtml) {
validator.checkHtmlFile(path.toFile(), true);
} else {
validator.checkXmlFile(path.toFile());
}
} else if (isHtml(path.toFile())) {
validator.checkHtmlFile(path.toFile(), true);
} else {
errorHandler.warning(new SAXParseException(EXTENSION_ERROR, null, path.toString(), -1, -1));
}
} catch (SAXException e) {
errorHandler.warning(new SAXParseException(e.getMessage(), null, path.toString(), -1, -1));
}
errorHandler.end(MSG_SUCCESS, MSG_FAIL, "");
return new String(out.toByteArray(), StandardCharsets.UTF_8);
}
private boolean isXhtml(File file) {
String name = file.getName();
return name.endsWith(".xhtml") || name.endsWith(".xht");
}
private boolean isHtml(File file) {
String name = file.getName();
return name.endsWith(".html") || name.endsWith(".htm");
}
private String validate(InputStream in) throws IOException, SAXException {
if (!used.compareAndSet(false, true)) {
throw new IllegalStateException("OneOffValidator instances are not reusable");
}
validator.checkHtmlInputSource(new InputSource(in));
errorHandler.end(MSG_SUCCESS, MSG_FAIL, "");
return new String(out.toByteArray(), StandardCharsets.UTF_8);
}
@Override
public void close() {
try {
bufOut.close();
} catch (IOException e) {
}
try {
out.close();
} catch (IOException e) {
}
}
}
}