All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nu.validator.client.EmbeddedValidator Maven / Gradle / Ivy

Go to download

An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)

There is a newer version: 20.7.2
Show newest version
/*
 * Copyright (c) 2017 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
package nu.validator.client;

import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicBoolean;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.json.Serializer;
import nu.validator.messages.GnuMessageEmitter;
import nu.validator.messages.JsonMessageEmitter;
import nu.validator.messages.MessageEmitter;
import nu.validator.messages.MessageEmitterAdapter;
import nu.validator.messages.TextMessageEmitter;
import nu.validator.messages.XmlMessageEmitter;
import nu.validator.servlet.imagereview.ImageCollector;
import nu.validator.validation.SimpleDocumentValidator;
import nu.validator.xml.SystemErrErrorHandler;

/**
 * Nu validator client for HTML validation from within another
 * application.
 */
public class EmbeddedValidator {

    public static final String SCHEMA_URL = "http://s.validator.nu/html5-rdfalite.rnc";

    public static enum OutputFormat {
        TEXT, XML, JSON, GNU
    }

    private boolean asciiQuotes = false;
    private boolean detectLanguages = false;
    private boolean forceHTML = false;
    private int lineOffset = 0;
    private boolean loadEntities = false;
    private boolean noStream = false;
    private OutputFormat outputFormat = OutputFormat.JSON;
    private String schemaUrl = SCHEMA_URL;

    /**
     * Validate the file at the given path
     * 
     * @param path
     *            a valid {@link Path} to a readable file
     * @return validation output {@link String}
     * @throws IllegalStateException
     * @throws IOException
     * @throws SAXException
     */
    public String validate(Path path) throws IOException, SAXException {
        try (OneOffValidator validator = new OneOffValidator(asciiQuotes, detectLanguages, forceHTML, lineOffset, loadEntities, noStream, outputFormat, schemaUrl)) {
            return validator.validate(path);
        }
    }

    /**
     * Validate the input source
     * 
     * @param in
     *            a valid {@link InputStream} to a readable file
     * @return validation output {@link String}
     * @throws IllegalStateException
     * @throws IOException
     * @throws SAXException
     */
    public String validate(InputStream in) throws IOException, SAXException {
        try (OneOffValidator validator = new OneOffValidator(asciiQuotes, detectLanguages, forceHTML, lineOffset, loadEntities, noStream, outputFormat, schemaUrl)) {
            return validator.validate(in);
        }
    }

    public OutputFormat getOutputFormat() {
        return outputFormat;
    }

    /**
     * Set the output format for the validation
     * 
     * @param outputFormat
     *            {@link OutputFormat}, not {@code null}
     * @throws IllegalArgumentException
     *             if argument is {@code null}
     */
    public void setOutputFormat(OutputFormat outputFormat) {
        if (outputFormat == null) {
            throw new IllegalArgumentException("outputFormat can not be null");
        }
        this.outputFormat = outputFormat;
    }

    public boolean isLoadEntities() {
        return loadEntities;
    }

    /**
     * @param loadEntities
     *            {@code true} to have XML parser load remote DTDs, etc
     */
    public void setLoadEntities(boolean loadEntities) {
        this.loadEntities = loadEntities;
    }

    public boolean isNoLangDetect() {
        return detectLanguages;
    }

    /**
     * @param detectLanguages
     *            {@code true} to enable language detection, {@code false} to
     *            disable language detection
     */
    public void setNoLangDetect(boolean noLangDetect) {
        this.detectLanguages = noLangDetect;
    }

    public boolean isNoStream() {
        return noStream;
    }

    /**
     * @param noStream
     *            if {@code true}, HTML parser will buffer instead of streaming
     */
    public void setNoStream(boolean noStream) {
        this.noStream = noStream;
    }

    public boolean isForceHTML() {
        return forceHTML;
    }

    /**
     * @param forceHTML
     *            if {@code true}, input will be validated as HTML regardless of
     *            its actual document type
     */
    public void setForceHTML(boolean forceHTML) {
        this.forceHTML = forceHTML;
    }

    public boolean isAsciiQuotes() {
        return asciiQuotes;
    }

    /**
     * @param asciiQuotes
     *            {@code true} if curly quotes ({@code '\u2018'} &
     *            {@code '\u2019'}) in emitted warnings and errors should be
     *            replaced with ascii quotes
     */
    public void setAsciiQuotes(boolean asciiQuotes) {
        this.asciiQuotes = asciiQuotes;
    }

    public int getLineOffset() {
        return lineOffset;
    }

    /**
     * @param lineOffset
     *            offset to add or subtract from the line number in emitted
     *            warnings and errors
     */
    public void setLineOffset(int lineOffset) {
        this.lineOffset = lineOffset;
    }

    public String getSchemaUrl() {
        return schemaUrl;
    }

    /**
     * Default value is {@value #SCHEMA_URL}
     * 
     * @param schemaUrl
     *            url to the required schema
     */
    public void setSchemaUrl(String schemaUrl) {
        if (schemaUrl != null && !schemaUrl.startsWith("http:")) {
            throw new IllegalArgumentException("schemaUrl should be a URL");
        }
        this.schemaUrl = schemaUrl;
    }

    /**
     * Self-contained, single use class for encapsulated building of an embedded
     * validator.
     */
    private class OneOffValidator implements AutoCloseable {

        private static final String MSG_SUCCESS = "Document checking completed. No errors found.";
        private static final String MSG_FAIL = "Document checking completed.";
        private static final String EXTENSION_ERROR = "File was not checked. Files must have .html, .xhtml, .htm, or .xht extensions.";

        private final AtomicBoolean used = new AtomicBoolean(false);
        private final ByteArrayOutputStream out = new ByteArrayOutputStream();
        private final BufferedOutputStream bufOut = new BufferedOutputStream(out);

        private final SimpleDocumentValidator validator;
        private final MessageEmitterAdapter errorHandler;
        private final boolean forceHtml;

        private OneOffValidator(boolean asciiQuotes, boolean detectLanguages, boolean forceHtml, int lineOffset, boolean loadEntities,
                boolean noStream, OutputFormat outputFormat, String schemaUrl) throws SAXException {
            this.validator = new SimpleDocumentValidator(true, false, !detectLanguages);
            this.errorHandler = newErrorHandler(lineOffset, asciiQuotes, outputFormat);
            this.forceHtml = forceHtml;
            try {
                this.validator.setUpMainSchema(schemaUrl == null ? SCHEMA_URL : schemaUrl, new SystemErrErrorHandler());
            } catch (Exception e) {
                throw new IllegalStateException(e);
            }
            this.validator.setUpValidatorAndParsers(errorHandler, noStream, loadEntities);
        }

        public MessageEmitterAdapter newErrorHandler(int lineOffset, boolean asciiQuotes, OutputFormat outputFormat) throws SAXException {
            boolean showSource = true;
            boolean batchMode = true;
            MessageEmitterAdapter adapter = new MessageEmitterAdapter(null, this.validator.getSourceCode(), showSource,
                    new ImageCollector(this.validator.getSourceCode()), lineOffset, batchMode, newEmitter(asciiQuotes, outputFormat));
            adapter.setErrorsOnly(false);
            adapter.setHtml(true);
            adapter.start(null);
            return adapter;
        }

        private MessageEmitter newEmitter(boolean asciiQuotes, OutputFormat outputFormat) {
            switch (outputFormat) {
            case TEXT:
                return new TextMessageEmitter(this.out, asciiQuotes);
            case GNU:
                return new GnuMessageEmitter(this.out, asciiQuotes);
            case JSON:
                return new JsonMessageEmitter(new Serializer(this.out), null);
            case XML:
                return new XmlMessageEmitter(new XmlSerializer(this.out));
            default:
                throw new UnsupportedOperationException("OutputFormat " + outputFormat + " not supported");
            }
        }

        private String validate(Path path) throws IOException, SAXException {
            if (!used.compareAndSet(false, true)) {
                throw new IllegalStateException("OneOffValidator instances are not reusable");
            }
            try {
                if (Files.notExists(path) || !Files.isReadable(path)) {
                    errorHandler.warning(new SAXParseException(
                            "File not found.", null, path.toString(), -1, -1));
                } else if (isXhtml(path.toFile())) {
                    if (forceHtml) {
                        validator.checkHtmlFile(path.toFile(), true);
                    } else {
                        validator.checkXmlFile(path.toFile());
                    }
                } else if (isHtml(path.toFile())) {
                    validator.checkHtmlFile(path.toFile(), true);
                } else {
                    errorHandler.warning(new SAXParseException(EXTENSION_ERROR, null, path.toString(), -1, -1));
                }
            } catch (SAXException e) {
                errorHandler.warning(new SAXParseException(e.getMessage(), null, path.toString(), -1, -1));
            }

            errorHandler.end(MSG_SUCCESS, MSG_FAIL, "");
            return new String(out.toByteArray(), StandardCharsets.UTF_8);
        }

        private boolean isXhtml(File file) {
            String name = file.getName();
            return name.endsWith(".xhtml") || name.endsWith(".xht");
        }

        private boolean isHtml(File file) {
            String name = file.getName();
            return name.endsWith(".html") || name.endsWith(".htm");
        }

        private String validate(InputStream in) throws IOException, SAXException {
            if (!used.compareAndSet(false, true)) {
                throw new IllegalStateException("OneOffValidator instances are not reusable");
            }
            validator.checkHtmlInputSource(new InputSource(in));
            errorHandler.end(MSG_SUCCESS, MSG_FAIL, "");
            return new String(out.toByteArray(), StandardCharsets.UTF_8);
        }

        @Override
        public void close() {
            try {
                bufOut.close();
            } catch (IOException e) {
            }
            try {
                out.close();
            } catch (IOException e) {
            }
        }

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy