All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nu.validator.client.SimpleCommandLineValidator Maven / Gradle / Ivy

Go to download

An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)

There is a newer version: 20.7.2
Show newest version
/*
 * Copyright (c) 2013-2018 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.client;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.regex.Pattern;

import nu.validator.htmlparser.sax.XmlSerializer;
import nu.validator.io.SystemIdIOException;
import nu.validator.messages.GnuMessageEmitter;
import nu.validator.messages.JsonMessageEmitter;
import nu.validator.messages.MessageEmitterAdapter;
import nu.validator.messages.TextMessageEmitter;
import nu.validator.messages.XmlMessageEmitter;
import nu.validator.servlet.imagereview.ImageCollector;
import nu.validator.source.SourceCode;
import nu.validator.validation.SimpleDocumentValidator;
import nu.validator.validation.SimpleDocumentValidator.SchemaReadException;
import nu.validator.xml.SystemErrErrorHandler;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

/**
 *
 * Simple command-line validator for HTML/XHTML files.
 */
public class SimpleCommandLineValidator {

    private static Package pkg = SimpleCommandLineValidator.class.getPackage();

    private static String version = pkg.getImplementationVersion();

    private static String userAgent;

    private static SimpleDocumentValidator validator;

    private static OutputStream out;

    private static Pattern filterPattern;

    private static MessageEmitterAdapter errorHandler;

    private static boolean verbose;

    private static boolean errorsOnly;

    private static boolean wError;

    private static boolean exitZeroAlways;

    private static boolean loadEntities;

    private static boolean noLangDetect;

    private static boolean noStream;

    private static boolean alsoCheckCSS;

    private static boolean skipNonCSS;

    private static boolean forceCSS;

    private static boolean alsoCheckSVG;

    private static boolean skipNonSVG;

    private static boolean forceSVG;

    private static boolean skipNonHTML;

    private static boolean forceHTML;

    private static boolean asciiQuotes;

    private static int lineOffset;

    private static enum OutputFormat {
        HTML, XHTML, TEXT, XML, JSON, RELAXED, SOAP, UNICORN, GNU
    }

    private static OutputFormat outputFormat;

    private static String schemaUrl;

    private static boolean hasSchemaOption;

    public static void main(String[] args) throws SAXException, Exception {
        out = System.err;
        userAgent = "Validator.nu/LV";
        System.setProperty("nu.validator.datatype.warn", "true");
        errorsOnly = false;
        wError = false;
        alsoCheckCSS = false;
        skipNonCSS = false;
        forceCSS = false;
        alsoCheckSVG = false;
        skipNonSVG = false;
        forceSVG = false;
        skipNonHTML = false;
        forceHTML = false;
        loadEntities = false;
        exitZeroAlways = false;
        noLangDetect = false;
        noStream = false;
        lineOffset = 0;
        asciiQuotes = false;
        verbose = false;

        filterPattern = null;
        String filterString = "";
        String outFormat = null;
        schemaUrl = null;
        hasSchemaOption = false;
        boolean hasFileArgs = false;
        boolean readFromStdIn = false;
        int fileArgsStart = 0;
        if (args.length == 0) {
            usage();
            System.exit(1);
        }
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-")) {
                readFromStdIn = true;
                break;
            } else if (!args[i].startsWith("--")) {
                hasFileArgs = true;
                fileArgsStart = i;
                break;
            } else {
                if ("--verbose".equals(args[i])) {
                    verbose = true;
                } else if ("--errors-only".equals(args[i])) {
                    errorsOnly = true;
                    System.setProperty("nu.validator.datatype.warn", "false");
                } else if ("--Werror".equals(args[i])) {
                    wError = true;
                } else if ("--exit-zero-always".equals(args[i])) {
                    exitZeroAlways = true;
                } else if ("--asciiquotes".equals(args[i])) {
                    asciiQuotes = true;
                } else if ("--filterfile".equals(args[i])) {
                    File filterFile = new File(args[++i]);
                    StringBuilder sb = new StringBuilder();
                    try (BufferedReader reader = //
                            new BufferedReader(new FileReader(filterFile))) {
                        String line;
                        String pipe = "";
                        while ((line = reader.readLine()) != null) {
                            if (line.startsWith("#")) {
                                continue;
                            }
                            sb.append(pipe);
                            sb.append(line);
                            pipe = "|";
                        }
                        if (sb.length() != 0) {
                            if ("".equals(filterString)) {
                                filterString = sb.toString();
                            } else {
                                filterString += "|" + sb.toString();
                            }
                        }
                    } catch (FileNotFoundException e) {
                        System.err.println("error: File not found: "
                                + filterFile.getPath());
                        System.exit(1);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                } else if ("--filterpattern".equals(args[i])) {
                    if ("".equals(filterString)) {
                        filterString = args[++i];
                    } else {
                        filterString += "|" + args[++i];
                    }
                } else if ("--format".equals(args[i])) {
                    outFormat = args[++i];
                } else if ("--user-agent".equals(args[i])) {
                    userAgent = args[++i];
                } else if ("--version".equals(args[i])) {
                    if (version != null) {
                        System.out.println(version);
                    } else {
                        System.out.println("[unknown version]");
                    }
                    System.exit(0);
                } else if ("--help".equals(args[i])) {
                    help();
                    System.exit(0);
                } else if ("--also-check-css".equals(args[i])) {
                    alsoCheckCSS = true;
                } else if ("--skip-non-css".equals(args[i])) {
                    skipNonCSS = true;
                } else if ("--css".equals(args[i])) {
                    forceCSS = true;
                } else if ("--also-check-svg".equals(args[i])) {
                    alsoCheckSVG = true;
                } else if ("--skip-non-svg".equals(args[i])) {
                    skipNonSVG = true;
                } else if ("--svg".equals(args[i])) {
                    forceSVG = true;
                } else if ("--skip-non-html".equals(args[i])) {
                    skipNonHTML = true;
                } else if ("--html".equals(args[i])) {
                    forceHTML = true;
                } else if ("--entities".equals(args[i])) {
                    loadEntities = true;
                } else if ("--no-langdetect".equals(args[i])) {
                    noLangDetect = true;
                } else if ("--no-stream".equals(args[i])) {
                    noStream = true;
                } else if ("--schema".equals(args[i])) {
                    hasSchemaOption = true;
                    schemaUrl = args[++i];
                    if (!schemaUrl.startsWith("http:")) {
                        System.err.println("error: The \"--schema\" option"
                                + " requires a URL for a schema.");
                        System.exit(1);
                    }
                }
            }
        }
        if (!"".equals(filterString)) {
            filterPattern = Pattern.compile(filterString);
        }
        if (schemaUrl == null) {
            schemaUrl = "http://s.validator.nu/html5-rdfalite.rnc";
        }
        if (outFormat == null) {
            outputFormat = OutputFormat.GNU;
        } else {
            if ("text".equals(outFormat)) {
                outputFormat = OutputFormat.TEXT;
            } else if ("gnu".equals(outFormat)) {
                outputFormat = OutputFormat.GNU;
            } else if ("xml".equals(outFormat)) {
                outputFormat = OutputFormat.XML;
            } else if ("json".equals(outFormat)) {
                outputFormat = OutputFormat.JSON;
            } else {
                System.err.printf("Error: Unsupported output format \"%s\"."
                        + " Must be \"gnu\", \"xml\", \"json\","
                        + " or \"text\".\n", outFormat);
                System.exit(1);
            }
        }
        if (readFromStdIn) {
            InputSource is = new InputSource(System.in);
            if (noLangDetect) {
                validator = new SimpleDocumentValidator(true, false, false);
            } else {
                validator = new SimpleDocumentValidator();
            }
            setup(schemaUrl);
            validator.checkHtmlInputSource(is);
            end();
        } else if (hasFileArgs) {
            if (noLangDetect) {
                validator = new SimpleDocumentValidator(true, false, false);
            } else {
                validator = new SimpleDocumentValidator(true, false, true);
            }
            setup(schemaUrl);
            checkFiles(args, fileArgsStart);
            end();
        } else {
            System.err.printf("\nError: No documents specified.\n");
            usage();
            System.exit(1);
        }
    }

    private static void setup(String schemaUrl) throws SAXException, Exception {
        setErrorHandler();
        if (cssCheckingEnabled()) {
            errorHandler.setLineOffset(-1);
        }
        errorHandler.setHtml(true);
        errorHandler.start(null);
        validator.setAllowCss(cssCheckingEnabled());
        try {
            validator.setUpMainSchema(schemaUrl, new SystemErrErrorHandler());
        } catch (SchemaReadException e) {
            System.out.println(e.getMessage() + " Terminating.");
            System.exit(1);
        } catch (StackOverflowError e) {
            System.out.println("StackOverflowError"
                    + " while evaluating HTML schema.");
            System.out.println("The checker requires a java thread stack size"
                    + " of at least 512k.");
            System.out.println("Consider invoking java with the -Xss"
                    + " option. For example:");
            System.out.println("\n  java -Xss512k -jar ~/vnu.jar FILE.html");
            System.exit(1);
        }
        validator.setUpValidatorAndParsers(errorHandler, noStream, loadEntities);
    }

    private static void end() throws SAXException {
        errorHandler.end("Document checking completed. No errors found.",
                "Document checking completed.", "");
        if (errorHandler.getErrors() > 0 || errorHandler.getFatalErrors() > 0
                || (wError && errorHandler.getWarnings() > 0)) {
            System.exit(exitZeroAlways ? 0 : 1);
        }
    }

    private static void checkFiles(String[] args, int fileArgsStart)
            throws IOException, Exception, SAXException {
        for (int i = fileArgsStart; i < args.length; i++) {
            if (args[i].startsWith("http://") || args[i].startsWith("https://")) {
                emitFilename(args[i]);
                try {
                    validator.checkHttpURL(args[i], userAgent, errorHandler);
                } catch (IOException e) {
                    errorHandler.fatalError(new SAXParseException(e.getMessage(),
                            null, args[i], -1, -1,
                            new SystemIdIOException(args[i], e.getMessage())));
                }
            } else {
                File file = new File(args[i]);
                if (file.isDirectory()) {
                    recurseDirectory(file);
                } else if (forceCSS) {
                    checkCssFile(file);
                } else if (skipNonCSS) {
                    if (isCss(file)) {
                        checkCssFile(file);
                    }
                } else if (alsoCheckCSS && isCss(file)) {
                    checkCssFile(file);
                } else if (forceSVG) {
                    checkSvgFile(file);
                } else if (skipNonSVG) {
                    if (isSvg(file)) {
                        checkSvgFile(file);
                    }
                } else if (alsoCheckSVG && isSvg(file)) {
                    checkSvgFile(file);
                } else {
                    checkHtmlFile(file);
                }
            }
        }
    }

    private static void recurseDirectory(File directory)
            throws IOException, Exception {
        if (directory.canRead()) {
            File[] files = directory.listFiles();
            for (File file : files) {
                if (file.isDirectory()) {
                    recurseDirectory(file);
                } else if (forceCSS) {
                    checkCssFile(file);
                } else if (skipNonCSS) {
                    if (isCss(file)) {
                        checkCssFile(file);
                    }
                } else if (alsoCheckCSS && isCss(file)) {
                    checkCssFile(file);
                } else if (forceSVG) {
                    checkSvgFile(file);
                } else if (skipNonSVG) {
                    if (isSvg(file)) {
                        checkSvgFile(file);
                    }
                } else if (alsoCheckSVG && isSvg(file)) {
                    checkSvgFile(file);
                } else {
                    checkHtmlFile(file);
                }
            }
        }
    }

    private static void checkSvgFile(File file) throws IOException, Exception {
        try {
            String path = file.getPath();
            if (!file.exists()) {
                if (verbose) {
                    errorHandler.warning(new SAXParseException(
                            "File not found.", null,
                            file.toURI().toURL().toString(), -1, -1));
                }
                return;
            } else {
                emitFilename(path);
                if (!"http://s.validator.nu/svg-xhtml5-rdf-mathml.rnc".equals(
                        validator.getMainSchemaUrl()) && !hasSchemaOption) {
                    setup("http://s.validator.nu/svg-xhtml5-rdf-mathml.rnc");
                }
                validator.checkXmlFile(file);
            }
        } catch (SAXException e) {
            if (!errorsOnly) {
                System.err.printf("\"%s\":-1:-1: warning: %s\n",
                        file.toURI().toURL().toString(), e.getMessage());
            }
        }
    }

    private static void checkCssFile(File file) throws IOException, Exception {
        try {
            String path = file.getPath();
            if (!file.exists()) {
                if (verbose) {
                    errorHandler.warning(new SAXParseException(
                            "File not found.", null,
                            file.toURI().toURL().toString(), -1, -1));
                }
                return;
            } else {
                emitFilename(path);
                validator.checkCssFile(file, true);
            }
        } catch (SAXException e) {
            if (!errorsOnly) {
                System.err.printf("\"%s\":-1:-1: warning: %s\n",
                        file.toURI().toURL().toString(), e.getMessage());
            }
        }
    }

    private static void checkHtmlFile(File file) throws IOException, Exception {
        try {
            String path = file.getPath();
            if (!file.exists()) {
                if (verbose) {
                    errorHandler.warning(new SAXParseException(
                            "File not found.", null,
                            file.toURI().toURL().toString(), -1, -1));
                }
                return;
            } else if (isXhtml(file)) {
                emitFilename(path);
                if (forceHTML) {
                    validator.checkHtmlFile(file, true);
                } else {
                    if (!"http://s.validator.nu/xhtml5-rdfalite.rnc".equals(
                            validator.getMainSchemaUrl()) && !hasSchemaOption) {
                        setup("http://s.validator.nu/xhtml5-rdfalite.rnc");
                    }
                    validator.checkXmlFile(file);
                }
            } else if (isHtml(file)) {
                emitFilename(path);
                if (!"http://s.validator.nu/html5-rdfalite.rnc".equals(
                        validator.getMainSchemaUrl()) && !hasSchemaOption) {
                    setup("http://s.validator.nu/html5-rdfalite.rnc");
                }
                validator.checkHtmlFile(file, true);
            } else {
                if (verbose) {
                    errorHandler.warning(new SAXParseException(
                            "File was not checked. Files must have .html,"
                                    + " .xhtml, .htm, or .xht extensions.",
                            null, file.toURI().toURL().toString(), -1, -1));
                }
            }
        } catch (SAXException e) {
            if (!errorsOnly) {
                System.err.printf("\"%s\":-1:-1: warning: %s\n",
                        file.toURI().toURL().toString(), e.getMessage());
            }
        }
    }

    private static boolean cssCheckingEnabled() {
        return forceCSS || alsoCheckCSS;
    }

    private static boolean isCss(File file) {
        String name = file.getName();
        return (name.endsWith(".css"));
    }

    private static boolean isSvg(File file) {
        String name = file.getName();
        return (name.endsWith(".svg"));
    }

    private static boolean isXhtml(File file) {
        String name = file.getName();
        return (name.endsWith(".xhtml") || name.endsWith(".xht"));
    }

    private static boolean isHtml(File file) {
        String name = file.getName();
        return (name.endsWith(".html") || name.endsWith(".htm") || !skipNonHTML);
    }

    private static void emitFilename(String name) {
        if (verbose) {
            System.out.println(name);
        }
    }

    private static void setErrorHandler() {
        SourceCode sourceCode = validator.getSourceCode();
        ImageCollector imageCollector = new ImageCollector(sourceCode);
        boolean showSource = false;
        if (outputFormat == OutputFormat.TEXT) {
            errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode,
                    showSource, imageCollector, lineOffset, true,
                    new TextMessageEmitter(out, asciiQuotes));
        } else if (outputFormat == OutputFormat.GNU) {
            errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode,
                    showSource, imageCollector, lineOffset, true,
                    new GnuMessageEmitter(out, asciiQuotes));
        } else if (outputFormat == OutputFormat.XML) {
            errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode,
                    showSource, imageCollector, lineOffset, true,
                    new XmlMessageEmitter(new XmlSerializer(out)));
        } else if (outputFormat == OutputFormat.JSON) {
            String callback = null;
            errorHandler = new MessageEmitterAdapter(filterPattern, sourceCode,
                    showSource, imageCollector, lineOffset, true,
                    new JsonMessageEmitter(
                            new nu.validator.json.Serializer(out), callback));
        } else {
            throw new RuntimeException("Bug. Should be unreachable.");
        }
        errorHandler.setErrorsOnly(errorsOnly);
    }

    private static void usage() {
        System.out.println("Usage:");
        System.out.println("");
        System.out.println("    java -jar vnu.jar [--errors-only] [--Werror] [--exit-zero-always]");
        System.out.println("         [--asciiquotes] [--user-agent USER_AGENT] [--no-langdetect]");
        System.out.println("         [--no-stream] [--filterfile FILENAME] [--filterpattern PATTERN]");
        System.out.println("         [--css] [--skip-non-css] [--also-check-css]");
        System.out.println("         [--svg] [--skip-non-svg] [--also-check-svg]");
        System.out.println("         [--html] [--skip-non-html] [--format gnu|xml|json|text]");
        System.out.println("         [--help] [--verbose] [--version] FILES");
        System.out.println("");
        System.out.println("    java -cp vnu.jar nu.validator.servlet.Main 8888");
        System.out.println("");
        System.out.println("    java -cp vnu.jar nu.validator.client.HttpClient FILES");
        System.out.println("");
        System.out.println("For detailed usage information, use \"java -jar vnu.jar --help\" or see:");
        System.out.println("");
        System.out.println("  http://validator.github.io/");
        System.out.println("");
        System.out.println("To read from stdin, use \"-\" as the filename, like this: \"java -jar vnu.jar - \".");
    }

    private static void help() {
        try (InputStream help = SimpleCommandLineValidator.class.getClassLoader().getResourceAsStream(
                "nu/validator/localentities/files/cli-help")) {
            System.out.println("");
            for (int b = help.read(); b != -1; b = help.read()) {
                System.out.write(b);
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy