All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nu.validator.checker.MicrodataChecker Maven / Gradle / Ivy

Go to download

An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)

There is a newer version: 20.7.2
Show newest version
/*
 * Copyright (c) 2011 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.checker;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

/**
 * Checker for microdata constraints that require tree traversal.
 *
 * The SAX events are used to construct a minimal tree with only the
 * relevant elements and attributes, which is then validated.
 *
 * The followings constraints are supported:
 *
 * - The itemref attribute, if specified, must have a value that is an
 *   unordered set of unique space-separated tokens that are
 *   case-sensitive, consisting of IDs of elements in the same home
 *   subtree.
 *
 * - A document must not contain any items for which the algorithm to
 *   find the properties of an item finds any microdata errors.
 *
 * - All itemref attributes in a Document must be such that there are
 *   no cycles in the graph formed from representing each item in the
 *   Document as a node in the graph and each property of an item
 *   whose value is another item as an edge in the graph connecting
 *   those two items.
 *
 * - A document must not contain any elements that have an itemprop
 *   attribute that would not be found to be a property of any of the
 *   items in that document were their properties all to be determined
 *
 * Not all checks are unconditional. For example, the itemref
 * constraints are only checked if the itemscope attribute is
 * present. However, the checks are chained such that if a document
 * validates with no errors, then all the constraints are satisfied.
 */
public class MicrodataChecker extends Checker {
    /**
     * The relevant aspects of an HTML element.
     *
     * There is no Document class as there is no need for a root
     * element from which all Elements can be reached.
     */
    class Element {
        public final Locator locator;

        public final String[] itemProp;

        public final String[] itemRef;

        public final boolean itemScope;

        public final List children;

        // tree order of the element, for cheap sorting and hashing
        private final int order;

        public Element(Locator locator, String[] itemProp, String[] itemRef,
                boolean itemScope) {
            this.locator = locator;
            this.itemProp = itemProp;
            this.itemRef = itemRef;
            this.itemScope = itemScope;
            this.children = new LinkedList<>();
            this.order = counter++;
        }

        @Override
        public boolean equals(Object that) {
            return this == that;
        }

        @Override
        public int hashCode() {
            return order;
        }

        /**
         * Helper for building the Element tree(s).
         */
        class Builder {
            public final Builder parent;

            public final int depth; // nesting depth in the input

            public Builder(Builder parent, int depth) {
                this.parent = parent;
                this.depth = depth;
            }

            public void appendChild(Element elm) {
                Element.this.children.add(elm);
            }
        }
    }

    private int depth; // nesting depth in the input

    private Element.Builder builder;

    private static int counter;

    // top-level items (itemscope but not itemprop)
    private List items;

    // property elements (itemprop)
    private Set properties;

    // mapping from id to Element (like getElementById)
    private Map idmap;

    private Locator locator;

    /**
     * @see nu.validator.checker.Checker#reset()
     */
    @Override
    public void reset() {
        depth = 0;
        builder = null;
        counter = 0;
        items = new LinkedList<>();
        properties = new LinkedHashSet<>();
        idmap = new HashMap<>();
    }

    /**
     * @see nu.validator.checker.Checker#startElement(java.lang.String,
     *      java.lang.String, java.lang.String, org.xml.sax.Attributes)
     */
    @Override
    public void startElement(String uri, String localName, String qName,
            Attributes atts) throws SAXException {
        depth++;

        if ("http://www.w3.org/1999/xhtml" != uri) {
            return;
        }

        String id = null;
        String[] itemProp = null;
        String[] itemRef = null;
        boolean itemScope = false;

        int len = atts.getLength();
        for (int i = 0; i < len; i++) {
            if (atts.getURI(i).isEmpty()) {
                String attLocal = atts.getLocalName(i);
                String attValue = atts.getValue(i);
                if ("id" == attLocal) {
                    id = attValue;
                } else if ("itemprop" == attLocal) {
                    itemProp = AttributeUtil.split(attValue);
                } else if ("itemref" == attLocal) {
                    itemRef = AttributeUtil.split(attValue);
                } else if ("itemscope" == attLocal) {
                    itemScope = true;
                }
            }
        }

        if (id != null || itemProp != null || itemScope) {
            Element elm = new Element(new LocatorImpl(locator), itemProp,
                    itemRef, itemScope);

            if (itemProp != null) {
                properties.add(elm);
            } else if (itemScope) {
                items.add(elm);
            }
            if (!idmap.containsKey(id)) {
                idmap.put(id, elm);
            }

            if (builder != null) {
                builder.appendChild(elm);
            }
            builder = elm.new Builder(builder, depth);
        }
    }

    /**
     * @see nu.validator.checker.Checker#endElement(java.lang.String,
     *      java.lang.String, java.lang.String)
     */
    @Override
    public void endElement(String uri, String localName, String qName)
            throws SAXException {
        if (builder != null && builder.depth == depth) {
            builder = builder.parent;
        }
        depth--;
    }

    /**
     * @see org.xml.sax.helpers.XMLFilterImpl#endDocument()
     */
    @Override
    public void endDocument() throws SAXException {
        // check all top-level items
        for (Element item : items) {
            checkItem(item, new ArrayDeque());
        }

        // emit errors for unreferenced properties
        for (Element prop : properties) {
            err("The \u201Citemprop\u201D attribute was specified,"
                + " but the element is not a property of any item.",
                    prop.locator);
        }
    }

    /**
     * Check itemref constraints.
     *
     * This mirrors the "the properties of an item" algorithm, modified to
     * recursively check sub-items.
     *
     * http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata.html#the-properties-of-an-item
     */
    private void checkItem(Element root, Deque parents)
            throws SAXException {
        Deque pending = new ArrayDeque<>();
        Set memory = new HashSet<>();
        memory.add(root);
        for (Element child : root.children) {
            pending.push(child);
        }
        if (root.itemRef != null) {
            for (String id : root.itemRef) {
                Element refElm = idmap.get(id);
                if (refElm != null) {
                    pending.push(refElm);
                } else {
                    err("The \u201Citemref\u201D attribute referenced \u201C"
                            + id
                            + "\u201D, but there is no element with an"
                            + " \u201Cid\u201D attribute with that value.",
                            root.locator);
                }
            }
        }
        boolean memoryError = false;
        while (pending.size() > 0) {
            Element current = pending.pop();
            if (memory.contains(current)) {
                memoryError = true;
                continue;
            }
            memory.add(current);
            if (!current.itemScope) {
                for (Element child : current.children) {
                    pending.push(child);
                }
            }
            if (current.itemProp != null) {
                properties.remove(current);
                if (current.itemScope) {
                    if (!parents.contains(current)) {
                        parents.push(root);
                        checkItem(current, parents);
                        parents.pop();
                    } else {
                        err("The \u201Citemref\u201D attribute created a"
                            + " circular reference with another item.",
                                current.locator);
                    }
                }
            }
        }
        if (memoryError) {
            err("The \u201Citemref\u201D attribute contained redundant references.",
                    root.locator);
        }
    }

    /**
     * @see org.xml.sax.helpers.XMLFilterImpl#setDocumentLocator(org.xml.sax.Locator)
     */
    @Override
    public void setDocumentLocator(Locator locator) {
        this.locator = locator;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy