nu.validator.checker.MicrodataChecker Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of validator Show documentation
Show all versions of validator Show documentation
An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)
/*
* Copyright (c) 2011 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.checker;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
/**
* Checker for microdata constraints that require tree traversal.
*
* The SAX events are used to construct a minimal tree with only the
* relevant elements and attributes, which is then validated.
*
* The followings constraints are supported:
*
* - The itemref attribute, if specified, must have a value that is an
* unordered set of unique space-separated tokens that are
* case-sensitive, consisting of IDs of elements in the same home
* subtree.
*
* - A document must not contain any items for which the algorithm to
* find the properties of an item finds any microdata errors.
*
* - All itemref attributes in a Document must be such that there are
* no cycles in the graph formed from representing each item in the
* Document as a node in the graph and each property of an item
* whose value is another item as an edge in the graph connecting
* those two items.
*
* - A document must not contain any elements that have an itemprop
* attribute that would not be found to be a property of any of the
* items in that document were their properties all to be determined
*
* Not all checks are unconditional. For example, the itemref
* constraints are only checked if the itemscope attribute is
* present. However, the checks are chained such that if a document
* validates with no errors, then all the constraints are satisfied.
*/
public class MicrodataChecker extends Checker {
/**
* The relevant aspects of an HTML element.
*
* There is no Document class as there is no need for a root
* element from which all Elements can be reached.
*/
class Element {
public final Locator locator;
public final String[] itemProp;
public final String[] itemRef;
public final boolean itemScope;
public final List children;
// tree order of the element, for cheap sorting and hashing
private final int order;
public Element(Locator locator, String[] itemProp, String[] itemRef,
boolean itemScope) {
this.locator = locator;
this.itemProp = itemProp;
this.itemRef = itemRef;
this.itemScope = itemScope;
this.children = new LinkedList<>();
this.order = counter++;
}
@Override
public boolean equals(Object that) {
return this == that;
}
@Override
public int hashCode() {
return order;
}
/**
* Helper for building the Element tree(s).
*/
class Builder {
public final Builder parent;
public final int depth; // nesting depth in the input
public Builder(Builder parent, int depth) {
this.parent = parent;
this.depth = depth;
}
public void appendChild(Element elm) {
Element.this.children.add(elm);
}
}
}
private int depth; // nesting depth in the input
private Element.Builder builder;
private static int counter;
// top-level items (itemscope but not itemprop)
private List items;
// property elements (itemprop)
private Set properties;
// mapping from id to Element (like getElementById)
private Map idmap;
private Locator locator;
/**
* @see nu.validator.checker.Checker#reset()
*/
@Override
public void reset() {
depth = 0;
builder = null;
counter = 0;
items = new LinkedList<>();
properties = new LinkedHashSet<>();
idmap = new HashMap<>();
}
/**
* @see nu.validator.checker.Checker#startElement(java.lang.String,
* java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
@Override
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
depth++;
if ("http://www.w3.org/1999/xhtml" != uri) {
return;
}
String id = null;
String[] itemProp = null;
String[] itemRef = null;
boolean itemScope = false;
int len = atts.getLength();
for (int i = 0; i < len; i++) {
if (atts.getURI(i).isEmpty()) {
String attLocal = atts.getLocalName(i);
String attValue = atts.getValue(i);
if ("id" == attLocal) {
id = attValue;
} else if ("itemprop" == attLocal) {
itemProp = AttributeUtil.split(attValue);
} else if ("itemref" == attLocal) {
itemRef = AttributeUtil.split(attValue);
} else if ("itemscope" == attLocal) {
itemScope = true;
}
}
}
if (id != null || itemProp != null || itemScope) {
Element elm = new Element(new LocatorImpl(locator), itemProp,
itemRef, itemScope);
if (itemProp != null) {
properties.add(elm);
} else if (itemScope) {
items.add(elm);
}
if (!idmap.containsKey(id)) {
idmap.put(id, elm);
}
if (builder != null) {
builder.appendChild(elm);
}
builder = elm.new Builder(builder, depth);
}
}
/**
* @see nu.validator.checker.Checker#endElement(java.lang.String,
* java.lang.String, java.lang.String)
*/
@Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (builder != null && builder.depth == depth) {
builder = builder.parent;
}
depth--;
}
/**
* @see org.xml.sax.helpers.XMLFilterImpl#endDocument()
*/
@Override
public void endDocument() throws SAXException {
// check all top-level items
for (Element item : items) {
checkItem(item, new ArrayDeque());
}
// emit errors for unreferenced properties
for (Element prop : properties) {
err("The \u201Citemprop\u201D attribute was specified,"
+ " but the element is not a property of any item.",
prop.locator);
}
}
/**
* Check itemref constraints.
*
* This mirrors the "the properties of an item" algorithm, modified to
* recursively check sub-items.
*
* http://www.whatwg.org/specs/web-apps/current-work/multipage/microdata.html#the-properties-of-an-item
*/
private void checkItem(Element root, Deque parents)
throws SAXException {
Deque pending = new ArrayDeque<>();
Set memory = new HashSet<>();
memory.add(root);
for (Element child : root.children) {
pending.push(child);
}
if (root.itemRef != null) {
for (String id : root.itemRef) {
Element refElm = idmap.get(id);
if (refElm != null) {
pending.push(refElm);
} else {
err("The \u201Citemref\u201D attribute referenced \u201C"
+ id
+ "\u201D, but there is no element with an"
+ " \u201Cid\u201D attribute with that value.",
root.locator);
}
}
}
boolean memoryError = false;
while (pending.size() > 0) {
Element current = pending.pop();
if (memory.contains(current)) {
memoryError = true;
continue;
}
memory.add(current);
if (!current.itemScope) {
for (Element child : current.children) {
pending.push(child);
}
}
if (current.itemProp != null) {
properties.remove(current);
if (current.itemScope) {
if (!parents.contains(current)) {
parents.push(root);
checkItem(current, parents);
parents.pop();
} else {
err("The \u201Citemref\u201D attribute created a"
+ " circular reference with another item.",
current.locator);
}
}
}
}
if (memoryError) {
err("The \u201Citemref\u201D attribute contained redundant references.",
root.locator);
}
}
/**
* @see org.xml.sax.helpers.XMLFilterImpl#setDocumentLocator(org.xml.sax.Locator)
*/
@Override
public void setDocumentLocator(Locator locator) {
this.locator = locator;
}
}