com.adobe.epubcheck.ops.OPSHandler30 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of epubcheck Show documentation
Show all versions of epubcheck Show documentation
EpubCheck is a tool to validate IDPF EPUB files. It can detect many types of errors in EPUB.
OCF container structure, OPF and OPS mark-up, and internal reference consistency are checked.
EpubCheck can be run as a standalone command-line tool, installed as a Java server-side web application
or used as a Java library.
The newest version!
package com.adobe.epubcheck.ops;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.adobe.epubcheck.api.EPUBLocation;
import com.adobe.epubcheck.api.EPUBProfile;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.opf.OPFChecker;
import com.adobe.epubcheck.opf.OPFChecker30;
import com.adobe.epubcheck.opf.ValidationContext;
import com.adobe.epubcheck.opf.XRefChecker;
import com.adobe.epubcheck.util.EpubConstants;
import com.adobe.epubcheck.util.FeatureEnum;
import com.adobe.epubcheck.util.PathUtil;
import com.adobe.epubcheck.vocab.AggregateVocab;
import com.adobe.epubcheck.vocab.AltStylesheetVocab;
import com.adobe.epubcheck.vocab.ComicsVocab;
import com.adobe.epubcheck.vocab.DataNavVocab;
import com.adobe.epubcheck.vocab.DictVocab;
import com.adobe.epubcheck.vocab.EnumVocab;
import com.adobe.epubcheck.vocab.EpubCheckVocab;
import com.adobe.epubcheck.vocab.IndexVocab;
import com.adobe.epubcheck.vocab.PackageVocabs;
import com.adobe.epubcheck.vocab.PackageVocabs.ITEM_PROPERTIES;
import com.adobe.epubcheck.vocab.Property;
import com.adobe.epubcheck.vocab.StagingEdupubVocab;
import com.adobe.epubcheck.vocab.StructureVocab;
import com.adobe.epubcheck.vocab.StructureVocab.EPUB_TYPES;
import com.adobe.epubcheck.vocab.Vocab;
import com.adobe.epubcheck.vocab.VocabUtil;
import com.adobe.epubcheck.xml.XMLAttribute;
import com.adobe.epubcheck.xml.XMLElement;
import com.adobe.epubcheck.xml.XMLParser;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Strings;
import com.google.common.collect.Collections2;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
public class OPSHandler30 extends OPSHandler
{
private static final Pattern DATA_URI_PATTERN = Pattern.compile("^data:([^;]*)[^,]*,.*");
private static Map RESERVED_VOCABS = ImmutableMap. of("",
AggregateVocab.of(StructureVocab.VOCAB, StagingEdupubVocab.VOCAB, DataNavVocab.VOCAB,
DictVocab.VOCAB, IndexVocab.VOCAB, ComicsVocab.VOCAB));
private static Map ALTCSS_VOCABS = ImmutableMap. of("",
AltStylesheetVocab.VOCAB);
private static Map KNOWN_VOCAB_URIS = ImmutableMap.of();
private static Set DEFAULT_VOCAB_URIS = ImmutableSet.of(StructureVocab.URI);
private Map vocabs = RESERVED_VOCABS;
private final Set requiredProperties = EnumSet.noneOf(ITEM_PROPERTIES.class);
private final Set allowedProperties = EnumSet.noneOf(ITEM_PROPERTIES.class);
private final boolean isLinear;
protected boolean inVideo = false;
protected boolean inAudio = false;
protected boolean hasValidFallback = false;
protected int imbricatedObjects = 0;
protected int imbricatedCanvases = 0;
protected boolean anchorNeedsText = false;
protected boolean inMathML = false;
protected boolean inSvg = false;
protected boolean inBody = false;
protected boolean inRegionBasedNav = false;
protected boolean hasAltorAnnotation = false;
protected boolean hasTitle = false;
static protected final String[] scriptEventsStrings = { "onafterprint", "onbeforeprint",
"onbeforeunload", "onerror", "onhaschange", "onload", "onmessage", "onoffline", "onpagehide",
"onpageshow", "onpopstate", "onredo", "onresize", "onstorage", "onundo", "onunload",
"onblur", "onchange", "oncontextmenu", "onfocus", "onformchange", "onforminput", "oninput",
"oninvalid", "onreset", "onselect", "onsubmit",
"onkeydown", "onkeypress", "onkeyup",
"onabort", "oncanplay", "oncanplaythrough", "ondurationchange", "onemptied", "onended",
"onerror", "onloadeddata", "onloadedmetadata", "onloadstart", "onpause", "onplay",
"onplaying", "onprogress", "onratechange", "onreadystatechange", "onseeked", "onseeking",
"onstalled", "onsuspend", "ontimeupdate", "onvolumechange", "onwaiting" };
static protected HashSet scriptEvents;
public static HashSet getScriptEvents()
{
if (scriptEvents == null)
{
scriptEvents = new HashSet();
Collections.addAll(scriptEvents, scriptEventsStrings);
Collections.addAll(scriptEvents, mouseEventsStrings);
}
return scriptEvents;
}
static protected final String[] mouseEventsStrings = { "onclick", "ondblclick", "ondrag",
"ondragend", "ondragenter", "ondragleave", "ondragover", "ondragstart", "ondrop",
"onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onmousewheel",
"onscroll" };
static protected HashSet mouseEvents;
public static HashSet getMouseEvents()
{
if (mouseEvents == null)
{
mouseEvents = new HashSet();
Collections.addAll(mouseEvents, mouseEventsStrings);
}
return mouseEvents;
}
public OPSHandler30(ValidationContext context, XMLParser parser)
{
super(context, parser);
checkedUnsupportedXMLVersion = false;
isLinear = !context.properties
.contains(EpubCheckVocab.VOCAB.get(EpubCheckVocab.PROPERTIES.NON_LINEAR));
}
protected void checkType(XMLElement e, String type)
{
if (type == null)
{
return;
}
Set propList = VocabUtil.parsePropertyList(type, vocabs, report,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber()));
checkTypes(Property.filter(propList, StructureVocab.EPUB_TYPES.class));
// Check the 'region-based' property (Data Navigation Documents)
if (propList.contains(DataNavVocab.VOCAB.get(DataNavVocab.EPUB_TYPES.REGION_BASED)))
{
if (!"nav".equals(e.getName()) || !context.properties
.contains(PackageVocabs.ITEM_VOCAB.get(PackageVocabs.ITEM_PROPERTIES.DATA_NAV)))
{
report.message(MessageId.HTM_052, parser.getLocation());
}
else
{
inRegionBasedNav = true;
}
}
// Store whether the doc containt DICT content
if (propList.contains(DictVocab.VOCAB.get(DictVocab.EPUB_TYPES.DICTIONARY)))
{
context.featureReport.report(FeatureEnum.DICTIONARY, parser.getLocation(), null);
}
}
protected void checkTypes(Set types)
{
if (types.contains(EPUB_TYPES.PAGEBREAK))
{
context.featureReport.report(FeatureEnum.PAGE_BREAK, parser.getLocation(), null);
}
if (types.contains(EPUB_TYPES.INDEX))
{
allowedProperties.add(ITEM_PROPERTIES.INDEX);
context.featureReport.report(FeatureEnum.INDEX, parser.getLocation(), null);
}
}
protected void checkSSMLPh(String ph)
{
// issue 139; enhancement is to add real syntax check for IPA and x-SAMPA
if (ph == null)
{
return;
}
if (ph.trim().length() < 1)
{
report.message(MessageId.HTM_007,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber()));
}
}
@Override
public void characters(char[] chars, int arg1, int arg2)
{
super.characters(chars, arg1, arg2);
String str = new String(chars, arg1, arg2);
str = str.trim();
if (!str.equals("") && (inAudio || inVideo || imbricatedObjects > 0 || imbricatedCanvases > 0))
{
hasValidFallback = true;
}
if (anchorNeedsText)
{
anchorNeedsText = false;
}
}
public void startElement()
{
super.startElement();
XMLElement e = parser.getCurrentElement();
String name = e.getName();
processSemantics(e);
processSectioning(e);
if (name.equals("html"))
{
vocabs = VocabUtil.parsePrefixDeclaration(
e.getAttributeNS(EpubConstants.EpubTypeNamespaceUri, "prefix"), RESERVED_VOCABS,
KNOWN_VOCAB_URIS, DEFAULT_VOCAB_URIS, report,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber()));
}
else if (name.equals("link"))
{
processLink(e);
}
else if (name.equals("object"))
{
processObject(e);
}
else if (name.equals("math"))
{
requiredProperties.add(ITEM_PROPERTIES.MATHML);
inMathML = true;
hasAltorAnnotation = (null != e.getAttribute("alttext"));
}
else if (!context.mimeType.equals("image/svg+xml") && name.equals("svg"))
{
requiredProperties.add(ITEM_PROPERTIES.SVG);
processStartSvg(e);
}
else if (name.equals("script"))
{
requiredProperties.add(ITEM_PROPERTIES.SCRIPTED);
}
else if (EpubConstants.EpubTypeNamespaceUri.equals(e.getNamespace()) && name.equals("switch"))
{
requiredProperties.add(ITEM_PROPERTIES.SWITCH);
}
else if (name.equals("audio"))
{
processAudio();
}
else if (name.equals("video"))
{
processVideo(e);
}
else if (name.equals("figure"))
{
processFigure(e);
}
else if (name.equals("table"))
{
processTable(e);
}
else if (name.equals("canvas"))
{
processCanvas();
}
else if (name.equals("img"))
{
processImg();
}
else if (name.equals("a"))
{
anchorNeedsText = true;
processAnchor(e);
}
else if (name.equals("annotation-xml"))
{
hasAltorAnnotation = true;
}
else if ("http://www.w3.org/2000/svg".equals(e.getNamespace()) && name.equals("title"))
{
hasTitle = true;
}
processInlineScripts(e);
processSrc(("source".equals(name)) ? e.getParent().getName() : name, e.getAttribute("src"));
checkType(e, e.getAttributeNS(EpubConstants.EpubTypeNamespaceUri, "type"));
checkSSMLPh(e.getAttributeNS("http://www.w3.org/2001/10/synthesis", "ph"));
}
protected void processInlineScripts(com.adobe.epubcheck.xml.XMLElement e)
{
HashSet scriptEvents = getScriptEvents();
HashSet mouseEvents = getMouseEvents();
for (int i = 0; i < e.getAttributeCount(); ++i)
{
XMLAttribute attr = e.getAttribute(i);
String name = attr.getName().toLowerCase(Locale.ROOT);
if (scriptEvents.contains(name) || mouseEvents.contains(name))
{
requiredProperties.add(ITEM_PROPERTIES.SCRIPTED);
return;
}
}
}
protected void processLink(XMLElement e)
{
String classAttribute = e.getAttribute("class");
if (classAttribute == null)
{
return;
}
Set properties = VocabUtil.parsePropertyList(classAttribute, ALTCSS_VOCABS, report,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber()));
Set altClasses = Property.filter(properties,
AltStylesheetVocab.PROPERTIES.class);
if (properties.size() == 1)
{
return;
}
boolean vertical = altClasses.contains(AltStylesheetVocab.PROPERTIES.VERTICAL);
boolean horizontal = altClasses.contains(AltStylesheetVocab.PROPERTIES.HORIZONTAL);
boolean day = altClasses.contains(AltStylesheetVocab.PROPERTIES.DAY);
boolean night = altClasses.contains(AltStylesheetVocab.PROPERTIES.NIGHT);
if (vertical && horizontal || day && night)
{
report.message(MessageId.CSS_005,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber()),
classAttribute);
}
}
protected void processAnchor(XMLElement e)
{
if (e.getAttribute("href") == null)
{
anchorNeedsText = false;
}
if (inSvg || context.mimeType.equals("image/svg+xml"))
{
hasTitle = Strings
.emptyToNull(e.getAttributeNS(EpubConstants.XLinkNamespaceUri, "title")) != null;
}
}
protected void processImg()
{
if ((inAudio || inVideo || imbricatedObjects > 0 || imbricatedCanvases > 0))
{
hasValidFallback = true;
}
}
protected void processCanvas()
{
imbricatedCanvases++;
}
protected void processAudio()
{
inAudio = true;
context.featureReport.report(FeatureEnum.AUDIO, parser.getLocation());
}
protected void processVideo(XMLElement e)
{
inVideo = true;
context.featureReport.report(FeatureEnum.VIDEO, parser.getLocation());
String posterSrc = e.getAttribute("poster");
String posterMimeType = null;
if (xrefChecker.isPresent() && posterSrc != null)
{
posterMimeType = xrefChecker.get().getMimeType(PathUtil.resolveRelativeReference(base,
posterSrc));
}
if (posterMimeType != null && !OPFChecker.isBlessedImageType(posterMimeType))
{
report.message(MessageId.MED_001,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber()));
}
if (posterSrc != null)
{
hasValidFallback = true;
processSrc(e.getName(), posterSrc);
}
}
protected void processHyperlink(String href)
{
super.processHyperlink(href);
if (inRegionBasedNav && xrefChecker.isPresent())
{
xrefChecker.get().registerReference(path, parser.getLineNumber(), parser.getColumnNumber(),
href, XRefChecker.Type.REGION_BASED_NAV);
}
}
protected void processSrc(String name, String src)
{
if (src != null)
{
src = src.trim();
if (src.equals(""))
{
report.message(MessageId.HTM_008,
EPUBLocation.create(path, parser.getLineNumber(), parser.getColumnNumber(), name));
}
}
if (src == null || !xrefChecker.isPresent())
{
return;
}
String srcMimeType = null;
Matcher matcher = DATA_URI_PATTERN.matcher(src);
if (matcher.matches())
{
srcMimeType = matcher.group(1);
}
else
{
if (src.matches("^[^:/?#]+://.*"))
{
requiredProperties.add(ITEM_PROPERTIES.REMOTE_RESOURCES);
}
else
{
src = PathUtil.resolveRelativeReference(base, src);
}
XRefChecker.Type refType;
if ("audio".equals(name))
{
refType = XRefChecker.Type.AUDIO;
}
else if ("video".equals(name))
{
refType = XRefChecker.Type.VIDEO;
}
else
{
refType = XRefChecker.Type.GENERIC;
}
if (!"img".equals(name)) // img already registered in super class
{
xrefChecker.get().registerReference(path, parser.getLineNumber(), parser.getColumnNumber(),
src, refType);
}
srcMimeType = xrefChecker.get().getMimeType(src);
}
if (srcMimeType == null)
{
return;
}
if (!context.mimeType.equals("image/svg+xml") && srcMimeType.equals("image/svg+xml"))
{
allowedProperties.add(ITEM_PROPERTIES.SVG);
}
if ((inAudio || inVideo || imbricatedObjects > 0 || imbricatedCanvases > 0)
&& OPFChecker30.isCoreMediaType(srcMimeType) && !name.equals("track"))
{
hasValidFallback = true;
}
}
protected void processObject(XMLElement e)
{
imbricatedObjects++;
String type = e.getAttribute("type");
String data = e.getAttribute("data");
if (data != null)
{
processSrc(e.getName(), data);
data = PathUtil.resolveRelativeReference(base, data);
}
if (type != null && data != null && xrefChecker.isPresent()
&& !type.equals(xrefChecker.get().getMimeType(data)))
{
String context = "