com.adobe.epubcheck.ctc.EpubExtLinksCheck Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of epubcheck Show documentation
Show all versions of epubcheck Show documentation
EpubCheck is a tool to validate IDPF EPUB files. It can detect many types of errors in EPUB.
OCF container structure, OPF and OPS mark-up, and internal reference consistency are checked.
EpubCheck can be run as a standalone command-line tool, installed as a Java server-side web application
or used as a Java library.
package com.adobe.epubcheck.ctc;
import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.ctc.epubpackage.EpubPackage;
import com.adobe.epubcheck.ctc.epubpackage.ManifestItem;
import com.adobe.epubcheck.ctc.xml.AnchorTagHandler;
import com.adobe.epubcheck.ctc.xml.XMLContentDocParser;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.messages.MessageLocation;
import com.adobe.epubcheck.opf.DocumentValidator;
import com.adobe.epubcheck.util.PathUtil;
import com.adobe.epubcheck.util.SearchDictionary;
import com.adobe.epubcheck.util.SearchDictionary.DictionaryType;
import com.adobe.epubcheck.util.TextSearchDictionaryEntry;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
public class EpubExtLinksCheck implements DocumentValidator
{
private final Report report;
private final EpubPackage epack;
public EpubExtLinksCheck(EpubPackage epack, Report report)
{
this.epack = epack;
this.report = report;
}
public boolean validate()
{
SearchDictionary tsd = new SearchDictionary(DictionaryType.LINK_VALUES);
SearchDictionary validTypes = new SearchDictionary(DictionaryType.VALID_TEXT_MEDIA_TYPES);
for (int i = 0; i < epack.getManifest().itemsLength(); i++)
{
ManifestItem itemEntry = epack.getManifest().getItem(i);
if (validTypes.isValidMediaType(itemEntry.getMediaType()))
{
String fileToParse = epack.getManifestItemFileName(itemEntry);
XMLContentDocParser parser;
parser = new XMLContentDocParser(epack.getZip(), report);
AnchorTagHandler h = new AnchorTagHandler();
ZipEntry entry = epack.getZip().getEntry(fileToParse);
if (entry == null)
{
report.message(MessageId.RSC_001, new MessageLocation(epack.getFileName(), -1, -1), fileToParse);
continue;
}
parser.parseDoc(fileToParse, h);
Vector v = h.getHrefAttributesValues();
for (int e = 0; e < v.size(); e++)
{
AnchorTagHandler.DocTagContent value = v.elementAt(e);
searchInsideValue(value, tsd, fileToParse);
String type = value.getType();
if ("img".compareToIgnoreCase(type) == 0 || "altimg".compareToIgnoreCase(type) == 0)
{
// ensure that this image is in the manifest
String imageFile = value.getValue();
if (imageFile.matches("^[^:/?#]+://.*"))
{
report.message(MessageId.RSC_006, new MessageLocation(fileToParse, value.getLine(), value.getColumn(), value.getContext()), value.getValue());
continue;
}
imageFile = PathUtil.resolveRelativeReference(fileToParse, imageFile, null);
int index = imageFile.lastIndexOf("#");
if (index > 0)
{
imageFile = imageFile.substring(0, index);
}
ZipEntry imgentry = epack.getZip().getEntry(imageFile);
if (imgentry == null)
{
MessageId id = "img".compareToIgnoreCase(type) == 0 ? MessageId.RSC_001 : MessageId.RSC_018;
report.message(id, new MessageLocation(fileToParse, value.getLine(), value.getColumn(), value.getContext()), value.getValue());
}
}
}
}
}
return true;
}
private void searchInsideValue(AnchorTagHandler.DocTagContent entry, SearchDictionary tds, String file)
{
for (int s = 0; s < tds.getDictEntries().size(); s++)
{
TextSearchDictionaryEntry de = tds.getDictEntries().get(s);
MessageId messageCode = de.getErrorCode();
Pattern p = de.getPattern();
Matcher matcher = p.matcher(entry.getValue());
int position = 0;
while (matcher.find(position))
{
position = matcher.end();
report.message(messageCode, new MessageLocation(file, entry.getLine(), entry.getColumn(), entry.getValue()));
}
}
}
}