com.adobe.epubcheck.ctc.EpubLangCheck Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of epubcheck Show documentation
Show all versions of epubcheck Show documentation
EpubCheck is a tool to validate IDPF EPUB files. It can detect many types of errors in EPUB.
OCF container structure, OPF and OPS mark-up, and internal reference consistency are checked.
EpubCheck can be run as a standalone command-line tool, installed as a Java server-side web application
or used as a Java library.
package com.adobe.epubcheck.ctc;
import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.ctc.epubpackage.EpubPackage;
import com.adobe.epubcheck.ctc.epubpackage.ManifestItem;
import com.adobe.epubcheck.ctc.xml.LangAttributeHandler;
import com.adobe.epubcheck.ctc.xml.XMLContentDocParser;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.messages.MessageLocation;
import com.adobe.epubcheck.opf.DocumentValidator;
import com.adobe.epubcheck.util.PathUtil;
import com.adobe.epubcheck.util.SearchDictionary;
import com.adobe.epubcheck.util.SearchDictionary.DictionaryType;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
public class EpubLangCheck implements DocumentValidator
{
private final ZipFile zip;
private final Report report;
private final EpubPackage epack;
public EpubLangCheck(EpubPackage epack, Report report)
{
this.zip = epack.getZip();
this.report = report;
this.epack = epack;
}
@Override
public boolean validate()
{
boolean result = false;
SearchDictionary vtsd = new SearchDictionary(DictionaryType.VALID_TEXT_MEDIA_TYPES);
for (int i = 0; i < epack.getManifest().itemsLength(); i++)
{
ManifestItem mi = epack.getManifest().getItem(i);
if (vtsd.isValidMediaType(mi.getMediaType()))
{
XMLContentDocParser parser = new XMLContentDocParser(this.zip, report);
LangAttributeHandler sh = new LangAttributeHandler();
String fileToParse = epack.getManifestItemFileName(mi);
ZipEntry entry = this.zip.getEntry(fileToParse);
if (entry == null)
{
report.message(MessageId.RSC_001, new MessageLocation(this.epack.getFileName(), -1, -1), fileToParse);
continue;
}
parser.parseDoc(fileToParse, sh);
String langAttribute = sh.getLangAttr();
String xmlLangAttribute = sh.getXmlLangAttr();
if (langAttribute != null && xmlLangAttribute != null)
{
if (xmlLangAttribute.compareToIgnoreCase(langAttribute) != 0)
{
report.message(MessageId.HTM_017, new MessageLocation(fileToParse, -1, -1));
}
if (!isValidLanguageDefinition(xmlLangAttribute))
{
report.message(MessageId.HTM_018, new MessageLocation(fileToParse, -1, -1));
}
if (!isValidLanguageDefinition(langAttribute))
{
report.message(MessageId.HTM_019, new MessageLocation(fileToParse, -1, -1));
}
}
else
{
if (xmlLangAttribute == null)
{
report.message(MessageId.HTM_020, new MessageLocation(fileToParse, -1, -1));
}
if (langAttribute == null)
{
report.message(MessageId.HTM_021, new MessageLocation(fileToParse, -1, -1));
}
}
}
}
return result;
}
private boolean isValidLanguageDefinition(String language)
{
// ignore language subclasses like en-us or fr-ca.
int pos = language.indexOf("-");
if (pos >= 0)
{
language = language.substring(0, pos);
}
for (String[] langValue : langValues)
{
if (language.compareToIgnoreCase(langValue[1]) == 0)
{
return true;
}
}
return false;
}
private final String[][] langValues = new String[][]
{
{"Abkhazian", "ab"},
{"Afar", "aa"},
{"Afrikaans", "af"},
{"Albanian", "sq"},
{"Amharic", "am"},
{"Arabic", "ar"},
{"Aragonese", "an"},
{"Armenian", "hy"},
{"Assamese", "as"},
{"Aymara", "ay"},
{"Azerbaijani", "az"},
{"Bashkir", "ba"},
{"Basque", "eu"},
{"Bengali (Bangla)", "bn"},
{"Bhutani", "dz"},
{"Bihari", "bh"},
{"Bislama", "bi"},
{"Breton", "br"},
{"Bulgarian", "bg"},
{"Burmese", "my"},
{"Byelorussian (Belarusian)", "be"},
{"Cambodian", "km"},
{"Catalan", "ca"},
{"Cherokee", " "},
{"Chewa", " "},
{"Chinese (Simplified)", "zh"},
{"Chinese (Traditional)", "zh"},
{"Corsican", "co"},
{"Croatian", "hr"},
{"Czech", "cs"},
{"Danish", "da"},
{"Divehi", " "},
{"Dutch", "nl"},
{"Edo", " "},
{"English", "en"},
{"Esperanto", "eo"},
{"Estonian", "et"},
{"Faeroese", "fo"},
{"Farsi", "fa"},
{"Fiji", "fj"},
{"Finnish", "fi"},
{"Flemish", " "},
{"French", "fr"},
{"Frisian", "fy"},
{"Fulfulde", " "},
{"Galician", "gl"},
{"Gaelic (Scottish)", "gd"},
{"Gaelic (Manx)", "gv"},
{"Georgian", "ka"},
{"German", "de"},
{"Greek", "el"},
{"Greenlandic", "kl"},
{"Guarani", "gn"},
{"Gujarati", "gu"},
{"Haitian Creole", "ht"},
{"Hausa", "ha"},
{"Hawaiian", " "},
{"Hebrew", "he, iw"},
{"Hindi", "hi"},
{"Hungarian", "hu"},
{"Ibibio", " "},
{"Icelandic", "is"},
{"Ido", "io"},
{"Igbo", " "},
{"Indonesian", "id, in"},
{"Interlingua", "ia"},
{"Interlingue", "ie"},
{"Inuktitut", "iu"},
{"Inupiak", "ik"},
{"Irish", "ga"},
{"Italian", "it"},
{"Japanese", "ja"},
{"Javanese", "jv"},
{"Kannada", "kn"},
{"Kanuri", " "},
{"Kashmiri", "ks"},
{"Kazakh", "kk"},
{"Kinyarwanda (Ruanda)", "rw"},
{"Kirghiz", "ky"},
{"Kirundi (Rundi)", "rn"},
{"Konkani", " "},
{"Korean", "ko"},
{"Kurdish", "ku"},
{"Laothian", "lo"},
{"Latin", "la"},
{"Latvian (Lettish)", "lv"},
{"Limburgish ( Limburger)", "li"},
{"Lingala", "ln"},
{"Lithuanian", "lt"},
{"Macedonian", "mk"},
{"Malagasy", "mg"},
{"Malay", "ms"},
{"Malayalam", "ml"},
{" ", " "},
{"Maltese", "mt"},
{"Maori", "mi"},
{"Marathi", "mr"},
{"Moldavian", "mo"},
{"Mongolian", "mn"},
{"Nauru", "na"},
{"Nepali", "ne"},
{"Norwegian", "no"},
{"Occitan", "oc"},
{"Oriya", "or"},
{"Oromo (Afaan Oromo)", "om"},
{"Papiamentu", " "},
{"Pashto (Pushto)", "ps"},
{"Polish", "pl"},
{"Portuguese", "pt"},
{"Punjabi", "pa"},
{"Quechua", "qu"},
{"Rhaeto-Romance", "rm"},
{"Romanian", "ro"},
{"Russian", "ru"},
{"Sami (Lappish)", " "},
{"Samoan", "sm"},
{"Sangro", "sg"},
{"Sanskrit", "sa"},
{"Serbian", "sr"},
{"Serbo-Croatian", "sh"},
{"Sesotho", "st"},
{"Setswana", "tn"},
{"Shona", "sn"},
{"Sichuan Yi", "ii"},
{"Sindhi", "sd"},
{"Sinhalese", "si"},
{"Siswati", "ss"},
{"Slovak", "sk"},
{"Slovenian", "sl"},
{"Somali", "so"},
{"Spanish", "es"},
{"Sundanese", "su"},
{"Swahili (Kiswahili)", "sw"},
{"Swedish", "sv"},
{"Syriac", " "},
{"Tagalog", "tl"},
{"Tajik", "tg"},
{"Tamazight", " "},
{"Tamil", "ta"},
{"Tatar", "tt"},
{"Telugu", "te"},
{"Thai", "th"},
{"Tibetan", "bo"},
{"Tigrinya", "ti"},
{"Tonga", "to"},
{"Tsonga", "ts"},
{"Turkish", "tr"},
{"Turkmen", "tk"},
{"Twi", "tw"},
{"Uighur", "ug"},
{"Ukrainian", "uk"},
{"Urdu", "ur"},
{"Uzbek", "uz"},
{"Venda", " "},
{"Vietnamese", "vi"},
{"Volapük", "vo"},
{"Wallon", "wa"},
{"Welsh", "cy"},
{"Wolof", "wo"},
{"Xhosa", "xh"},
{"Yi", " "},
{"Yiddish", "yi, ji"},
{"Yoruba", "yo"},
{"Zulu", "zu"},
};
}