All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.epubcheck.ctc.EpubLangCheck Maven / Gradle / Ivy

Go to download

EpubCheck is a tool to validate IDPF EPUB files. It can detect many types of errors in EPUB. OCF container structure, OPF and OPS mark-up, and internal reference consistency are checked. EpubCheck can be run as a standalone command-line tool, installed as a Java server-side web application or used as a Java library.

There is a newer version: 4.1.1
Show newest version
package com.adobe.epubcheck.ctc;

import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.ctc.epubpackage.EpubPackage;
import com.adobe.epubcheck.ctc.epubpackage.ManifestItem;
import com.adobe.epubcheck.ctc.xml.LangAttributeHandler;
import com.adobe.epubcheck.ctc.xml.XMLContentDocParser;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.messages.MessageLocation;
import com.adobe.epubcheck.opf.DocumentValidator;
import com.adobe.epubcheck.util.PathUtil;
import com.adobe.epubcheck.util.SearchDictionary;
import com.adobe.epubcheck.util.SearchDictionary.DictionaryType;

import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

public class EpubLangCheck implements DocumentValidator
{
  private final ZipFile zip;
  private final Report report;
  private final EpubPackage epack;

  public EpubLangCheck(EpubPackage epack, Report report)
  {
    this.zip = epack.getZip();
    this.report = report;
    this.epack = epack;
  }

  @Override
  public boolean validate()
  {
    boolean result = false;
    SearchDictionary vtsd = new SearchDictionary(DictionaryType.VALID_TEXT_MEDIA_TYPES);
    for (int i = 0; i < epack.getManifest().itemsLength(); i++)
    {
      ManifestItem mi = epack.getManifest().getItem(i);
      if (vtsd.isValidMediaType(mi.getMediaType()))
      {
        XMLContentDocParser parser = new XMLContentDocParser(this.zip, report);
        LangAttributeHandler sh = new LangAttributeHandler();
        String fileToParse = epack.getManifestItemFileName(mi);

        ZipEntry entry = this.zip.getEntry(fileToParse);
        if (entry == null)
        {
          report.message(MessageId.RSC_001, new MessageLocation(this.epack.getFileName(), -1, -1), fileToParse);
          continue;
        }

        parser.parseDoc(fileToParse, sh);
        String langAttribute = sh.getLangAttr();
        String xmlLangAttribute = sh.getXmlLangAttr();
        if (langAttribute != null && xmlLangAttribute != null)
        {
          if (xmlLangAttribute.compareToIgnoreCase(langAttribute) != 0)
          {
            report.message(MessageId.HTM_017, new MessageLocation(fileToParse, -1, -1));
          }

          if (!isValidLanguageDefinition(xmlLangAttribute))
          {
            report.message(MessageId.HTM_018, new MessageLocation(fileToParse, -1, -1));
          }
          if (!isValidLanguageDefinition(langAttribute))
          {
            report.message(MessageId.HTM_019, new MessageLocation(fileToParse, -1, -1));
          }
        }
        else
        {
          if (xmlLangAttribute == null)
          {
            report.message(MessageId.HTM_020, new MessageLocation(fileToParse, -1, -1));
          }
          if (langAttribute == null)
          {
            report.message(MessageId.HTM_021, new MessageLocation(fileToParse, -1, -1));
          }
        }
      }
    }
    return result;
  }

  private boolean isValidLanguageDefinition(String language)
  {
    // ignore language subclasses like en-us or fr-ca.
    int pos = language.indexOf("-");
    if (pos >= 0)
    {
      language = language.substring(0, pos);
    }

    for (String[] langValue : langValues)
    {
      if (language.compareToIgnoreCase(langValue[1]) == 0)
      {
        return true;
      }
    }
    return false;
  }

  private final String[][] langValues = new String[][]
      {
          {"Abkhazian", "ab"},
          {"Afar", "aa"},
          {"Afrikaans", "af"},
          {"Albanian", "sq"},
          {"Amharic", "am"},
          {"Arabic", "ar"},
          {"Aragonese", "an"},
          {"Armenian", "hy"},
          {"Assamese", "as"},
          {"Aymara", "ay"},
          {"Azerbaijani", "az"},
          {"Bashkir", "ba"},
          {"Basque", "eu"},
          {"Bengali (Bangla)", "bn"},
          {"Bhutani", "dz"},
          {"Bihari", "bh"},
          {"Bislama", "bi"},
          {"Breton", "br"},
          {"Bulgarian", "bg"},
          {"Burmese", "my"},
          {"Byelorussian (Belarusian)", "be"},
          {"Cambodian", "km"},
          {"Catalan", "ca"},
          {"Cherokee", " "},
          {"Chewa", " "},
          {"Chinese (Simplified)", "zh"},
          {"Chinese (Traditional)", "zh"},
          {"Corsican", "co"},
          {"Croatian", "hr"},
          {"Czech", "cs"},
          {"Danish", "da"},
          {"Divehi", " "},
          {"Dutch", "nl"},
          {"Edo", " "},
          {"English", "en"},
          {"Esperanto", "eo"},
          {"Estonian", "et"},
          {"Faeroese", "fo"},
          {"Farsi", "fa"},
          {"Fiji", "fj"},
          {"Finnish", "fi"},
          {"Flemish", " "},
          {"French", "fr"},
          {"Frisian", "fy"},
          {"Fulfulde", " "},
          {"Galician", "gl"},
          {"Gaelic (Scottish)", "gd"},
          {"Gaelic (Manx)", "gv"},
          {"Georgian", "ka"},
          {"German", "de"},
          {"Greek", "el"},
          {"Greenlandic", "kl"},
          {"Guarani", "gn"},
          {"Gujarati", "gu"},
          {"Haitian Creole", "ht"},
          {"Hausa", "ha"},
          {"Hawaiian", " "},
          {"Hebrew", "he, iw"},
          {"Hindi", "hi"},
          {"Hungarian", "hu"},
          {"Ibibio", " "},
          {"Icelandic", "is"},
          {"Ido", "io"},
          {"Igbo", " "},
          {"Indonesian", "id, in"},
          {"Interlingua", "ia"},
          {"Interlingue", "ie"},
          {"Inuktitut", "iu"},
          {"Inupiak", "ik"},
          {"Irish", "ga"},
          {"Italian", "it"},
          {"Japanese", "ja"},
          {"Javanese", "jv"},
          {"Kannada", "kn"},
          {"Kanuri", " "},
          {"Kashmiri", "ks"},
          {"Kazakh", "kk"},
          {"Kinyarwanda (Ruanda)", "rw"},
          {"Kirghiz", "ky"},
          {"Kirundi (Rundi)", "rn"},
          {"Konkani", " "},
          {"Korean", "ko"},
          {"Kurdish", "ku"},
          {"Laothian", "lo"},
          {"Latin", "la"},
          {"Latvian (Lettish)", "lv"},
          {"Limburgish ( Limburger)", "li"},
          {"Lingala", "ln"},
          {"Lithuanian", "lt"},
          {"Macedonian", "mk"},
          {"Malagasy", "mg"},
          {"Malay", "ms"},
          {"Malayalam", "ml"},
          {" ", " "},
          {"Maltese", "mt"},
          {"Maori", "mi"},
          {"Marathi", "mr"},
          {"Moldavian", "mo"},
          {"Mongolian", "mn"},
          {"Nauru", "na"},
          {"Nepali", "ne"},
          {"Norwegian", "no"},
          {"Occitan", "oc"},
          {"Oriya", "or"},
          {"Oromo (Afaan Oromo)", "om"},
          {"Papiamentu", " "},
          {"Pashto (Pushto)", "ps"},
          {"Polish", "pl"},
          {"Portuguese", "pt"},
          {"Punjabi", "pa"},
          {"Quechua", "qu"},
          {"Rhaeto-Romance", "rm"},
          {"Romanian", "ro"},
          {"Russian", "ru"},
          {"Sami (Lappish)", " "},
          {"Samoan", "sm"},
          {"Sangro", "sg"},
          {"Sanskrit", "sa"},
          {"Serbian", "sr"},
          {"Serbo-Croatian", "sh"},
          {"Sesotho", "st"},
          {"Setswana", "tn"},
          {"Shona", "sn"},
          {"Sichuan Yi", "ii"},
          {"Sindhi", "sd"},
          {"Sinhalese", "si"},
          {"Siswati", "ss"},
          {"Slovak", "sk"},
          {"Slovenian", "sl"},
          {"Somali", "so"},
          {"Spanish", "es"},
          {"Sundanese", "su"},
          {"Swahili (Kiswahili)", "sw"},
          {"Swedish", "sv"},
          {"Syriac", " "},
          {"Tagalog", "tl"},
          {"Tajik", "tg"},
          {"Tamazight", " "},
          {"Tamil", "ta"},
          {"Tatar", "tt"},
          {"Telugu", "te"},
          {"Thai", "th"},
          {"Tibetan", "bo"},
          {"Tigrinya", "ti"},
          {"Tonga", "to"},
          {"Tsonga", "ts"},
          {"Turkish", "tr"},
          {"Turkmen", "tk"},
          {"Twi", "tw"},
          {"Uighur", "ug"},
          {"Ukrainian", "uk"},
          {"Urdu", "ur"},
          {"Uzbek", "uz"},
          {"Venda", " "},
          {"Vietnamese", "vi"},
          {"Volapük", "vo"},
          {"Wallon", "wa"},
          {"Welsh", "cy"},
          {"Wolof", "wo"},
          {"Xhosa", "xh"},
          {"Yi", " "},
          {"Yiddish", "yi, ji"},
          {"Yoruba", "yo"},
          {"Zulu", "zu"},
      };
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy