All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.epubcheck.ctc.EntitySearch Maven / Gradle / Ivy

Go to download

EPUBCheck is a tool to validate the conformance of EPUB publications against the EPUB specifications. EPUBCheck can be run as a standalone command-line tool or used as a Java library.

There is a newer version: 5.1.0
Show newest version
package com.adobe.epubcheck.ctc;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Scanner;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import com.adobe.epubcheck.api.EPUBLocation;
import com.adobe.epubcheck.api.Report;
import com.adobe.epubcheck.messages.MessageId;
import com.adobe.epubcheck.ocf.EncryptionFilter;
import com.adobe.epubcheck.util.EPUBVersion;

/**
 *  ===  WARNING  ==========================================
 *  This class is scheduled to be refactored and integrated
 *  in another package.
 *  Please keep changes minimal (bug fixes only) until then.
 *  ========================================================
 */
class EntitySearch extends TextSearch
{
  static final Pattern entityPattern = Pattern.compile("&([A-Za-z0-9]+)([;|\\s])");
  static final HashSet legalEntities2_0;
  static final HashSet legalEntities3_0;

  MessageId ENTITY_INVALID = MessageId.HTM_023;
  MessageId ENTITY_OK = MessageId.HTM_006;
  MessageId ENTITY_IMPROPER = MessageId.HTM_024;

  static
  {
    legalEntities3_0 = new HashSet();
    Collections.addAll(legalEntities3_0, "&", "'", """, "<", ">");

    legalEntities2_0 = new HashSet();
    Collections.addAll(legalEntities2_0, " ", "¡", "¢", "£", "¤",
        "¥", "¦", "§", "¨", "©", "ª", "«", "¬", "­", "®",
        "¯", "°", "±", "²", "³", "´", "µ", "¶", "·", "¸",
        "¹", "º", "»", "¼", "½", "¾", "¿", "À", "Á",
        "Â", "Ã", "Ä", "Å", "Æ", "Ç", "È", "É", "Ê", "Ë",
        "Ì", "Í", "Î", "Ï", "Ð", "Ñ", "Ò", "Ó", "Ô", "Õ",
        "Ö", "×", "Ø", "Ù", "Ú", "Û", "Ü", "Ý", "Þ", "ß",
        "à", "á", "â", "ã", "ä", "å", "æ", "ç", "è", "é",
        "ê", "ë", "ì", "í", "î", "ï", "ð", "ñ", "ò", "ó",
        "ô", "õ", "ö", "÷", "ø", "ù", "ú", "û", "ü", "ý",
        "þ", "ÿ", "Œ", "œ", "Š", "š", "Ÿ", "ƒ", "ˆ", "˜",
        "Α", "Β", "Γ", "Δ", "Ε", "Ζ", "Η", "Θ", "Ι", "Κ",
        "Λ", "Μ", "Ν", "Ξ", "Ο", "Π", "Ρ", "Σ", "Τ", "Υ", "Φ", "Χ",
        "Ψ", "Ω", "α", "β", "γ", "δ", "ε", "ζ", "η", "θ", "ι",
        "κ", "λ", "μ", "ν", "ξ", "ο", "π", "ρ", "ς", "σ", "τ",
        "υ", "φ", "χ", "ψ", "ω", "ϑ", "ϒ", "ϖ", " ", " ", " ",
        "‌", "‍", "‎", "‏", "–", "—", "‘", "’", "‚", "“", "”",
        "„", "†", "‡", "•", "…", "‰", "′", "″", "‹", "›",
        "‾", "⁄", "€", "ℑ", "℘", "ℜ", "™", "ℵ", "←", "↑",
        "→", "↓", "↔", "↵", "⇐", "⇑", "⇒", "⇓", "⇔", "∀", "∂",
        "∃", "∅", "∇", "∈", "∉", "∋", "∏", "∑", "−", "∗", "√",
        "∝", "∞", "∠", "∧", "∨", "∩", "∪", "∫", "∴", "∼", "≅", "≈",
        "≠", "≡", "≤", "≥", "⊂", "⊃", "⊄", "⊆", "⊇", "⊕", "⊗", "⊥",
        "⋅", "⋮", "⌈", "⌉", "⌊", "⌋", "⟨", "⟩", "◊", "♠", "♣",
        "♥", "♦");

  }

  public EntitySearch(EPUBVersion version, ZipFile zip, Report report)
  {
    super(version, zip, report);
  }


  public Vector Search(String entry)
  {
    Vector result = new Vector();
    InputStream is = null;
    Scanner in = null;
    try
    {
      is = getInputStream(entry);
      in = new Scanner(is);
      int lineCounter = 1;

      while (in.hasNextLine())
      {
        String line = in.nextLine();
        Matcher matcher = entityPattern.matcher(line);
        int position = 0;

        while (matcher.find(position))
        {
          MessageId messageCode = ENTITY_INVALID;
          position = matcher.end();
          String matchedText = line.substring(matcher.start(), matcher.end());
          if (version == EPUBVersion.VERSION_2)
          {
            if (legalEntities3_0.contains(matchedText) || legalEntities2_0.contains(matchedText))
            {
              // its in either the legal 2.0 list or the 3.0 list. Simply emit a usage message
              messageCode = ENTITY_OK;
            }
          }
          else if (version == EPUBVersion.VERSION_3)
          {
            if (legalEntities3_0.contains(matchedText))
            {
              // its in the 3.0 list.  just emit a usage message
              messageCode = ENTITY_OK;
            }
            else if (legalEntities2_0.contains(matchedText))
            {
              // its in the 2.0 list.  Emit a usage message saying that only & ' etc. are allowed
              messageCode = ENTITY_IMPROPER;
            }
          }

          int contextStart = Math.max(0, matcher.start() - 20);
          int contextEnd = Math.min(contextStart + 40, line.length() - 1);
          String context = line.substring(contextStart, contextEnd);

          if (messageCode == ENTITY_INVALID)
          {
            // emit the erroneous text along with the message
            report.message(messageCode, EPUBLocation.create(entry, lineCounter, matcher.start(), context.trim()), matchedText);
          }
          else
          {
            report.message(messageCode, EPUBLocation.create(entry, lineCounter, matcher.start(), context.trim()));
          }
        }
        lineCounter++;
      }
    }
    catch (FileNotFoundException e1)
    {
      String fileName = new File(zip.getName()).getName();
      report.message(MessageId.RSC_001, EPUBLocation.create(fileName), entry);
    }
    catch (IOException e1)
    {
      String fileName = new File(zip.getName()).getName();
      report.message(MessageId.PKG_008, EPUBLocation.create(fileName), entry);
    }
    catch (Exception e)
    {
      e.printStackTrace();
      report.message(MessageId.RSC_005, EPUBLocation.create(entry), e.getMessage());
    }
    finally
    {
      if (is != null)
      {
        try
        {
          is.close();
        }
        catch (Exception ignored)
        {
        }
      }
      if (in != null) {
	    in.close();
      }
    }
    return result;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy