All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.braille.utils.pef.XPathPEFBook Maven / Gradle / Ivy

package org.daisy.braille.utils.pef;

import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

class XPathPEFBook {
    private static final Pattern eightDotPattern = Pattern.compile("[\u2840-\u28ff]");
    private static final Logger logger = Logger.getLogger(XPathPEFBook.class.getCanonicalName());

    static PEFBook load(
        URI uri
    ) throws ParserConfigurationException, SAXException, XPathExpressionException, IOException {
        return load(uri, false);
    }

    static PEFBook load(
        URI uri,
        boolean continueOnError
    ) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
        Map> metadata;
        // Book properties
        int volumes;
        int pageTags;
        int pages;
        int maxWidth;
        int maxHeight;
        String inputEncoding;
        boolean containsEightDot;
        int[] startPages;
        int tmp = 0;
        Document d = null;
        String encoding = null;
        metadata = new HashMap<>();

        try {
            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
            dbf.setNamespaceAware(true);
            DocumentBuilder db = dbf.newDocumentBuilder();
            d = db.parse(uri.toString());
            encoding = d.getInputEncoding();
            List al;
            NodeList nl = d.getDocumentElement().getElementsByTagName("meta").item(0).getChildNodes();
            for (int i = 0; i < nl.getLength(); i++) {
                Node n = nl.item(i);
                if (
                    n != null &&
                    n.getNodeType() == Node.ELEMENT_NODE &&
                    "http://purl.org/dc/elements/1.1/".equals(n.getNamespaceURI())
                ) {
                    String name = n.getLocalName();
                    if (metadata.containsKey(name)) {
                        al = metadata.remove(name);
                    } else {
                        al = new ArrayList<>();
                    }
                    al.add(n.getTextContent());
                    metadata.put(name, al);
                }
            }
        } catch (ParserConfigurationException e) {
            if (continueOnError) {
                logger.throwing("XPathPEFBook", e.getMessage(), e);
            } else {
                throw e;
            }
        } catch (SAXException e) {
            if (continueOnError) {
                logger.throwing("XPathPEFBook", e.getMessage(), e);
            } else {
                throw e;
            }
        } catch (IOException e) {
            if (continueOnError) {
                logger.throwing("XPathPEFBook", e.getMessage(), e);
            } else {
                throw e;
            }
        }

        inputEncoding = encoding;

        XPath xp = XPathFactory.newInstance().newXPath();
        xp.setNamespaceContext(new PEFNamespaceContext());

        // Count volumes
        tmp = 0;
        try {
            tmp = ((Double) xp.evaluate("count(//pef:volume)", d, XPathConstants.NUMBER)).intValue();
        } catch (XPathExpressionException e) {
            tmp = 0;
        }
        volumes = tmp;

        // Count page tags
        tmp = 0;
        try {
            tmp = ((Double) xp.evaluate("count(//pef:page)", d, XPathConstants.NUMBER)).intValue();
        } catch (XPathExpressionException e) {
            if (continueOnError) {
                tmp = 0;
            } else {
                throw e;
            }
        }
        pageTags = tmp;

        // Count pages including blank
        tmp = 0;
        try {
            tmp = ((Double) xp.evaluate(
                "count(//pef:section[ancestor-or-self::pef:*[@duplex][1][@duplex='false']]" +
                "/descendant::pef:page)*2 + count(//pef:section[ancestor-or-self::pef:*[@duplex][1][@duplex='true']]" +
                "/descendant::pef:page) + count(//pef:section[count(descendant::pef:page) mod 2 = 1]" +
                "[ancestor-or-self::pef:*[@duplex][1][@duplex='true']])-count(((//pef:section)[last()])[" +
                "count(descendant::pef:page) mod 2 = 1][ancestor-or-self::pef:*[@duplex][1][@duplex='true']])",
                d,
                XPathConstants.NUMBER)
            ).intValue();
        } catch (XPathExpressionException e) {
            if (continueOnError) {
                tmp = 0;
            } else {
                throw e;
            }
        }
        pages = tmp;

        // Get max width
        tmp = 0;
        try {
            NodeList ns = (NodeList) xp.evaluate("//pef:*/@cols", d, XPathConstants.NODESET);
            for (int i = 0; i < ns.getLength(); ++i) {
                Attr attr = (Attr) ns.item(i);
                String colsValue = attr.getNodeValue();
                tmp = Math.max(tmp, Integer.valueOf(colsValue));
            }
        } catch (XPathExpressionException e) {
            if (continueOnError) {
                tmp = 0;
            } else {
                throw e;
            }
        }
        maxWidth = tmp;

        // Get max height
        tmp = 0;
        try {
            NodeList ns = (NodeList) xp.evaluate("//pef:*/@rows", d, XPathConstants.NODESET);
            for (int i = 0; i < ns.getLength(); ++i) {
                Attr attr = (Attr) ns.item(i);
                String colsValue = attr.getNodeValue();
                tmp = Math.max(tmp, Integer.valueOf(colsValue));
            }
        } catch (XPathExpressionException e) {
            if (continueOnError) {
                tmp = 0;
            } else {
                throw e;
            }
        }
        maxHeight = tmp;

        // Contains eight dot?
        boolean bTmp = false;
        try {
            NodeList texts = (NodeList) xp.evaluate("//pef:row/text()", d, XPathConstants.NODESET);
            for (int i = 0; i < texts.getLength(); ++i) {
                String text = texts.item(i).getTextContent();
                if (eightDotPattern.matcher(text).find()) {
                    bTmp = true;
                }
            }
        } catch (XPathExpressionException e) {
            if (!continueOnError) {
                throw e;
            }
        }
        containsEightDot = bTmp;

        // get start pages
        startPages = new int[volumes];
        for (int i = 1; i <= volumes; i++) {
            try {
                Node page = (Node) xp.evaluate(
                "(//pef:volume)[position()=" + (i) + "]/descendant::pef:page[1]",
                    d,
                    XPathConstants.NODE
                );
                int pageOffset = ((Double) xp.evaluate(
                "count(preceding::pef:section[ancestor-or-self::pef:*[@duplex][1][@duplex='false']]" +
                    "/descendant::pef:page)*2 + " +
                    "count(preceding::pef:section[ancestor-or-self::pef:*[@duplex][1][@duplex='true']]" +
                    "/descendant::pef:page) + count(preceding::pef:section[count(descendant::pef:page) mod 2 = 1]" +
                    "[ancestor-or-self::pef:*[@duplex][1][@duplex='true']])",
                    page,
                    XPathConstants.NUMBER)
                ).intValue();
                startPages[i - 1] = pageOffset + 1;
            } catch (XPathExpressionException e) {
                if (continueOnError) {
                    logger.throwing("XPathPEFBook", e.getMessage(), e);
                    startPages[i - 1] = 0;
                } else {
                    throw e;
                }
            }
        }
        return new PEFBook(
            uri,
            metadata,
            volumes,
            pages,
            pageTags,
            maxWidth,
            maxHeight,
            inputEncoding,
            containsEightDot,
            startPages
        );
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy