org.fcrepo.utilities.xml.DOM Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of fcrepo-common Show documentation
Common, generally useful utilities
There is a newer version: 6.5.0
package org.fcrepo.utilities.xml;


import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.fcrepo.utilities.XmlTransformUtility;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/**
 * Helpers for doing DOM parsing and manipulations.
 * 
 * Ported from the State and University Library  project sbutils.
 */
public class DOM {

    private static final Logger log = LoggerFactory.getLogger(DOM.class);


    public static final String XML_HEADER =
            "";

    private static final XPathSelectorImpl selector =
            new XPathSelectorImpl(null, 50);

    /**
     * Extracts all textual and CDATA content from the given node and its
     * children.
     *
     * @param node the node to get the content from.
     * @return the textual content of node.
     */
    public static String getElementNodeValue(Node node) {
        StringWriter sw = new StringWriter(2000);
        if (node.getNodeType() == Node.ELEMENT_NODE) {
            NodeList all = node.getChildNodes();
            for (int i = 0; i < all.getLength(); i++) {
                if (all.item(i).getNodeType() == Node.TEXT_NODE ||
                    all.item(i).getNodeType() == Node.CDATA_SECTION_NODE) {
                    // TODO: Check if we exceed the limit for getNodeValue
                    sw.append(all.item(i).getNodeValue());
                }
            }
        }
        return sw.toString();
    }

    /* **************************************** */

    /**
     * Parses an XML document from a String to a DOM.
     *
     * @param xmlString      a String containing an XML document.
     * @param namespaceAware if {@code true} the parsed DOM will reflect any
     *                       XML namespaces declared in the document
     * @return The document in a DOM or {@code null} on errors.
     */
    public static Document stringToDOM(String xmlString,
                                       boolean namespaceAware) {
        try {
            InputSource in = new InputSource();
            in.setCharacterStream(new StringReader(xmlString));

            DocumentBuilderFactory dbFact = DocumentBuilderFactory.newInstance();
            dbFact.setNamespaceAware(namespaceAware);

            return dbFact.newDocumentBuilder().parse(in);
        } catch (IOException e) {
            log.warn("I/O error when parsing XML :" + e.getMessage() + "\n"
                     + xmlString, e);
        } catch (SAXException e) {
            log.warn("Parse error when parsing XML :" + e.getMessage() + "\n"
                     + xmlString, e);
        } catch (ParserConfigurationException e) {
            log.warn("Parser configuration error when parsing XML :"
                     + e.getMessage() + "\n"
                     + xmlString, e);
        }
        return null;
    }

    /**
     * Parses an XML document from a String disregarding namespaces
     *
     * @param xmlString a String containing an XML document.
     * @return The document in a DOM or {@code null} on errors.
     */
    public static Document stringToDOM(String xmlString) {
        return stringToDOM(xmlString, false);
    }

    /**
     * Parses a XML document from a stream to a DOM or return
     * {@code null} on error.
     *
     * @param xmlStream      a stream containing an XML document.
     * @param namespaceAware if {@code true} the constructed DOM will reflect
     *                       the namespaces declared in the XML document
     * @return The document in a DOM or {@code null} in case of errors
     */
    public static Document streamToDOM(InputStream xmlStream,
                                       boolean namespaceAware) {
        try {
            DocumentBuilderFactory dbFact = DocumentBuilderFactory.newInstance();
            dbFact.setNamespaceAware(namespaceAware);

            return dbFact.newDocumentBuilder().parse(xmlStream);
        } catch (IOException e) {
            log.warn("I/O error when parsing stream :" + e.getMessage(), e);
        } catch (SAXException e) {
            log.warn("Parse error when parsing stream :" + e.getMessage(), e);
        } catch (ParserConfigurationException e) {
            log.warn("Parser configuration error when parsing XML stream: "
                     + e.getMessage(), e);
        }
        return null;
    }

    /**
     * Parses a XML document from a stream to a DOM disregarding namespaces.
     * Returns {@code null} on error.
     *
     * @param xmlStream a stream containing an XML document.
     * @return The document in a DOM or {@code null} in case of errors
     */
    public static Document streamToDOM(InputStream xmlStream) {
        return streamToDOM(xmlStream, false);
    }

    /**
     * Convert the given DOM to an UTF-8 XML String.
     *
     * @param dom the Document to convert.
     * @return the dom as an XML String.
     * @throws TransformerException if the dom could not be converted.
     */
    public static String domToString(Node dom) throws Exception {
        return domToString(dom, false);
    }

    /**
     * Convert the given DOM to an UTF-8 XML String.
     *
     * @param dom                the Document to convert.
     * @param withXmlDeclaration if trye, an XML-declaration is prepended.
     * @return the dom as an XML String.
     * @throws TransformerException if the dom could not be converted.
     */
    // TODO: Consider optimizing this with ThreadLocal Transformers
    public static String domToString(Node dom, boolean withXmlDeclaration)
            throws Exception {
        Transformer t = XmlTransformUtility.getTransformer();
        t.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        if (withXmlDeclaration) {
            t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
        } else {
            t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        }
        t.setOutputProperty(OutputKeys.METHOD, "xml");

        /* Transformer */
        StringWriter sw = new StringWriter();
        t.transform(new DOMSource(dom), new StreamResult(sw));

        return sw.toString();
    }

    /**
     * Create a new {@link XPathSelector} instance with a given namespace
     * mapping. The arguments are parsed as
     * {@code prefix1, uri1, prefix2, uri2, ...}.
     * 

     * If you want to apply XPath expressions without namespaces use the static
     * {@code select*} methods directly on the {@code DOM} class.
     * 

     * Note that if you want to apply XPath selections on a DOM constructed from
     * either {@link DOM#streamToDOM(InputStream, boolean)} or
     * {@link DOM#stringToDOM(String, boolean)} you must pass
     * {@code namespaceAware=true} as the boolean argument. Namespaced
     * selections will fail on a DOM constructed without namespaces.
     *
     * @param nsContext prefix, uri pairs
     * @return a newly allocated {@link XPathSelector}
     * @throws IllegalArgumentException if an uneven number of arguments are
     *                                  passed
     */
    public static XPathSelector createXPathSelector(String... nsContext) {
        return new XPathSelectorImpl(
                new DefaultNamespaceContext(null, nsContext), 50);
    }

    /**
     * Extract an integer value from {@code node} or return {@code defaultValue}
     * if it is not found.
     *
     * @param node         the node with the wanted attribute.
     * @param xpath        the XPath to extract.
     * @param defaultValue the default value.
     * @return the value of the path, if existing, else
     *         defaultValue
     */
    public static Integer selectInteger(Node node, String xpath, Integer defaultValue) {
        return selector.selectInteger(node, xpath, defaultValue);
    }

    /**
     * Extract an integer value from {@code node} or return {@code null} if it
     * is not found
     *
     * @param node  the node with the wanted attribute.
     * @param xpath the XPath to extract.
     * @return the value of the path or {@code null}
     */
    public static Integer selectInteger(Node node, String xpath) {
        return selector.selectInteger(node, xpath);
    }

    /**
     * Extract a double precision floating point value from {@code node} or
     * return {@code defaultValue} if it is not found
     *
     * @param node         the node with the wanted attribute.
     * @param xpath        the XPath to extract.
     * @param defaultValue the default value.
     * @return the value of the path, if existing, else
     *         defaultValue
     */
    public static Double selectDouble(Node node,
                                      String xpath, Double defaultValue) {
        return selector.selectDouble(node, xpath, defaultValue);
    }

    /**
     * Extract a double precision floating point value from {@code node} or
     * return {@code null} if it is not found
     *
     * @param node  the node with the wanted attribute.
     * @param xpath the XPath to extract.
     * @return the value of the path or {@code null}
     */
    public static Double selectDouble(Node node, String xpath) {
        return selector.selectDouble(node, xpath);
    }

    /**
     * Extract a boolean value from {@code node} or return {@code defaultValue}
     * if there is no boolean value at {@code xpath}
     *
     * @param node         the node with the wanted attribute.
     * @param xpath        the path to extract.
     * @param defaultValue the default value.
     * @return the value of the path, if existing, else
     *         {@code defaultValue}
     */
    public static Boolean selectBoolean(Node node,
                                        String xpath, Boolean defaultValue) {
        return selector.selectBoolean(node, xpath, defaultValue);
    }

    /**
     * Extract a boolean value from {@code node} or return {@code false}
     * if there is no boolean value at {@code xpath}
     *
     * @param node  the node with the wanted attribute.
     * @param xpath the path to extract.
     * @return the value of the path, if existing, else
     *         {@code false}
     */
    public static Boolean selectBoolean(Node node, String xpath) {
        return selector.selectBoolean(node, xpath);
    }

    /**
     * Extract the given value from the node as a String or if the value cannot
     * be extracted, {@code defaultValue} is returned.
     * 

     * Example: To get the value of the attribute "foo" in the node, specify
     * "@foo" as the path.
     * 

     * Note: This method does not handle namespaces explicitely.
     *
     * @param node         the node with the wanted attribute
     * @param xpath        the XPath to extract.
     * @param defaultValue the default value
     * @return the value of the path, if existing, else
     *         {@code defaultValue}
     */
    public static String selectString(Node node, String xpath, String defaultValue) {
        return selector.selectString(node, xpath, defaultValue);
    }

    /**
     * Extract the given value from the node as a String or if the value cannot
     * be extracted, the empty string is returned
     * 

     * Example: To get the value of the attribute "foo" in the node, specify
     * "@foo" as the path.
     * 

     * Note: This method does not handle namespaces explicitely.
     *
     * @param node  the node with the wanted attribute
     * @param xpath the XPath to extract
     * @return the value of the path, if existing, else
     *         the empty string
     */
    public static String selectString(Node node, String xpath) {
        return selector.selectString(node, xpath);
    }

    /**
     * Select the {@link NodeList} with the given XPath.
     * 

     * Note: This is a convenience method that logs exceptions instead of
     * throwing them.
     *
     * @param node  the root document.
     * @param xpath the xpath for the Node list.
     * @return the NodeList requested or an empty NodeList if unattainable
     */
    public static NodeList selectNodeList(Node node, String xpath) {
        return selector.selectNodeList(node, xpath);
    }

    /**
     * Select the Node with the given XPath.
     * 
     * Note: This is a convenience method that logs exceptions instead of
     * throwing them.
     *
     * @param dom   the root document.
     * @param xpath the xpath for the node.
     * @return the Node or null if unattainable.
     */
    public static Node selectNode(Node dom, String xpath) {
        return selector.selectNode(dom, xpath);
    }

    /**
     * Package private method to clear the expression cache
     * - used for unit testing
     */
    static void clearXPathCache() {
        selector.clearCache();
    }
}