org.identityconnectors.common.XmlUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of connector-framework Show documentation
There is a newer version: 1.6.0.0
/*
 * ====================
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2008-2009 Sun Microsystems, Inc. All rights reserved.
 *
 * The contents of this file are subject to the terms of the Common Development
 * and Distribution License("CDDL") (the "License").  You may not use this file
 * except in compliance with the License.
 *
 * You can obtain a copy of the License at
 * http://opensource.org/licenses/cddl1.php
 * See the License for the specific language governing permissions and limitations
 * under the License.
 *
 * When distributing the Covered Code, include this CDDL Header Notice in each file
 * and include the License file at http://opensource.org/licenses/cddl1.php.
 * If applicable, add the following below this CDDL Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyrighted [year] [name of copyright owner]"
 * ====================
 */
package org.identityconnectors.common;

import java.io.IOException;
import java.io.StringReader;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public final class XmlUtil {
    private XmlUtil() {

    }

    // ///////////////////////////////////////////////////////////
    //
    // Constants
    //
    // //////////////////////////////////////////////////////////

    public static final char NO_DELIM = 0;
    public static final char DOUBLE_QUOTE = '"';
    public static final char SINGLE_QUOTE = '\'';

    // ///////////////////////////////////////////////////////////
    //
    // Parsing
    //
    // //////////////////////////////////////////////////////////

    /**
     * Parses a string without validation and returns the Document.
     */
    public static Document parseString(String xml) throws IOException, SAXException,
            ParserConfigurationException {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setValidating(false);
        DocumentBuilder db = dbf.newDocumentBuilder();
        // some parsers will attempt to find and parse dtd even
        // if not validating and that makes it very slow
        db.setEntityResolver(new DummyDTDResolver());
        InputSource is = new InputSource(new StringReader(xml));
        return db.parse(is);
    }

    private static class DummyDTDResolver implements EntityResolver {
        @Override
        public InputSource resolveEntity(String publicID, String systemID) {
            if ((publicID != null && publicID.endsWith(".dtd"))
                    || (systemID != null && systemID.endsWith(".dtd"))) {
                return new InputSource(new StringReader(""));
            } else {
                return null;
            }
        }
    }

    // ///////////////////////////////////////////////////////////
    //
    // DOM Navigation utilities
    //
    // //////////////////////////////////////////////////////////

    /**
     * Return the value of an attribute on an element.
     * 
     * The DOM getAttribute method returns an empty string if the attribute
     * doesn't exist. Here, we detect this and return null.
     */
    public static String getAttribute(Element e, String name) {
        String value = e.getAttribute(name);
        if (value != null && value.length() == 0) {
            value = null;
        }
        return value;
    }

    /**
     * Find an immediate child of the given name
     */
    public static Element findImmediateChildElement(Node node, String name) {

        Element found = null;

        if (node != null) {

            for (Node child = node.getFirstChild(); child != null && found == null; child =
                    child.getNextSibling()) {

                if (child.getNodeType() == Node.ELEMENT_NODE) {
                    Element tmp = (Element) child;
                    if (tmp.getTagName().equals(name)) {
                        return tmp;
                    }
                }
            }
        }

        return found;
    }

    /**
     * Returns the First child element or null if none found
     *
     * @param node
     *            The node. May be null.
     * @return the First child element or null if none found
     */
    public static Element getFirstChildElement(Node node) {
        if (node == null) {
            return null;
        }
        Node child = node.getFirstChild();
        if (child instanceof Element) {
            return (Element) child;
        } else {
            return getNextElement(child);
        }
    }

    /**
     * Get the next right sibling that is an element.
     */
    public static Element getNextElement(Node node) {

        Element found = null;

        if (node != null) {

            for (Node next = node.getNextSibling(); next != null && found == null; next =
                    next.getNextSibling()) {

                if (next.getNodeType() == Node.ELEMENT_NODE) {
                    found = (Element) next;
                }
            }
        }

        return found;
    }

    /**
     * Locate the first text node at any level below the given node. If the
     * ignoreEmpty flag is true, we will ignore text nodes that contain only
     * whitespace characteres.
     * 

     * Note that if you're trying to extract element content, you probably don't
     * want this since parser's can break up pcdata into multiple adjacent text
     * nodes. See getContent() for a more useful method.
     */
    private static Text findText(Node node, boolean ignoreEmpty) {

        Text found = null;

        if (node != null) {

            if (node.getNodeType() == Node.TEXT_NODE
                    || node.getNodeType() == Node.CDATA_SECTION_NODE) {

                Text t = (Text) node;
                if (!ignoreEmpty) {
                    found = t;
                } else {
                    String s = t.getData().trim();
                    if (s.length() > 0) {
                        found = t;
                    }
                }
            }

            if (found == null) {

                for (Node child = node.getFirstChild(); child != null && found == null; child =
                        child.getNextSibling()) {

                    found = findText(child, ignoreEmpty);
                }
            }
        }

        return found;
    }

    /**
     * Return the content of the given element.
     * 

     * We will descend to an arbitrary depth looking for the first text node.
     * 

     * Note that the parser may break what was originally a single string of
     * pcdata into multiple adjacent text nodes. Xerces appears to do this when
     * it encounters a '$' in the text, not sure if there is specified behavior,
     * or if its parser specific.
     * 

     * Here, we will congeal adjacent text nodes.
     * 
     * We will NOT ignore text nodes that have only whitespace.
     */
    public static String getContent(Element e) {

        String content = null;

        if (e != null) {

            // find the first inner text node,
            Text t = findText(e, false);
            if (t != null) {
                // we have at least some text
                StringBuilder b = new StringBuilder();
                while (t != null) {
                    b.append(t.getData());
                    Node n = t.getNextSibling();

                    t = null;
                    if (n != null
                            && ((n.getNodeType() == Node.TEXT_NODE) || (n.getNodeType() == Node.CDATA_SECTION_NODE))) {
                        t = (Text) n;
                    }
                }
                content = b.toString();
            }
        }

        return content;
    }

    // ///////////////////////////////////////////////////////////
    //
    // Xml Encoding Utilities
    //
    // //////////////////////////////////////////////////////////

    /**
     * Escapes the given string and appends to the given buffer
     *
     * @param b
     *            The buffer
     * @param s
     *            The script to be escaped. May be null.
     * @param delim
     *            May be {@link #SINGLE_QUOTE}, {@link #DOUBLE_QUOTE}, or
     *            {@link #NO_DELIM}.
     */
    public static void escape(StringBuilder b, String s, char delim) {
        if (s != null) {

            for (int i = 0; i < s.length(); i++) {
                char ch = s.charAt(i);

                if (ch == '&') {
                    // Ampersand: introduces a character entity.
                    b.append("&");
                } else if (ch == '<') {
                    // LessThan: introduces a tag.
                    b.append("<");
                } else if (ch == '>') {
                    // GreaterThan: some browsers impute an opening "<".
                    b.append(">");
                } else if (ch == 0xA) {
                    // LineFeed: preserve format.
                    b.append("
");
                } else if (ch == 0xD) {
                    // CarriageReturn: preserve format.
                    b.append("");
                } else if (ch == 0x9) {
                    // HorizontalTab: preserve format.
                    b.append("	");
                } else if (ch == delim && delim == SINGLE_QUOTE) {
                    // Accept only single or double quote as delimiter.
                    b.append("'");
                } else if (ch == delim && delim == DOUBLE_QUOTE) {
                    // Accept only single or double quote as delimiter.
                    // Does """ work in XML?
                    b.append(""");
                } else if (ch >= 0x20 && ch < 0x7f) {
                    b.append(ch);
                } else if (validXmlChar(ch)) {
                    b.append(ch);
                }
            }
        }
    }

    /**
     * legal xml chars from http://www.xml.com/axml/testaxml.htm Char::= #x9 |
     * #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
     */
    private static boolean validXmlChar(char ch) {
        if (ch >= 0x20 && ch < 0x7f) {
            return true; // short circuit test
        }

        if (ch == 0x09 || ch == 0x0A || ch == 0x0D || (ch >= 0x20 && ch <= 0xfd7ff)
                || (ch >= 0x0E000 && ch <= 0xffffd) || (ch >= 0x010000 && ch <= 0xF10ffff)) {
            return true;
        }

        return false;
    }
}