All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.identityconnectors.common.XmlUtil Maven / Gradle / Ivy

/*
 * ====================
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 * 
 * Copyright 2008-2009 Sun Microsystems, Inc. All rights reserved.     
 * 
 * The contents of this file are subject to the terms of the Common Development 
 * and Distribution License("CDDL") (the "License").  You may not use this file 
 * except in compliance with the License.
 * 
 * You can obtain a copy of the License at 
 * http://IdentityConnectors.dev.java.net/legal/license.txt
 * See the License for the specific language governing permissions and limitations 
 * under the License. 
 * 
 * When distributing the Covered Code, include this CDDL Header Notice in each file
 * and include the License file at identityconnectors/legal/license.txt.
 * If applicable, add the following below this CDDL Header, with the fields 
 * enclosed by brackets [] replaced by your own identifying information: 
 * "Portions Copyrighted [year] [name of copyright owner]"
 * ====================
 */
package org.identityconnectors.common;

import java.io.IOException;
import java.io.StringReader;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public class XmlUtil {
    private XmlUtil() {

    }

    /////////////////////////////////////////////////////////////
    //
    // Constants
    //
    ////////////////////////////////////////////////////////////

    public static final char NO_DELIM     = 0;
    public static final char DOUBLE_QUOTE = '"';
    public static final char SINGLE_QUOTE = '\'';

    /////////////////////////////////////////////////////////////
    //
    // Parsing
    //
    ////////////////////////////////////////////////////////////

    /**
     * Parses a string without validation and returns the Document.
     */
    public static Document parseString(String xml)
        throws IOException, SAXException, ParserConfigurationException
    {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setValidating(false);
        DocumentBuilder db = dbf.newDocumentBuilder();
        //some parsers will attempt to find and parse dtd even
        //if not validating and that makes it very slow
        db.setEntityResolver(new DummyDTDResolver());
        InputSource is = new InputSource(new StringReader(xml));
        return db.parse(is);
    }

    private static class DummyDTDResolver implements EntityResolver {
        public InputSource resolveEntity(String publicID, String systemID) {
            if ((publicID != null && publicID.endsWith(".dtd"))
                    || (systemID != null && systemID.endsWith(".dtd"))) {
                return new InputSource(new StringReader(""));
            } else {
                return null;
            }
        }
    }

    /////////////////////////////////////////////////////////////
    //
    // DOM Navigation utilities
    //
    ////////////////////////////////////////////////////////////

    /**
     * Return the value of an attribute on an element. 

The DOM getAttribute * method returns an empty string if the attribute doesn't exist. Here, we * detect this and return null. */ public static String getAttribute(Element e, String name) { String value = e.getAttribute(name); if (value != null && value.length() == 0) value = null; return value; } /** * Find an immediate child of the given name */ public static Element findImmediateChildElement(Node node, String name) { Element found = null; if (node != null) { for (Node child = node.getFirstChild(); child != null && found == null; child = child.getNextSibling()) { if (child.getNodeType() == Node.ELEMENT_NODE) { Element tmp = (Element) child; if ( tmp.getTagName().equals(name) ) { return tmp; } } } } return found; } /** * Returns the First child element or null if none found * @param node The node. May be null. * @return the First child element or null if none found */ public static Element getFirstChildElement(Node node) { if ( node == null ) { return null; } Node child = node.getFirstChild(); if ( child instanceof Element ) { return (Element)child; } else { return getNextElement(child); } } /** * Get the next right sibling that is an element. */ public static Element getNextElement(Node node) { Element found = null; if (node != null) { for (Node next = node.getNextSibling(); next != null && found == null; next = next.getNextSibling()) { if (next.getNodeType() == Node.ELEMENT_NODE) found = (Element) next; } } return found; } /** * Locate the first text node at any level below the given node. If the * ignoreEmpty flag is true, we will ignore text nodes that contain only * whitespace characteres.

Note that if you're trying to extract * element content, you probably don't want this since parser's can break up * pcdata into multiple adjacent text nodes. See getContent() for a more * useful method. */ private static Text findText(Node node, boolean ignoreEmpty) { Text found = null; if (node != null) { if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE) { Text t = (Text) node; if (!ignoreEmpty) found = t; else { String s = t.getData().trim(); if (s.length() > 0) found = t; } } if (found == null) { for (Node child = node.getFirstChild(); child != null && found == null; child = child.getNextSibling()) { found = findText(child, ignoreEmpty); } } } return found; } /** * Return the content of the given element.

We will descend to an * arbitrary depth looking for the first text node.

Note that * the parser may break what was originally a single string of pcdata into * multiple adjacent text nodes. Xerces appears to do this when it * encounters a '$' in the text, not sure if there is specified behavior, or * if its parser specific.

Here, we will congeal adjacent text nodes. *

We will NOT ignore text nodes that have only whitespace. */ public static String getContent(Element e) { String content = null; if (e != null) { // find the first inner text node, Text t = findText(e, false); if (t != null) { // we have at least some text StringBuilder b = new StringBuilder(); while (t != null) { b.append(t.getData()); Node n = t.getNextSibling(); t = null; if (n != null && ((n.getNodeType() == Node.TEXT_NODE) || (n.getNodeType() == Node.CDATA_SECTION_NODE))) { t = (Text) n; } } content = b.toString(); } } return content; } ///////////////////////////////////////////////////////////// // // Xml Encoding Utilities // //////////////////////////////////////////////////////////// /** * Escapes the given string and appends to the given buffer * @param b The buffer * @param s The script to be escaped. May be null. * @param delim May be {@link #SINGLE_QUOTE}, {@link #DOUBLE_QUOTE}, or {@link #NO_DELIM}. */ public static void escape(StringBuilder b, String s, char delim) { if (s != null) { for (int i = 0; i < s.length(); i++) { char ch = s.charAt(i); if (ch == '&') { // Ampersand: introduces a character entity. b.append("&"); } else if (ch == '<') { // LessThan: introduces a tag. b.append("<"); } else if (ch == '>') { // GreaterThan: some browsers impute an opening "<". b.append(">"); } else if (ch == 0xA) { // LineFeed: preserve format. b.append(" "); } else if (ch == 0xD) { // CarriageReturn: preserve format. b.append(" "); } else if (ch == 0x9) { // HorizontalTab: preserve format. b.append(" "); } else if (ch == delim && delim == SINGLE_QUOTE) { // Accept only single or double quote as delimiter. b.append("'"); } else if (ch == delim && delim == DOUBLE_QUOTE) { // Accept only single or double quote as delimiter. // Does """ work in XML? b.append("""); } else if (ch >= 0x20 && ch < 0x7f) { b.append(ch); } else if (validXmlChar(ch)) { b.append(ch); } } } } /** * legal xml chars from http://www.xml.com/axml/testaxml.htm Char::= #x9 | * #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */ private static boolean validXmlChar(char ch) { if (ch >= 0x20 && ch < 0x7f) return true; // short circuit test if (ch == 0x09 || ch == 0x0A || ch == 0x0D || (ch >= 0x20 && ch <= 0xfd7ff) || (ch >= 0x0E000 && ch <= 0xffffd) || (ch >= 0x010000 && ch <= 0xF10ffff)) return true; return false; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy