All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openide.xml.XMLUtil Maven / Gradle / Ivy

The newest version!
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common
 * Development and Distribution License("CDDL") (collectively, the
 * "License"). You may not use this file except in compliance with the
 * License. You can obtain a copy of the License at
 * http://www.netbeans.org/cddl-gplv2.html
 * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
 * specific language governing permissions and limitations under the
 * License.  When distributing the software, include this License Header
 * Notice in each file and include the License file at
 * nbbuild/licenses/CDDL-GPL-2-CP.  Sun designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Sun in the GPL Version 2 section of the License file that
 * accompanied this code. If applicable, add the following below the
 * License Header, with the fields enclosed by brackets [] replaced by
 * your own identifying information:
 * "Portions Copyrighted [year] [name of copyright owner]"
 *
 * Contributor(s):
 *
 * The Original Software is NetBeans. The Initial Developer of the Original
 * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
 * Microsystems, Inc. All Rights Reserved.
 *
 * If you wish your version of this file to be governed by only the CDDL
 * or only the GPL Version 2, indicate your decision by adding
 * "[Contributor] elects to include this software in this distribution
 * under the [CDDL or GPL Version 2] license." If you do not indicate a
 * single choice of license, a recipient has the option to distribute
 * your version of this file under either the CDDL, the GPL Version 2 or
 * to extend the choice of license to its licensees as provided above.
 * However, if you add GPL Version 2 code and therefore, elected the GPL
 * Version 2 license, then the option applies only if the new code is
 * made subject to such option by the copyright holder.
 */

package org.openide.xml;

import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.Validator;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;

/**
 * Utility class collecting library methods related to XML processing.
 *
 * 
* *

Remember that when parsing XML files you often want to set an explicit * entity resolver. For example, consider a file such as this:

* *
 * <?xml version="1.0" encoding="UTF-8"?>
 * <!DOCTYPE root PUBLIC "-//NetBeans//DTD Foo 1.0//EN" "http://www.netbeans.org/dtds/foo-1_0.dtd">
 * <root/>
 * 
* *

If you parse this with a null entity resolver, or you use the * default resolver ({@link EntityCatalog#getDefault}) but do not do * anything special with this DTD, you will probably find the parse * blocking to make a network connection even when you are not * validating. That is because DTDs can be used to define * entities and other XML oddities, and are not a pure constraint * language like Schema or RELAX-NG.

* *

There are three basic ways to avoid the network connection.

* *
    * *
  1. Register the DTD. This is generally the best thing to do. See * {@link EntityCatalog}'s documentation for details, but for example * in your layer use:

    * *
     * <filesystem>
     *   <folder name="xml">
     *     <folder name="entities">
     *       <folder name="NetBeans">
     *         <file name="DTD_Foo_1_0"
     *               url="nbres:/org/netbeans/modules/mymod/resources/foo-1_0.dtd">
     *           <attr name="hint.originalPublicID"
     *                 stringvalue="-//NetBeans//DTD Foo 1.0//EN"/>
     *         </file>
     *       </folder>
     *     </folder>
     *   </folder>
     * </filesystem>
     * 
    * *

    Now the default system entity catalog will resolve the public ID * to the local copy in your module, not the network copy. * Additionally, anyone who mounts the "NetBeans Catalog" in the XML * Entity Catalogs node in the Runtime tab will be able to use your * local copy of the DTD automatically, for validation, code * completion, etc. (The network URL should really exist, though, for * the benefit of other tools!)

  2. * *
  3. You can also set an explicit entity resolver which maps that * particular public ID to some local copy of the DTD, if you do not * want to register it globally in the system for some reason. If * handed other public IDs, just return null to indicate that the * system ID should be loaded.

  4. * *
  5. In some cases where XML parsing is very * performance-sensitive, and you know that you do not need validation * and furthermore that the DTD defines no infoset (there are no * entity or character definitions, etc.), you can speed up the parse. * Turn off validation, but also supply a custom entity resolver that * does not even bother to load the DTD at all:

    * *
     * public InputSource resolveEntity(String pubid, String sysid)
     *     throws SAXException, IOException {
     *   if (pubid.equals("-//NetBeans//DTD Foo 1.0//EN")) {
     *     return new InputSource(new ByteArrayInputStream(new byte[0]));
     *   } else {
     *     return EntityCatalog.getDefault().resolveEntity(pubid, sysid);
     *   }
     * }
     * 
  6. * *
* *
* * @author Petr Kuzel * @since release 3.2 */ public final class XMLUtil extends Object { /* public static String toCDATA(String val) throws IOException { } */ private static final char[] DEC2HEX = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; /** Forbids creating new XMLUtil */ private XMLUtil() { } // ~~~~~~~~~~~~~~~~~~~~~ SAX related ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /** Create a simple parser. * @return createXMLReader(false, false) */ public static XMLReader createXMLReader() throws SAXException { return createXMLReader(false, false); } /** Create a simple parser, possibly validating. * @param validate if true, a validating parser is returned * @return createXMLReader(validate, false) */ public static XMLReader createXMLReader(boolean validate) throws SAXException { return createXMLReader(validate, false); } private static SAXParserFactory[][] saxes = new SAXParserFactory[2][2]; /** Create a SAX parser from the JAXP factory. * The result can be used to parse XML files. * *

See class Javadoc for hints on setting an entity resolver. * This parser has its entity resolver set to the system entity resolver chain. * * @param validate if true, a validating parser is returned * @param namespaceAware if true, a namespace aware parser is returned * * @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type. * @throws SAXException if a parser fulfilling given parameters can not be created * * @return XMLReader configured according to passed parameters */ public static XMLReader createXMLReader(boolean validate, boolean namespaceAware) throws SAXException { SAXParserFactory factory = saxes[validate ? 0 : 1][namespaceAware ? 0 : 1]; if (factory == null) { factory = SAXParserFactory.newInstance(); factory.setValidating(validate); factory.setNamespaceAware(namespaceAware); saxes[validate ? 0 : 1][namespaceAware ? 0 : 1] = factory; } try { return factory.newSAXParser().getXMLReader(); } catch (ParserConfigurationException ex) { throw new SAXException("Cannot create parser satisfying configuration parameters", ex); //NOI18N } } // ~~~~~~~~~~~~~~~~~~~~~ DOM related ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /** * Creates empty DOM Document using JAXP factoring. E.g.: *

     * Document doc = createDocument("book", null, null, null);
     * 

* creates new DOM of a well-formed document with root element named book. * * @param rootQName qualified name of root element. e.g. myroot or ns:myroot * @param namespaceURI URI of root element namespace or null * @param doctypePublicID public ID of DOCTYPE or null * @param doctypeSystemID system ID of DOCTYPE or null if no DOCTYPE * required and doctypePublicID is also null * * @throws DOMException if new DOM with passed parameters can not be created * @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type. * * @return new DOM Document */ public static Document createDocument( String rootQName, String namespaceURI, String doctypePublicID, String doctypeSystemID ) throws DOMException { DOMImplementation impl = getDOMImplementation(); if ((doctypePublicID != null) && (doctypeSystemID == null)) { throw new IllegalArgumentException("System ID cannot be null if public ID specified. "); //NOI18N } DocumentType dtd = null; if (doctypeSystemID != null) { dtd = impl.createDocumentType(rootQName, doctypePublicID, doctypeSystemID); } return impl.createDocument(namespaceURI, rootQName, dtd); } /** * Obtains DOMImpementaton interface providing a number of methods for performing * operations that are independent of any particular DOM instance. * * @throw DOMException NOT_SUPPORTED_ERR if cannot get DOMImplementation * @throw FactoryConfigurationError Application developers should never need to directly catch errors of this type. * * @return DOMImplementation implementation */ private static DOMImplementation getDOMImplementation() throws DOMException { //can be made public DocumentBuilderFactory factory = getFactory(false, false); try { return factory.newDocumentBuilder().getDOMImplementation(); } catch (ParserConfigurationException ex) { throw new DOMException( DOMException.NOT_SUPPORTED_ERR, "Cannot create parser satisfying configuration parameters" ); //NOI18N } catch (RuntimeException e) { // E.g. #36578, IllegalArgumentException. Try to recover gracefully. throw (DOMException) new DOMException(DOMException.NOT_SUPPORTED_ERR, e.toString()).initCause(e); } } private static DocumentBuilderFactory[][] doms = new DocumentBuilderFactory[2][2]; private static DocumentBuilderFactory getFactory(boolean validate, boolean namespaceAware) { DocumentBuilderFactory factory = doms[validate ? 0 : 1][namespaceAware ? 0 : 1]; if (factory == null) { factory = DocumentBuilderFactory.newInstance(); factory.setValidating(validate); factory.setNamespaceAware(namespaceAware); doms[validate ? 0 : 1][namespaceAware ? 0 : 1] = factory; } return factory; } /** * Create from factory a DocumentBuilder and let it create a org.w3c.dom.Document. * This method takes InputSource. After successful finish the document tree is returned. * * @param input a parser input (for URL users use: new InputSource(url.toExternalForm()) * @param validate if true validating parser is used * @param namespaceAware if true DOM is created by namespace aware parser * @param errorHandler a error handler to notify about exception or null * @param entityResolver SAX entity resolver or null; see class Javadoc for hints * * @throws IOException if an I/O problem during parsing occurs * @throws SAXException is thrown if a parser error occurs * @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type. * * @return document representing given input */ public static Document parse( InputSource input, boolean validate, boolean namespaceAware, ErrorHandler errorHandler, EntityResolver entityResolver ) throws IOException, SAXException { DocumentBuilder builder = null; DocumentBuilderFactory factory = getFactory(validate, namespaceAware); try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException ex) { throw new SAXException("Cannot create parser satisfying configuration parameters", ex); //NOI18N } if (errorHandler != null) { builder.setErrorHandler(errorHandler); } if (entityResolver != null) { builder.setEntityResolver(entityResolver); } return builder.parse(input); } /** * Identity transformation in XSLT with indentation added. * Just using the identity transform and calling * t.setOutputProperty(OutputKeys.INDENT, "yes"); * t.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); * does not work currently. * You really have to use this bogus stylesheet. * @see "JDK bug #5064280" */ private static final String IDENTITY_XSLT_WITH_INDENT = "" + // NOI18N "" + // NOI18N "" + // NOI18N "" + // NOI18N "" + // NOI18N "" + // NOI18N "" + // NOI18N ""; // NOI18N /** * Writes a DOM document to a stream. * The precise output format is not guaranteed but this method will attempt to indent it sensibly. * *

Important: There might be some problems with * <![CDATA[ ]]> sections in the DOM tree you pass into this method. Specifically, * some CDATA sections my not be written as CDATA section or may be merged with * other CDATA section at the same level. Also if plain text nodes are mixed with * CDATA sections at the same level all text is likely to end up in one big CDATA section. *
* For nodes that only have one CDATA section this method should work fine. *

* * @param doc DOM document to be written * @param out data sink * @param enc XML-defined encoding name (e.g. "UTF-8") * @throws IOException if JAXP fails or the stream cannot be written to */ public static void write(Document doc, OutputStream out, String enc) throws IOException { if (enc == null) { throw new NullPointerException("You must set an encoding; use \"UTF-8\" unless you have a good reason not to!"); // NOI18N } Document doc2 = normalize(doc); // XXX the following DOM 3 LS implementation of the rest of this method works fine on JDK 6 but pretty-printing is broken on JDK 5: /* DOMImplementationLS ls = (DOMImplementationLS) doc.getImplementation().getFeature("LS", "3.0"); // NOI18N assert ls != null : "No DOM 3 LS supported in " + doc.getClass().getName(); LSOutput output = ls.createLSOutput(); output.setEncoding(enc); output.setByteStream(out); LSSerializer ser = ls.createLSSerializer(); String fpp = "format-pretty-print"; // NOI18N if (ser.getDomConfig().canSetParameter(fpp, true)) { ser.getDomConfig().setParameter(fpp, true); } ser.write(doc2, output); */ try { Transformer t = TransformerFactory.newInstance().newTransformer( new StreamSource(new StringReader(IDENTITY_XSLT_WITH_INDENT))); DocumentType dt = doc2.getDoctype(); if (dt != null) { String pub = dt.getPublicId(); if (pub != null) { t.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, pub); } t.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, dt.getSystemId()); } t.setOutputProperty(OutputKeys.ENCODING, enc); // See #123816 Set cdataQNames = new HashSet(); collectCDATASections(doc2, cdataQNames); if (cdataQNames.size() > 0) { StringBuilder cdataSections = new StringBuilder(); for(String s : cdataQNames) { cdataSections.append(s).append(' '); //NOI18N } t.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, cdataSections.toString()); } Source source = new DOMSource(doc2); Result result = new StreamResult(out); t.transform(source, result); } catch (Exception e) { throw (IOException) new IOException(e.toString()).initCause(e); } } private static void collectCDATASections(Node node, Set cdataQNames) { if (node instanceof CDATASection) { Node parent = node.getParentNode(); if (parent != null) { String uri = parent.getNamespaceURI(); if (uri != null) { cdataQNames.add("{" + uri + "}" + parent.getNodeName()); //NOI18N } else { cdataQNames.add(parent.getNodeName()); } } } NodeList children = node.getChildNodes(); for(int i = 0; i < children.getLength(); i++) { collectCDATASections(children.item(i), cdataQNames); } } /** * Check whether a DOM tree is valid according to a schema. * Example of usage: *
     * Element fragment = ...;
     * SchemaFactory f = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
     * Schema s = f.newSchema(This.class.getResource("something.xsd"));
     * try {
     *     XMLUtil.validate(fragment, s);
     *     // valid
     * } catch (SAXException x) {
     *     // invalid
     * }
     * 
* @param data a DOM tree * @param schema a parsed schema * @throws SAXException if validation failed * @since org.openide.util 7.17 */ public static void validate(Element data, Schema schema) throws SAXException { Validator v = schema.newValidator(); final SAXException[] error = {null}; v.setErrorHandler(new ErrorHandler() { public void warning(SAXParseException x) throws SAXException {} public void error(SAXParseException x) throws SAXException { // Just rethrowing it is bad because it will also print it to stderr. error[0] = x; } public void fatalError(SAXParseException x) throws SAXException { error[0] = x; } }); try { v.validate(new DOMSource(fixupAttrs(data))); } catch (IOException x) { assert false : x; } if (error[0] != null) { throw error[0]; } } private static Element fixupAttrs(Element root) { // #140905 // #6529766/#6531160: some versions of JAXP reject attributes set using setAttribute // (rather than setAttributeNS) even though the schema calls for no-NS attrs! // JDK 5 is fine; JDK 6 broken; JDK 6u2+ fixed // #146081: xml:base attributes mess up validation too. Element copy = (Element) root.cloneNode(true); fixupAttrsSingle(copy); NodeList nl = copy.getElementsByTagName("*"); // NOI18N for (int i = 0; i < nl.getLength(); i++) { fixupAttrsSingle((Element) nl.item(i)); } return copy; } private static void fixupAttrsSingle(Element e) throws DOMException { e.removeAttributeNS("http://www.w3.org/XML/1998/namespace", "base"); // NOI18N Map replace = new HashMap(); NamedNodeMap attrs = e.getAttributes(); for (int j = 0; j < attrs.getLength(); j++) { Attr attr = (Attr) attrs.item(j); if (attr.getNamespaceURI() == null && !attr.getName().equals("xmlns")) { // NOI18N replace.put(attr.getName(), attr.getValue()); } } for (Map.Entry entry : replace.entrySet()) { e.removeAttribute(entry.getKey()); e.setAttributeNS(null, entry.getKey(), entry.getValue()); } } /** * Escape passed string as XML attibute value * (<, &, ' and " * will be escaped. * Note: An XML processor returns normalized value that can be different. * * @param val a string to be escaped * * @return escaped value * @throws CharConversionException if val contains an improper XML character * * @since 1.40 */ public static String toAttributeValue(String val) throws CharConversionException { if (val == null) { throw new CharConversionException("null"); // NOI18N } if (checkAttributeCharacters(val)) { return val; } StringBuffer buf = new StringBuffer(); for (int i = 0; i < val.length(); i++) { char ch = val.charAt(i); if ('<' == ch) { buf.append("<"); continue; } else if ('&' == ch) { buf.append("&"); continue; } else if ('\'' == ch) { buf.append("'"); continue; } else if ('"' == ch) { buf.append("""); continue; } buf.append(ch); } return buf.toString(); } /** * Escape passed string as XML element content (<, * & and > in ]]> sequences). * * @param val a string to be escaped * * @return escaped value * @throws CharConversionException if val contains an improper XML character * * @since 1.40 */ public static String toElementContent(String val) throws CharConversionException { if (val == null) { throw new CharConversionException("null"); // NOI18N } if (checkContentCharacters(val)) { return val; } StringBuilder buf = new StringBuilder(); for (int i = 0; i < val.length(); i++) { char ch = val.charAt(i); if ('<' == ch) { buf.append("<"); continue; } else if ('&' == ch) { buf.append("&"); continue; } else if (('>' == ch) && (i > 1) && (val.charAt(i - 2) == ']') && (val.charAt(i - 1) == ']')) { buf.append(">"); continue; } buf.append(ch); } return buf.toString(); } /** * Can be used to encode values that contain invalid XML characters. * At SAX parser end must be used pair method to get original value. * * @param val data to be converted * @param start offset * @param len count * * @since 1.29 */ public static String toHex(byte[] val, int start, int len) { StringBuffer buf = new StringBuffer(); for (int i = 0; i < len; i++) { byte b = val[start + i]; buf.append(DEC2HEX[(b & 0xf0) >> 4]); buf.append(DEC2HEX[b & 0x0f]); } return buf.toString(); } /** * Decodes data encoded using {@link #toHex(byte[],int,int) toHex}. * * @param hex data to be converted * @param start offset * @param len count * * @throws IOException if input does not represent hex encoded value * * @since 1.29 */ public static byte[] fromHex(char[] hex, int start, int len) throws IOException { if (hex == null) { throw new IOException("null"); } int i = hex.length; if ((i % 2) != 0) { throw new IOException("odd length"); } byte[] magic = new byte[i / 2]; for (; i > 0; i -= 2) { String g = new String(hex, i - 2, 2); try { magic[(i / 2) - 1] = (byte) Integer.parseInt(g, 16); } catch (NumberFormatException ex) { throw new IOException(ex.getLocalizedMessage()); } } return magic; } /** * Check if all passed characters match XML expression [2]. * @return true if no escaping necessary * @throws CharConversionException if contains invalid chars */ private static boolean checkAttributeCharacters(String chars) throws CharConversionException { boolean escape = false; for (int i = 0; i < chars.length(); i++) { char ch = chars.charAt(i); if (((int) ch) <= 93) { // we are UNICODE ']' switch (ch) { case 0x9: case 0xA: case 0xD: continue; case '\'': case '"': case '<': case '&': escape = true; continue; default: if (((int) ch) < 0x20) { throw new CharConversionException("Invalid XML character &#" + ((int) ch) + ";."); } } } } return escape == false; } /** * Check if all passed characters match XML expression [2]. * @return true if no escaping necessary * @throws CharConversionException if contains invalid chars */ private static boolean checkContentCharacters(String chars) throws CharConversionException { boolean escape = false; for (int i = 0; i < chars.length(); i++) { char ch = chars.charAt(i); if (((int) ch) <= 93) { // we are UNICODE ']' switch (ch) { case 0x9: case 0xA: case 0xD: continue; case '>': // only ]]> is dangerous if (escape) { continue; } escape = (i > 0) && (chars.charAt(i - 1) == ']'); continue; case '<': case '&': escape = true; continue; default: if (((int) ch) < 0x20) { throw new CharConversionException("Invalid XML character &#" + ((int) ch) + ";."); } } } } return escape == false; } /** * Try to normalize a document by removing nonsignificant whitespace. * @see "#62006" */ private static Document normalize(Document orig) throws IOException { DocumentBuilder builder = null; DocumentBuilderFactory factory = getFactory(false, false); try { builder = factory.newDocumentBuilder(); } catch (ParserConfigurationException e) { throw (IOException) new IOException("Cannot create parser satisfying configuration parameters: " + e).initCause(e); //NOI18N } DocumentType doctype = null; NodeList nl = orig.getChildNodes(); for (int i = 0; i < nl.getLength(); i++) { if (nl.item(i) instanceof DocumentType) { // We cannot import DocumentType's, so we need to manually copy it. doctype = (DocumentType) nl.item(i); } } Document doc; if (doctype != null) { doc = builder.getDOMImplementation().createDocument( orig.getDocumentElement().getNamespaceURI(), orig.getDocumentElement().getTagName(), builder.getDOMImplementation().createDocumentType( orig.getDoctype().getName(), orig.getDoctype().getPublicId(), orig.getDoctype().getSystemId())); // XXX what about entity decls inside the DOCTYPE? doc.removeChild(doc.getDocumentElement()); } else { doc = builder.newDocument(); } for (int i = 0; i < nl.getLength(); i++) { if (!(nl.item(i) instanceof DocumentType)) { doc.appendChild(doc.importNode(nl.item(i), true)); } } doc.normalize(); nl = doc.getElementsByTagName("*"); // NOI18N for (int i = 0; i < nl.getLength(); i++) { Element e = (Element) nl.item(i); NodeList nl2 = e.getChildNodes(); for (int j = 0; j < nl2.getLength(); j++) { Node n = nl2.item(j); if (n instanceof Text && ((Text) n).getNodeValue().trim().length() == 0) { e.removeChild(n); j--; // since list is dynamic } } } return doc; } }