org.openide.xml.XMLUtil Maven / Gradle / Ivy
* Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common
* Development and Distribution License("CDDL") (collectively, the
* "License"). You may not use this file except in compliance with the
* License. You can obtain a copy of the License at
* http://www.netbeans.org/cddl-gplv2.html
* or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
* specific language governing permissions and limitations under the
* License. When distributing the software, include this License Header
* Notice in each file and include the License file at
* nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
* particular file as subject to the "Classpath" exception as provided
* by Sun in the GPL Version 2 section of the License file that
* accompanied this code. If applicable, add the following below the
* License Header, with the fields enclosed by brackets [] replaced by
* your own identifying information:
* "Portions Copyrighted [year] [name of copyright owner]"
* Contributor(s):
* The Original Software is NetBeans. The Initial Developer of the Original
* Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
* Microsystems, Inc. All Rights Reserved.
* If you wish your version of this file to be governed by only the CDDL
* or only the GPL Version 2, indicate your decision by adding
* "[Contributor] elects to include this software in this distribution
* under the [CDDL or GPL Version 2] license." If you do not indicate a
* single choice of license, a recipient has the option to distribute
* your version of this file under either the CDDL, the GPL Version 2 or
* to extend the choice of license to its licensees as provided above.
* However, if you add GPL Version 2 code and therefore, elected the GPL
* Version 2 license, then the option applies only if the new code is
* made subject to such option by the copyright holder.
package org.openide.xml;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringReader;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.Validator;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
* Utility class collecting library methods related to XML processing.
* Remember that when parsing XML files you often want to set an explicit
* entity resolver. For example, consider a file such as this:
* <?xml version="1.0" encoding="UTF-8"?>
* <!DOCTYPE root PUBLIC "-//NetBeans//DTD Foo 1.0//EN" "http://www.netbeans.org/dtds/foo-1_0.dtd">
* <root/>
* If you parse this with a null entity resolver, or you use the
* default resolver ({@link EntityCatalog#getDefault}) but do not do
* anything special with this DTD, you will probably find the parse
* blocking to make a network connection even when you are not
* validating. That is because DTDs can be used to define
* entities and other XML oddities, and are not a pure constraint
* language like Schema or RELAX-NG.
* There are three basic ways to avoid the network connection.
* Register the DTD. This is generally the best thing to do. See
* {@link EntityCatalog}'s documentation for details, but for example
* in your layer use:
* <filesystem>
* <folder name="xml">
* <folder name="entities">
* <folder name="NetBeans">
* <file name="DTD_Foo_1_0"
* url="nbres:/org/netbeans/modules/mymod/resources/foo-1_0.dtd">
* <attr name="hint.originalPublicID"
* stringvalue="-//NetBeans//DTD Foo 1.0//EN"/>
* </file>
* </folder>
* </folder>
* </folder>
* </filesystem>
* Now the default system entity catalog will resolve the public ID
* to the local copy in your module, not the network copy.
* Additionally, anyone who mounts the "NetBeans Catalog" in the XML
* Entity Catalogs node in the Runtime tab will be able to use your
* local copy of the DTD automatically, for validation, code
* completion, etc. (The network URL should really exist, though, for
* the benefit of other tools!)
* You can also set an explicit entity resolver which maps that
* particular public ID to some local copy of the DTD, if you do not
* want to register it globally in the system for some reason. If
* handed other public IDs, just return null to indicate that the
* system ID should be loaded.
* In some cases where XML parsing is very
* performance-sensitive, and you know that you do not need validation
* and furthermore that the DTD defines no infoset (there are no
* entity or character definitions, etc.), you can speed up the parse.
* Turn off validation, but also supply a custom entity resolver that
* does not even bother to load the DTD at all:
* public InputSource resolveEntity(String pubid, String sysid)
* throws SAXException, IOException {
* if (pubid.equals("-//NetBeans//DTD Foo 1.0//EN")) {
* return new InputSource(new ByteArrayInputStream(new byte[0]));
* } else {
* return EntityCatalog.getDefault().resolveEntity(pubid, sysid);
* }
* }
* @author Petr Kuzel
* @since release 3.2 */
public final class XMLUtil extends Object {
public static String toCDATA(String val) throws IOException {
private static final char[] DEC2HEX = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
/** Forbids creating new XMLUtil */
private XMLUtil() {
// ~~~~~~~~~~~~~~~~~~~~~ SAX related ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/** Create a simple parser.
* @return createXMLReader(false, false)
public static XMLReader createXMLReader() throws SAXException {
return createXMLReader(false, false);
/** Create a simple parser, possibly validating.
* @param validate if true, a validating parser is returned
* @return createXMLReader(validate, false)
public static XMLReader createXMLReader(boolean validate)
throws SAXException {
return createXMLReader(validate, false);
private static SAXParserFactory[][] saxes = new SAXParserFactory[2][2];
/** Create a SAX parser from the JAXP factory.
* The result can be used to parse XML files.
* See class Javadoc for hints on setting an entity resolver.
* This parser has its entity resolver set to the system entity resolver chain.
* @param validate if true, a validating parser is returned
* @param namespaceAware if true, a namespace aware parser is returned
* @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type.
* @throws SAXException if a parser fulfilling given parameters can not be created
* @return XMLReader configured according to passed parameters
public static XMLReader createXMLReader(boolean validate, boolean namespaceAware)
throws SAXException {
SAXParserFactory factory = saxes[validate ? 0 : 1][namespaceAware ? 0 : 1];
if (factory == null) {
factory = SAXParserFactory.newInstance();
saxes[validate ? 0 : 1][namespaceAware ? 0 : 1] = factory;
try {
return factory.newSAXParser().getXMLReader();
} catch (ParserConfigurationException ex) {
throw new SAXException("Cannot create parser satisfying configuration parameters", ex); //NOI18N
// ~~~~~~~~~~~~~~~~~~~~~ DOM related ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Creates empty DOM Document using JAXP factoring. E.g.:
* Document doc = createDocument("book", null, null, null);
* creates new DOM of a well-formed document with root element named book.
* @param rootQName qualified name of root element. e.g. myroot
or ns:myroot
* @param namespaceURI URI of root element namespace or null
* @param doctypePublicID public ID of DOCTYPE or null
* @param doctypeSystemID system ID of DOCTYPE or null
* required and doctypePublicID is also null
* @throws DOMException if new DOM with passed parameters can not be created
* @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type.
* @return new DOM Document
public static Document createDocument(
String rootQName, String namespaceURI, String doctypePublicID, String doctypeSystemID
) throws DOMException {
DOMImplementation impl = getDOMImplementation();
if ((doctypePublicID != null) && (doctypeSystemID == null)) {
throw new IllegalArgumentException("System ID cannot be null if public ID specified. "); //NOI18N
DocumentType dtd = null;
if (doctypeSystemID != null) {
dtd = impl.createDocumentType(rootQName, doctypePublicID, doctypeSystemID);
return impl.createDocument(namespaceURI, rootQName, dtd);
* Obtains DOMImpementaton interface providing a number of methods for performing
* operations that are independent of any particular DOM instance.
* @throw DOMException NOT_SUPPORTED_ERR
if cannot get DOMImplementation
* @throw FactoryConfigurationError Application developers should never need to directly catch errors of this type.
* @return DOMImplementation implementation
private static DOMImplementation getDOMImplementation()
throws DOMException { //can be made public
DocumentBuilderFactory factory = getFactory(false, false);
try {
return factory.newDocumentBuilder().getDOMImplementation();
} catch (ParserConfigurationException ex) {
throw new DOMException(
DOMException.NOT_SUPPORTED_ERR, "Cannot create parser satisfying configuration parameters"
); //NOI18N
} catch (RuntimeException e) {
// E.g. #36578, IllegalArgumentException. Try to recover gracefully.
throw (DOMException) new DOMException(DOMException.NOT_SUPPORTED_ERR, e.toString()).initCause(e);
private static DocumentBuilderFactory[][] doms = new DocumentBuilderFactory[2][2];
private static DocumentBuilderFactory getFactory(boolean validate, boolean namespaceAware) {
DocumentBuilderFactory factory = doms[validate ? 0 : 1][namespaceAware ? 0 : 1];
if (factory == null) {
factory = DocumentBuilderFactory.newInstance();
doms[validate ? 0 : 1][namespaceAware ? 0 : 1] = factory;
return factory;
* Create from factory a DocumentBuilder and let it create a org.w3c.dom.Document.
* This method takes InputSource. After successful finish the document tree is returned.
* @param input a parser input (for URL users use: new InputSource(url.toExternalForm())
* @param validate if true validating parser is used
* @param namespaceAware if true DOM is created by namespace aware parser
* @param errorHandler a error handler to notify about exception or null
* @param entityResolver SAX entity resolver or null
; see class Javadoc for hints
* @throws IOException if an I/O problem during parsing occurs
* @throws SAXException is thrown if a parser error occurs
* @throws FactoryConfigurationError Application developers should never need to directly catch errors of this type.
* @return document representing given input
public static Document parse(
InputSource input, boolean validate, boolean namespaceAware, ErrorHandler errorHandler,
EntityResolver entityResolver
) throws IOException, SAXException {
DocumentBuilder builder = null;
DocumentBuilderFactory factory = getFactory(validate, namespaceAware);
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException ex) {
throw new SAXException("Cannot create parser satisfying configuration parameters", ex); //NOI18N
if (errorHandler != null) {
if (entityResolver != null) {
return builder.parse(input);
* Identity transformation in XSLT with indentation added.
* Just using the identity transform and calling
* t.setOutputProperty(OutputKeys.INDENT, "yes");
* t.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
* does not work currently.
* You really have to use this bogus stylesheet.
* @see "JDK bug #5064280"
private static final String IDENTITY_XSLT_WITH_INDENT =
"" + // NOI18N
" " + // NOI18N
"" + // NOI18N
"" + // NOI18N
" " + // NOI18N
" " + // NOI18N
" " + // NOI18N
" "; // NOI18N
* Writes a DOM document to a stream.
* The precise output format is not guaranteed but this method will attempt to indent it sensibly.
Important: There might be some problems with
* <![CDATA[ ]]>
sections in the DOM tree you pass into this method. Specifically,
* some CDATA sections my not be written as CDATA section or may be merged with
* other CDATA section at the same level. Also if plain text nodes are mixed with
* CDATA sections at the same level all text is likely to end up in one big CDATA section.
* For nodes that only have one CDATA section this method should work fine.
* @param doc DOM document to be written
* @param out data sink
* @param enc XML-defined encoding name (e.g. "UTF-8")
* @throws IOException if JAXP fails or the stream cannot be written to
public static void write(Document doc, OutputStream out, String enc) throws IOException {
if (enc == null) {
throw new NullPointerException("You must set an encoding; use \"UTF-8\" unless you have a good reason not to!"); // NOI18N
Document doc2 = normalize(doc);
// XXX the following DOM 3 LS implementation of the rest of this method works fine on JDK 6 but pretty-printing is broken on JDK 5:
DOMImplementationLS ls = (DOMImplementationLS) doc.getImplementation().getFeature("LS", "3.0"); // NOI18N
assert ls != null : "No DOM 3 LS supported in " + doc.getClass().getName();
LSOutput output = ls.createLSOutput();
LSSerializer ser = ls.createLSSerializer();
String fpp = "format-pretty-print"; // NOI18N
if (ser.getDomConfig().canSetParameter(fpp, true)) {
ser.getDomConfig().setParameter(fpp, true);
ser.write(doc2, output);
try {
Transformer t = TransformerFactory.newInstance().newTransformer(
new StreamSource(new StringReader(IDENTITY_XSLT_WITH_INDENT)));
DocumentType dt = doc2.getDoctype();
if (dt != null) {
String pub = dt.getPublicId();
if (pub != null) {
t.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, pub);
t.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, dt.getSystemId());
t.setOutputProperty(OutputKeys.ENCODING, enc);
// See #123816
Set cdataQNames = new HashSet();
collectCDATASections(doc2, cdataQNames);
if (cdataQNames.size() > 0) {
StringBuilder cdataSections = new StringBuilder();
for(String s : cdataQNames) {
cdataSections.append(s).append(' '); //NOI18N
t.setOutputProperty(OutputKeys.CDATA_SECTION_ELEMENTS, cdataSections.toString());
Source source = new DOMSource(doc2);
Result result = new StreamResult(out);
t.transform(source, result);
} catch (Exception e) {
throw (IOException) new IOException(e.toString()).initCause(e);
private static void collectCDATASections(Node node, Set cdataQNames) {
if (node instanceof CDATASection) {
Node parent = node.getParentNode();
if (parent != null) {
String uri = parent.getNamespaceURI();
if (uri != null) {
cdataQNames.add("{" + uri + "}" + parent.getNodeName()); //NOI18N
} else {
NodeList children = node.getChildNodes();
for(int i = 0; i < children.getLength(); i++) {
collectCDATASections(children.item(i), cdataQNames);
* Check whether a DOM tree is valid according to a schema.
* Example of usage:
* Element fragment = ...;
* SchemaFactory f = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
* Schema s = f.newSchema(This.class.getResource("something.xsd"));
* try {
* XMLUtil.validate(fragment, s);
* // valid
* } catch (SAXException x) {
* // invalid
* }
* @param data a DOM tree
* @param schema a parsed schema
* @throws SAXException if validation failed
* @since org.openide.util 7.17
public static void validate(Element data, Schema schema) throws SAXException {
Validator v = schema.newValidator();
final SAXException[] error = {null};
v.setErrorHandler(new ErrorHandler() {
public void warning(SAXParseException x) throws SAXException {}
public void error(SAXParseException x) throws SAXException {
// Just rethrowing it is bad because it will also print it to stderr.
error[0] = x;
public void fatalError(SAXParseException x) throws SAXException {
error[0] = x;
try {
v.validate(new DOMSource(fixupAttrs(data)));
} catch (IOException x) {
assert false : x;
if (error[0] != null) {
throw error[0];
private static Element fixupAttrs(Element root) { // #140905
// #6529766/#6531160: some versions of JAXP reject attributes set using setAttribute
// (rather than setAttributeNS) even though the schema calls for no-NS attrs!
// JDK 5 is fine; JDK 6 broken; JDK 6u2+ fixed
// #146081: xml:base attributes mess up validation too.
Element copy = (Element) root.cloneNode(true);
NodeList nl = copy.getElementsByTagName("*"); // NOI18N
for (int i = 0; i < nl.getLength(); i++) {
fixupAttrsSingle((Element) nl.item(i));
return copy;
private static void fixupAttrsSingle(Element e) throws DOMException {
e.removeAttributeNS("http://www.w3.org/XML/1998/namespace", "base"); // NOI18N
Map replace = new HashMap();
NamedNodeMap attrs = e.getAttributes();
for (int j = 0; j < attrs.getLength(); j++) {
Attr attr = (Attr) attrs.item(j);
if (attr.getNamespaceURI() == null && !attr.getName().equals("xmlns")) { // NOI18N
replace.put(attr.getName(), attr.getValue());
for (Map.Entry entry : replace.entrySet()) {
e.setAttributeNS(null, entry.getKey(), entry.getValue());
* Escape passed string as XML attibute value
* (<
, &
, '
and "
* will be escaped.
* Note: An XML processor returns normalized value that can be different.
* @param val a string to be escaped
* @return escaped value
* @throws CharConversionException if val contains an improper XML character
* @since 1.40
public static String toAttributeValue(String val) throws CharConversionException {
if (val == null) {
throw new CharConversionException("null"); // NOI18N
if (checkAttributeCharacters(val)) {
return val;
StringBuffer buf = new StringBuffer();
for (int i = 0; i < val.length(); i++) {
char ch = val.charAt(i);
if ('<' == ch) {
} else if ('&' == ch) {
} else if ('\'' == ch) {
} else if ('"' == ch) {
return buf.toString();
* Escape passed string as XML element content (<
* &
and > in ]]>
* @param val a string to be escaped
* @return escaped value
* @throws CharConversionException if val contains an improper XML character
* @since 1.40
public static String toElementContent(String val) throws CharConversionException {
if (val == null) {
throw new CharConversionException("null"); // NOI18N
if (checkContentCharacters(val)) {
return val;
StringBuilder buf = new StringBuilder();
for (int i = 0; i < val.length(); i++) {
char ch = val.charAt(i);
if ('<' == ch) {
} else if ('&' == ch) {
} else if (('>' == ch) && (i > 1) && (val.charAt(i - 2) == ']') && (val.charAt(i - 1) == ']')) {
return buf.toString();
* Can be used to encode values that contain invalid XML characters.
* At SAX parser end must be used pair method to get original value.
* @param val data to be converted
* @param start offset
* @param len count
* @since 1.29
public static String toHex(byte[] val, int start, int len) {
StringBuffer buf = new StringBuffer();
for (int i = 0; i < len; i++) {
byte b = val[start + i];
buf.append(DEC2HEX[(b & 0xf0) >> 4]);
buf.append(DEC2HEX[b & 0x0f]);
return buf.toString();
* Decodes data encoded using {@link #toHex(byte[],int,int) toHex}.
* @param hex data to be converted
* @param start offset
* @param len count
* @throws IOException if input does not represent hex encoded value
* @since 1.29
public static byte[] fromHex(char[] hex, int start, int len)
throws IOException {
if (hex == null) {
throw new IOException("null");
int i = hex.length;
if ((i % 2) != 0) {
throw new IOException("odd length");
byte[] magic = new byte[i / 2];
for (; i > 0; i -= 2) {
String g = new String(hex, i - 2, 2);
try {
magic[(i / 2) - 1] = (byte) Integer.parseInt(g, 16);
} catch (NumberFormatException ex) {
throw new IOException(ex.getLocalizedMessage());
return magic;
* Check if all passed characters match XML expression [2].
* @return true if no escaping necessary
* @throws CharConversionException if contains invalid chars
private static boolean checkAttributeCharacters(String chars)
throws CharConversionException {
boolean escape = false;
for (int i = 0; i < chars.length(); i++) {
char ch = chars.charAt(i);
if (((int) ch) <= 93) { // we are UNICODE ']'
switch (ch) {
case 0x9:
case 0xA:
case 0xD:
case '\'':
case '"':
case '<':
case '&':
escape = true;
if (((int) ch) < 0x20) {
throw new CharConversionException("Invalid XML character " + ((int) ch) + ";.");
return escape == false;
* Check if all passed characters match XML expression [2].
* @return true if no escaping necessary
* @throws CharConversionException if contains invalid chars
private static boolean checkContentCharacters(String chars)
throws CharConversionException {
boolean escape = false;
for (int i = 0; i < chars.length(); i++) {
char ch = chars.charAt(i);
if (((int) ch) <= 93) { // we are UNICODE ']'
switch (ch) {
case 0x9:
case 0xA:
case 0xD:
case '>': // only ]]> is dangerous
if (escape) {
escape = (i > 0) && (chars.charAt(i - 1) == ']');
case '<':
case '&':
escape = true;
if (((int) ch) < 0x20) {
throw new CharConversionException("Invalid XML character " + ((int) ch) + ";.");
return escape == false;
* Try to normalize a document by removing nonsignificant whitespace.
* @see "#62006"
private static Document normalize(Document orig) throws IOException {
DocumentBuilder builder = null;
DocumentBuilderFactory factory = getFactory(false, false);
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw (IOException) new IOException("Cannot create parser satisfying configuration parameters: " + e).initCause(e); //NOI18N
DocumentType doctype = null;
NodeList nl = orig.getChildNodes();
for (int i = 0; i < nl.getLength(); i++) {
if (nl.item(i) instanceof DocumentType) {
// We cannot import DocumentType's, so we need to manually copy it.
doctype = (DocumentType) nl.item(i);
Document doc;
if (doctype != null) {
doc = builder.getDOMImplementation().createDocument(
// XXX what about entity decls inside the DOCTYPE?
} else {
doc = builder.newDocument();
for (int i = 0; i < nl.getLength(); i++) {
if (!(nl.item(i) instanceof DocumentType)) {
doc.appendChild(doc.importNode(nl.item(i), true));
nl = doc.getElementsByTagName("*"); // NOI18N
for (int i = 0; i < nl.getLength(); i++) {
Element e = (Element) nl.item(i);
NodeList nl2 = e.getChildNodes();
for (int j = 0; j < nl2.getLength(); j++) {
Node n = nl2.item(j);
if (n instanceof Text && ((Text) n).getNodeValue().trim().length() == 0) {
j--; // since list is dynamic
return doc;