
org.xwiki.xml.XMLUtils Maven / Gradle / Ivy
/*
* See the NOTICE file distributed with this work for additional
* information regarding copyright ownership.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.xwiki.xml;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.bootstrap.DOMImplementationRegistry;
import org.w3c.dom.ls.DOMImplementationLS;
import org.w3c.dom.ls.LSInput;
import org.w3c.dom.ls.LSOutput;
import org.w3c.dom.ls.LSParser;
import org.w3c.dom.ls.LSSerializer;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* XML Utility methods.
*
* @version $Id: 945e152a847a57f16964a0d774d5e0653c2950a7 $
* @since 1.6M1
*/
public final class XMLUtils
{
/** Logging helper object. */
private static final Logger LOGGER = LoggerFactory.getLogger(XMLUtils.class);
/** XML encoding of the "ampersand" character. */
private static final String AMP = "&";
/** Regular expression recognizing XML-escaped "ampersand" characters. */
private static final Pattern AMP_PATTERN = Pattern.compile("&(?:amp|#0*+38|#x0*+26);");
/** XML encoding of the "single quote" character. */
private static final String APOS = "'";
/** Regular expression recognizing XML-escaped "single quote" characters. */
private static final Pattern APOS_PATTERN = Pattern.compile("&(?:apos|#0*+39|#x0*+27);");
/** XML encoding of the "double quote" character. */
private static final String QUOT = """;
/** Regular expression recognizing XML-escaped "double quote" characters. */
private static final Pattern QUOT_PATTERN = Pattern.compile("&(?:quot|#0*+34|#x0*+22);");
/** XML encoding of the "left curly bracket". */
private static final String LCURL = "{";
/** Regular expression recognizing XML-escaped "left curly bracket" characters. */
private static final Pattern LCURL_PATTERN = Pattern.compile("&(?:#0*+123|#x0*+7[bB]);");
/** XML encoding of the "less than" character. */
private static final String LT = "<";
/** Regular expression recognizing XML-escaped "less than" characters. */
private static final Pattern LT_PATTERN = Pattern.compile("&(?:lt|#0*+60|#x0*+3[cC]);");
/** XML encoding of the "greater than" character. */
private static final String GT = ">";
/** Regular expression recognizing XML-escaped "greater than" characters. */
private static final Pattern GT_PATTERN = Pattern.compile("&(?:gt|#0*+62|#x0*+3[eE]);");
/** Helper object for manipulating DOM Level 3 Load and Save APIs. */
private static final DOMImplementationLS LS_IMPL;
/** Xerces configuration parameter for disabling fetching and checking XMLs against their DTD. */
private static final String DISABLE_DTD_PARAM = "http://apache.org/xml/features/nonvalidating/load-external-dtd";
static {
DOMImplementationLS implementation = null;
try {
implementation =
(DOMImplementationLS) DOMImplementationRegistry.newInstance().getDOMImplementation("LS 3.0");
} catch (Exception ex) {
LOGGER.warn("Cannot initialize the XML Script Service: [{}]", ex.getMessage());
}
LS_IMPL = implementation;
}
/**
* Private constructor since this is a utility class that shouldn't be instantiated (all methods are static).
*/
private XMLUtils()
{
// Nothing to do
}
/**
* Extracts a well-formed XML fragment from the given DOM tree.
*
* @param node the root of the DOM tree where the extraction takes place
* @param start the index of the first character
* @param length the maximum number of characters in text nodes to include in the returned fragment
* @return a well-formed XML fragment starting at the given character index and having up to the specified length,
* summing only the characters in text nodes
* @since 1.6M2
*/
public static String extractXML(Node node, int start, int length)
{
ExtractHandler handler = null;
try {
handler = new ExtractHandler(start, length);
Transformer xformer = TransformerFactory.newInstance().newTransformer();
xformer.transform(new DOMSource(node), new SAXResult(handler));
return handler.getResult();
} catch (Throwable t) {
if (handler != null && handler.isFinished()) {
return handler.getResult();
} else {
throw new RuntimeException("Failed to extract XML", t);
}
}
}
/**
* XML comment does not support some characters inside its content but there is no official escaping/unescaping for
* it so we made our own.
*
* - 1) Escape existing \
* - 2) Escape --
* - 3) Add {@code \} (unescaped as {@code ""}) at the end if the last char is {@code -}
*
*
* @param content the XML comment content to escape
* @return the escaped content.
* @since 1.9M2
*/
public static String escapeXMLComment(String content)
{
StringBuffer str = new StringBuffer(content.length());
char[] buff = content.toCharArray();
char lastChar = 0;
for (char c : buff) {
if (c == '\\') {
str.append('\\');
} else if (c == '-' && lastChar == '-') {
str.append('\\');
}
str.append(c);
lastChar = c;
}
if (lastChar == '-') {
str.append('\\');
}
return str.toString();
}
/**
* XML comment does not support some characters inside its content but there is no official escaping/unescaping for
* it so we made our own.
*
* @param content the XML comment content to unescape
* @return the unescaped content.
* @see #escapeXMLComment(String)
* @since 1.9M2
*/
public static String unescapeXMLComment(String content)
{
StringBuffer str = new StringBuffer(content.length());
char[] buff = content.toCharArray();
boolean escaped = false;
for (char c : buff) {
if (!escaped && c == '\\') {
escaped = true;
continue;
}
str.append(c);
escaped = false;
}
return str.toString();
}
/**
* Escapes all the XML special characters in a String
using numerical XML entities. Specifically,
* escapes <, >, ", ', & and {. Left curly bracket is included here to protect against {{/html}} in
* xwiki 2.x syntax.
*
* @param content the text to escape, may be {@code null}
* @return a new escaped {@code String}, {@code null} if {@code null} input
*/
public static String escape(Object content)
{
return escapeAttributeValue(content);
}
/**
* Escapes all the XML special characters and left curly bracket in a String
using numerical XML
* entities, so that the resulting string can safely be used as an XML attribute value. Specifically, escapes <,
* >, ", ', & and {. Left curly bracket is included here to protect against {{/html}} in xwiki 2.x syntax.
*
* @param content the text to escape, may be {@code null}
* @return a new escaped {@code String}, {@code null} if {@code null} input
*/
public static String escapeAttributeValue(Object content)
{
if (content == null) {
return null;
}
String str = String.valueOf(content);
StringBuilder result = new StringBuilder((int) (str.length() * 1.1));
int length = str.length();
char c;
for (int i = 0; i < length; ++i) {
c = str.charAt(i);
switch (c) {
case '&':
result.append(AMP);
break;
case '\'':
result.append(APOS);
break;
case '"':
result.append(QUOT);
break;
case '<':
result.append(LT);
break;
case '>':
result.append(GT);
break;
case '{':
result.append(LCURL);
break;
default:
result.append(c);
}
}
return result.toString();
}
/**
* Escapes the XML special characters in a String
using numerical XML entities, so that the resulting
* string can safely be used as an XML text node. Specifically, escapes <, >, and &.
*
* @param content the text to escape, may be {@code null}
* @return a new escaped {@code String}, {@code null} if {@code null} input
*/
public static String escapeElementContent(Object content)
{
if (content == null) {
return null;
}
String str = String.valueOf(content);
StringBuilder result = new StringBuilder((int) (str.length() * 1.1));
int length = str.length();
char c;
for (int i = 0; i < length; ++i) {
c = str.charAt(i);
switch (c) {
case '&':
result.append(AMP);
break;
case '<':
result.append(LT);
break;
case '>':
result.append(GT);
break;
default:
result.append(c);
}
}
return result.toString();
}
/**
* Unescape encoded special XML characters. Only >, < &, ", ' and { are unescaped, since they are the only
* ones that affect the resulting markup.
*
* @param content the text to decode, may be {@code null}
* @return unescaped content, {@code null} if {@code null} input
*/
public static String unescape(Object content)
{
if (content == null) {
return null;
}
String str = String.valueOf(content);
str = APOS_PATTERN.matcher(str).replaceAll("'");
str = QUOT_PATTERN.matcher(str).replaceAll("\"");
str = LT_PATTERN.matcher(str).replaceAll("<");
str = GT_PATTERN.matcher(str).replaceAll(">");
str = AMP_PATTERN.matcher(str).replaceAll("&");
str = LCURL_PATTERN.matcher(str).replaceAll("{");
return str;
}
/**
* Construct a new (empty) DOM Document and return it.
*
* @return an empty DOM Document
*/
public static Document createDOMDocument()
{
try {
return DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException ex) {
LOGGER.error("Cannot create DOM Documents", ex);
return null;
}
}
/**
* Parse a DOM Document from a source.
*
* @param source the source input to parse
* @return the equivalent DOM Document, or {@code null} if the parsing failed.
*/
public static Document parse(LSInput source)
{
try {
LSParser p = LS_IMPL.createLSParser(DOMImplementationLS.MODE_SYNCHRONOUS, null);
// Disable validation, since this takes a lot of time and causes unneeded network traffic
p.getDomConfig().setParameter("validate", false);
if (p.getDomConfig().canSetParameter(DISABLE_DTD_PARAM, false)) {
p.getDomConfig().setParameter(DISABLE_DTD_PARAM, false);
}
return p.parse(source);
} catch (Exception ex) {
LOGGER.warn("Cannot parse XML document: [{}]", ex.getMessage());
return null;
}
}
/**
* Serialize a DOM Node into a string, including the XML declaration at the start.
*
* @param node the node to export
* @return the serialized node, or an empty string if the serialization fails
*/
public static String serialize(Node node)
{
return serialize(node, true);
}
/**
* Serialize a DOM Node into a string, with an optional XML declaration at the start.
*
* @param node the node to export
* @param withXmlDeclaration whether to output the XML declaration or not
* @return the serialized node, or an empty string if the serialization fails or the node is {@code null}
*/
public static String serialize(Node node, boolean withXmlDeclaration)
{
if (node == null) {
return "";
}
try {
LSOutput output = LS_IMPL.createLSOutput();
StringWriter result = new StringWriter();
output.setCharacterStream(result);
LSSerializer serializer = LS_IMPL.createLSSerializer();
serializer.getDomConfig().setParameter("xml-declaration", withXmlDeclaration);
serializer.setNewLine("\n");
String encoding = "UTF-8";
if (node instanceof Document) {
encoding = ((Document) node).getXmlEncoding();
} else if (node.getOwnerDocument() != null) {
encoding = node.getOwnerDocument().getXmlEncoding();
}
output.setEncoding(encoding);
serializer.write(node, output);
return result.toString();
} catch (Exception ex) {
LOGGER.warn("Failed to serialize node to XML String: [{}]", ex.getMessage());
return "";
}
}
/**
* Apply an XSLT transformation to a Document.
*
* @param xml the document to transform
* @param xslt the stylesheet to apply
* @return the transformation result, or {@code null} if an error occurs or {@code null} xml or xslt input
*/
public static String transform(Source xml, Source xslt)
{
if (xml != null && xslt != null) {
try {
StringWriter output = new StringWriter();
Result result = new StreamResult(output);
javax.xml.transform.TransformerFactory.newInstance().newTransformer(xslt).transform(xml, result);
return output.toString();
} catch (Exception ex) {
LOGGER.warn("Failed to apply XSLT transformation: [{}]", ex.getMessage());
}
}
return null;
}
/**
* Parse and pretty print a XML content.
*
* @param content the XML content to format
* @return the formated version of the passed XML content
* @throws TransformerFactoryConfigurationError when failing to create a
* {@link TransformerFactoryConfigurationError}
* @throws TransformerException when failing to transform the content
* @since 5.2M1
*/
public static String formatXMLContent(String content) throws TransformerFactoryConfigurationError,
TransformerException
{
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StreamResult result = new StreamResult(new StringWriter());
// Use a SAX Source instead of a StreamSource so that we can control the XMLReader used and set up one that
// doesn't resolve entities (and thus doesn't go out on the internet to fetch DTDs!).
SAXSource source = new SAXSource(new InputSource(new StringReader(content)));
try {
XMLReader reader = org.xml.sax.helpers.XMLReaderFactory.createXMLReader();
reader.setEntityResolver(new org.xml.sax.EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException
{
// Return an empty resolved entity. Note that we don't return null since this would tell the reader
// to go on the internet to fetch the DTD.
return new InputSource(new StringReader(""));
}
});
source.setXMLReader(reader);
} catch (Exception e) {
throw new TransformerException(String.format(
"Failed to create XML Reader while pretty-printing content [%s]", content), e);
}
transformer.transform(source, result);
return result.getWriter().toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy