com.twelvemonkeys.xml.XMLSerializer Maven / Gradle / Ivy
/*
* Copyright (c) 2008, Harald Kuhr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name "TwelveMonkeys" nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.twelvemonkeys.xml;
import com.twelvemonkeys.lang.StringUtil;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;
import java.nio.charset.Charset;
import java.util.Date;
/**
* XMLSerializer
*
* @author Harald Kuhr
* @author last modified by $Author: haku $
* @version $Id: //depot/branches/personal/haraldk/twelvemonkeys/release-2/twelvemonkeys-core/src/main/java/com/twelvemonkeys/xml/XMLSerializer.java#1 $
*/
public class XMLSerializer {
// TODO: Replace with DOMSerializer? Test performance, pretty printing etc...
// Main problem: Sun's Java 5 does not have LS 3.0 support
// This class has no dependencies, which probably makes it more useful
// TODO: Support line breaking (at configurable width)
// TODO: Support skipping XML declaration?
// TODO: Support standalone?
// TODO: Support more than version 1.0?
// TODO: Consider using IOException to communicate trouble, rather than RTE,
// to be more compatible...
// TODO: Support not inserting line-breaks, to preserve space
// TODO: Idea: Create a SerializationContext that stores attributes on
// serialization, to keep the serialization thread-safe
// Store preserveSpace attribute in this context, to avoid costly traversals
// Store user options here too
// TODO: Push/pop?
private final OutputStream mOutput;
private final Charset mEncoding;
private final SerializationContext mContext;
public XMLSerializer(final OutputStream pOutput, final String pEncoding) {
mOutput = pOutput;
mEncoding = Charset.forName(pEncoding);
mContext = new SerializationContext();
}
public void setIndentation(String pIndent) {
mContext.indent = pIndent != null ? pIndent : " ";
}
public void setStripComments(boolean pStrip) {
mContext.stripComments = pStrip;
}
public void serialize(final Document pDocument) {
PrintWriter out = new PrintWriter(new OutputStreamWriter(mOutput, mEncoding));
try {
writeXMLDeclararion(out);
writeXML(out, pDocument, mContext.copy());
}
finally {
out.flush();
}
}
private void writeXMLDeclararion(final PrintWriter pOut) {
pOut.print("");
}
private void writeXML(final PrintWriter pOut, final Document pDocument, final SerializationContext pContext) {
writeNodeRecursive(pOut, pDocument, pContext);
}
private void writeNodeRecursive(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
if (pNode.getNodeType() != Node.TEXT_NODE) {
indentToLevel(pOut, pContext);
}
switch (pNode.getNodeType()) {
case Node.DOCUMENT_NODE:
case Node.DOCUMENT_FRAGMENT_NODE:
writeDocument(pOut, pNode, pContext);
break;
case Node.DOCUMENT_TYPE_NODE:
writeDoctype(pOut, (DocumentType) pNode);
break;
case Node.ELEMENT_NODE:
boolean preserveSpace = pContext.preserveSpace;
updatePreserveSpace(pNode, pContext);
writeElement(pOut, (Element) pNode, pContext);
pContext.preserveSpace = preserveSpace;
break;
case Node.CDATA_SECTION_NODE:
writeCData(pOut, pNode);
break;
case Node.TEXT_NODE:
writeText(pOut, pNode, pContext);
break;
case Node.COMMENT_NODE:
writeComment(pOut, pNode, pContext);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
writeProcessingInstruction(pOut, pNode);
break;
case Node.ATTRIBUTE_NODE:
throw new IllegalArgumentException("Malformed input Document: Attribute nodes should only occur inside Element nodes");
case Node.ENTITY_NODE:
// ''
case Node.ENTITY_REFERENCE_NODE:
// ( '&' | '%' ) + getNodeName + ';'
case Node.NOTATION_NODE:
// ''
default:
throw new InternalError("Lazy programmer never implemented serialization of " + pNode.getClass());
}
}
private void writeProcessingInstruction(final PrintWriter pOut, final Node pNode) {
pOut.print("\n");
pOut.print(pNode.getNodeValue());
pOut.println("?>");
}
private void writeText(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
// TODO: Is this really as specified?
String value = pNode.getNodeValue();
if (pContext.preserveSpace) {
pOut.print(maybeEscapeElementValue(value));
}
else if (!StringUtil.isEmpty(value)) {
indentToLevel(pOut, pContext);
pOut.println(maybeEscapeElementValue(value.trim()));
}
}
private void writeCData(final PrintWriter pOut, final Node pNode) {
pOut.print("");
}
private static void updatePreserveSpace(final Node pNode, final SerializationContext pContext) {
NamedNodeMap attributes = pNode.getAttributes();
if (attributes != null) {
Node space = attributes.getNamedItem("xml:space");
if (space != null) {
if ("preserve".equals(space.getNodeValue())) {
pContext.preserveSpace = true;
}
else if ("default".equals(space.getNodeValue())) {
pContext.preserveSpace = false;
}
// No other values are allowed per spec, ingore
}
}
}
private static void indentToLevel(final PrintWriter pOut, final SerializationContext pContext) {
for (int i = 0; i < pContext.level; i++) {
pOut.print(pContext.indent);
}
}
private void writeComment(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
if (pContext.stripComments) {
return;
}
String value = pNode.getNodeValue();
validateCommenValue(value);
if (value.startsWith(" ")) {
pOut.print("");
}
else {
pOut.println(" -->");
}
}
/**
* Returns an escaped version of the input string. The string is guaranteed
* to not contain illegal XML characters ({@code &<>}).
* If no escaping is needed, the input string is returned as is.
*
* @param pValue the input string that might need escaping.
* @return an escaped version of the input string.
*/
static String maybeEscapeElementValue(final String pValue) {
int startEscape = needsEscapeElement(pValue);
if (startEscape < 0) {
// If no escpaing is needed, simply return original
return pValue;
}
else {
// Otherwise, start replacing
StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
builder.ensureCapacity(pValue.length() + 30);
int pos = startEscape;
for (int i = pos; i < pValue.length(); i++) {
switch (pValue.charAt(i)) {
case '&':
pos = appendAndEscape(pValue, pos, i, builder, "&");
break;
case '<':
pos = appendAndEscape(pValue, pos, i, builder, "<");
break;
case '>':
pos = appendAndEscape(pValue, pos, i, builder, ">");
break;
//case '\'':
// pos = appendAndEscape(pString, pos, i, builder, "'");
// break;
//case '"':
// pos = appendAndEscape(pString, pos, i, builder, """);
// break;
default:
break;
}
}
builder.append(pValue.substring(pos));
return builder.toString();
}
}
private static int appendAndEscape(final String pString, int pStart, final int pEnd, final StringBuilder pBuilder, final String pEntity) {
pBuilder.append(pString.substring(pStart, pEnd));
pBuilder.append(pEntity);
return pEnd + 1;
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int needsEscapeElement(final String pString) {
for (int i = 0; i < pString.length(); i++) {
switch (pString.charAt(i)) {
case '&':
case '<':
case '>':
//case '\'':
//case '"':
return i;
default:
}
}
return -1;
}
private static String maybeEscapeAttributeValue(final String pValue) {
int startEscape = needsEscapeAttribute(pValue);
if (startEscape < 0) {
return pValue;
}
else {
StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
builder.ensureCapacity(pValue.length() + 16);
int pos = startEscape;
for (int i = pos; i < pValue.length(); i++) {
switch (pValue.charAt(i)) {
case '&':
pos = appendAndEscape(pValue, pos, i, builder, "&");
break;
case '"':
pos = appendAndEscape(pValue, pos, i, builder, """);
break;
default:
break;
}
}
//StringBuilder builder = new StringBuilder(pValue.length() + 30);
//
//int start = 0;
//while (end >= 0) {
// builder.append(pValue.substring(start, end));
// builder.append(""");
// start = end + 1;
// end = pValue.indexOf('"', start);
//}
//builder.append(pValue.substring(start));
builder.append(pValue.substring(pos));
return builder.toString();
}
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int needsEscapeAttribute(final String pString) {
for (int i = 0; i < pString.length(); i++) {
switch (pString.charAt(i)) {
case '&':
//case '<':
//case '>':
//case '\'':
case '"':
return i;
default:
}
}
return -1;
}
private static String validateCDataValue(final String pValue) {
if (pValue.indexOf("]]>") >= 0) {
throw new IllegalArgumentException("Malformed input document: CDATA block may not contain the string ']]>'");
}
return pValue;
}
private static String validateCommenValue(final String pValue) {
if (pValue.indexOf("--") >= 0) {
throw new IllegalArgumentException("Malformed input document: Comment may not contain the string '--'");
}
return pValue;
}
private void writeDocument(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
// Document fragments might not have child nodes...
if (pNode.hasChildNodes()) {
NodeList nodes = pNode.getChildNodes();
for (int i = 0; i < nodes.getLength(); i++) {
writeNodeRecursive(pOut, nodes.item(i), pContext);
}
}
}
private void writeElement(final PrintWriter pOut, final Element pNode, final SerializationContext pContext) {
pOut.print("<");
pOut.print(pNode.getTagName());
// TODO: Attributes should probably include namespaces, so that it works
// even if the document was created using attributes instead of namespaces...
// Handle namespace
String namespace = pNode.getNamespaceURI();
if (namespace != null && !namespace.equals(pContext.defaultNamespace)) {
String prefix = pNode.getPrefix();
if (prefix == null) {
pContext.defaultNamespace = namespace;
pOut.print(" xmlns");
}
else {
pOut.print(" xmlns:");
pOut.print(prefix);
}
pOut.print("=\"");
pOut.print(namespace);
pOut.print("\"");
}
// Iterate attributes if any
if (pNode.hasAttributes()) {
NamedNodeMap attributes = pNode.getAttributes();
for (int i = 0; i < attributes.getLength(); i++) {
Attr attribute = (Attr) attributes.item(i);
String name = attribute.getName();
if (!(name.startsWith("xmlns") && (name.length() == 5 || name.charAt(5) == ':'))) {
pOut.print(" ");
pOut.print(name);
pOut.print("=\"");
pOut.print(maybeEscapeAttributeValue(attribute.getValue()));
pOut.print("\"");
}
//else {
// System.err.println("attribute.getName(): " + name);
//}
}
}
// Iterate children if any
if (pNode.hasChildNodes()) {
pOut.print(">");
if (!pContext.preserveSpace) {
pOut.println();
}
NodeList children = pNode.getChildNodes();
//pContext.level++;
for (int i = 0; i < children.getLength(); i++) {
writeNodeRecursive(pOut, children.item(i), pContext.push());
}
//pContext.level--;
if (!pContext.preserveSpace) {
indentToLevel(pOut, pContext);
}
pOut.print("");
pOut.print(pNode.getTagName());
pOut.println(">");
}
else {
pOut.println("/>");
}
}
private void writeDoctype(final PrintWriter pOut, final DocumentType pDoctype) {
// NOTE: The DOMImplementationLS LSSerializer actually inserts SYSTEM or
// PUBLIC identifiers even if they are empty strings. The result is, it
// will create invalid documents.
// Testing for empty strings seems to be more compatible.
if (pDoctype != null) {
pOut.print("");
}
}
public static void main(String[] pArgs) throws IOException, SAXException {
// Build XML tree (Document) and write
// Find the implementation
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
}
catch (ParserConfigurationException e) {
throw (IOException) new IOException(e.getMessage()).initCause(e);
}
DOMImplementation dom = builder.getDOMImplementation();
Document document = dom.createDocument("http://www.twelvemonkeys.com/xml/test", "test", dom.createDocumentType("test", null, null));
Element root = document.getDocumentElement();
// This is probably not the correct way of setting a default namespace
//root.setAttribute("xmlns", "http://www.twelvemonkeys.com/xml/test");
// Create and insert the normal Properties headers as XML comments
document.insertBefore(document.createComment(new Date().toString()), root);
Element test = document.createElement("sub");
root.appendChild(test);
Element more = document.createElementNS("http://more.com/1999/namespace", "more:more");
more.setAttribute("foo", "test");
more.setAttribute("bar", "'really' \"legal\" & ok");
test.appendChild(more);
more.appendChild(document.createTextNode("Simply some text."));
more.appendChild(document.createCDATASection("&something escaped;"));
more.appendChild(document.createTextNode("More & !"));
more.appendChild(document.createTextNode("\"<<'&'>>\""));
Element another = document.createElement("another");
test.appendChild(another);
Element yet = document.createElement("yet-another");
yet.setAttribute("this-one", "with-params");
test.appendChild(yet);
Element pre = document.createElementNS("http://www.twelvemonkeys.com/xml/test", "pre");
pre.setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
pre.appendChild(document.createTextNode(" \t \n\r some text & white ' ' \n "));
test.appendChild(pre);
// Create serializer and output document
//XMLSerializer serializer = new XMLSerializer(pOutput, new OutputFormat(document, UTF_8_ENCODING, true));
System.out.println("XMLSerializer:");
XMLSerializer serializer = new XMLSerializer(System.out, "UTF-8");
serializer.serialize(document);
System.out.println();
System.out.println("DOMSerializer:");
DOMSerializer serializerD = new DOMSerializer(System.out, "UTF-8");
serializerD.setPrettyPrint(true);
serializerD.serialize(document);
System.out.println();
System.out.println("\n");
ByteArrayOutputStream out = new ByteArrayOutputStream();
XMLSerializer serializer2 = new XMLSerializer(out, "UTF-8");
serializer2.serialize(document);
ByteArrayOutputStream outD = new ByteArrayOutputStream();
DOMSerializer serializer2D = new DOMSerializer(outD, "UTF-8");
serializer2D.serialize(document);
Document document2 = builder.parse(new ByteArrayInputStream(out.toByteArray()));
System.out.println("XMLSerializer reparsed XMLSerializer:");
serializer.serialize(document2);
System.out.println();
System.out.println("DOMSerializer reparsed XMLSerializer:");
serializerD.serialize(document2);
System.out.println();
Document documentD = builder.parse(new ByteArrayInputStream(outD.toByteArray()));
System.out.println("XMLSerializer reparsed DOMSerializer:");
serializer.serialize(documentD);
System.out.println();
System.out.println("DOMSerializer reparsed DOMSerializer:");
serializerD.serialize(documentD);
System.out.println();
}
static class SerializationContext implements Cloneable {
String indent = " ";
int level = 0;
boolean preserveSpace = false;
boolean stripComments = false;
String defaultNamespace;
public SerializationContext copy() {
try {
return (SerializationContext) clone();
}
catch (CloneNotSupportedException e) {
throw new Error(e);
}
}
public SerializationContext push() {
SerializationContext context = copy();
context.level++;
return context;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy