com.twelvemonkeys.xml.XMLSerializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of common-io Show documentation
Show all versions of common-io Show documentation
TwelveMonkeys Common I/O support classes.
/*
* Copyright (c) 2008, Harald Kuhr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.twelvemonkeys.xml;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.Date;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import com.twelvemonkeys.lang.StringUtil;
/**
* XMLSerializer
*
* @author Harald Kuhr
* @author last modified by $Author: haku $
* @version $Id: //depot/branches/personal/haraldk/twelvemonkeys/release-2/twelvemonkeys-core/src/main/java/com/twelvemonkeys/xml/XMLSerializer.java#1 $
*/
public class XMLSerializer {
// TODO: Replace with DOMSerializer? Test performance, pretty printing etc...
// Main problem: Sun's Java 5 does not have LS 3.0 support
// This class has no dependencies, which probably makes it more useful
// TODO: Don't insert initial and ending line-break for text-nodes
// TODO: Support not inserting line-breaks, to preserve space
// TODO: Support line breaking (at configurable width)
// TODO: Support standalone?
// TODO: Support more than version 1.0?
// TODO: Consider using IOException to communicate trouble, rather than RTE,
// to be more compatible...
private final OutputStream output;
private final Charset encoding;
private final SerializationContext context;
public XMLSerializer(final OutputStream pOutput, final String pEncoding) {
output = pOutput;
encoding = Charset.forName(pEncoding);
context = new SerializationContext();
}
public final XMLSerializer indentation(String pIndent) {
// TODO: Verify that indent value is only whitespace?
context.indent = pIndent != null ? pIndent : "\t";
return this;
}
public final XMLSerializer stripComments(boolean pStrip) {
context.stripComments = pStrip;
return this;
}
/**
* Serializes the entire document, along with the XML declaration
* ({@code <?xml version="1.0" encoding="..."?>}).
*
* @param pDocument the document to serialize.
*/
public void serialize(final Document pDocument) {
serialize(pDocument, true);
}
/**
* Serializes the entire sub tree starting at {@code pRootNode}, along with an optional XML declaration
* ({@code <?xml version="1.0" encoding="..."?>}).
*
* @param pRootNode the root node to serialize.
* @param pWriteXMLDeclaration {@code true} if the XML declaration should be included, otherwise {@code false}.
*/
public void serialize(final Node pRootNode, final boolean pWriteXMLDeclaration) {
PrintWriter out = new PrintWriter(new OutputStreamWriter(output, encoding));
try {
if (pWriteXMLDeclaration) {
writeXMLDeclaration(out);
}
writeXML(out, pRootNode, context.copy());
}
finally {
out.flush();
}
}
private void writeXMLDeclaration(final PrintWriter pOut) {
pOut.print("");
}
private void writeXML(final PrintWriter pOut, final Node pDocument, final SerializationContext pContext) {
writeNodeRecursive(pOut, pDocument, pContext);
}
private void writeNodeRecursive(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
if (pNode.getNodeType() != Node.TEXT_NODE) {
indentToLevel(pOut, pContext);
}
switch (pNode.getNodeType()) {
case Node.DOCUMENT_NODE:
case Node.DOCUMENT_FRAGMENT_NODE:
writeDocument(pOut, pNode, pContext);
break;
case Node.DOCUMENT_TYPE_NODE:
writeDoctype(pOut, (DocumentType) pNode);
break;
case Node.ELEMENT_NODE:
boolean preserveSpace = pContext.preserveSpace;
updatePreserveSpace(pNode, pContext);
writeElement(pOut, (Element) pNode, pContext);
pContext.preserveSpace = preserveSpace;
break;
case Node.CDATA_SECTION_NODE:
writeCData(pOut, pNode);
break;
case Node.TEXT_NODE:
writeText(pOut, pNode, pContext);
break;
case Node.COMMENT_NODE:
writeComment(pOut, pNode, pContext);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
writeProcessingInstruction(pOut, (ProcessingInstruction) pNode);
break;
case Node.ATTRIBUTE_NODE:
throw new IllegalArgumentException("Malformed input Document: Attribute nodes should only occur inside Element nodes");
case Node.ENTITY_NODE:
// ''
case Node.ENTITY_REFERENCE_NODE:
// ( '&' | '%' ) + getNodeName + ';'
case Node.NOTATION_NODE:
// ''
default:
throw new InternalError("Lazy programmer never implemented serialization of " + pNode.getClass());
}
}
private void writeProcessingInstruction(final PrintWriter pOut, final ProcessingInstruction pNode) {
pOut.print("\n");
pOut.print(pNode.getTarget());
String value = pNode.getData();
if (value != null) {
pOut.print(" ");
pOut.print(value);
}
pOut.println("?>");
}
private void writeText(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
// TODO: Is this really as specified?
String value = pNode.getNodeValue();
if (pContext.preserveSpace) {
pOut.print(maybeEscapeElementValue(value));
}
else if (!StringUtil.isEmpty(value)) {
String escapedValue = maybeEscapeElementValue(value.trim());
//if (escapedValue.length() + (pContext.level * pContext.indent.length()) > 78) {
indentToLevel(pOut, pContext);
//}
pOut.println(escapedValue);
}
}
private void writeCData(final PrintWriter pOut, final Node pNode) {
pOut.print("");
}
private static void updatePreserveSpace(final Node pNode, final SerializationContext pContext) {
NamedNodeMap attributes = pNode.getAttributes();
if (attributes != null) {
Node space = attributes.getNamedItem("xml:space");
if (space != null) {
if ("preserve".equals(space.getNodeValue())) {
pContext.preserveSpace = true;
}
else if ("default".equals(space.getNodeValue())) {
pContext.preserveSpace = false;
}
// No other values are allowed per spec, ignore
}
}
}
private static void indentToLevel(final PrintWriter pOut, final SerializationContext pContext) {
for (int i = 0; i < pContext.level; i++) {
pOut.print(pContext.indent);
}
}
private void writeComment(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
if (pContext.stripComments) {
return;
}
String value = pNode.getNodeValue();
validateCommentValue(value);
if (value.startsWith(" ")) {
pOut.print("");
}
else {
pOut.println(" -->");
}
}
/**
* Returns an escaped version of the input string. The string is guaranteed
* to not contain illegal XML characters ({@code &<>}).
* If no escaping is needed, the input string is returned as is.
*
* @param pValue the input string that might need escaping.
* @return an escaped version of the input string.
*/
static String maybeEscapeElementValue(final String pValue) {
int startEscape = needsEscapeElement(pValue);
if (startEscape < 0) {
// If no escaping is needed, simply return original
return pValue;
}
else {
// Otherwise, start replacing
StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
builder.ensureCapacity(pValue.length() + 30);
int pos = startEscape;
for (int i = pos; i < pValue.length(); i++) {
switch (pValue.charAt(i)) {
case '&':
pos = appendAndEscape(pValue, pos, i, builder, "&");
break;
case '<':
pos = appendAndEscape(pValue, pos, i, builder, "<");
break;
case '>':
pos = appendAndEscape(pValue, pos, i, builder, ">");
break;
//case '\'':
//case '"':
default:
break;
}
}
builder.append(pValue.substring(pos));
return builder.toString();
}
}
private static int appendAndEscape(final String pString, int pStart, final int pEnd, final StringBuilder pBuilder, final String pEntity) {
pBuilder.append(pString, pStart, pEnd);
pBuilder.append(pEntity);
return pEnd + 1;
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int needsEscapeElement(final String pString) {
for (int i = 0; i < pString.length(); i++) {
switch (pString.charAt(i)) {
case '&':
case '<':
case '>':
//case '\'':
//case '"':
return i;
default:
}
}
return -1;
}
private static String maybeEscapeAttributeValue(final String pValue) {
int startEscape = needsEscapeAttribute(pValue);
if (startEscape < 0) {
return pValue;
}
else {
StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
builder.ensureCapacity(pValue.length() + 16);
int pos = startEscape;
for (int i = pos; i < pValue.length(); i++) {
switch (pValue.charAt(i)) {
case '&':
pos = appendAndEscape(pValue, pos, i, builder, "&");
break;
case '"':
pos = appendAndEscape(pValue, pos, i, builder, """);
break;
default:
break;
}
}
builder.append(pValue.substring(pos));
return builder.toString();
}
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int needsEscapeAttribute(final String pString) {
for (int i = 0; i < pString.length(); i++) {
switch (pString.charAt(i)) {
case '&':
//case '<':
//case '>':
//case '\'':
case '"':
return i;
default:
}
}
return -1;
}
private static String validateCDataValue(final String pValue) {
if (pValue.contains("]]>")) {
throw new IllegalArgumentException("Malformed input document: CDATA block may not contain the string ']]>'");
}
return pValue;
}
private static String validateCommentValue(final String pValue) {
if (pValue.contains("--")) {
throw new IllegalArgumentException("Malformed input document: Comment may not contain the string '--'");
}
return pValue;
}
private void writeDocument(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
// Document fragments might not have child nodes...
if (pNode.hasChildNodes()) {
Node child = pNode.getFirstChild();
while (child != null) {
writeNodeRecursive(pOut, child, pContext);
child = child.getNextSibling();
}
}
}
private void writeElement(final PrintWriter pOut, final Element pNode, final SerializationContext pContext) {
pOut.print("<");
pOut.print(pNode.getTagName());
// TODO: Attributes should probably include namespaces, so that it works
// even if the document was created using attributes instead of namespaces...
// In that case, prefix will be null...
// Handle namespace
String namespace = pNode.getNamespaceURI();
if (namespace != null && !namespace.equals(pContext.defaultNamespace)) {
String prefix = pNode.getPrefix();
if (prefix == null) {
pContext.defaultNamespace = namespace;
pOut.print(" xmlns");
}
else {
pOut.print(" xmlns:");
pOut.print(prefix);
}
pOut.print("=\"");
pOut.print(namespace);
pOut.print("\"");
}
// Iterate attributes if any
if (pNode.hasAttributes()) {
NamedNodeMap attributes = pNode.getAttributes();
for (int i = 0; i < attributes.getLength(); i++) {
Attr attribute = (Attr) attributes.item(i);
String name = attribute.getName();
if (!(name.startsWith("xmlns") && (name.length() == 5 || name.charAt(5) == ':'))) {
pOut.print(" ");
pOut.print(name);
pOut.print("=\"");
pOut.print(maybeEscapeAttributeValue(attribute.getValue()));
pOut.print("\"");
}
//else {
// System.err.println("attribute.getName(): " + name);
//}
}
}
// TODO: Consider not indenting/newline if the first child is a text node
// Iterate children if any
if (pNode.hasChildNodes()) {
pOut.print(">");
if (!pContext.preserveSpace) {
pOut.println();
}
Node child = pNode.getFirstChild();
while (child != null) {
writeNodeRecursive(pOut, child, pContext.push());
child = child.getNextSibling();
}
if (!pContext.preserveSpace) {
indentToLevel(pOut, pContext);
}
pOut.print("");
pOut.print(pNode.getTagName());
pOut.println(">");
}
else if (pNode.getNodeValue() != null) {
// NOTE: This is NOT AS SPECIFIED, but we do this to support
// the weirdness that is the javax.imageio.metadata.IIOMetadataNode.
// According to the spec, the nodeValue of an Element is null.
pOut.print(">");
pOut.print(pNode.getNodeValue());
pOut.print("");
pOut.print(pNode.getTagName());
pOut.println(">");
}
else {
pOut.println("/>");
}
}
private void writeDoctype(final PrintWriter pOut, final DocumentType pDoctype) {
// NOTE: The DOMImplementationLS LSSerializer actually inserts SYSTEM or
// PUBLIC identifiers even if they are empty strings. The result is, it
// will create invalid documents.
// Testing for empty strings seems to be more compatible.
if (pDoctype != null) {
pOut.print("");
}
}
public static void main(String[] pArgs) throws IOException, SAXException {
// Build XML tree (Document) and write
// Find the implementation
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
}
catch (ParserConfigurationException e) {
throw new IOException(e);
}
DOMImplementation dom = builder.getDOMImplementation();
Document document = dom.createDocument("http://www.twelvemonkeys.com/xml/test", "test", dom.createDocumentType("test", null, null));
Element root = document.getDocumentElement();
// This is probably not the correct way of setting a default namespace
//root.setAttribute("xmlns", "http://www.twelvemonkeys.com/xml/test");
// Create and insert the normal Properties headers as XML comments
document.insertBefore(document.createComment(new Date().toString()), root);
Element test = document.createElement("sub");
root.appendChild(test);
Element more = document.createElementNS("http://more.com/1999/namespace", "more:more");
more.setAttribute("foo", "test");
more.setAttribute("bar", "'really' \"legal\" & ok");
test.appendChild(more);
more.appendChild(document.createTextNode("Simply some text."));
more.appendChild(document.createCDATASection("&something escaped;"));
more.appendChild(document.createTextNode("More & !"));
more.appendChild(document.createTextNode("\"<<'&'>>\""));
Element another = document.createElement("another");
test.appendChild(another);
Element yet = document.createElement("yet-another");
yet.setAttribute("this-one", "with-params");
test.appendChild(yet);
Element pre = document.createElementNS("http://www.twelvemonkeys.com/xml/test", "pre");
pre.setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
pre.appendChild(document.createTextNode(" \t \n\r some text & white ' ' \n "));
test.appendChild(pre);
Element pre2 = document.createElementNS("http://www.twelvemonkeys.com/xml/test", "tight");
pre2.setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
pre2.appendChild(document.createTextNode("no-space-around-me"));
test.appendChild(pre2);
// Create serializer and output document
//XMLSerializer serializer = new XMLSerializer(pOutput, new OutputFormat(document, UTF_8_ENCODING, true));
System.out.println("XMLSerializer:");
XMLSerializer serializer = new XMLSerializer(System.out, "UTF-8");
serializer.serialize(document);
System.out.println();
System.out.println("DOMSerializer:");
DOMSerializer serializerD = new DOMSerializer(System.out, "UTF-8");
serializerD.setPrettyPrint(true);
serializerD.serialize(document);
System.out.println();
System.out.println("\n");
ByteArrayOutputStream out = new ByteArrayOutputStream();
XMLSerializer serializer2 = new XMLSerializer(out, "UTF-8");
serializer2.serialize(document);
ByteArrayOutputStream outD = new ByteArrayOutputStream();
DOMSerializer serializer2D = new DOMSerializer(outD, "UTF-8");
serializer2D.serialize(document);
Document document2 = builder.parse(new ByteArrayInputStream(out.toByteArray()));
System.out.println("XMLSerializer reparsed XMLSerializer:");
serializer.serialize(document2);
System.out.println();
System.out.println("DOMSerializer reparsed XMLSerializer:");
serializerD.serialize(document2);
System.out.println();
Document documentD = builder.parse(new ByteArrayInputStream(outD.toByteArray()));
System.out.println("XMLSerializer reparsed DOMSerializer:");
serializer.serialize(documentD);
System.out.println();
System.out.println("DOMSerializer reparsed DOMSerializer:");
serializerD.serialize(documentD);
System.out.println();
}
static class SerializationContext implements Cloneable {
String indent = "\t";
int level = 0;
boolean preserveSpace = false;
boolean stripComments = false;
String defaultNamespace;
public SerializationContext copy() {
try {
return (SerializationContext) clone();
}
catch (CloneNotSupportedException e) {
throw new Error(e);
}
}
public SerializationContext push() {
SerializationContext context = copy();
context.level++;
return context;
}
}
}