com.twelvemonkeys.xml.XMLSerializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.fop Show documentation
Show all versions of org.apache.fop Show documentation
The core maven build properties
The newest version!
/*
* Copyright (c) 2008, Harald Kuhr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* * Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.twelvemonkeys.xml;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.Date;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.*;
import org.xml.sax.SAXException;
import com.twelvemonkeys.lang.StringUtil;
/**
* XMLSerializer
*
* @author Harald Kuhr
* @author last modified by $Author: haku $
* @version $Id: //depot/branches/personal/haraldk/twelvemonkeys/release-2/twelvemonkeys-core/src/main/java/com/twelvemonkeys/xml/XMLSerializer.java#1 $
*/
public class XMLSerializer {
// TODO: Replace with DOMSerializer? Test performance, pretty printing etc...
// Main problem: Sun's Java 5 does not have LS 3.0 support
// This class has no dependencies, which probably makes it more useful
// TODO: Don't insert initial and ending line-break for text-nodes
// TODO: Support not inserting line-breaks, to preserve space
// TODO: Support line breaking (at configurable width)
// TODO: Support standalone?
// TODO: Support more than version 1.0?
// TODO: Consider using IOException to communicate trouble, rather than RTE,
// to be more compatible...
private final OutputStream output;
private final Charset encoding;
private final SerializationContext context;
public XMLSerializer(final OutputStream pOutput, final String pEncoding) {
output = pOutput;
encoding = Charset.forName(pEncoding);
context = new SerializationContext();
}
public final XMLSerializer indentation(String pIndent) {
// TODO: Verify that indent value is only whitespace?
context.indent = pIndent != null ? pIndent : "\t";
return this;
}
public final XMLSerializer stripComments(boolean pStrip) {
context.stripComments = pStrip;
return this;
}
/**
* Serializes the entire document, along with the XML declaration
* ({@code <?xml version="1.0" encoding="..."?>}).
*
* @param pDocument the document to serialize.
*/
public void serialize(final Document pDocument) {
serialize(pDocument, true);
}
/**
* Serializes the entire sub tree starting at {@code pRootNode}, along with an optional XML declaration
* ({@code <?xml version="1.0" encoding="..."?>}).
*
* @param pRootNode the root node to serialize.
* @param pWriteXMLDeclaration {@code true} if the XML declaration should be included, otherwise {@code false}.
*/
public void serialize(final Node pRootNode, final boolean pWriteXMLDeclaration) {
PrintWriter out = new PrintWriter(new OutputStreamWriter(output, encoding));
try {
if (pWriteXMLDeclaration) {
writeXMLDeclaration(out);
}
writeXML(out, pRootNode, context.copy());
}
finally {
out.flush();
}
}
private void writeXMLDeclaration(final PrintWriter pOut) {
pOut.print("");
}
private void writeXML(final PrintWriter pOut, final Node pDocument, final SerializationContext pContext) {
writeNodeRecursive(pOut, pDocument, pContext);
}
private void writeNodeRecursive(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
if (pNode.getNodeType() != Node.TEXT_NODE) {
indentToLevel(pOut, pContext);
}
switch (pNode.getNodeType()) {
case Node.DOCUMENT_NODE:
case Node.DOCUMENT_FRAGMENT_NODE:
writeDocument(pOut, pNode, pContext);
break;
case Node.DOCUMENT_TYPE_NODE:
writeDoctype(pOut, (DocumentType) pNode);
break;
case Node.ELEMENT_NODE:
boolean preserveSpace = pContext.preserveSpace;
updatePreserveSpace(pNode, pContext);
writeElement(pOut, (Element) pNode, pContext);
pContext.preserveSpace = preserveSpace;
break;
case Node.CDATA_SECTION_NODE:
writeCData(pOut, pNode);
break;
case Node.TEXT_NODE:
writeText(pOut, pNode, pContext);
break;
case Node.COMMENT_NODE:
writeComment(pOut, pNode, pContext);
break;
case Node.PROCESSING_INSTRUCTION_NODE:
writeProcessingInstruction(pOut, (ProcessingInstruction) pNode);
break;
case Node.ATTRIBUTE_NODE:
throw new IllegalArgumentException("Malformed input Document: Attribute nodes should only occur inside Element nodes");
case Node.ENTITY_NODE:
// ''
case Node.ENTITY_REFERENCE_NODE:
// ( '&' | '%' ) + getNodeName + ';'
case Node.NOTATION_NODE:
// ''
default:
throw new InternalError("Lazy programmer never implemented serialization of " + pNode.getClass());
}
}
private void writeProcessingInstruction(final PrintWriter pOut, final ProcessingInstruction pNode) {
pOut.print("\n");
pOut.print(pNode.getTarget());
String value = pNode.getData();
if (value != null) {
pOut.print(" ");
pOut.print(value);
}
pOut.println("?>");
}
private void writeText(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
// TODO: Is this really as specified?
String value = pNode.getNodeValue();
if (pContext.preserveSpace) {
pOut.print(maybeEscapeElementValue(value));
}
else if (!StringUtil.isEmpty(value)) {
String escapedValue = maybeEscapeElementValue(value.trim());
//if (escapedValue.length() + (pContext.level * pContext.indent.length()) > 78) {
indentToLevel(pOut, pContext);
//}
pOut.println(escapedValue);
}
}
private void writeCData(final PrintWriter pOut, final Node pNode) {
pOut.print("");
}
private static void updatePreserveSpace(final Node pNode, final SerializationContext pContext) {
NamedNodeMap attributes = pNode.getAttributes();
if (attributes != null) {
Node space = attributes.getNamedItem("xml:space");
if (space != null) {
if ("preserve".equals(space.getNodeValue())) {
pContext.preserveSpace = true;
}
else if ("default".equals(space.getNodeValue())) {
pContext.preserveSpace = false;
}
// No other values are allowed per spec, ignore
}
}
}
private static void indentToLevel(final PrintWriter pOut, final SerializationContext pContext) {
for (int i = 0; i < pContext.level; i++) {
pOut.print(pContext.indent);
}
}
private void writeComment(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
if (pContext.stripComments) {
return;
}
String value = pNode.getNodeValue();
validateCommentValue(value);
if (value.startsWith(" ")) {
pOut.print("");
}
else {
pOut.println(" -->");
}
}
/**
* Returns an escaped version of the input string. The string is guaranteed
* to not contain illegal XML characters ({@code &<>}).
* If no escaping is needed, the input string is returned as is.
*
* @param pValue the input string that might need escaping.
* @return an escaped version of the input string.
*/
static String maybeEscapeElementValue(final String pValue) {
int startEscape = needsEscapeElement(pValue);
if (startEscape < 0) {
// If no escaping is needed, simply return original
return pValue;
}
else {
// Otherwise, start replacing
StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
builder.ensureCapacity(pValue.length() + 30);
int pos = startEscape;
for (int i = pos; i < pValue.length(); i++) {
switch (pValue.charAt(i)) {
case '&':
pos = appendAndEscape(pValue, pos, i, builder, "&");
break;
case '<':
pos = appendAndEscape(pValue, pos, i, builder, "<");
break;
case '>':
pos = appendAndEscape(pValue, pos, i, builder, ">");
break;
//case '\'':
//case '"':
default:
break;
}
}
builder.append(pValue.substring(pos));
return builder.toString();
}
}
private static int appendAndEscape(final String pString, int pStart, final int pEnd, final StringBuilder pBuilder, final String pEntity) {
pBuilder.append(pString, pStart, pEnd);
pBuilder.append(pEntity);
return pEnd + 1;
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int needsEscapeElement(final String pString) {
for (int i = 0; i < pString.length(); i++) {
switch (pString.charAt(i)) {
case '&':
case '<':
case '>':
//case '\'':
//case '"':
return i;
default:
}
}
return -1;
}
private static String maybeEscapeAttributeValue(final String pValue) {
int startEscape = needsEscapeAttribute(pValue);
if (startEscape < 0) {
return pValue;
}
else {
StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
builder.ensureCapacity(pValue.length() + 16);
int pos = startEscape;
for (int i = pos; i < pValue.length(); i++) {
switch (pValue.charAt(i)) {
case '&':
pos = appendAndEscape(pValue, pos, i, builder, "&");
break;
case '"':
pos = appendAndEscape(pValue, pos, i, builder, """);
break;
default:
break;
}
}
builder.append(pValue.substring(pos));
return builder.toString();
}
}
/**
* Returns an the first index from the input string that should be escaped
* if escaping is needed, otherwise {@code -1}.
*
* @param pString the input string that might need escaping.
* @return the first index from the input string that should be escaped,
* or {@code -1}.
*/
private static int needsEscapeAttribute(final String pString) {
for (int i = 0; i < pString.length(); i++) {
switch (pString.charAt(i)) {
case '&':
//case '<':
//case '>':
//case '\'':
case '"':
return i;
default:
}
}
return -1;
}
private static String validateCDataValue(final String pValue) {
if (pValue.contains("]]>")) {
throw new IllegalArgumentException("Malformed input document: CDATA block may not contain the string ']]>'");
}
return pValue;
}
private static String validateCommentValue(final String pValue) {
if (pValue.contains("--")) {
throw new IllegalArgumentException("Malformed input document: Comment may not contain the string '--'");
}
return pValue;
}
private void writeDocument(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
// Document fragments might not have child nodes...
if (pNode.hasChildNodes()) {
Node child = pNode.getFirstChild();
while (child != null) {
writeNodeRecursive(pOut, child, pContext);
child = child.getNextSibling();
}
}
}
private void writeElement(final PrintWriter pOut, final Element pNode, final SerializationContext pContext) {
pOut.print("<");
pOut.print(pNode.getTagName());
// TODO: Attributes should probably include namespaces, so that it works
// even if the document was created using attributes instead of namespaces...
// In that case, prefix will be null...
// Handle namespace
String namespace = pNode.getNamespaceURI();
if (namespace != null && !namespace.equals(pContext.defaultNamespace)) {
String prefix = pNode.getPrefix();
if (prefix == null) {
pContext.defaultNamespace = namespace;
pOut.print(" xmlns");
}
else {
pOut.print(" xmlns:");
pOut.print(prefix);
}
pOut.print("=\"");
pOut.print(namespace);
pOut.print("\"");
}
// Iterate attributes if any
if (pNode.hasAttributes()) {
NamedNodeMap attributes = pNode.getAttributes();
for (int i = 0; i < attributes.getLength(); i++) {
Attr attribute = (Attr) attributes.item(i);
String name = attribute.getName();
if (!(name.startsWith("xmlns") && (name.length() == 5 || name.charAt(5) == ':'))) {
pOut.print(" ");
pOut.print(name);
pOut.print("=\"");
pOut.print(maybeEscapeAttributeValue(attribute.getValue()));
pOut.print("\"");
}
//else {
// System.err.println("attribute.getName(): " + name);
//}
}
}
// TODO: Consider not indenting/newline if the first child is a text node
// Iterate children if any
if (pNode.hasChildNodes()) {
pOut.print(">");
if (!pContext.preserveSpace) {
pOut.println();
}
Node child = pNode.getFirstChild();
while (child != null) {
writeNodeRecursive(pOut, child, pContext.push());
child = child.getNextSibling();
}
if (!pContext.preserveSpace) {
indentToLevel(pOut, pContext);
}
pOut.print("");
pOut.print(pNode.getTagName());
pOut.println(">");
}
else if (pNode.getNodeValue() != null) {
// NOTE: This is NOT AS SPECIFIED, but we do this to support
// the weirdness that is the javax.imageio.metadata.IIOMetadataNode.
// According to the spec, the nodeValue of an Element is null.
pOut.print(">");
pOut.print(pNode.getNodeValue());
pOut.print("");
pOut.print(pNode.getTagName());
pOut.println(">");
}
else {
pOut.println("/>");
}
}
private void writeDoctype(final PrintWriter pOut, final DocumentType pDoctype) {
// NOTE: The DOMImplementationLS LSSerializer actually inserts SYSTEM or
// PUBLIC identifiers even if they are empty strings. The result is, it
// will create invalid documents.
// Testing for empty strings seems to be more compatible.
if (pDoctype != null) {
pOut.print("");
}
}
public static void main(String[] pArgs) throws IOException, SAXException {
// Build XML tree (Document) and write
// Find the implementation
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
try {
builder = factory.newDocumentBuilder();
}
catch (ParserConfigurationException e) {
throw new IOException(e);
}
DOMImplementation dom = builder.getDOMImplementation();
Document document = dom.createDocument("http://www.twelvemonkeys.com/xml/test", "test", dom.createDocumentType("test", null, null));
Element root = document.getDocumentElement();
// This is probably not the correct way of setting a default namespace
//root.setAttribute("xmlns", "http://www.twelvemonkeys.com/xml/test");
// Create and insert the normal Properties headers as XML comments
document.insertBefore(document.createComment(new Date().toString()), root);
Element test = document.createElement("sub");
root.appendChild(test);
Element more = document.createElementNS("http://more.com/1999/namespace", "more:more");
more.setAttribute("foo", "test");
more.setAttribute("bar", "'really' \"legal\" & ok");
test.appendChild(more);
more.appendChild(document.createTextNode("Simply some text."));
more.appendChild(document.createCDATASection("&something escaped;"));
more.appendChild(document.createTextNode("More & !"));
more.appendChild(document.createTextNode("\"<<'&'>>\""));
Element another = document.createElement("another");
test.appendChild(another);
Element yet = document.createElement("yet-another");
yet.setAttribute("this-one", "with-params");
test.appendChild(yet);
Element pre = document.createElementNS("http://www.twelvemonkeys.com/xml/test", "pre");
pre.setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
pre.appendChild(document.createTextNode(" \t \n\r some text & white ' ' \n "));
test.appendChild(pre);
Element pre2 = document.createElementNS("http://www.twelvemonkeys.com/xml/test", "tight");
pre2.setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
pre2.appendChild(document.createTextNode("no-space-around-me"));
test.appendChild(pre2);
// Create serializer and output document
//XMLSerializer serializer = new XMLSerializer(pOutput, new OutputFormat(document, UTF_8_ENCODING, true));
System.out.println("XMLSerializer:");
XMLSerializer serializer = new XMLSerializer(System.out, "UTF-8");
serializer.serialize(document);
System.out.println();
System.out.println("DOMSerializer:");
DOMSerializer serializerD = new DOMSerializer(System.out, "UTF-8");
serializerD.setPrettyPrint(true);
serializerD.serialize(document);
System.out.println();
System.out.println("\n");
ByteArrayOutputStream out = new ByteArrayOutputStream();
XMLSerializer serializer2 = new XMLSerializer(out, "UTF-8");
serializer2.serialize(document);
ByteArrayOutputStream outD = new ByteArrayOutputStream();
DOMSerializer serializer2D = new DOMSerializer(outD, "UTF-8");
serializer2D.serialize(document);
Document document2 = builder.parse(new ByteArrayInputStream(out.toByteArray()));
System.out.println("XMLSerializer reparsed XMLSerializer:");
serializer.serialize(document2);
System.out.println();
System.out.println("DOMSerializer reparsed XMLSerializer:");
serializerD.serialize(document2);
System.out.println();
Document documentD = builder.parse(new ByteArrayInputStream(outD.toByteArray()));
System.out.println("XMLSerializer reparsed DOMSerializer:");
serializer.serialize(documentD);
System.out.println();
System.out.println("DOMSerializer reparsed DOMSerializer:");
serializerD.serialize(documentD);
System.out.println();
}
static class SerializationContext implements Cloneable {
String indent = "\t";
int level = 0;
boolean preserveSpace = false;
boolean stripComments = false;
String defaultNamespace;
public SerializationContext copy() {
try {
return (SerializationContext) clone();
}
catch (CloneNotSupportedException e) {
throw new Error(e);
}
}
public SerializationContext push() {
SerializationContext context = copy();
context.level++;
return context;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy