All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twelvemonkeys.xml.XMLSerializer Maven / Gradle / Ivy

There is a newer version: 2.3
Show newest version
/*
 * Copyright (c) 2008, Harald Kuhr
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name "TwelveMonkeys" nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.twelvemonkeys.xml;

import com.twelvemonkeys.lang.StringUtil;
import org.w3c.dom.*;
import org.xml.sax.SAXException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.*;
import java.nio.charset.Charset;
import java.util.Date;

/**
 * XMLSerializer
 *
 * @author Harald Kuhr
 * @author last modified by $Author: haku $
 * @version $Id: //depot/branches/personal/haraldk/twelvemonkeys/release-2/twelvemonkeys-core/src/main/java/com/twelvemonkeys/xml/XMLSerializer.java#1 $
 */
public class XMLSerializer {
    // TODO: Replace with DOMSerializer? Test performance, pretty printing etc...
    // Main problem: Sun's Java 5 does not have LS 3.0 support
    // This class has no dependencies, which probably makes it more useful

    // TODO: Support line breaking (at configurable width)
    // TODO: Support skipping XML declaration?
    // TODO: Support standalone?
    // TODO: Support more than version 1.0?
    // TODO: Consider using IOException to communicate trouble, rather than RTE,
    // to be more compatible...
    // TODO: Support not inserting line-breaks, to preserve space

    // TODO: Idea: Create a SerializationContext that stores attributes on
    // serialization, to keep the serialization thread-safe
    // Store preserveSpace attribute in this context, to avoid costly traversals
    // Store user options here too
    // TODO: Push/pop?

    private final OutputStream mOutput;
    private final Charset mEncoding;
    private final SerializationContext mContext;

    public XMLSerializer(final OutputStream pOutput, final String pEncoding) {
        mOutput = pOutput;
        mEncoding = Charset.forName(pEncoding);
        mContext = new SerializationContext();
    }

    public void setIndentation(String pIndent) {
        mContext.indent = pIndent != null ? pIndent : "  ";
    }

    public void setStripComments(boolean pStrip) {
        mContext.stripComments = pStrip;
    }

    public void serialize(final Document pDocument) {
        PrintWriter out = new PrintWriter(new OutputStreamWriter(mOutput, mEncoding));
        try {
            writeXMLDeclararion(out);
            writeXML(out, pDocument, mContext.copy());
        }
        finally {
            out.flush();
        }
    }

    private void writeXMLDeclararion(final PrintWriter pOut) {
        pOut.print("");
    }

    private void writeXML(final PrintWriter pOut, final Document pDocument, final SerializationContext pContext) {
        writeNodeRecursive(pOut, pDocument, pContext);
    }

    private void writeNodeRecursive(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
        if (pNode.getNodeType() != Node.TEXT_NODE) {
            indentToLevel(pOut, pContext);
        }

        switch (pNode.getNodeType()) {
            case Node.DOCUMENT_NODE:
            case Node.DOCUMENT_FRAGMENT_NODE:
                writeDocument(pOut, pNode, pContext);
                break;
            case Node.DOCUMENT_TYPE_NODE:
                writeDoctype(pOut, (DocumentType) pNode);
                break;
            case Node.ELEMENT_NODE:
                boolean preserveSpace = pContext.preserveSpace;
                updatePreserveSpace(pNode, pContext);
                writeElement(pOut, (Element) pNode, pContext);
                pContext.preserveSpace = preserveSpace;
                break;
            case Node.CDATA_SECTION_NODE:
                writeCData(pOut, pNode);
                break;
            case Node.TEXT_NODE:
                writeText(pOut, pNode, pContext);
                break;
            case Node.COMMENT_NODE:
                writeComment(pOut, pNode, pContext);
                break;
            case Node.PROCESSING_INSTRUCTION_NODE:
                writeProcessingInstruction(pOut, pNode);
                break;
            case Node.ATTRIBUTE_NODE:
                throw new IllegalArgumentException("Malformed input Document: Attribute nodes should only occur inside Element nodes");
            case Node.ENTITY_NODE:
                // ''
            case Node.ENTITY_REFERENCE_NODE:
                // ( '&' | '%' ) + getNodeName + ';'
            case Node.NOTATION_NODE:
                // ''
            default:
                throw new InternalError("Lazy programmer never implemented serialization of " + pNode.getClass());
        }
    }

    private void writeProcessingInstruction(final PrintWriter pOut, final Node pNode) {
        pOut.print("\n");
    }

    private void writeText(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
        // TODO: Is this really as specified?
        String value = pNode.getNodeValue();
        if (pContext.preserveSpace) {
            pOut.print(maybeEscapeElementValue(value));
        }
        else if (!StringUtil.isEmpty(value)) {
            indentToLevel(pOut, pContext);
            pOut.println(maybeEscapeElementValue(value.trim()));
        }
    }

    private void writeCData(final PrintWriter pOut, final Node pNode) {
        pOut.print("");
    }

    private static void updatePreserveSpace(final Node pNode, final SerializationContext pContext) {
        NamedNodeMap attributes = pNode.getAttributes();
        if (attributes != null) {
            Node space = attributes.getNamedItem("xml:space");
            if (space != null) {
                if ("preserve".equals(space.getNodeValue())) {
                    pContext.preserveSpace = true;
                }
                else if ("default".equals(space.getNodeValue())) {
                    pContext.preserveSpace = false;
                }
                // No other values are allowed per spec, ingore
            }
        }
    }

    private static void indentToLevel(final PrintWriter pOut, final SerializationContext pContext) {
        for (int i = 0; i < pContext.level; i++) {
            pOut.print(pContext.indent);
        }
    }

    private void writeComment(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
        if (pContext.stripComments) {
            return;
        }

        String value = pNode.getNodeValue();
        validateCommenValue(value);

        if (value.startsWith(" ")) {
            pOut.print("");
        }
        else {
            pOut.println(" -->");
        }
    }

    /**
     * Returns an escaped version of the input string. The string is guaranteed
     * to not contain illegal XML characters ({@code &<>}).
     * If no escaping is needed, the input string is returned as is. 
     *
     * @param pValue the input string that might need escaping.
     * @return an escaped version of the input string.
     */
    static String maybeEscapeElementValue(final String pValue) {
        int startEscape = needsEscapeElement(pValue);

        if (startEscape < 0) {
            // If no escpaing is needed, simply return original
            return pValue;
        }
        else {
            // Otherwise, start replacing
            StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
            builder.ensureCapacity(pValue.length() + 30);

            int pos = startEscape;
            for (int i = pos; i < pValue.length(); i++) {
                switch (pValue.charAt(i)) {
                    case '&':
                        pos = appendAndEscape(pValue, pos, i, builder, "&");
                        break;
                    case '<':
                        pos = appendAndEscape(pValue, pos, i, builder, "<");
                        break;
                    case '>':
                        pos = appendAndEscape(pValue, pos, i, builder, ">");
                        break;
                    //case '\'':
                    //    pos = appendAndEscape(pString, pos, i, builder, "'");
                    //    break;
                    //case '"':
                    //    pos = appendAndEscape(pString, pos, i, builder, """);
                    //    break;
                    default:
                        break;
                }
            }

            builder.append(pValue.substring(pos));
            return builder.toString();
        }
    }

    private static int appendAndEscape(final String pString, int pStart, final int pEnd, final StringBuilder pBuilder, final String pEntity) {
        pBuilder.append(pString.substring(pStart, pEnd));
        pBuilder.append(pEntity);
        return pEnd + 1;
    }

    /**
     * Returns an the first index from the input string that should be escaped
     * if escaping is needed, otherwise {@code -1}.
     *
     * @param pString the input string that might need escaping.
     * @return the first index from the input string that should be escaped,
     *         or {@code -1}.
     */
    private static int needsEscapeElement(final String pString) {
        for (int i = 0; i < pString.length(); i++) {
            switch (pString.charAt(i)) {
                case '&':
                case '<':
                case '>':
                //case '\'':
                //case '"':
                    return i;
                default:
            }
        }
        return -1;
    }

    private static String maybeEscapeAttributeValue(final String pValue) {
        int startEscape = needsEscapeAttribute(pValue);

        if (startEscape < 0) {
            return pValue;
        }
        else {
            StringBuilder builder = new StringBuilder(pValue.substring(0, startEscape));
            builder.ensureCapacity(pValue.length() + 16);

            int pos = startEscape;
            for (int i = pos; i < pValue.length(); i++) {
                switch (pValue.charAt(i)) {
                    case '&':
                        pos = appendAndEscape(pValue, pos, i, builder, "&");
                        break;
                    case '"':
                        pos = appendAndEscape(pValue, pos, i, builder, """);
                        break;
                    default:
                        break;
                }
            }

            //StringBuilder builder = new StringBuilder(pValue.length() + 30);
            //
            //int start = 0;
            //while (end >= 0) {
            //    builder.append(pValue.substring(start, end));
            //    builder.append(""");
            //    start = end + 1;
            //    end = pValue.indexOf('"', start);
            //}
            //builder.append(pValue.substring(start));

            builder.append(pValue.substring(pos));

            return builder.toString();
        }
    }

    /**
     * Returns an the first index from the input string that should be escaped
     * if escaping is needed, otherwise {@code -1}.
     *
     * @param pString the input string that might need escaping.
     * @return the first index from the input string that should be escaped,
     *         or {@code -1}.
     */
    private static int needsEscapeAttribute(final String pString) {
        for (int i = 0; i < pString.length(); i++) {
            switch (pString.charAt(i)) {
                case '&':
                //case '<':
                //case '>':
                //case '\'':
                case '"':
                    return i;
                default:
            }
        }
        return -1;
    }

    private static String validateCDataValue(final String pValue) {
        if (pValue.indexOf("]]>") >= 0) {
            throw new IllegalArgumentException("Malformed input document: CDATA block may not contain the string ']]>'");
        }
        return pValue;
    }

    private static String validateCommenValue(final String pValue) {
        if (pValue.indexOf("--") >= 0) {
            throw new IllegalArgumentException("Malformed input document: Comment may not contain the string '--'");
        }
        return pValue;
    }

    private void writeDocument(final PrintWriter pOut, final Node pNode, final SerializationContext pContext) {
        // Document fragments might not have child nodes...
        if (pNode.hasChildNodes()) {
            NodeList nodes = pNode.getChildNodes();
            for (int i = 0; i < nodes.getLength(); i++) {
                writeNodeRecursive(pOut, nodes.item(i), pContext);
            }
        }
    }

    private void writeElement(final PrintWriter pOut, final Element pNode, final SerializationContext pContext) {
        pOut.print("<");
        pOut.print(pNode.getTagName());

        // TODO: Attributes should probably include namespaces, so that it works
        // even if the document was created using attributes instead of namespaces...

        // Handle namespace
        String namespace = pNode.getNamespaceURI();
        if (namespace != null && !namespace.equals(pContext.defaultNamespace)) {
            String prefix = pNode.getPrefix();
            if (prefix == null) {
                pContext.defaultNamespace = namespace;
                pOut.print(" xmlns");
            }
            else {
                pOut.print(" xmlns:");
                pOut.print(prefix);
            }
            pOut.print("=\"");
            pOut.print(namespace);
            pOut.print("\"");
        }

        // Iterate attributes if any
        if (pNode.hasAttributes()) {
            NamedNodeMap attributes = pNode.getAttributes();
            for (int i = 0; i < attributes.getLength(); i++) {
                Attr attribute = (Attr) attributes.item(i);
                String name = attribute.getName();
                if (!(name.startsWith("xmlns") && (name.length() == 5 || name.charAt(5) == ':'))) {
                    pOut.print(" ");
                    pOut.print(name);
                    pOut.print("=\"");
                    pOut.print(maybeEscapeAttributeValue(attribute.getValue()));
                    pOut.print("\"");
                }
                //else {
                //    System.err.println("attribute.getName(): " + name);
                //}
            }
        }

        // Iterate children if any
        if (pNode.hasChildNodes()) {
            pOut.print(">");
            if (!pContext.preserveSpace) {
                pOut.println();
            }

            NodeList children = pNode.getChildNodes();
            //pContext.level++;
            for (int i = 0; i < children.getLength(); i++) {
                writeNodeRecursive(pOut, children.item(i), pContext.push());
            }
            //pContext.level--;

            if (!pContext.preserveSpace) {
                indentToLevel(pOut, pContext);
            }

            pOut.print("");
        }
        else {
            pOut.println("/>");
        }

    }

    private void writeDoctype(final PrintWriter pOut, final DocumentType pDoctype) {
        // NOTE: The DOMImplementationLS LSSerializer actually inserts SYSTEM or
        // PUBLIC identifiers even if they are empty strings. The result is, it
        // will create invalid documents.
        // Testing for empty strings seems to be more compatible.
        if (pDoctype != null) {
            pOut.print("");
        }
    }

    public static void main(String[] pArgs) throws IOException, SAXException {
        // Build XML tree (Document) and write
        // Find the implementation
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setNamespaceAware(true);
        DocumentBuilder builder;
        try {
            builder = factory.newDocumentBuilder();
        }
        catch (ParserConfigurationException e) {
            throw (IOException) new IOException(e.getMessage()).initCause(e);
        }
        DOMImplementation dom = builder.getDOMImplementation();

        Document document = dom.createDocument("http://www.twelvemonkeys.com/xml/test", "test", dom.createDocumentType("test", null, null));

        Element root = document.getDocumentElement();

        // This is probably not the correct way of setting a default namespace
        //root.setAttribute("xmlns", "http://www.twelvemonkeys.com/xml/test");

        // Create and insert the normal Properties headers as XML comments
        document.insertBefore(document.createComment(new Date().toString()), root);

        Element test = document.createElement("sub");
        root.appendChild(test);
        Element more = document.createElementNS("http://more.com/1999/namespace", "more:more");
        more.setAttribute("foo", "test");
        more.setAttribute("bar", "'really' \"legal\" & ok");
        test.appendChild(more);
        more.appendChild(document.createTextNode("Simply some text."));
        more.appendChild(document.createCDATASection("&something escaped;"));
        more.appendChild(document.createTextNode("More & !"));
        more.appendChild(document.createTextNode("\"<<'&'>>\""));
        Element another = document.createElement("another");
        test.appendChild(another);
        Element yet = document.createElement("yet-another");
        yet.setAttribute("this-one", "with-params");
        test.appendChild(yet);

        Element pre = document.createElementNS("http://www.twelvemonkeys.com/xml/test", "pre");
        pre.setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:space", "preserve");
        pre.appendChild(document.createTextNode(" \t \n\r some text & white ' '   \n   "));
        test.appendChild(pre);

        // Create serializer and output document
        //XMLSerializer serializer = new XMLSerializer(pOutput, new OutputFormat(document, UTF_8_ENCODING, true));
        System.out.println("XMLSerializer:");
        XMLSerializer serializer = new XMLSerializer(System.out, "UTF-8");
        serializer.serialize(document);
        System.out.println();

        System.out.println("DOMSerializer:");
        DOMSerializer serializerD = new DOMSerializer(System.out, "UTF-8");
        serializerD.setPrettyPrint(true);
        serializerD.serialize(document);
        System.out.println();
        
        System.out.println("\n");

        ByteArrayOutputStream out = new ByteArrayOutputStream();
        XMLSerializer serializer2 = new XMLSerializer(out, "UTF-8");
        serializer2.serialize(document);

        ByteArrayOutputStream outD = new ByteArrayOutputStream();
        DOMSerializer serializer2D = new DOMSerializer(outD, "UTF-8");
        serializer2D.serialize(document);

        Document document2 = builder.parse(new ByteArrayInputStream(out.toByteArray()));
        System.out.println("XMLSerializer reparsed XMLSerializer:");
        serializer.serialize(document2);
        System.out.println();
        System.out.println("DOMSerializer reparsed XMLSerializer:");
        serializerD.serialize(document2);
        System.out.println();


        Document documentD = builder.parse(new ByteArrayInputStream(outD.toByteArray()));
        System.out.println("XMLSerializer reparsed DOMSerializer:");
        serializer.serialize(documentD);
        System.out.println();
        System.out.println("DOMSerializer reparsed DOMSerializer:");
        serializerD.serialize(documentD);
        System.out.println();
    }

    static class SerializationContext implements Cloneable {
        String indent = "  ";
        int level = 0;
        boolean preserveSpace = false;
        boolean stripComments = false;
        String defaultNamespace;

        public SerializationContext copy() {
            try {
                return (SerializationContext) clone();
            }
            catch (CloneNotSupportedException e) {
                throw new Error(e);
            }
        }

        public SerializationContext push() {
            SerializationContext context = copy();
            context.level++;
            return context;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy