org.jitsi.util.xml.DOMElementWriter Maven / Gradle / Ivy

Go to download
/*
 * Copyright @ 2015 Atlassian Pty Ltd
 * Copyright  2000-2004 The Apache Software Foundation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.jitsi.util.xml;

import java.io.*;

import org.w3c.dom.*;

/**
 * Writes a DOM tree to a given Writer.
 *
 * Utility class used by {@link XMLUtils}.
 *
 * @author Lubomir Marinov
 */
public class DOMElementWriter
{
    /**
     * The system-specific line separator as defined by the well-known system
     * property.
     */
    private static final String lSep = System.getProperty("line.separator");

    /**
     * Decodes an XML (element) name according to
     * http://www.w3.org/TR/xml/#NT-Name.
     *
     * @param name the XML (element) name to be decoded
     * @return a String which represents name decoded
     * according to http://www.w3.org/TR/xml/#NT-Name
     */
    public static String decodeName(String name)
    {
        int length = name.length();
        StringBuilder value = new StringBuilder(length);

        for (int i = 0; i < length;)
        {
            int start = name.indexOf('_', i);

            /*
             * If there's nothing else to decode, append whatever's left and
             * finish.
             */
            if (start == -1)
            {
                value.append(name, i, length);
                break;
            }

            /*
             * We may have to decode from start (inclusive). Append from i to
             * start (exclusive).
             */
            if (i != start)
                value.append(name, i, start);

            // Determine whether we'll actually decode.
            int end = start + 6 /* xHHHH_ */;

            if ((end < length)
                    && (name.charAt(start + 1) == 'x')
                    && (name.charAt(end) == '_')
                    && isHexDigit(name.charAt(start + 2))
                    && isHexDigit(name.charAt(start + 3))
                    && isHexDigit(name.charAt(start + 4))
                    && isHexDigit(name.charAt(start + 5)))
            {
                char c = (char) Integer.parseInt(name.substring(start + 2, end), 16);

                /*
                 * We've decoded a character. But is it really a character we'd
                 * have encoded in the first place? We don't want to
                 * accidentally decode a string just because it looked like an
                 * encoded character.
                 */
                if ((start == 0) ? !isNameStartChar(c) : !isNameChar(c))
                {
                    value.append(c);
                    i = end + 1;
                    continue;
                }
            }

            // We didn't really have to decode and the string was a literal.
            value.append(name.charAt(start));
            i = start + 1;
        }
        return value.toString();
    }

    /**
     * Encodes a specific String so that it is a valid XML (element)
     * name according to http://www.w3.org/TR/xml/#NT-Name.
     *
     * @param value the String to be encoded so that it is a valid XML
     * name
     * @return a String which represents value encoded so that
     * it is a valid XML (element) name
     */
    public static String encodeName(String value)
    {
        int length = value.length();
        StringBuilder name = new StringBuilder();

        for (int i = 0; i < length; i++)
        {
            char c = value.charAt(i);

            if (i == 0)
            {
                if (isNameStartChar(c))
                {
                    name.append(c);
                    continue;
                }
            }
            else if (isNameChar(c))
            {
                name.append(c);
                continue;
            }

            name.append("_x");
            if (c <= 0x000F)
                name.append("000");
            else if (c <= 0x00FF)
                name.append("00");
            else if (c <= 0x0FFF)
                name.append('0');
            name.append(Integer.toHexString(c).toUpperCase());
            name.append('_');
        }
        return name.toString();
    }

    /**
     * Determines whether a specific character represents a hex digit.
     *
     * @param c the character to be checked whether it represents a hex digit
     * @return true if the specified character represents a hex digit;
     * otherwise, false
     */
    private static boolean isHexDigit(char c)
    {
        return
            (('0' <= c) && (c <= '9'))
                || (('A' <= c) && (c <= 'F'))
                || (('a' <= c) && (c <= 'f'));
    }

    /**
     * Determines whether a specific characters is a NameChar as
     * defined by http://www.w3.org/TR/xml/#NT-Name.
     *
     * @param c the character which is to be determines whether it is a
     * NameChar
     * @return true if the specified character is a NameChar;
     * otherwise, false
     */
    private static boolean isNameChar(char c)
    {
        if (isNameStartChar(c))
            return true;
        else if ((c == '-') || (c == '.'))
            return true;
        else if (('0' <= c) && (c <= '9'))
            return true;
        else if (c == 0xB7)
            return true;
        else if (c < 0x0300)
            return false;
        else if (c <= 0x036F)
            return true;
        else if (c < 0x203F)
            return false;
        else if (c <= 0x2040)
            return true;
        else
            return false;
    }

    /**
     * Determines whether a specific characters is a NameStartChar as
     * defined by http://www.w3.org/TR/xml/#NT-Name.
     *
     * @param c the character to be determined whether it is a
     * NameStartChar
     * @return true if the specified character is a
     * NameStartChar; otherwise, false
     */
    private static boolean isNameStartChar(char c)
    {
        if ((c == ':') || (c == '_'))
            return true;
        else if (('A' <= c) && (c <= 'Z'))
            return true;
        else if (('a' <= c) && (c <= 'z'))
            return true;
        else if (c < 0xC0)
            return false;
        else if (c <= 0xD6)
            return true;
        else if (c < 0xD8)
            return false;
        else if (c <= 0xF6)
            return true;
        else if (c < 0xF8)
            return false;
        else if (c <= 0x2FF)
            return true;
        else if (c < 0x370)
            return false;
        else if (c <= 0x37D)
            return true;
        else if (c < 0x37F)
            return false;
        else if (c <= 0x1FFF)
            return true;
        else if (c < 0x200C)
            return false;
        else if (c <= 0x200D)
            return true;
        else if (c < 0x2070)
            return false;
        else if (c <= 0x218F)
            return true;
        else if (c < 0x2C00)
            return false;
        else if (c <= 0x2FEF)
            return true;
        else if (c < 0x3001)
            return false;
        else if (c <= 0xD7FF)
            return true;
        else if (c < 0xF900)
            return false;
        else if (c <= 0xFDCF)
            return true;
        else if (c < 0xFDF0)
            return false;
        else if (c <= 0xFFFD)
            return true;
//        else if (c < 0x10000)
//            return false;
//        else if (c <= 0xEFFFF)
//            return true;
        else
            return false;
    }

    /**
     * Don't try to be too smart but at least recognize the predefined
     * entities.
     */
    protected String[] knownEntities = {"gt", "amp", "lt", "apos", "quot"};


    /**
     * Writes a DOM tree to a stream in UTF8 encoding. Note that
     * it prepends the <?xml version='1.0' encoding='UTF-8'?>.
     * The indent number is set to 0 and a 2-space indent.
     * @param root the root element of the DOM tree.
     * @param out the outputstream to write to.
     * @throws IOException if an error happens while writing to the stream.
     */
    public void write(Element root, OutputStream out)
        throws IOException
    {
        Writer wri = new OutputStreamWriter(out, "UTF-8");
        wri.write(""+lSep);
        write(root, wri, 0, "  ");
        wri.flush();
    }

    /**
     * Writes a DOM tree to a stream.
     *
     * @param element the Root DOM element of the tree
     * @param out where to send the output
     * @param indent number of
     * @param indentWith string that should be used to indent the corresponding tag.
     * @throws IOException if an error happens while writing to the stream.
     */
    public void write(Node element, Writer out, int indent,
                      String indentWith)
        throws IOException
    {
        // Write indent characters
        for (int i = 0; i < indent; i++) {
            out.write(indentWith);
        }

        if(element.getNodeType() == Node.COMMENT_NODE)
        {
            out.write("");
        }
        else
        {
            // Write element
            out.write("<");
            out.write(((Element)element).getTagName());

            // Write attributes
            NamedNodeMap attrs = element.getAttributes();
            for (int i = 0; i < attrs.getLength(); i++)
            {
                Attr attr = (Attr) attrs.item(i);
                out.write(" ");
                out.write(attr.getName());
                out.write("=\"");
                out.write(encode(attr.getValue()));
                out.write("\"");
            }
            out.write(">");
        }
        // Write child elements and text
        boolean hasChildren = false;
        NodeList children = element.getChildNodes();
        for (int i = 0
             ; element.hasChildNodes()
               && i < children.getLength()
             ; i++)
        {
            Node child = children.item(i);

            switch (child.getNodeType()) {

            case Node.ELEMENT_NODE: case Node.COMMENT_NODE:
                if (!hasChildren) {
                    out.write(lSep);
                    hasChildren = true;
                }
                write(child, out, indent + 1, indentWith);
                break;

            case Node.TEXT_NODE:
                //if this is a new line don't print it as we print our own.
                if(child.getNodeValue() != null
                   && (   child.getNodeValue().indexOf("\n") == -1
                       || child.getNodeValue().trim().length() != 0))
                    out.write(encode(child.getNodeValue()));
                break;
            case Node.CDATA_SECTION_NODE:
                out.write("");
                break;

            case Node.ENTITY_REFERENCE_NODE:
                out.write('&');
                out.write(child.getNodeName());
                out.write(';');
                break;

            case Node.PROCESSING_INSTRUCTION_NODE:
                out.write(" 0) {
                    out.write(' ');
                    out.write(data);
                }
                out.write("?>");
                break;
            }
        }

        // If we had child elements, we need to indent before we close
        // the element, otherwise we're on the same line and don't need
        // to indent
        if (hasChildren) {
            for (int i = 0; i < indent; i++) {
                out.write(indentWith);
            }
        }

        // Write element close
        if(element.getNodeType() == Node.ELEMENT_NODE)
        {
            out.write("");
        }

        out.write(lSep);
        out.flush();
    }

    /**
     * Escape <, > & &apos;, " as their entities and
     * drop characters that are illegal in XML documents.
     *
     * @param value the value to encode
     *
     * @return a String containing the encoded element.
     */
    public String encode(String value) {
        StringBuilder sb = new StringBuilder();
        int len = value.length();
        for (int i = 0; i < len; i++)
        {
            char c = value.charAt(i);
            switch (c) {
            case '<':
                sb.append("<");
                break;
            case '>':
                sb.append(">");
                break;
            case '\'':
                sb.append("'");
                break;
            case '\"':
                sb.append(""");
                break;
            case '&':
                int nextSemi = value.indexOf(";", i);
                if ((nextSemi < 0)
                        || !isReference(value.substring(i, nextSemi + 1)))
                    sb.append("&");
                else
                    sb.append('&');
                break;
            default:
                if (isLegalCharacter(c))
                    sb.append(c);
                break;
            }
        }
        return sb.substring(0);
    }

    /**
     * Drop characters that are illegal in XML documents.
     *
     * 
Also ensure that we are not including an ]]>
     * marker by replacing that sequence with
     * &#x5d;&#x5d;&gt;.
     *
     * 
See XML 1.0 2.2 http://www.w3.org/TR/1998/REC-xml-19980210#charsets and
     * 2.7 http://www.w3.org/TR/1998/REC-xml-19980210#sec-cdata-sect.
     *
     * @param value the value to encode
     *
     * @return a String containing the encoded value.
     */
    public String encodedata(final String value) {
        StringBuffer sb = new StringBuffer();
        int len = value.length();
        for (int i = 0; i < len; ++i) {
            char c = value.charAt(i);
            if (isLegalCharacter(c)) {
                sb.append(c);
            }
        }

        String result = sb.substring(0);
        int cdEnd = result.indexOf("]]>");
        while (cdEnd != -1) {
            sb.setLength(cdEnd);
            sb.append("]]>")
                .append(result.substring(cdEnd + 3));
            result = sb.substring(0);
            cdEnd = result.indexOf("]]>");
        }

        return result;
    }

    /**
     * Is the given argument a character or entity reference?
     *
     * @param ent the string whose nature we need to determine.
     * @return true if ent is an entity reference and
     * false otherwise.
     */
    public boolean isReference(String ent) {
        if (!(ent.charAt(0) == '&') || !ent.endsWith(";"))
            return false;

        if (ent.charAt(1) == '#') {
            if (ent.charAt(2) == 'x') {
                try {
                    Integer.parseInt(ent.substring(3, ent.length() - 1), 16);
                    return true;
                } catch (NumberFormatException nfe) {
                    return false;
                }
            } else {
                try {
                    Integer.parseInt(ent.substring(2, ent.length() - 1));
                    return true;
                } catch (NumberFormatException nfe) {
                    return false;
                }
            }
        }

        String name = ent.substring(1, ent.length() - 1);
        for (int i = 0; i < knownEntities.length; i++) {
            if (name.equals(knownEntities[i])) {
                return true;
            }
        }
        return false;
    }

    /**
     * Is the given character allowed inside an XML document?
     *
     * See XML 1.0 2.2 
     * http://www.w3.org/TR/1998/REC-xml-19980210#charsets.
     *
     * @since 1.10, Ant 1.5
     *
     * @param c the character whose nature we'd like to determine.
     *
     * @return true if c is a legal character and false otherwise
     */
    public boolean isLegalCharacter(char c) {
        if (c == 0x9 || c == 0xA || c == 0xD) {
            return true;
        } else if (c < 0x20) {
            return false;
        } else if (c <= 0xD7FF) {
            return true;
        } else if (c < 0xE000) {
            return false;
        } else if (c <= 0xFFFD) {
            return true;
        }
        return false;
    }
}