org.eigenbase.xom.XMLUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of eigenbase-xom Show documentation
XML object model for Java
The newest version!
/*
// Licensed to Julian Hyde under one or more contributor license
// agreements. See the NOTICE file distributed with this work for
// additional information regarding copyright ownership.
//
// Julian Hyde licenses this file to you under the Apache License,
// Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
package org.eigenbase.xom;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;

/**
 * Utilities for dealing with XML data.  These methods must NOT depend upon any
 * XML parser or object model (MSXML, DOM, SAX, etc.)
 *
 * @author jhyde
 */
public class XMLUtil {

    /**
     * Determine if a String contains any XML special characters, return true
     * if it does.  If this function returns true, the string will need to be
     * encoded either using the stringEncodeXML function above or using a
     * CDATA section.  Note that MSXML has a nasty bug whereby whitespace
     * characters outside of a CDATA section are lost when parsing.  To
     * avoid hitting this bug, this method treats many whitespace characters
     * as "special".
     * @param input the String to scan for XML special characters.
     * @return true if the String contains any such characters.
     */
    public static boolean stringHasXMLSpecials(String input)
    {
        for (int i = 0; i < input.length(); i++) {
            char c = input.charAt(i);
            switch (c) {
            case '<':
            case '>':
            case '"':
            case '\'':
            case '&':
            case '\t':
            case '\n':
            case '\r':
                return true;
            }
        }
        return false;
    }

    /**
     * Encode a String for XML output, displaying it to a PrintWriter.
     * The String to be encoded is displayed, except that
     * special characters are converted into entities.
     * @param input a String to convert.
     * @param out a PrintWriter to which to write the results.
     */
    public static void stringEncodeXML(String input, PrintWriter out)
    {
        for (int i = 0; i < input.length(); i++) {
            char c = input.charAt(i);
            switch (c) {
            case '<':
            case '>':
            case '"':
            case '\'':
            case '&':
            case '\t':
            case '\n':
            case '\r':
                out.print("&#" + (int)c + ";");
                break;
            default:
                out.print(c);
            }
        }
    }

    /**
     * Quote a string, and write to a {@link PrintWriter}.
     *
     * For example, "a string" becomes <![CDATA[a
     * string]]>.  If the string contains ']]>' (which commonly
     * occurs when wrapping other XML documents), we give up on using
     * <![CDATA[ ... ]]>, and just encode the
     * string.  For example, "A string with ]]> in it" becomes
     * "A string with ]]&gt; in it".
     */
    public static void printPCDATA(PrintWriter pw, String data)
    {
        if (data.indexOf("]]>") > -1) {
            String s = StringEscaper.xmlEscaper.escapeString(data);
            pw.print(s);
        } else {
            pw.print("");
        }
    }

    /**
     * Quote a string.
     *
     * @see #printPCDATA(PrintWriter,String)
     */
    public static String quotePCDATA(String data)
    {
        if (data.indexOf("]]>") > -1) {
            return StringEscaper.xmlEscaper.escapeString(data);
        } else {
            return "";
        }
    }

    /**
     * Quote a string in an element and a CDATA, and write to a {@link
     * PrintWriter}.  For example, it tag is "Value", then
     * "a string" becomes <Value><![CDATA[a
     * string]]></Value>.
     *
     * @param newline whether to print a newline after the element
     * @see #printPCDATA(PrintWriter,String)
     */
    public static void printPCDATA(
        PrintWriter pw, String tag, String data, boolean newline)
    {
        if (data == null || data.length() == 0) {
            return;
        }
        pw.print("<");
        pw.print(tag);
        pw.print(">");
        printPCDATA(pw,data);
        pw.print("");
        if (newline) {
            pw.println();
        }
    }

    public static void printPCDATA(PrintWriter pw, String tag, String data)
    {
        boolean newline = false;
        printPCDATA(pw, tag, data, newline);
    }

    private static String escapeForQuoting(String val)
    {
        return StringEscaper.xmlNumericEscaper.escapeString(val);
    }

    /** Quote a string so that it can be included as an XML attribute value. */
    public static String quoteAtt(String val)
    {
        return "\"" + escapeForQuoting(val) + "\"";
    }

    /** Return an XML attribute/value pair for String val */
    public static String quoteAtt(String name, String val)
    {
        if ((val == null) || val.equals("")) {
            return "";
        }
        return " " + name + "=" + quoteAtt(val);
    }

    /** Return an XML attribute/value pair for int val */
    public static String quoteAtt(String name, int val)
    {
        return " " + name + "=\"" + val + "\"";
    }

    /** Return an XML attribute/value pair for boolean val */
    public static String quoteAtt(String name, boolean val)
    {
        return " " + name + "=\"" + (val ? "TRUE" : "FALSE") + "\"";
    }

    /** Quote a string so that it can be included as an XML attribute value. */
    public static void printAtt(PrintWriter pw, String val)
    {
        pw.print("\"");
        pw.print(escapeForQuoting(val));
        pw.print("\"");
    }

    /** Print an XML attribute name and value for string val */
    public static void printAtt(PrintWriter pw, String name, String val)
    {
        if (val != null /* && !val.equals("") */) {
            pw.print(" ");
            pw.print(name);
            pw.print("=\"");
            pw.print(escapeForQuoting(val));
            pw.print("\"");
        }
    }

    /** Print an XML attribute name and value for int val */
    public static void printAtt(PrintWriter pw, String name, int val)
    {
        pw.print(" ");
        pw.print(name);
        pw.print("=\"");
        pw.print(val);
        pw.print("\"");
    }

    /** Print an XML attribute name and value for boolean val */
    public static void printAtt(PrintWriter pw, String name, boolean val)
    {
        pw.print(" ");
        pw.print(name);
        pw.print(val ? "=\"true\"" : "=\"false\"");
    }

    /**
     * Retrieve the name of the first tag in the XML document specified by the
     * given Reader, without parsing the full file/string.  This function is
     * useful to identify the DocType of an XML document before parsing,
     * possibly to send the document off to different pieces of code.
     * For performance reasons, the function attempts to read as little of
     * the file or string as possible before making its decision about the
     * first tag.  Leading comments are ignored.
     * @param xml a Reader containing an XML document.
     * @return the first tag name, as a String, or null if no first tag
     * can be found.
     */
    public static String getFirstTagName(Reader xml)
    {
        final int OUTSIDE = 0;  // constant: identify outside state
        final int BRACKET = 1;  // constant: bracket, contents unknown
        final int COMMENT = 2;  // constant: identify a comment section
        final int IGNORE = 3;   // constant: identify an ignored section
        final int TAG = 4;      // constant: identify a tag section

        int state = OUTSIDE;
        String commentMatch = null;
        StringBuffer tagBuffer = null;
        boolean sawBang = false;

        try {
            int c = xml.read();
            for (;;) {
                // No tag found if we hit EOF first.
                if (c == -1) {
                    return null;
                }
                switch (state) {
                case OUTSIDE:
                    // Start of any sort of tag
                    if (c == '<') {
                        state = BRACKET;
                        commentMatch = "!--";
                        sawBang = false;
                        c = xml.read();

                        // Other non-whitespace characters outside of any tag
                    } else if (!Character.isWhitespace((char) c)) {
                        return null;

                        // Whitespace characters are ignored
                    } else {
                        c = xml.read();
                    }
                    break;

                case BRACKET:
                    // Check for the start of a comment.
                    if (commentMatch != null) {
                        if (c == commentMatch.charAt(0)) {
                            // This match indicates a comment
                            if (commentMatch.length() == 1) {
                                c = xml.read();
                                commentMatch = "-->";
                                state = COMMENT;
                            } else {
                                // Remove the first character from commentMatch,
                                // then process the character as usual.
                                commentMatch =
                                    commentMatch.substring(1, commentMatch.length());
                            }
                        } else {
                            // No longer eligible for comment.
                            commentMatch = null;
                        }
                    }

                    // Hit whitespace; ignore the character.
                    if (Character.isWhitespace((char) c)) {
                        c = xml.read();
                        break;
                    }

                    switch (c) {
                    case '?':
                        c = xml.read();
                        state = IGNORE;
                        break;
                    case '!':
                        // Enter an ignored section unless eligible for comment.
                        c = xml.read();
                        sawBang = true;
                        if (commentMatch == null) {
                            state = IGNORE;
                        }
                        break;
                    case '-':
                        // Enter an ignored section unless eligible for comment.
                        c = xml.read();
                        if (commentMatch == null) {
                            state = IGNORE;
                        }
                        break;
                    case '>':
                        // Return to OUTSIDE state immediately
                        c = xml.read();
                        state = OUTSIDE;
                        break;
                    default:
                        // State depends on whether we saw a ! or not.
                        if (sawBang) {
                            state = IGNORE;
                        } else {
                            state = TAG;
                        }
                        tagBuffer = new StringBuffer();
                    }
                    break;

                case COMMENT:
                    // Did we match the next expected end-of-comment character?
                    if (c == commentMatch.charAt(0)) {
                        c = xml.read();
                        if (commentMatch.length() == 1) {
                            // Done with the comment
                            state = OUTSIDE;
                        } else {
                            commentMatch =
                                commentMatch.substring(1, commentMatch.length());
                        }
                    } else {
                        // If not, restart our quest for the end-of-comment character.
                        c = xml.read();
                        commentMatch = "-->";
                    }
                    break;

                case IGNORE:
                    // Drop out on a close >.  Ignore all other characters.
                    if (c == '>') {
                        c = xml.read();
                        state = OUTSIDE;
                    } else {
                        c = xml.read();
                    }
                    break;

                case TAG:
                    // Store characters in the tag buffer until we hit whitespace.
                    // When we hit whitespace or '>' or '/', return the name of the tag.
                    if (Character.isWhitespace((char)c) || c == '>'
                        || c == '/') {
                        return tagBuffer.toString();
                    } else {
                        tagBuffer.append((char)c);
                        c = xml.read();
                    }
                    break;
                }
            }
        } catch (IOException ex) {
            // On exception, we can't determine the first tag, so return null.
            return null;
        }
    }
}


// End XMLUtil.java