org.eigenbase.xom.XMLUtil Maven / Gradle / Ivy
The newest version!
/*
// Licensed to Julian Hyde under one or more contributor license
// agreements. See the NOTICE file distributed with this work for
// additional information regarding copyright ownership.
//
// Julian Hyde licenses this file to you under the Apache License,
// Version 2.0 (the "License"); you may not use this file except in
// compliance with the License. You may obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
*/
package org.eigenbase.xom;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
/**
* Utilities for dealing with XML data. These methods must NOT depend upon any
* XML parser or object model (MSXML, DOM, SAX, etc.)
*
* @author jhyde
*/
public class XMLUtil {
/**
* Determine if a String contains any XML special characters, return true
* if it does. If this function returns true, the string will need to be
* encoded either using the stringEncodeXML function above or using a
* CDATA section. Note that MSXML has a nasty bug whereby whitespace
* characters outside of a CDATA section are lost when parsing. To
* avoid hitting this bug, this method treats many whitespace characters
* as "special".
* @param input the String to scan for XML special characters.
* @return true if the String contains any such characters.
*/
public static boolean stringHasXMLSpecials(String input)
{
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
switch (c) {
case '<':
case '>':
case '"':
case '\'':
case '&':
case '\t':
case '\n':
case '\r':
return true;
}
}
return false;
}
/**
* Encode a String for XML output, displaying it to a PrintWriter.
* The String to be encoded is displayed, except that
* special characters are converted into entities.
* @param input a String to convert.
* @param out a PrintWriter to which to write the results.
*/
public static void stringEncodeXML(String input, PrintWriter out)
{
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
switch (c) {
case '<':
case '>':
case '"':
case '\'':
case '&':
case '\t':
case '\n':
case '\r':
out.print("" + (int)c + ";");
break;
default:
out.print(c);
}
}
}
/**
* Quote a string, and write to a {@link PrintWriter}.
*
* For example, "a string"
becomes <![CDATA[a
* string]]>
. If the string contains ']]>' (which commonly
* occurs when wrapping other XML documents), we give up on using
* <![CDATA[
... ]]>
, and just encode the
* string. For example, "A string with ]]> in it"
becomes
* "A string with ]]> in it"
.
*/
public static void printPCDATA(PrintWriter pw, String data)
{
if (data.indexOf("]]>") > -1) {
String s = StringEscaper.xmlEscaper.escapeString(data);
pw.print(s);
} else {
pw.print("");
}
}
/**
* Quote a string.
*
* @see #printPCDATA(PrintWriter,String)
*/
public static String quotePCDATA(String data)
{
if (data.indexOf("]]>") > -1) {
return StringEscaper.xmlEscaper.escapeString(data);
} else {
return "";
}
}
/**
* Quote a string in an element and a CDATA, and write to a {@link
* PrintWriter}. For example, it tag
is "Value", then
* "a string"
becomes <Value><![CDATA[a
* string]]></Value>
.
*
* @param newline whether to print a newline after the element
* @see #printPCDATA(PrintWriter,String)
*/
public static void printPCDATA(
PrintWriter pw, String tag, String data, boolean newline)
{
if (data == null || data.length() == 0) {
return;
}
pw.print("<");
pw.print(tag);
pw.print(">");
printPCDATA(pw,data);
pw.print("");
pw.print(tag);
pw.print(">");
if (newline) {
pw.println();
}
}
public static void printPCDATA(PrintWriter pw, String tag, String data)
{
boolean newline = false;
printPCDATA(pw, tag, data, newline);
}
private static String escapeForQuoting(String val)
{
return StringEscaper.xmlNumericEscaper.escapeString(val);
}
/** Quote a string so that it can be included as an XML attribute value. */
public static String quoteAtt(String val)
{
return "\"" + escapeForQuoting(val) + "\"";
}
/** Return an XML attribute/value pair for String val */
public static String quoteAtt(String name, String val)
{
if ((val == null) || val.equals("")) {
return "";
}
return " " + name + "=" + quoteAtt(val);
}
/** Return an XML attribute/value pair for int val */
public static String quoteAtt(String name, int val)
{
return " " + name + "=\"" + val + "\"";
}
/** Return an XML attribute/value pair for boolean val */
public static String quoteAtt(String name, boolean val)
{
return " " + name + "=\"" + (val ? "TRUE" : "FALSE") + "\"";
}
/** Quote a string so that it can be included as an XML attribute value. */
public static void printAtt(PrintWriter pw, String val)
{
pw.print("\"");
pw.print(escapeForQuoting(val));
pw.print("\"");
}
/** Print an XML attribute name and value for string val */
public static void printAtt(PrintWriter pw, String name, String val)
{
if (val != null /* && !val.equals("") */) {
pw.print(" ");
pw.print(name);
pw.print("=\"");
pw.print(escapeForQuoting(val));
pw.print("\"");
}
}
/** Print an XML attribute name and value for int val */
public static void printAtt(PrintWriter pw, String name, int val)
{
pw.print(" ");
pw.print(name);
pw.print("=\"");
pw.print(val);
pw.print("\"");
}
/** Print an XML attribute name and value for boolean val */
public static void printAtt(PrintWriter pw, String name, boolean val)
{
pw.print(" ");
pw.print(name);
pw.print(val ? "=\"true\"" : "=\"false\"");
}
/**
* Retrieve the name of the first tag in the XML document specified by the
* given Reader, without parsing the full file/string. This function is
* useful to identify the DocType of an XML document before parsing,
* possibly to send the document off to different pieces of code.
* For performance reasons, the function attempts to read as little of
* the file or string as possible before making its decision about the
* first tag. Leading comments are ignored.
* @param xml a Reader containing an XML document.
* @return the first tag name, as a String, or null if no first tag
* can be found.
*/
public static String getFirstTagName(Reader xml)
{
final int OUTSIDE = 0; // constant: identify outside state
final int BRACKET = 1; // constant: bracket, contents unknown
final int COMMENT = 2; // constant: identify a comment section
final int IGNORE = 3; // constant: identify an ignored section
final int TAG = 4; // constant: identify a tag section
int state = OUTSIDE;
String commentMatch = null;
StringBuffer tagBuffer = null;
boolean sawBang = false;
try {
int c = xml.read();
for (;;) {
// No tag found if we hit EOF first.
if (c == -1) {
return null;
}
switch (state) {
case OUTSIDE:
// Start of any sort of tag
if (c == '<') {
state = BRACKET;
commentMatch = "!--";
sawBang = false;
c = xml.read();
// Other non-whitespace characters outside of any tag
} else if (!Character.isWhitespace((char) c)) {
return null;
// Whitespace characters are ignored
} else {
c = xml.read();
}
break;
case BRACKET:
// Check for the start of a comment.
if (commentMatch != null) {
if (c == commentMatch.charAt(0)) {
// This match indicates a comment
if (commentMatch.length() == 1) {
c = xml.read();
commentMatch = "-->";
state = COMMENT;
} else {
// Remove the first character from commentMatch,
// then process the character as usual.
commentMatch =
commentMatch.substring(1, commentMatch.length());
}
} else {
// No longer eligible for comment.
commentMatch = null;
}
}
// Hit whitespace; ignore the character.
if (Character.isWhitespace((char) c)) {
c = xml.read();
break;
}
switch (c) {
case '?':
c = xml.read();
state = IGNORE;
break;
case '!':
// Enter an ignored section unless eligible for comment.
c = xml.read();
sawBang = true;
if (commentMatch == null) {
state = IGNORE;
}
break;
case '-':
// Enter an ignored section unless eligible for comment.
c = xml.read();
if (commentMatch == null) {
state = IGNORE;
}
break;
case '>':
// Return to OUTSIDE state immediately
c = xml.read();
state = OUTSIDE;
break;
default:
// State depends on whether we saw a ! or not.
if (sawBang) {
state = IGNORE;
} else {
state = TAG;
}
tagBuffer = new StringBuffer();
}
break;
case COMMENT:
// Did we match the next expected end-of-comment character?
if (c == commentMatch.charAt(0)) {
c = xml.read();
if (commentMatch.length() == 1) {
// Done with the comment
state = OUTSIDE;
} else {
commentMatch =
commentMatch.substring(1, commentMatch.length());
}
} else {
// If not, restart our quest for the end-of-comment character.
c = xml.read();
commentMatch = "-->";
}
break;
case IGNORE:
// Drop out on a close >. Ignore all other characters.
if (c == '>') {
c = xml.read();
state = OUTSIDE;
} else {
c = xml.read();
}
break;
case TAG:
// Store characters in the tag buffer until we hit whitespace.
// When we hit whitespace or '>' or '/', return the name of the tag.
if (Character.isWhitespace((char)c) || c == '>'
|| c == '/') {
return tagBuffer.toString();
} else {
tagBuffer.append((char)c);
c = xml.read();
}
break;
}
}
} catch (IOException ex) {
// On exception, we can't determine the first tag, so return null.
return null;
}
}
}
// End XMLUtil.java