![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.juneau.xml.XmlUtils Maven / Gradle / Ivy
// ***************************************************************************************************************************
// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file *
// * distributed with this work for additional information regarding copyright ownership. The ASF licenses this file *
// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance *
// * with the License. You may obtain a copy of the License at *
// * *
// * http://www.apache.org/licenses/LICENSE-2.0 *
// * *
// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an *
// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the *
// * specific language governing permissions and limitations under the License. *
// ***************************************************************************************************************************
package org.apache.juneau.xml;
import static org.apache.juneau.common.internal.StringUtils.*;
import static org.apache.juneau.common.internal.ThrowableUtils.*;
import java.io.*;
import java.util.*;
import javax.xml.stream.*;
import org.apache.juneau.*;
import org.apache.juneau.common.internal.*;
import org.apache.juneau.internal.*;
import org.apache.juneau.xml.annotation.*;
/**
* XML utility methods.
*
* See Also:
* - XML Details
*
*/
public final class XmlUtils {
//-----------------------------------------------------------------------------------------------------------------
// XML element names
//-----------------------------------------------------------------------------------------------------------------
/**
* Encodes any invalid XML element name characters to _x####_ sequences.
*
* @param w The writer to send the output to.
* @param value The object being encoded.
* @return The same writer passed in.
*/
public static Writer encodeElementName(Writer w, Object value) {
try {
if (value == null)
return w.append("_x0000_");
String s = value.toString();
if (needsElementNameEncoding(s))
return encodeElementNameInner(w, s);
w.append(s);
} catch (IOException e) {
throw asRuntimeException(e);
}
return w;
}
/**
* Encodes any invalid XML element name characters to _x####_ sequences.
*
* @param value The object being encoded.
* @return The encoded element name string.
*/
public static String encodeElementName(Object value) {
if (value == null)
return "_x0000_";
String s = value.toString();
if (s.isEmpty())
return "_xE000_";
try {
if (needsElementNameEncoding(s))
try (Writer w = new StringBuilderWriter(s.length() * 2)) {
return encodeElementNameInner(w, s).toString();
}
} catch (IOException e) {
throw asRuntimeException(e); // Never happens
}
return s;
}
private static Writer encodeElementNameInner(Writer w, String s) throws IOException {
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if ((c >= 'A' && c <= 'Z')
|| (c == '_' && ! isEscapeSequence(s,i))
|| (c >= 'a' && c <= 'z')
|| (i != 0 && (
c == '-'
|| c == '.'
|| (c >= '0' && c <= '9')
|| c == '\u00b7'
|| (c >= '\u0300' && c <= '\u036f')
|| (c >= '\u203f' && c <= '\u2040')
))
|| (c >= '\u00c0' && c <= '\u00d6')
|| (c >= '\u00d8' && c <= '\u00f6')
|| (c >= '\u00f8' && c <= '\u02ff')
|| (c >= '\u0370' && c <= '\u037d')
|| (c >= '\u037f' && c <= '\u1fff')
|| (c >= '\u200c' && c <= '\u200d')
|| (c >= '\u2070' && c <= '\u218f')
|| (c >= '\u2c00' && c <= '\u2fef')
|| (c >= '\u3001' && c <= '\ud7ff')
|| (c >= '\uf900' && c <= '\ufdcf')
|| (c >= '\ufdf0' && c <= '\ufffd')) {
w.append(c);
} else {
appendPaddedHexChar(w, c);
}
}
return w;
}
private static boolean needsElementNameEncoding(String value) {
// Note that this doesn't need to be perfect, just fast.
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
if (! (c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') || (i == 0 && (c >= '0' && c <= '9')))
return true;
}
return false;
}
//-----------------------------------------------------------------------------------------------------------------
// XML element text
//-----------------------------------------------------------------------------------------------------------------
/**
* Escapes invalid XML text characters to _x####_ sequences.
*
* @param value The object being encoded.
* @return The encoded string.
*/
public static String escapeText(Object value) {
if (value == null)
return "_x0000_";
String s = value.toString();
try {
if (! needsTextEncoding(s))
return s;
final int len = s.length();
StringWriter sw = new StringWriter(s.length()*2);
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if ((i == 0 || i == len-1) && Character.isWhitespace(c))
appendPaddedHexChar(sw, c);
else if (c == '_' && isEscapeSequence(s,i))
appendPaddedHexChar(sw, c);
else if (isValidXmlCharacter(c))
sw.append(c);
else
appendPaddedHexChar(sw, c);
}
return sw.toString();
} catch (IOException e) {
throw asRuntimeException(e); // Never happens
}
}
/**
* Encodes the specified element text and sends the results to the specified writer.
*
*
* Encodes any invalid XML text characters to _x####_ sequences and sends the response to the specified
* writer.
*
Encodes '&' , '<' , and '>' as XML entities.
*
Encodes invalid XML text characters to _x####_ sequences.
*
* @param w The writer to send the output to.
* @param value The object being encoded.
* @param trim Trim the text before serializing it.
* @param preserveWhitespace
* Specifies whether we're in preserve-whitespace mode.
* (e.g. {@link XmlFormat#MIXED_PWS} or {@link XmlFormat#TEXT_PWS}.
* If true , leading and trailing whitespace characters will be encoded.
* @return The same writer passed in.
*/
public static Writer encodeText(Writer w, Object value, boolean trim, boolean preserveWhitespace) {
try {
if (value == null)
return w.append("_x0000_");
String s = value.toString();
if (s.isEmpty())
return w.append("_xE000_");
if (trim)
s = s.trim();
if (needsTextEncoding(s)) {
final int len = s.length();
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if ((i == 0 || i == len-1) && Character.isWhitespace(c) && ! preserveWhitespace)
appendPaddedHexChar(w, c);
else if (REPLACE_TEXT.contains(c))
w.append(REPLACE_TEXT.get(c));
else if (c == '_' && isEscapeSequence(s,i))
appendPaddedHexChar(w, c);
else if (isValidXmlCharacter(c))
w.append(c);
else
appendPaddedHexChar(w, c);
}
} else {
w.append(s);
}
} catch (IOException e) {
throw asRuntimeException(e);
}
return w;
}
private static boolean needsTextEncoding(String value) {
// See if we need to convert the string.
// Conversion is somewhat expensive, so make sure we need to do so before hand.
final int len = value.length();
for (int i = 0; i < len; i++) {
char c = value.charAt(i);
if ((i == 0 || i == len-1) && Character.isWhitespace(c))
return true;
if (REPLACE_TEXT.contains(c) || ! isValidXmlCharacter(c) || (c == '_' && isEscapeSequence(value,i)))
return true;
}
return false;
}
private static AsciiMap REPLACE_TEXT = new AsciiMap()
.append('&', "&")
.append('<', "<")
.append('>', ">")
.append((char)0x09, " ")
.append((char)0x0A, "
")
.append((char)0x0D, "
");
//-----------------------------------------------------------------------------------------------------------------
// XML attribute names
//-----------------------------------------------------------------------------------------------------------------
/**
* Serializes and encodes the specified object as valid XML attribute name.
*
* @param w The writer to send the output to.
* @param value The object being serialized.
* @return This object.
* @throws IOException If a problem occurred.
*/
public static Writer encodeAttrName(Writer w, Object value) throws IOException {
if (value == null)
return w.append("_x0000_");
String s = value.toString();
if (needsAttrNameEncoding(s)) {
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (i == 0) {
if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == ':')
w.append(c);
else if (c == '_' && ! isEscapeSequence(s,i))
w.append(c);
else
appendPaddedHexChar(w, c);
} else {
if ((c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == ':'))
w.append(c);
else if (c == '_' && ! isEscapeSequence(s,i))
w.append(c);
else
appendPaddedHexChar(w, c);
}
}
} else {
w.append(s);
}
return w;
}
private static boolean needsAttrNameEncoding(String value) {
// Note that this doesn't need to be perfect, just fast.
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
if (! (c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') || (i == 0 && ! (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')))
return true;
}
return false;
}
//-----------------------------------------------------------------------------------------------------------------
// XML attribute values
//-----------------------------------------------------------------------------------------------------------------
/**
* Encodes the specified attribute value and sends the results to the specified writer.
*
*
* Encodes any invalid XML text characters to _x####_ sequences and sends the response to the specified
* writer.
*
Encodes '&' , '<' , '>' , '"' , and '\'' as XML entities.
*
Encodes invalid XML text characters to _x####_ sequences.
*
* @param w The writer to send the output to.
* @param value The object being encoded.
* @param trim
* Trim the text before serializing it.
* If true , leading and trailing whitespace characters will be encoded.
* @return The same writer passed in.
*/
public static Writer encodeAttrValue(Writer w, Object value, boolean trim) {
try {
if (value == null)
return w.append("_x0000_");
String s = value.toString();
if (s.isEmpty())
return w;
if (trim)
s = s.trim();
if (needsAttrValueEncoding(s)) {
final int len = s.length();
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if ((i == 0 || i == len-1) && Character.isWhitespace(c))
appendPaddedHexChar(w, c);
else if (REPLACE_ATTR_VAL.contains(c))
w.append(REPLACE_ATTR_VAL.get(c));
else if (c == '_' && isEscapeSequence(s,i))
appendPaddedHexChar(w, c);
else if (isValidXmlCharacter(c))
w.append(c);
else
appendPaddedHexChar(w, c);
}
} else {
w.append(s);
}
} catch (IOException e) {
throw asRuntimeException(e);
}
return w;
}
private static boolean needsAttrValueEncoding(String value) {
// See if we need to convert the string.
// Conversion is somewhat expensive, so make sure we need to do so before hand.
final int len = value.length();
for (int i = 0; i < len; i++) {
char c = value.charAt(i);
if ((i == 0 || i == len-1) && Character.isWhitespace(c))
return true;
if (REPLACE_ATTR_VAL.contains(c) || ! isValidXmlCharacter(c) || (c == '_' && isEscapeSequence(value,i)))
return true;
}
return false;
}
private static AsciiMap REPLACE_ATTR_VAL = new AsciiMap()
.append('&', "&")
.append('<', "<")
.append('>', ">")
.append('"', """)
.append('\'', "'")
.append((char)0x09, " ")
.append((char)0x0A, "
")
.append((char)0x0D, "
");
//-----------------------------------------------------------------------------------------------------------------
// Decode XML text
//-----------------------------------------------------------------------------------------------------------------
/**
* Translates any _x####_ sequences (introduced by the various encode methods) back into their original characters.
*
* @param value The string being decoded.
* @param sb The string builder to use as a scratch pad.
* @return The decoded string.
*/
public static String decode(String value, StringBuilder sb) {
if (value == null)
return null;
if (value.isEmpty() || value.indexOf('_') == -1)
return value;
if (sb == null)
sb = new StringBuilder(value.length());
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
if (c == '_' && isEscapeSequence(value,i)) {
int x = Integer.parseInt(value.substring(i+2, i+6), 16);
// If we find _x0000_, then that means a null.
// If we find _xE000_, then that means an empty string.
if (x == 0)
return null;
else if (x != 0xE000)
sb.append((char)x);
i+=6;
} else {
sb.append(c);
}
}
return sb.toString();
}
/**
* Given a list of Strings and other Objects, combines Strings that are next to each other in the list.
*
* @param value The list of text nodes to collapse.
* @return The same list.
*/
public static LinkedList