com.adobe.xfa.ut.StringUtils Maven / Gradle / Ivy
Show all versions of aem-sdk-api Show documentation
/*
* ADOBE CONFIDENTIAL
*
* Copyright 2005 Adobe Systems Incorporated All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains the property of
* Adobe Systems Incorporated and its suppliers, if any. The intellectual and
* technical concepts contained herein are proprietary to Adobe Systems
* Incorporated and its suppliers and may be covered by U.S. and Foreign
* Patents, patents in process, and are protected by trade secret or copyright
* law. Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained from
* Adobe Systems Incorporated.
*/
package com.adobe.xfa.ut;
import java.util.Comparator;
/**
* This class provides some utility methods that were available in jfString, but
* are not part of the Java String interface.
*
* @exclude from published api -- Mike Tardif, May 2006.
*/
public final class StringUtils {
/**
* Publicly available case-insensitive comparator. You can also use the
* static method getCaseInsensitiveComparator() to get one.
*/
public final static Comparator CASE_INSENSITIVE_COMPARATOR = String.CASE_INSENSITIVE_ORDER;
/**
* Compares Strings based on UCS codepoint values.
*/
public final static Comparator UCS_CODEPOINT_COMPARATOR = new CodePointComparator();
/**
* Compares Strings based on UCS codepoint values.
*/
private static class CodePointComparator implements Comparator, java.io.Serializable {
private static final long serialVersionUID = -8178230179838097500L;
public int compare(String s1, String s2) {
final int length1 = s1.length();
final int length2 = s2.length();
int i = 0;
int j = 0;
while (true) {
if (i == length1)
return j == length2 ? 0 : -1;
if (j == length2)
return 1;
int codePoint1, codePoint2;
char high = s1.charAt(i);
i++;
if (Character.isHighSurrogate(high)) {
if (i == length1) {
codePoint1 = high; // garbage input - unmatched surrogate pair
}
else {
char low = s1.charAt(i);
if (Character.isLowSurrogate(low)) {
// matched surrogate pair
codePoint1 = Character.toCodePoint(high, low);
i++;
}
else {
codePoint1 = high; // garbage input - unmatched surrogate pair
}
}
}
else {
codePoint1 = high; // normal BMP character
}
high = s2.charAt(j);
j++;
if (Character.isHighSurrogate(high)) {
if (j == length1) {
codePoint2 = high; // garbage input - unmatched surrogate pair
}
else {
char low = s2.charAt(j);
if (Character.isLowSurrogate(low)) {
// matched surrogate pair
codePoint2 = Character.toCodePoint(high, low);
j++;
}
else {
codePoint2 = high; // garbage input - unmatched surrogate pair
}
}
}
else {
codePoint2 = high; // normal BMP character
}
if (codePoint1 != codePoint2)
return codePoint1 - codePoint2;
}
}
}
/**
* Set of characters considered to be white space.
*/
public final static String WHITE_SPACE = " \t\n\r\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u3000\uFEFF";
/**
* General-purpose string equality comparison for strings that may be
* null.
* This method compares two strings for equality, allowing for possible
* null references and non-interned strings.
* @param s1 First string to compare.
* @param s2 First string to compare.
* @return True if the strings are equal; false if not. Note that two
* null references would be considered equal.
*/
@FindBugsSuppress(code="ES")
public static boolean equalsWithNull (String s1, String s2) {
if (s1 == s2) {
return true;
} else if ((s1 == null) || (s2 == null)) {
return false;
} else {
return s1.equals (s2);
}
}
/**
* Scan a string for a string, but ignore case.
*
* The only ASCII case folding is considered. Comparisons are done
* on a character by character basis (i.e., Unicode not code points are not considered).
*
* @param source
* the string being searched.
* @param target
* the string to search for.
* @param fromIndex
* the index to begin searching from.
* @return the start position for where the string was found. If
* the string was not found, -1.
*
*/
public static int findNoCase(CharSequence source, CharSequence target, int fromIndex) {
if (fromIndex >= source.length())
return (target.length() == 0 ? source.length() : -1);
if (fromIndex < 0)
fromIndex = 0;
if (target.length() == 0)
return fromIndex;
final char first = asciiToLower(target.charAt(0));
final int max = source.length() - target.length();
for (int i = fromIndex; i <= max; i++) {
// Match the first character
for (; i <= max; i++) { // NOPMD - modifying an outer loop incrementer in an inner loop
char c = source.charAt(i);
if (c == first || asciiToLower(c) == first)
break;
}
// If we matched the first character, try matching the rest of the string
if (i <= max) {
int j = i + 1;
final int end = j + target.length() - 1;
for (int k = 1; j < end; j++, k++) {
char c1 = source.charAt(j);
char c2 = target.charAt(k);
if (c1 != c2 && asciiToLower(c1) != asciiToLower(c2))
break;
}
if (j == end) {
// We matched the whole string
return i;
}
}
}
return -1;
}
private static char asciiToLower(char c) {
// If uppercased Then fold to lowercase.
if (0x40 < c && c < 0x5b) c += 0x20;
return c;
}
/**
* Return a comparator that performs case-insensitive string
* comparisons.
* @return A comparator that performs case insensitive comparisons.
*/
public static Comparator getCaseInsensitiveComparator () {
return CASE_INSENSITIVE_COMPARATOR;
}
/**
* Empty string test.
* Short-hand to test a string for null pointer or empty condition.
* @param s String to be tested.
* @return True if the given string reference is null or empty; false otherwise.
*/
public static boolean isEmpty (CharSequence s) {
return (s == null) || (s.length() == 0);
}
public static Integer number(String s) {
return number(s, 10);
}
public static Integer number(String s, int radix) {
try {
return Integer.valueOf(Integer.parseInt(s, radix));
} catch (NumberFormatException e) {
return null;
}
}
public static Long longNumber(String s) {
return longNumber(s, 10);
}
public static Long longNumber(String s, int radix) {
try {
return Long.valueOf(Long.parseLong(s, radix));
} catch (NumberFormatException e) {
return null;
}
}
public static String parseToken (StringBuilder source) {
int i;
for (i = 0; i < source.length(); i++) {
if (WHITE_SPACE.indexOf (source.charAt (i)) < 0) {
break;
}
}
if (i == source.length()) {
return null;
}
StringBuilder result = new StringBuilder();
boolean quoted = false;
boolean backslashPending = false;
for (; i < source.length(); i++) {
char c = source.charAt (i);
if ((! quoted) && (WHITE_SPACE.indexOf (c) >= 0)) {
break;
}
boolean process = false;
if ((c != '\\') && (c != '"')) {
process = true;
} else if (quoted && (c == '\\')) {
backslashPending = true;
} else if (c == '"') {
if (backslashPending) {
backslashPending = false;
process = true;
} else {
quoted = ! quoted;
}
} else {
process = true;
}
if (process) {
if (backslashPending) {
result.append ('\\');
backslashPending = false;
}
result.append (c);
}
}
source.delete (0, i);
return result.toString();
}
public static void replace(StringBuilder buf, char find, char rep) {
replace(buf, find, rep, 0, buf.length());
}
public static void replace(StringBuilder buf, char find, char rep,
int start, int limit) {
for (int i = start; i < limit; i++) {
if (buf.charAt(i) == find) {
buf.setCharAt(i, rep);
}
}
}
public static int safeNumber(String s) {
return safeNumber(s, 10);
}
public static int safeNumber(String s, int radix) {
Integer num = number(s, radix);
return (num == null) ? 0 : num.intValue();
}
public static long safeLongNumber(String s) {
return safeLongNumber(s, 10);
}
public static long safeLongNumber(String s, int radix) {
Long num = longNumber(s, radix);
return (num == null) ? 0L : num.longValue();
}
/**
* Scan this string for the first character not in the given set. Similar to
* strspn().
*
* @param sSkip -
* the characters to scan for
* @param nOffset -
* the position where to start the scan. Default = 0
* @return The position, relative to nOffset, for the first character not
* found in the given set
*/
static public int skipOver(CharSequence src, String sSkip, int nOffset /* =0 */) {
int nCharsSkipped;
// starting at the offset position, scan the characters in this string
// until it does not match any of the characters in the given set.
nCharsSkipped = nOffset;
while (nCharsSkipped < src.length()) {
int i = nCharsSkipped;
if (sSkip.indexOf(src.charAt(i++)) == -1)
break;
nCharsSkipped = i;
}
return nCharsSkipped - nOffset;
}
/**
* Scan this string for the first character in the given set. Similar to
* strcspn().
*
* @param src -
* the string to scan
* @param sSkip -
* the characters to scan for
* @param nOffset -
* the position where to start the scan. Default = 0.
* @return The position, relative to nOffset, for the first character found
* in the given set
*/
static public int skipUntil(CharSequence src, String sSkip, int nOffset /* =0 */) {
int nCharsSkipped = nOffset;
// starting at the offset position, scan the characters in this string
// until it matches one of the characters in the given set.
while (nCharsSkipped < src.length()) {
int i = nCharsSkipped;
if (sSkip.indexOf(src.charAt(i++)) != -1)
break;
nCharsSkipped = i;
}
return nCharsSkipped - nOffset;
}
/**
* Remove white space from the end of a string buffer.
* @param buf - String buffer to be trimmed.
*/
public static void trim(StringBuilder buf) {
int trunc;
for (trunc = buf.length(); trunc > 0; trunc--) {
if (!Character.isWhitespace(buf.charAt(trunc - 1))) {
break;
}
}
buf.delete(trunc, buf.length());
}
/**
* Remove white space from the start of a string buffer.
* @param buf - String buffer to be trimmed.
*/
public static void trimStart(StringBuilder buf) {
int start;
for (start = 0; start < buf.length(); start++) {
if (!Character.isWhitespace(buf.charAt(start))) {
break;
}
}
buf.delete(0, start);
}
public static String trim(String s) {
final int length = s.length();
int endIndex = length;
while (endIndex > 0 && Character.isWhitespace(s.charAt(endIndex - 1)))
endIndex--;
return endIndex < length ? s.substring(0, endIndex) : s;
}
public static String trimStart(String s) {
final int length = s.length();
int startIndex = 0;
while (startIndex < length && Character.isWhitespace(s.charAt(startIndex)))
startIndex++;
return startIndex > 0 ? s.substring(startIndex, length) : s;
}
/**
* @exclude from public api.
*/
public static String toXML(String src, boolean isAttribute) {
final int length = src.length();
int needsEncoding = 0;
for (int i = 0; i < length; i++) {
final char c = src.charAt(i);
if (c == '&') {
if (isValidEntityReference(src, i)) {
i = src.indexOf(';', i);
}
else {
needsEncoding++;
}
}
else if (isAttribute) {
if (c == '<' || /* c == '\'' || */ c == '\"' || c == '\t' || c == '\n' || c == '\r')
needsEncoding++;
}
else {
if (c == '<' || c == '>' || c == '\r')
needsEncoding++;
}
}
if (needsEncoding == 0)
return src;
final StringBuilder result = new StringBuilder(length + needsEncoding * 5);
for (int i = 0; i < length; i++) {
final char c = src.charAt(i);
switch (c) {
case '&':
if (isValidEntityReference(src, i)) {
int semicolon = src.indexOf(';', i);
result.append(src, i, semicolon + 1);
i = semicolon;
}
else
result.append("&");
break;
case '<':
result.append("<");
break;
case '>':
if (isAttribute)
result.append(c);
else
result.append(">");
break;
case '"':
if (isAttribute)
result.append(""");
else
result.append(c);
break;
case '\r':
result.append("
");
break;
case '\n':
if (isAttribute)
result.append("
");
else
result.append(c);
break;
case '\t':
if (isAttribute)
result.append(" ");
else
result.append(c);
break;
// case '\'':
// if (doQuotes)
// result.append("'");
// else
// result.append(c);
// break;
default:
result.append(c);
break;
}
}
return result.toString();
}
/**
* @exclude from published api.
*/
public enum ToXMLType {
XMLTEXT,
XMLATTRIBUTE_WITH_DQUOTE,
XMLATTRIBUTE_WITH_QUOTE
}
/**
* Replaces specified characters in the current string with their entity references.
*
* '&' is replaced with "&"
* '<' is replaced with "<"
* if eTargetType = XMLTEXT
* '>' is replaced with ">"
* if eTargetType = XMLATTRIBUTE_WITH_DQUOTE
* '"' is replaced with """
* if eTargetType = XMLATTRIBUTE_WITH_QUOTE
* ''' is replaced with "'"
*
*
* Any characters found in sOthers are replaced with entity references.
* Any characters found inbetween and including the cRangeStart and cRangeEnd are
* replaced with entity references.
*
* @param sSrc the source string.
* @param eTargetType an enum indicating the target xml type.
* @param sOthers a list of characters to be encoded in this string.
* @param cRangeMin Any characters greater than or equal to this char are encoded
* with their entity references.
* @param cRangeMax Any characters less than or equal to this char are encoded
* with their entity references.
* @param sExclude a list of characters NOT to be encoded in this string.
* @return The converted string.
* @exclude from published api.
*/
public static String toXML(String sSrc, ToXMLType eTargetType /* = XMLTEXT */,
String sOthers /* = "" */,
char cRangeMin /* = '\0' */,
char cRangeMax /* = '\0' */,
String sExclude /* = "" */) {
int range = 0; // 1 = less than, 2 = greater than, 3 both
if (cRangeMin != 0 && cRangeMax != 0)
range = 3;
else if (cRangeMin != 0 && cRangeMax == 0)
range = 2;
else if (cRangeMin == 0 && cRangeMax != 0)
range = 1;
boolean bHasInvalidChar = false;
int nNeedsEncoding = 0;
int nLen = sSrc.length();
for (int i = 0; i < nLen; ) {
char chr = sSrc.charAt(i++);
if (chr == '&' || chr == '<' || chr == '\r')
nNeedsEncoding++;
else if (chr == '>' && eTargetType == ToXMLType.XMLTEXT)
nNeedsEncoding++;
else if ((chr == '\t' || chr == '\n') && eTargetType != ToXMLType.XMLTEXT) // any attr
nNeedsEncoding++;
else if (chr == '\"' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_DQUOTE)
nNeedsEncoding++;
else if (chr == '\'' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_QUOTE)
nNeedsEncoding++;
else if (range == 1 && chr <= cRangeMax)
nNeedsEncoding++;
else if (range == 2 && cRangeMin <= chr)
nNeedsEncoding++;
else if (range == 3 && cRangeMin <= chr && chr <= cRangeMax)
nNeedsEncoding++;
// JavaPort: beware that the surrogate ranges used below are wrong,
// as are the checks in Java for non-BMP characters!
else if ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
|| (0xD7FF < chr && chr < 0xE000)
|| (0xFFFD < chr && chr < 0x10000)
|| (chr > 0x10FFFF))
bHasInvalidChar = true;
else {
for (int j = 0; j < sOthers.length(); ) {
if (chr == sOthers.charAt(j++)) {
nNeedsEncoding++;
break;
}
}
}
}
if (nNeedsEncoding > 0 || bHasInvalidChar) {
StringBuilder retStr = new StringBuilder(nLen + (nNeedsEncoding * 12)); // See appendHex()
for (int i = 0; i < nLen; ) {
char chr = sSrc.charAt(i++);
boolean bExcluded = false;
for (int j = 0; j < sExclude.length(); ) {
if (chr == sExclude.charAt(j++)) {
retStr.append(chr);
bExcluded = true;
break;
}
}
if (bExcluded)
continue;
// JavaPort: beware that the surrogate ranges used below are wrong,
// as are the checks in Java for non-BMP characters!
if (bHasInvalidChar
&& ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
|| (0xD7FF < chr && chr < 0xE000)
|| (0xFFFD < chr && chr < 0x10000)
|| (chr > 0x10FFFF)))
continue;
if (chr == '<')
retStr.append("<");
else if (chr == '&')
retStr.append("&");
else if (chr == '\r')
retStr.append("
"); // writen with no leading 0's
else if (chr == '>' && eTargetType == ToXMLType.XMLTEXT)
retStr.append(">");
else if (chr == '\"' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_DQUOTE)
retStr.append(""");
else if (chr == '\'' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_QUOTE)
retStr.append("'");
else if (chr == '\n' && eTargetType != ToXMLType.XMLTEXT) // any attr
retStr.append("
"); // writen with no leading 0's
else if (chr == '\t' && eTargetType != ToXMLType.XMLTEXT) // any attr
retStr.append(" "); // writen with no leading 0's
else if (range == 1 && chr <= cRangeMax)
appendHex(retStr, chr);
else if (range == 2 && cRangeMin <= chr)
appendHex(retStr, chr);
else if (range == 3 && cRangeMin <= chr && chr <= cRangeMax)
appendHex(retStr, chr);
else {
boolean bFound = false;
for (int j = 0; j < sOthers.length(); ) {
if (chr == sOthers.charAt(j++)) {
appendHex(retStr, chr);
bFound = true;
break;
}
}
if (! bFound) // reaches here only all ifs fail.
retStr.append(chr);
}
}
return retStr.toString();
}
return sSrc;
}
private static final char hexdigit[] = {
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
private static void appendHex(StringBuilder srcStr, char srcChr) {
srcStr.append("");
int nUCS4 = srcChr;
// JavaPort: chars greater than 0xFFFF aren't possible in Java.
/* if ((nUCS4 & 0xF00000) != 0) {
srcStr.append(hexdigit[nUCS4 >> 20 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 16 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]);
srcStr.append(hexdigit[nUCS4 & 0xF]);
}
else */ if ((nUCS4 & 0xF0000) != 0) {
srcStr.append(hexdigit[nUCS4 >> 16 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]);
srcStr.append(hexdigit[nUCS4 & 0xF]);
}
else if ((nUCS4 & 0xF000) != 0) {
srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]);
srcStr.append(hexdigit[nUCS4 & 0xF]);
}
else if ((nUCS4 & 0x0F00) != 0) {
srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]);
srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]);
srcStr.append(hexdigit[nUCS4 & 0xF]);
}
else if ((nUCS4 & 0x00F0) != 0) {
srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]);
srcStr.append(hexdigit[nUCS4 & 0xF]);
}
else {
srcStr.append(hexdigit[nUCS4 & 0xF]);
}
srcStr.append(';');
}
/**
* Replaces specified characters in the current string with their entity references.
* and wraps the text in and elements.
*
* '&' is replaced with "&"
* '<' is replaced with "<"
* '>' is replaced with ">"
*
* All line feeds are are replaced with
.
*
* @param sSrc the source string.
* @param bIncludePI true if the string should be prefixed with .
* @return The converted string.
* @exclude from published api.
*/
public static String toXHTML(String sSrc, boolean bIncludePI) {
boolean bHasInvalidChar = false;
int nNeedsEncoding = 0;
int nLen = sSrc.length();
for (int i = 0; i < nLen; ) {
char chr = sSrc.charAt(i++);
if (chr == '&' || chr == '<' || chr == 0x0A || chr == 0x0D || chr == '>')
nNeedsEncoding++;
// JavaPort: beware that the surrogate ranges used below are wrong,
// as are the checks in Java for non-BMP characters!
else if ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
|| (0xD7FF < chr && chr < 0xE000) || (0xFFFD < chr && chr < 0x10000)
|| (chr > 0x10FFFF))
bHasInvalidChar = true;
}
// new size is original size + max size of encoding * num Encodings + length of XHTML wrapper
int nNewSize = nLen + (nNeedsEncoding * 7) + 57;
if (bIncludePI)
nNewSize += 21;
StringBuilder sRet = new StringBuilder(nNewSize);
if (bIncludePI)
sRet.append("");
sRet.append("");
int nSpaceRun = 0;
for (int i = 0; i < nLen; ) {
char chr = sSrc.charAt(i++);
//close up any space run if we're not a space
if (nSpaceRun > 0 && chr != 0x20) {
sRet.append(' ') ;
if (nSpaceRun > 1)
sRet.append("");
nSpaceRun = 0;
}
// JavaPort: beware that the surrogate ranges used below are wrong,
// as are the checks in Java for non-BMP characters!
if (bHasInvalidChar &&
((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
|| (0xD7FF < chr && chr < 0xE000)
|| (0xFFFD < chr && chr < 0x10000)
|| (chr > 0x10FFFF)))
continue;
if (chr == '<')
sRet.append("<");
else if (chr == '&')
sRet.append("&");
else if (chr == 0x0D)
sRet.append("
"); // writen with no leading 0's
else if (chr == 0x0A)
sRet.append("
"); // write out a
for any line feed
else if (chr == 0x09)
sRet.append(" "); //tabs
else if (chr == 0x20) {
nSpaceRun++;
if (nSpaceRun == 2) //starting a space run
sRet.append("");
if (nSpaceRun >=2)
sRet.append(" ");
}
else if (chr == '>')
sRet.append(">");
else
sRet.append(chr);
//if we're at the end, close up any space run
if (nSpaceRun > 0 && (i == nLen)) {
sRet.append(' ');
if (nSpaceRun > 1)
sRet.append("");
}
}
sRet.append("
");
return sRet.toString();
}
private static boolean isValidEntityReference(String src, int index) {
assert src.charAt(index) == '&';
int semicolon = src.indexOf(';', index);
if (semicolon - index >= 3) {
String ent = src.substring(index, semicolon);
String entList = "<>&'"";
if (entList.contains(ent)) {
return true;
}
else if (ent.startsWith("")) {
for (int j = 3; j < ent.length(); j++) {
char h = ent.charAt(j);
if ('0' <= h && h <= '9')
continue;
else if ('a' <= h && h <= 'f')
continue;
else if ('A' <= h && h <= 'F')
continue;
return false;
}
return true;
}
else if (ent.startsWith("")) {
for (int j = 2; j < ent.length(); j++) {
char n = ent.charAt(j);
if ('0' <= n && n <= '9')
continue;
return false;
}
return true;
}
}
return false;
}
/*
* Disallow instance of this class.
*/
private StringUtils() {
}
}