All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.xfa.ut.StringUtils Maven / Gradle / Ivy

There is a newer version: 2024.11.18598.20241113T125352Z-241000
Show newest version
/*
 * ADOBE CONFIDENTIAL
 *
 * Copyright 2005 Adobe Systems Incorporated All Rights Reserved.
 *
 * NOTICE: All information contained herein is, and remains the property of
 * Adobe Systems Incorporated and its suppliers, if any. The intellectual and
 * technical concepts contained herein are proprietary to Adobe Systems
 * Incorporated and its suppliers and may be covered by U.S. and Foreign
 * Patents, patents in process, and are protected by trade secret or copyright
 * law. Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained from
 * Adobe Systems Incorporated.
 */
package com.adobe.xfa.ut;

import java.util.Comparator;

/**
 * This class provides some utility methods that were available in jfString, but
 * are not part of the Java String interface.
 * 
 * @exclude from published api -- Mike Tardif, May 2006.
 */

public final class StringUtils {

	/**
	 * Publicly available case-insensitive comparator.  You can also use the
	 * static method getCaseInsensitiveComparator() to get one.
	 */
	public final static Comparator CASE_INSENSITIVE_COMPARATOR = String.CASE_INSENSITIVE_ORDER;
	
	/**
	 * Compares Strings based on UCS codepoint values.
	 */
	public final static Comparator UCS_CODEPOINT_COMPARATOR = new CodePointComparator();
	
	/**
	 * Compares Strings based on UCS codepoint values.
	 */
	private static class CodePointComparator implements Comparator, java.io.Serializable {

		private static final long serialVersionUID = -8178230179838097500L;

		public int compare(String s1, String s2) {
			
			final int length1 = s1.length();
	        final int length2 = s2.length();
	        int i = 0;
	        int j = 0;
	        
	        while (true) {
	        	
	            if (i == length1)
	                return j == length2 ? 0 : -1;
	            
	            if (j == length2)
	                return 1;
	            
	            int codePoint1, codePoint2;
	            
	            char high = s1.charAt(i);
	            i++;
	            if (Character.isHighSurrogate(high)) {
	            	if (i == length1) {
	            		codePoint1 = high; // garbage input - unmatched surrogate pair
	            	}
	            	else {
	            		char low = s1.charAt(i);
	            		if (Character.isLowSurrogate(low)) {
	            			// matched surrogate pair
	            			codePoint1 = Character.toCodePoint(high, low); 
	            			i++;
	            		}
	            		else {
	            			codePoint1 = high; // garbage input - unmatched surrogate pair
	            		}
	            	}
	            }
	            else {
	            	codePoint1 = high; // normal BMP character
	            }
	            
	            high = s2.charAt(j);
	            j++;
	            if (Character.isHighSurrogate(high)) {
	            	if (j == length1) {
	            		codePoint2 = high; // garbage input - unmatched surrogate pair
	            	}
	            	else {
	            		char low = s2.charAt(j);
	            		if (Character.isLowSurrogate(low)) {
	            			// matched surrogate pair
	            			codePoint2 = Character.toCodePoint(high, low);
	            			j++;
	            		}
	            		else {
	            			codePoint2 = high; // garbage input - unmatched surrogate pair
	            		}
	            	}
	            }
	            else {
	            	codePoint2 = high; // normal BMP character
	            }
	            
	            if (codePoint1 != codePoint2)
	            	return codePoint1 - codePoint2;
	        }
		}
	}

	/**
	 * Set of characters considered to be white space.
	 */
	public final static String WHITE_SPACE = " \t\n\r\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u3000\uFEFF";

	/**
	 * General-purpose string equality comparison for strings that may be
	 * null.
	 * This method compares two strings for equality, allowing for possible
	 * null references and non-interned strings.
	 * @param s1 First string to compare.
	 * @param s2 First string to compare.
	 * @return True if the strings are equal; false if not.  Note that two
	 * null references would be considered equal.
	 */
	@FindBugsSuppress(code="ES")
	public static boolean equalsWithNull (String s1, String s2) {
		if (s1 == s2) {
			return true;
		} else if ((s1 == null) || (s2 == null)) {
			return false;
		} else {
			return s1.equals (s2);
		}
	}

	/**
	 * Scan a string for a string, but ignore case.
	 * 

* The only ASCII case folding is considered. Comparisons are done * on a character by character basis (i.e., Unicode not code points are not considered). * * @param source * the string being searched. * @param target * the string to search for. * @param fromIndex * the index to begin searching from. * @return the start position for where the string was found. If * the string was not found, -1. * */ public static int findNoCase(CharSequence source, CharSequence target, int fromIndex) { if (fromIndex >= source.length()) return (target.length() == 0 ? source.length() : -1); if (fromIndex < 0) fromIndex = 0; if (target.length() == 0) return fromIndex; final char first = asciiToLower(target.charAt(0)); final int max = source.length() - target.length(); for (int i = fromIndex; i <= max; i++) { // Match the first character for (; i <= max; i++) { // NOPMD - modifying an outer loop incrementer in an inner loop char c = source.charAt(i); if (c == first || asciiToLower(c) == first) break; } // If we matched the first character, try matching the rest of the string if (i <= max) { int j = i + 1; final int end = j + target.length() - 1; for (int k = 1; j < end; j++, k++) { char c1 = source.charAt(j); char c2 = target.charAt(k); if (c1 != c2 && asciiToLower(c1) != asciiToLower(c2)) break; } if (j == end) { // We matched the whole string return i; } } } return -1; } private static char asciiToLower(char c) { // If uppercased Then fold to lowercase. if (0x40 < c && c < 0x5b) c += 0x20; return c; } /** * Return a comparator that performs case-insensitive string * comparisons. * @return A comparator that performs case insensitive comparisons. */ public static Comparator getCaseInsensitiveComparator () { return CASE_INSENSITIVE_COMPARATOR; } /** * Empty string test. * Short-hand to test a string for null pointer or empty condition. * @param s String to be tested. * @return True if the given string reference is null or empty; false otherwise. */ public static boolean isEmpty (CharSequence s) { return (s == null) || (s.length() == 0); } public static Integer number(String s) { return number(s, 10); } public static Integer number(String s, int radix) { try { return Integer.valueOf(Integer.parseInt(s, radix)); } catch (NumberFormatException e) { return null; } } public static Long longNumber(String s) { return longNumber(s, 10); } public static Long longNumber(String s, int radix) { try { return Long.valueOf(Long.parseLong(s, radix)); } catch (NumberFormatException e) { return null; } } public static String parseToken (StringBuilder source) { int i; for (i = 0; i < source.length(); i++) { if (WHITE_SPACE.indexOf (source.charAt (i)) < 0) { break; } } if (i == source.length()) { return null; } StringBuilder result = new StringBuilder(); boolean quoted = false; boolean backslashPending = false; for (; i < source.length(); i++) { char c = source.charAt (i); if ((! quoted) && (WHITE_SPACE.indexOf (c) >= 0)) { break; } boolean process = false; if ((c != '\\') && (c != '"')) { process = true; } else if (quoted && (c == '\\')) { backslashPending = true; } else if (c == '"') { if (backslashPending) { backslashPending = false; process = true; } else { quoted = ! quoted; } } else { process = true; } if (process) { if (backslashPending) { result.append ('\\'); backslashPending = false; } result.append (c); } } source.delete (0, i); return result.toString(); } public static void replace(StringBuilder buf, char find, char rep) { replace(buf, find, rep, 0, buf.length()); } public static void replace(StringBuilder buf, char find, char rep, int start, int limit) { for (int i = start; i < limit; i++) { if (buf.charAt(i) == find) { buf.setCharAt(i, rep); } } } public static int safeNumber(String s) { return safeNumber(s, 10); } public static int safeNumber(String s, int radix) { Integer num = number(s, radix); return (num == null) ? 0 : num.intValue(); } public static long safeLongNumber(String s) { return safeLongNumber(s, 10); } public static long safeLongNumber(String s, int radix) { Long num = longNumber(s, radix); return (num == null) ? 0L : num.longValue(); } /** * Scan this string for the first character not in the given set. Similar to * strspn(). * * @param sSkip - * the characters to scan for * @param nOffset - * the position where to start the scan. Default = 0 * @return The position, relative to nOffset, for the first character not * found in the given set */ static public int skipOver(CharSequence src, String sSkip, int nOffset /* =0 */) { int nCharsSkipped; // starting at the offset position, scan the characters in this string // until it does not match any of the characters in the given set. nCharsSkipped = nOffset; while (nCharsSkipped < src.length()) { int i = nCharsSkipped; if (sSkip.indexOf(src.charAt(i++)) == -1) break; nCharsSkipped = i; } return nCharsSkipped - nOffset; } /** * Scan this string for the first character in the given set. Similar to * strcspn(). * * @param src - * the string to scan * @param sSkip - * the characters to scan for * @param nOffset - * the position where to start the scan. Default = 0. * @return The position, relative to nOffset, for the first character found * in the given set */ static public int skipUntil(CharSequence src, String sSkip, int nOffset /* =0 */) { int nCharsSkipped = nOffset; // starting at the offset position, scan the characters in this string // until it matches one of the characters in the given set. while (nCharsSkipped < src.length()) { int i = nCharsSkipped; if (sSkip.indexOf(src.charAt(i++)) != -1) break; nCharsSkipped = i; } return nCharsSkipped - nOffset; } /** * Remove white space from the end of a string buffer. * @param buf - String buffer to be trimmed. */ public static void trim(StringBuilder buf) { int trunc; for (trunc = buf.length(); trunc > 0; trunc--) { if (!Character.isWhitespace(buf.charAt(trunc - 1))) { break; } } buf.delete(trunc, buf.length()); } /** * Remove white space from the start of a string buffer. * @param buf - String buffer to be trimmed. */ public static void trimStart(StringBuilder buf) { int start; for (start = 0; start < buf.length(); start++) { if (!Character.isWhitespace(buf.charAt(start))) { break; } } buf.delete(0, start); } public static String trim(String s) { final int length = s.length(); int endIndex = length; while (endIndex > 0 && Character.isWhitespace(s.charAt(endIndex - 1))) endIndex--; return endIndex < length ? s.substring(0, endIndex) : s; } public static String trimStart(String s) { final int length = s.length(); int startIndex = 0; while (startIndex < length && Character.isWhitespace(s.charAt(startIndex))) startIndex++; return startIndex > 0 ? s.substring(startIndex, length) : s; } /** * @exclude from public api. */ public static String toXML(String src, boolean isAttribute) { final int length = src.length(); int needsEncoding = 0; for (int i = 0; i < length; i++) { final char c = src.charAt(i); if (c == '&') { if (isValidEntityReference(src, i)) { i = src.indexOf(';', i); } else { needsEncoding++; } } else if (isAttribute) { if (c == '<' || /* c == '\'' || */ c == '\"' || c == '\t' || c == '\n' || c == '\r') needsEncoding++; } else { if (c == '<' || c == '>' || c == '\r') needsEncoding++; } } if (needsEncoding == 0) return src; final StringBuilder result = new StringBuilder(length + needsEncoding * 5); for (int i = 0; i < length; i++) { final char c = src.charAt(i); switch (c) { case '&': if (isValidEntityReference(src, i)) { int semicolon = src.indexOf(';', i); result.append(src, i, semicolon + 1); i = semicolon; } else result.append("&"); break; case '<': result.append("<"); break; case '>': if (isAttribute) result.append(c); else result.append(">"); break; case '"': if (isAttribute) result.append("""); else result.append(c); break; case '\r': result.append(" "); break; case '\n': if (isAttribute) result.append(" "); else result.append(c); break; case '\t': if (isAttribute) result.append(" "); else result.append(c); break; // case '\'': // if (doQuotes) // result.append("'"); // else // result.append(c); // break; default: result.append(c); break; } } return result.toString(); } /** * @exclude from published api. */ public enum ToXMLType { XMLTEXT, XMLATTRIBUTE_WITH_DQUOTE, XMLATTRIBUTE_WITH_QUOTE } /** * Replaces specified characters in the current string with their entity references. *

     *  '&' is replaced with "&amp;"
     *  '<' is replaced with "&lt;"
     *  if eTargetType = XMLTEXT
     *      '>' is replaced with "&gt;"
     *  if eTargetType = XMLATTRIBUTE_WITH_DQUOTE
     *      '"' is replaced with "&quot;"
     *  if eTargetType = XMLATTRIBUTE_WITH_QUOTE
     *      ''' is replaced with "'"
     * 
* * Any characters found in sOthers are replaced with entity references. * Any characters found inbetween and including the cRangeStart and cRangeEnd are * replaced with entity references. * * @param sSrc the source string. * @param eTargetType an enum indicating the target xml type. * @param sOthers a list of characters to be encoded in this string. * @param cRangeMin Any characters greater than or equal to this char are encoded * with their entity references. * @param cRangeMax Any characters less than or equal to this char are encoded * with their entity references. * @param sExclude a list of characters NOT to be encoded in this string. * @return The converted string. * @exclude from published api. */ public static String toXML(String sSrc, ToXMLType eTargetType /* = XMLTEXT */, String sOthers /* = "" */, char cRangeMin /* = '\0' */, char cRangeMax /* = '\0' */, String sExclude /* = "" */) { int range = 0; // 1 = less than, 2 = greater than, 3 both if (cRangeMin != 0 && cRangeMax != 0) range = 3; else if (cRangeMin != 0 && cRangeMax == 0) range = 2; else if (cRangeMin == 0 && cRangeMax != 0) range = 1; boolean bHasInvalidChar = false; int nNeedsEncoding = 0; int nLen = sSrc.length(); for (int i = 0; i < nLen; ) { char chr = sSrc.charAt(i++); if (chr == '&' || chr == '<' || chr == '\r') nNeedsEncoding++; else if (chr == '>' && eTargetType == ToXMLType.XMLTEXT) nNeedsEncoding++; else if ((chr == '\t' || chr == '\n') && eTargetType != ToXMLType.XMLTEXT) // any attr nNeedsEncoding++; else if (chr == '\"' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_DQUOTE) nNeedsEncoding++; else if (chr == '\'' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_QUOTE) nNeedsEncoding++; else if (range == 1 && chr <= cRangeMax) nNeedsEncoding++; else if (range == 2 && cRangeMin <= chr) nNeedsEncoding++; else if (range == 3 && cRangeMin <= chr && chr <= cRangeMax) nNeedsEncoding++; // JavaPort: beware that the surrogate ranges used below are wrong, // as are the checks in Java for non-BMP characters! else if ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D)) || (0xD7FF < chr && chr < 0xE000) || (0xFFFD < chr && chr < 0x10000) || (chr > 0x10FFFF)) bHasInvalidChar = true; else { for (int j = 0; j < sOthers.length(); ) { if (chr == sOthers.charAt(j++)) { nNeedsEncoding++; break; } } } } if (nNeedsEncoding > 0 || bHasInvalidChar) { StringBuilder retStr = new StringBuilder(nLen + (nNeedsEncoding * 12)); // See appendHex() for (int i = 0; i < nLen; ) { char chr = sSrc.charAt(i++); boolean bExcluded = false; for (int j = 0; j < sExclude.length(); ) { if (chr == sExclude.charAt(j++)) { retStr.append(chr); bExcluded = true; break; } } if (bExcluded) continue; // JavaPort: beware that the surrogate ranges used below are wrong, // as are the checks in Java for non-BMP characters! if (bHasInvalidChar && ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D)) || (0xD7FF < chr && chr < 0xE000) || (0xFFFD < chr && chr < 0x10000) || (chr > 0x10FFFF))) continue; if (chr == '<') retStr.append("<"); else if (chr == '&') retStr.append("&"); else if (chr == '\r') retStr.append(" "); // writen with no leading 0's else if (chr == '>' && eTargetType == ToXMLType.XMLTEXT) retStr.append(">"); else if (chr == '\"' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_DQUOTE) retStr.append("""); else if (chr == '\'' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_QUOTE) retStr.append("'"); else if (chr == '\n' && eTargetType != ToXMLType.XMLTEXT) // any attr retStr.append(" "); // writen with no leading 0's else if (chr == '\t' && eTargetType != ToXMLType.XMLTEXT) // any attr retStr.append(" "); // writen with no leading 0's else if (range == 1 && chr <= cRangeMax) appendHex(retStr, chr); else if (range == 2 && cRangeMin <= chr) appendHex(retStr, chr); else if (range == 3 && cRangeMin <= chr && chr <= cRangeMax) appendHex(retStr, chr); else { boolean bFound = false; for (int j = 0; j < sOthers.length(); ) { if (chr == sOthers.charAt(j++)) { appendHex(retStr, chr); bFound = true; break; } } if (! bFound) // reaches here only all ifs fail. retStr.append(chr); } } return retStr.toString(); } return sSrc; } private static final char hexdigit[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; private static void appendHex(StringBuilder srcStr, char srcChr) { srcStr.append("&#x"); int nUCS4 = srcChr; // JavaPort: chars greater than 0xFFFF aren't possible in Java. /* if ((nUCS4 & 0xF00000) != 0) { srcStr.append(hexdigit[nUCS4 >> 20 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 16 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]); srcStr.append(hexdigit[nUCS4 & 0xF]); } else */ if ((nUCS4 & 0xF0000) != 0) { srcStr.append(hexdigit[nUCS4 >> 16 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]); srcStr.append(hexdigit[nUCS4 & 0xF]); } else if ((nUCS4 & 0xF000) != 0) { srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]); srcStr.append(hexdigit[nUCS4 & 0xF]); } else if ((nUCS4 & 0x0F00) != 0) { srcStr.append(hexdigit[nUCS4 >> 8 & 0xF]); srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]); srcStr.append(hexdigit[nUCS4 & 0xF]); } else if ((nUCS4 & 0x00F0) != 0) { srcStr.append(hexdigit[nUCS4 >> 4 & 0xF]); srcStr.append(hexdigit[nUCS4 & 0xF]); } else { srcStr.append(hexdigit[nUCS4 & 0xF]); } srcStr.append(';'); } /** * Replaces specified characters in the current string with their entity references. * and wraps the text in and

elements. *

     *  '&' is replaced with "&amp;"
     *  '<' is replaced with "&lt;"
     *  '>' is replaced with "&gt;"
     * 
* All line feeds are are replaced with
. * * @param sSrc the source string. * @param bIncludePI true if the string should be prefixed with . * @return The converted string. * @exclude from published api. */ public static String toXHTML(String sSrc, boolean bIncludePI) { boolean bHasInvalidChar = false; int nNeedsEncoding = 0; int nLen = sSrc.length(); for (int i = 0; i < nLen; ) { char chr = sSrc.charAt(i++); if (chr == '&' || chr == '<' || chr == 0x0A || chr == 0x0D || chr == '>') nNeedsEncoding++; // JavaPort: beware that the surrogate ranges used below are wrong, // as are the checks in Java for non-BMP characters! else if ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D)) || (0xD7FF < chr && chr < 0xE000) || (0xFFFD < chr && chr < 0x10000) || (chr > 0x10FFFF)) bHasInvalidChar = true; } // new size is original size + max size of encoding * num Encodings + length of XHTML wrapper int nNewSize = nLen + (nNeedsEncoding * 7) + 57; if (bIncludePI) nNewSize += 21; StringBuilder sRet = new StringBuilder(nNewSize); if (bIncludePI) sRet.append(""); sRet.append("

"); int nSpaceRun = 0; for (int i = 0; i < nLen; ) { char chr = sSrc.charAt(i++); //close up any space run if we're not a space if (nSpaceRun > 0 && chr != 0x20) { sRet.append(' ') ; if (nSpaceRun > 1) sRet.append(""); nSpaceRun = 0; } // JavaPort: beware that the surrogate ranges used below are wrong, // as are the checks in Java for non-BMP characters! if (bHasInvalidChar && ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D)) || (0xD7FF < chr && chr < 0xE000) || (0xFFFD < chr && chr < 0x10000) || (chr > 0x10FFFF))) continue; if (chr == '<') sRet.append("<"); else if (chr == '&') sRet.append("&"); else if (chr == 0x0D) sRet.append(" "); // writen with no leading 0's else if (chr == 0x0A) sRet.append("

"); // write out a
for any line feed else if (chr == 0x09) sRet.append(" "); //tabs else if (chr == 0x20) { nSpaceRun++; if (nSpaceRun == 2) //starting a space run sRet.append(""); if (nSpaceRun >=2) sRet.append(" "); } else if (chr == '>') sRet.append(">"); else sRet.append(chr); //if we're at the end, close up any space run if (nSpaceRun > 0 && (i == nLen)) { sRet.append(' '); if (nSpaceRun > 1) sRet.append(""); } } sRet.append("

"); return sRet.toString(); } private static boolean isValidEntityReference(String src, int index) { assert src.charAt(index) == '&'; int semicolon = src.indexOf(';', index); if (semicolon - index >= 3) { String ent = src.substring(index, semicolon); String entList = "<>&'""; if (entList.contains(ent)) { return true; } else if (ent.startsWith("&#x")) { for (int j = 3; j < ent.length(); j++) { char h = ent.charAt(j); if ('0' <= h && h <= '9') continue; else if ('a' <= h && h <= 'f') continue; else if ('A' <= h && h <= 'F') continue; return false; } return true; } else if (ent.startsWith("&#")) { for (int j = 2; j < ent.length(); j++) { char n = ent.charAt(j); if ('0' <= n && n <= '9') continue; return false; } return true; } } return false; } /* * Disallow instance of this class. */ private StringUtils() { } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy