com.adobe.xfa.ut.StringUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
There is a newer version: 2024.11.18598.20241113T125352Z-241000
/*
 * ADOBE CONFIDENTIAL
 *
 * Copyright 2005 Adobe Systems Incorporated All Rights Reserved.
 *
 * NOTICE: All information contained herein is, and remains the property of
 * Adobe Systems Incorporated and its suppliers, if any. The intellectual and
 * technical concepts contained herein are proprietary to Adobe Systems
 * Incorporated and its suppliers and may be covered by U.S. and Foreign
 * Patents, patents in process, and are protected by trade secret or copyright
 * law. Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained from
 * Adobe Systems Incorporated.
 */
package com.adobe.xfa.ut;

import java.util.Comparator;

/**
 * This class provides some utility methods that were available in jfString, but
 * are not part of the Java String interface.
 * 
 * @exclude from published api -- Mike Tardif, May 2006.
 */

public final class StringUtils {

	/**
	 * Publicly available case-insensitive comparator.  You can also use the
	 * static method getCaseInsensitiveComparator() to get one.
	 */
	public final static Comparator CASE_INSENSITIVE_COMPARATOR = String.CASE_INSENSITIVE_ORDER;
	
	/**
	 * Compares Strings based on UCS codepoint values.
	 */
	public final static Comparator UCS_CODEPOINT_COMPARATOR = new CodePointComparator();
	
	/**
	 * Compares Strings based on UCS codepoint values.
	 */
	private static class CodePointComparator implements Comparator, java.io.Serializable {

		private static final long serialVersionUID = -8178230179838097500L;

		public int compare(String s1, String s2) {
			
			final int length1 = s1.length();
	        final int length2 = s2.length();
	        int i = 0;
	        int j = 0;
	        
	        while (true) {
	        	
	            if (i == length1)
	                return j == length2 ? 0 : -1;
	            
	            if (j == length2)
	                return 1;
	            
	            int codePoint1, codePoint2;
	            
	            char high = s1.charAt(i);
	            i++;
	            if (Character.isHighSurrogate(high)) {
	            	if (i == length1) {
	            		codePoint1 = high; // garbage input - unmatched surrogate pair
	            	}
	            	else {
	            		char low = s1.charAt(i);
	            		if (Character.isLowSurrogate(low)) {
	            			// matched surrogate pair
	            			codePoint1 = Character.toCodePoint(high, low); 
	            			i++;
	            		}
	            		else {
	            			codePoint1 = high; // garbage input - unmatched surrogate pair
	            		}
	            	}
	            }
	            else {
	            	codePoint1 = high; // normal BMP character
	            }
	            
	            high = s2.charAt(j);
	            j++;
	            if (Character.isHighSurrogate(high)) {
	            	if (j == length1) {
	            		codePoint2 = high; // garbage input - unmatched surrogate pair
	            	}
	            	else {
	            		char low = s2.charAt(j);
	            		if (Character.isLowSurrogate(low)) {
	            			// matched surrogate pair
	            			codePoint2 = Character.toCodePoint(high, low);
	            			j++;
	            		}
	            		else {
	            			codePoint2 = high; // garbage input - unmatched surrogate pair
	            		}
	            	}
	            }
	            else {
	            	codePoint2 = high; // normal BMP character
	            }
	            
	            if (codePoint1 != codePoint2)
	            	return codePoint1 - codePoint2;
	        }
		}
	}

	/**
	 * Set of characters considered to be white space.
	 */
	public final static String WHITE_SPACE = " \t\n\r\u00A0\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u200B\u3000\uFEFF";

	/**
	 * General-purpose string equality comparison for strings that may be
	 * null.
	 * This method compares two strings for equality, allowing for possible
	 * null references and non-interned strings.
	 * @param s1 First string to compare.
	 * @param s2 First string to compare.
	 * @return True if the strings are equal; false if not.  Note that two
	 * null references would be considered equal.
	 */
	@FindBugsSuppress(code="ES")
	public static boolean equalsWithNull (String s1, String s2) {
		if (s1 == s2) {
			return true;
		} else if ((s1 == null) || (s2 == null)) {
			return false;
		} else {
			return s1.equals (s2);
		}
	}

	/**
	 * Scan a string for a string, but ignore case.
	 * 
	 * The only ASCII case folding is considered. Comparisons are done
	 * on a character by character basis (i.e., Unicode not code points are not considered).
	 * 
	 * @param source 
	 *            the string being searched.
	 * @param target 
	 * 			  the string to search for.
	 * @param fromIndex 
	 *            the index to begin searching from.
	 * @return the start position for where the string was found. If
	 *         the string was not found, -1.
	 * 
	 */
	public static int findNoCase(CharSequence source, CharSequence target, int fromIndex) {

		if (fromIndex >= source.length())
            return (target.length() == 0 ? source.length() : -1);
		
    	if (fromIndex < 0)
    	    fromIndex = 0;
	
    	if (target.length() == 0)
    		return fromIndex;
    	
    	final char first = asciiToLower(target.charAt(0));
        final int max = source.length() - target.length();

        for (int i = fromIndex; i <= max; i++) {
        	
            // Match the first character
            for (; i <= max; i++) {	// NOPMD - modifying an outer loop incrementer in an inner loop
            	char c = source.charAt(i);
            	if (c == first || asciiToLower(c) == first)
            		break;            	
            }

            // If we matched the first character, try matching the rest of the string
            if (i <= max) {
                int j = i + 1;
                final int end = j + target.length() - 1;
                for (int k = 1; j < end; j++, k++) {
                	char c1 = source.charAt(j);
                	char c2 = target.charAt(k);
                	
                	if (c1 != c2 && asciiToLower(c1) != asciiToLower(c2))
                		break;
                }

                if (j == end) {
                    // We matched the whole string
                    return i;
                }
            }
        }
        
        return -1;
	}
	
	private static char asciiToLower(char c) {
		// If uppercased Then fold to lowercase.
		if (0x40 < c && c < 0x5b) c += 0x20;
		
		return c;
	}

	/**
	 * Return a comparator that performs case-insensitive string
	 * comparisons.
	 * @return A comparator that performs case insensitive comparisons.
	 */
	public static Comparator getCaseInsensitiveComparator () {
		return CASE_INSENSITIVE_COMPARATOR;
	}

	/**
	 * Empty string test.
	 * Short-hand to test a string for null pointer or empty condition.
	 * @param s String to be tested.
	 * @return True if the given string reference is null or empty; false otherwise.
	 */
	public static boolean isEmpty (CharSequence s) {
		return (s == null) || (s.length() == 0);
	}

	public static Integer number(String s) {
		return number(s, 10);
	}

	public static Integer number(String s, int radix) {
		try {
			return Integer.valueOf(Integer.parseInt(s, radix));
		} catch (NumberFormatException e) {
			return null;
		}
	}

	public static Long longNumber(String s) {
		return longNumber(s, 10);
	}

	public static Long longNumber(String s, int radix) {
		try {
			return Long.valueOf(Long.parseLong(s, radix));
		} catch (NumberFormatException e) {
			return null;
		}
	}

	public static String parseToken (StringBuilder source) {
		int i;
		for (i = 0; i < source.length(); i++) {
			if (WHITE_SPACE.indexOf (source.charAt (i)) < 0) {
				break;
			}
		}
		if (i == source.length()) {
			return null;
		}
		StringBuilder result = new StringBuilder();
		boolean quoted = false;
		boolean backslashPending = false;
		for (; i < source.length(); i++) {
			char c = source.charAt (i);
			if ((! quoted) && (WHITE_SPACE.indexOf (c) >= 0)) {
				break;
			}
			boolean process = false;
			if ((c != '\\') && (c != '"')) {
				process = true;
			} else if (quoted && (c == '\\')) {
				backslashPending = true;
			} else if (c == '"') {
				if (backslashPending) {
					backslashPending = false;
					process = true;
				} else {
					quoted = ! quoted;
				}
			} else {
				process = true;
			}
			if (process) {
				if (backslashPending) {
					result.append ('\\');
					backslashPending = false;
				}
				result.append (c);
			}
		}
		source.delete (0, i);
		return result.toString();
	}

	public static void replace(StringBuilder buf, char find, char rep) {
		replace(buf, find, rep, 0, buf.length());
	}

	public static void replace(StringBuilder buf, char find, char rep,
			int start, int limit) {
		for (int i = start; i < limit; i++) {
			if (buf.charAt(i) == find) {
				buf.setCharAt(i, rep);
			}
		}
	}

	public static int safeNumber(String s) {
		return safeNumber(s, 10);
	}

	public static int safeNumber(String s, int radix) {
		Integer num = number(s, radix);
		return (num == null) ? 0 : num.intValue();
	}

	public static long safeLongNumber(String s) {
		return safeLongNumber(s, 10);
	}

	public static long safeLongNumber(String s, int radix) {
		Long num = longNumber(s, radix);
		return (num == null) ? 0L : num.longValue();
	}

	/**
	 * Scan this string for the first character not in the given set. Similar to
	 * strspn().
	 * 
	 * @param sSkip -
	 *            the characters to scan for
	 * @param nOffset -
	 *            the position where to start the scan. Default = 0
	 * @return The position, relative to nOffset, for the first character not
	 *         found in the given set
	 */
	static public int skipOver(CharSequence src, String sSkip, int nOffset /* =0 */) {
		int nCharsSkipped;

		// starting at the offset position, scan the characters in this string
		// until it does not match any of the characters in the given set.
		nCharsSkipped = nOffset;
		while (nCharsSkipped < src.length()) {
			int i = nCharsSkipped;
			if (sSkip.indexOf(src.charAt(i++)) == -1)
				break;
			nCharsSkipped = i;
		}

		return nCharsSkipped - nOffset;
	}

	/**
	 * Scan this string for the first character in the given set. Similar to
	 * strcspn().
	 * 
	 * @param src -
	 *            the string to scan
	 * @param sSkip -
	 *            the characters to scan for
	 * @param nOffset -
	 *            the position where to start the scan. Default = 0.
	 * @return The position, relative to nOffset, for the first character found
	 *         in the given set
	 */

	static public int skipUntil(CharSequence src, String sSkip, int nOffset /* =0 */) {
		int nCharsSkipped = nOffset;

		// starting at the offset position, scan the characters in this string
		// until it matches one of the characters in the given set.
		while (nCharsSkipped < src.length()) {
			int i = nCharsSkipped;
			if (sSkip.indexOf(src.charAt(i++)) != -1)
				break;
			nCharsSkipped = i;
		}

		return nCharsSkipped - nOffset;
	}

	/**
	 * Remove white space from the end of a string buffer.
	 * @param buf - String buffer to be trimmed.
	 */
	public static void trim(StringBuilder buf) {
		int trunc;
		for (trunc = buf.length(); trunc > 0; trunc--) {
			if (!Character.isWhitespace(buf.charAt(trunc - 1))) {
				break;
			}
		}
		buf.delete(trunc, buf.length());
	}

	/**
	 * Remove white space from the start of a string buffer.
	 * @param buf - String buffer to be trimmed.
	 */
	public static void trimStart(StringBuilder buf) {
		int start;
		for (start = 0; start < buf.length(); start++) {
			if (!Character.isWhitespace(buf.charAt(start))) {
				break;
			}
		}
		buf.delete(0, start);
	}
	
	public static String trim(String s) {
		
		final int length = s.length();
		int endIndex = length;
		
		while (endIndex > 0 && Character.isWhitespace(s.charAt(endIndex - 1)))
		    endIndex--;
		    
		return endIndex < length ? s.substring(0, endIndex) : s;
	}
	
	public static String trimStart(String s) {
		
		final int length = s.length();
		int startIndex = 0;
		
		while (startIndex < length && Character.isWhitespace(s.charAt(startIndex)))
		    startIndex++;
		
		return startIndex > 0 ? s.substring(startIndex, length) : s;
	}
	
	/**
	 * @exclude from public api.
	 */
	public static String toXML(String src, boolean isAttribute) {
		
		final int length = src.length();
		int needsEncoding = 0;
		for (int i = 0; i < length; i++) {
			final char c = src.charAt(i);
			
			if (c == '&') {
				if (isValidEntityReference(src, i)) {
					i = src.indexOf(';', i);
				}
				else {
					needsEncoding++;
				}
			}
			else if (isAttribute) {
				
				if (c == '<' || /* c == '\'' || */ c == '\"' || c == '\t' || c == '\n' || c == '\r')
					needsEncoding++;
			}
			else {
				
				if (c == '<' || c == '>' || c == '\r')
					needsEncoding++;				
			}
		}
		
		if (needsEncoding == 0)
			return src;
		
 		final StringBuilder result = new StringBuilder(length + needsEncoding * 5);
		for (int i = 0; i < length; i++) {
			final char c = src.charAt(i);
			switch (c) {
			
			case '&':
				if (isValidEntityReference(src, i)) {
					int semicolon = src.indexOf(';', i);
					result.append(src, i, semicolon + 1);
					i = semicolon;
				}
				else
					result.append("&");

				break;
				
			case '<':
				result.append("<");
				break;
				
			case '>':
				if (isAttribute)
					result.append(c);
				else
					result.append(">");
				break;
				
			case '"':
				if (isAttribute)
					result.append(""");
				else
					result.append(c);
				break;
				
			case '\r':
				result.append("");
				break;
				
			case '\n':
				if (isAttribute)
					result.append("
");
				else
					result.append(c);
				break;
				
			case '\t':
				if (isAttribute)
					result.append("	");
				else
					result.append(c);
				break;
				
//			case '\'':
//				if (doQuotes)
//					result.append("'");
//				else
//					result.append(c);
//				break;
				
			default:
				result.append(c);
				break;
			}
		}
		
		return result.toString();
	}
	
	/**
	 * @exclude from published api.
	 */
	public enum ToXMLType {
        XMLTEXT,
        XMLATTRIBUTE_WITH_DQUOTE,
        XMLATTRIBUTE_WITH_QUOTE
    }

	/**
     * Replaces specified characters in the current string with their entity references.
     * 
     *  '&' is replaced with "&amp;"
     *  '<' is replaced with "&lt;"
     *  if eTargetType = XMLTEXT
     *      '>' is replaced with "&gt;"
     *  if eTargetType = XMLATTRIBUTE_WITH_DQUOTE
     *      '"' is replaced with "&quot;"
     *  if eTargetType = XMLATTRIBUTE_WITH_QUOTE
     *      ''' is replaced with "'"
     * 
     *
     *  Any characters found in sOthers are replaced with entity references.
     *  Any characters found inbetween and including the cRangeStart and cRangeEnd are
     *  replaced with entity references.
     *
     * @param sSrc the source string.
     * @param eTargetType an enum indicating the target xml type.
     * @param sOthers a list of characters to be encoded in this string.
     * @param cRangeMin Any characters greater than or equal to this char are encoded 
     * with their entity references.
     * @param cRangeMax Any characters less than or equal to this char are encoded 
     * with their entity references.
     * @param sExclude a list of characters NOT to be encoded in this string.
     * @return The converted string.
     * @exclude from published api.
     */
    public static String toXML(String sSrc, ToXMLType eTargetType /* = XMLTEXT */,
                              String sOthers /* = "" */,
                              char cRangeMin /* = '\0' */, 
                              char cRangeMax /* = '\0' */,
                              String sExclude /* = "" */) {
        int range = 0;   // 1 = less than, 2 = greater than, 3 both
        if (cRangeMin != 0 && cRangeMax != 0)
            range = 3;
        else if (cRangeMin != 0 && cRangeMax == 0)
            range = 2;
        else if (cRangeMin == 0 && cRangeMax != 0)
            range = 1;
        boolean bHasInvalidChar = false;
        int nNeedsEncoding = 0;
        int nLen = sSrc.length();
        for (int i = 0; i < nLen; ) {
            char chr = sSrc.charAt(i++);
            if (chr == '&' || chr == '<' || chr == '\r')
                nNeedsEncoding++;
            else if (chr == '>' && eTargetType == ToXMLType.XMLTEXT)
                nNeedsEncoding++;
            else if ((chr == '\t' || chr == '\n') && eTargetType != ToXMLType.XMLTEXT) // any attr
                nNeedsEncoding++;
            else if (chr == '\"' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_DQUOTE)
                nNeedsEncoding++;
            else if (chr == '\'' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_QUOTE)
                nNeedsEncoding++;
            else if (range == 1 && chr <= cRangeMax)
                nNeedsEncoding++;
            else if (range == 2 && cRangeMin <= chr)
                nNeedsEncoding++;
            else if (range == 3 && cRangeMin <= chr && chr <= cRangeMax)
                nNeedsEncoding++;
	        // JavaPort: beware that the surrogate ranges used below are wrong,
	        // as are the checks in Java for non-BMP characters!
            else if ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
					|| (0xD7FF < chr && chr < 0xE000)
						|| (0xFFFD < chr && chr < 0x10000)
							|| (chr > 0x10FFFF))
                bHasInvalidChar = true;
            else {
                for (int j = 0; j < sOthers.length(); ) {
                    if (chr == sOthers.charAt(j++)) {
                        nNeedsEncoding++;
                        break;
                    }
                }
            }
        }
        if (nNeedsEncoding > 0 || bHasInvalidChar) {
            StringBuilder retStr = new StringBuilder(nLen + (nNeedsEncoding * 12)); // See appendHex()
            for (int i = 0; i < nLen; ) {
                char chr = sSrc.charAt(i++);
                boolean bExcluded = false;
                for (int j = 0; j < sExclude.length(); ) {
                    if (chr == sExclude.charAt(j++)) {
                        retStr.append(chr);
                        bExcluded = true;
                        break;
                    }
                }
                if (bExcluded)
                    continue;
				// JavaPort: beware that the surrogate ranges used below are wrong,
				// as are the checks in Java for non-BMP characters!
                if (bHasInvalidChar
                        && ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
                           || (0xD7FF < chr && chr < 0xE000)
                                || (0xFFFD < chr && chr < 0x10000)
                                    || (chr > 0x10FFFF)))
                    continue;
                if (chr == '<')
                    retStr.append("<");
                else if (chr == '&')
                    retStr.append("&");
                else if (chr == '\r')
                    retStr.append("");          // writen with no leading 0's
                else if (chr == '>' && eTargetType == ToXMLType.XMLTEXT)
                    retStr.append(">");
                else if (chr == '\"' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_DQUOTE)
                    retStr.append(""");
                else if (chr == '\'' && eTargetType == ToXMLType.XMLATTRIBUTE_WITH_QUOTE)
                    retStr.append("'");
                else if (chr == '\n' && eTargetType != ToXMLType.XMLTEXT) // any attr
                    retStr.append("
");          // writen with no leading 0's
                else if (chr == '\t' && eTargetType != ToXMLType.XMLTEXT) // any attr
                    retStr.append("	");          // writen with no leading 0's
                else if (range == 1 && chr <= cRangeMax)
                    appendHex(retStr, chr);
                else if (range == 2 && cRangeMin <= chr)
                    appendHex(retStr, chr);
                else if (range == 3 && cRangeMin <= chr && chr <= cRangeMax)
                    appendHex(retStr, chr);
                else {
                    boolean bFound = false;
                    for (int j = 0; j < sOthers.length(); ) {
                        if (chr == sOthers.charAt(j++)) {
                            appendHex(retStr, chr);
                            bFound = true;
                            break;
                        }
                    }
                    if (! bFound) // reaches here only all ifs fail.
                        retStr.append(chr);
                }
            }
            return retStr.toString();
        }
        return sSrc;
    }

    private static final char hexdigit[] = {
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
    };
    
    private static void appendHex(StringBuilder srcStr, char srcChr) {
        srcStr.append("&#x");
        int nUCS4 = srcChr;
    // JavaPort: chars greater than 0xFFFF aren't possible in Java.    
    /*  if ((nUCS4 & 0xF00000) != 0) {
            srcStr.append(hexdigit[nUCS4 >> 20 & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 16 & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 8  & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 4  & 0xF]);
            srcStr.append(hexdigit[nUCS4       & 0xF]);
        }
        else */ if ((nUCS4 & 0xF0000) != 0) {
            srcStr.append(hexdigit[nUCS4 >> 16 & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 8  & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 4  & 0xF]);
            srcStr.append(hexdigit[nUCS4       & 0xF]);
        }
        else if ((nUCS4 & 0xF000) != 0) {
            srcStr.append(hexdigit[nUCS4 >> 12 & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 8  & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 4  & 0xF]);
            srcStr.append(hexdigit[nUCS4       & 0xF]);
        }
        else if ((nUCS4 & 0x0F00) != 0) {
            srcStr.append(hexdigit[nUCS4 >> 8  & 0xF]);
            srcStr.append(hexdigit[nUCS4 >> 4  & 0xF]);
            srcStr.append(hexdigit[nUCS4       & 0xF]);
        }
        else if ((nUCS4 & 0x00F0) != 0) {
            srcStr.append(hexdigit[nUCS4 >> 4  & 0xF]);
            srcStr.append(hexdigit[nUCS4       & 0xF]);
        }
        else {
            srcStr.append(hexdigit[nUCS4       & 0xF]);
        }
        srcStr.append(';');
    }

	/**
     * Replaces specified characters in the current string with their entity references.
     * and wraps the text in  and  elements.
     * 
     *  '&' is replaced with "&amp;"
     *  '<' is replaced with "&lt;"
     *  '>' is replaced with "&gt;"
     * 
     *  All line feeds are are replaced with 
.
     *
     * @param sSrc the source string.
     * @param bIncludePI true if the string should be prefixed with .
     * @return The converted string.
     * @exclude from published api.
     */
	public static String toXHTML(String sSrc, boolean bIncludePI) {
	    boolean bHasInvalidChar = false;
	    int nNeedsEncoding = 0;
	    int nLen = sSrc.length();
	    for (int i = 0; i < nLen; ) {
	        char chr = sSrc.charAt(i++);
	        if (chr == '&' || chr == '<' || chr == 0x0A || chr == 0x0D || chr == '>')
	            nNeedsEncoding++;
	        // JavaPort: beware that the surrogate ranges used below are wrong,
	        // as are the checks in Java for non-BMP characters!
	        else if ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
	                || (0xD7FF < chr && chr < 0xE000) || (0xFFFD < chr && chr < 0x10000)
	                    || (chr > 0x10FFFF))
	            bHasInvalidChar = true;
	    }
	    // new size is original size + max size of encoding * num Encodings + length of XHTML wrapper
	    int nNewSize = nLen + (nNeedsEncoding * 7) + 57;
	    if (bIncludePI)
	        nNewSize += 21;
	    StringBuilder sRet = new StringBuilder(nNewSize);
	    if (bIncludePI)
	        sRet.append("");
	    sRet.append("");
	    int nSpaceRun = 0;   
	    for (int i = 0; i < nLen; ) {
	        char chr = sSrc.charAt(i++);
	        //close up any space run if we're not a space
	        if (nSpaceRun > 0 && chr != 0x20)   {
	            sRet.append(' ') ;
	            if (nSpaceRun > 1)
	                sRet.append("");
	            nSpaceRun = 0;
	        }
	        // JavaPort: beware that the surrogate ranges used below are wrong,
	        // as are the checks in Java for non-BMP characters!
	        if (bHasInvalidChar &&
	            ((chr < 0x20 && (chr != 0x09) && (chr != 0x0A) && (chr != 0x0D))
	             || (0xD7FF < chr && chr < 0xE000)
					 || (0xFFFD < chr && chr < 0x10000)
						 || (chr > 0x10FFFF)))
	            continue;
	        if (chr == '<')
	            sRet.append("<");
	        else if (chr == '&')
	            sRet.append("&");
	        else if (chr == 0x0D)
	            sRet.append("");            // writen with no leading 0's
	        else if (chr == 0x0A)
	            sRet.append("
");          // write out a 
 for any line feed
	        else if (chr == 0x09)
	            sRet.append(" "); //tabs
	        else if (chr == 0x20) {
	            nSpaceRun++;
	            if (nSpaceRun == 2) //starting a space run
	                sRet.append("");
	            if (nSpaceRun >=2)
	                sRet.append(" ");
	        }
	        else if (chr == '>')
	            sRet.append(">");
	        else
	            sRet.append(chr);
	        //if we're at the end, close up any space run
	        if (nSpaceRun > 0 && (i == nLen))  {
	            sRet.append(' ');
	            if (nSpaceRun > 1)
	                sRet.append("");
	        }
	    }
	    sRet.append("");
	    return sRet.toString();
	}

	private static boolean isValidEntityReference(String src, int index) {
		assert src.charAt(index) == '&';
		
		int semicolon = src.indexOf(';', index);
		if (semicolon - index >= 3) {
			String ent = src.substring(index, semicolon);
			String entList = "<>&'"";
			if (entList.contains(ent)) {
				return true;
			}
			else if (ent.startsWith("&#x")) {
				for (int j = 3; j < ent.length(); j++) {
					char h = ent.charAt(j);
					if ('0' <= h && h <= '9')
						continue;
					else if ('a' <= h && h <= 'f')
						continue;
					else if ('A' <= h && h <= 'F')
						continue;
					
					return false;
				}
				
				return true;				
			}
			else if (ent.startsWith("&#")) {
				for (int j = 2; j < ent.length(); j++) {
					char n = ent.charAt(j);
					if ('0' <= n && n <= '9')
						continue;
					return false;
				}
				
				return true;
			}
		}
		
		return false;
	}

	/*
	 * Disallow instance of this class.
	 */
	private StringUtils() {
	}

}