com.fireflysource.common.string.StringUtils Maven / Gradle / Ivy
package com.fireflysource.common.string;
import com.fireflysource.common.collection.trie.ArrayTrie;
import com.fireflysource.common.collection.trie.Trie;
import java.nio.charset.StandardCharsets;
import java.util.*;
public class StringUtils {
public static final String EMPTY = "";
public static final String[] EMPTY_STRING_ARRAY = new String[0];
private static final String FOLDER_SEPARATOR = "/";
private static final char EXTENSION_SEPARATOR = '.';
private static final char[] LOWER_CASE = {'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', '\010',
'\011', '\012', '\013', '\014', '\015', '\016', '\017', '\020', '\021', '\022', '\023', '\024', '\025',
'\026', '\027', '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', '\040', '\041', '\042',
'\043', '\044', '\045', '\046', '\047', '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', '\070', '\071', '\072', '\073', '\074',
'\075', '\076', '\077', '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', '\150', '\151',
'\152', '\153', '\154', '\155', '\156', '\157', '\160', '\161', '\162', '\163', '\164', '\165', '\166',
'\167', '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', '\140', '\141', '\142', '\143',
'\144', '\145', '\146', '\147', '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', '\160',
'\161', '\162', '\163', '\164', '\165', '\166', '\167', '\170', '\171', '\172', '\173', '\174', '\175',
'\176', '\177'};
private static final Trie CHARSETS = new ArrayTrie<>(256);
private static final String ISO_8859_1 = "iso-8859-1";
private static final String UTF8 = "utf-8";
private static final String __UTF16 = "utf-16";
static {
CHARSETS.put("utf-8", UTF8);
CHARSETS.put("utf8", UTF8);
CHARSETS.put("utf-16", __UTF16);
CHARSETS.put("utf16", __UTF16);
CHARSETS.put("iso-8859-1", ISO_8859_1);
CHARSETS.put("iso_8859_1", ISO_8859_1);
}
/**
*
* Splits the provided text into an array, using whitespace as the
* separator. Whitespace is defined by {@link Character#isWhitespace(char)}.
*
*
*
* The separator is not included in the returned String array. Adjacent
* separators are treated as one separator. For more control over the split
* use the StrTokenizer class.
*
*
*
* A null
input String returns null
.
*
*
*
* StringUtils.split(null) = null
* StringUtils.split("") = []
* StringUtils.split("abc def") = ["abc", "def"]
* StringUtils.split("abc def") = ["abc", "def"]
* StringUtils.split(" abc ") = ["abc"]
*
*
* @param str the String to parse, may be null
* @return an array of parsed Strings, null
if null String
* input
*/
public static String[] split(String str) {
return split(str, null, -1);
}
/**
*
* Splits the provided text into an array, separators specified. This is an
* alternative to using StringTokenizer.
*
*
*
* The separator is not included in the returned String array. Adjacent
* separators are treated as one separator. For more control over the split
* use the StrTokenizer class.
*
*
*
* A null
input String returns null
. A
* null
separatorChars splits on whitespace.
*
*
*
* StringUtils.split(null, *) = null
* StringUtils.split("", *) = []
* StringUtils.split("abc def", null) = ["abc", "def"]
* StringUtils.split("abc def", " ") = ["abc", "def"]
* StringUtils.split("abc def", " ") = ["abc", "def"]
* StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"]
*
*
* @param str the String to parse, may be null
* @param separatorChars the characters used as the delimiters, null
* splits on whitespace
* @return an array of parsed Strings, null
if null String
* input
*/
public static String[] split(String str, String separatorChars) {
return splitWorker(str, separatorChars, -1, false);
}
/**
*
* Splits the provided text into an array, separator specified. This is an
* alternative to using StringTokenizer.
*
*
*
* The separator is not included in the returned String array. Adjacent
* separators are treated as one separator. For more control over the split
* use the StrTokenizer class.
*
*
*
* A null
input String returns null
.
*
*
*
* StringUtils.split(null, *) = null
* StringUtils.split("", *) = []
* StringUtils.split("a.b.c", '.') = ["a", "b", "c"]
* StringUtils.split("a..b.c", '.') = ["a", "b", "c"]
* StringUtils.split("a:b:c", '.') = ["a:b:c"]
* StringUtils.split("a b c", ' ') = ["a", "b", "c"]
*
*
* @param str the String to parse, may be null
* @param separatorChar the character used as the delimiter
* @return an array of parsed Strings, null
if null String
* input
* @since 2.0
*/
public static String[] split(String str, char separatorChar) {
return splitWorker(str, separatorChar, false);
}
/**
*
* Splits the provided text into an array with a maximum length, separators
* specified.
*
*
*
* The separator is not included in the returned String array. Adjacent
* separators are treated as one separator.
*
*
*
* A null
input String returns null
. A
* null
separatorChars splits on whitespace.
*
*
*
* If more than max
delimited substrings are found, the last
* returned string includes all characters after the first
* max - 1
returned strings (including separator characters).
*
*
*
* StringUtils.split(null, *, *) = null
* StringUtils.split("", *, *) = []
* StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
* StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
*
*
* @param str the String to parse, may be null
* @param separatorChars the characters used as the delimiters, null
* splits on whitespace
* @param max the maximum number of elements to include in the array. A zero
* or negative value implies no limit
* @return an array of parsed Strings, null
if null String
* input
*/
public static String[] split(String str, String separatorChars, int max) {
return splitWorker(str, separatorChars, max, false);
}
/**
* Performs the logic for the split
and
* splitPreserveAllTokens
methods that return a maximum array
* length.
*
* @param str the String to parse, may be null
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the array. A zero
* or negative value implies no limit.
* @param preserveAllTokens if true
, adjacent separators are treated as empty
* token separators; if false
, adjacent separators
* are treated as one separator.
* @return an array of parsed Strings, null
if null String
* input
*/
private static String[] splitWorker(String str, String separatorChars, int max, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return EMPTY_STRING_ARRAY;
}
List list = new ArrayList<>();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if (separatorChars == null) {
// Null separator means use whitespace
while (i < len) {
if (Character.isWhitespace(str.charAt(i))) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else if (separatorChars.length() == 1) {
// Optimise 1 character case
char sep = separatorChars.charAt(0);
while (i < len) {
if (str.charAt(i) == sep) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else {
// standard case
while (i < len) {
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return list.toArray(EMPTY_STRING_ARRAY);
}
/**
* Performs the logic for the split
and
* splitPreserveAllTokens
methods that do not return a maximum
* array length.
*
* @param str the String to parse, may be null
* @param separatorChar the separate character
* @param preserveAllTokens if true
, adjacent separators are treated as empty
* token separators; if false
, adjacent separators
* are treated as one separator.
* @return an array of parsed Strings, null
if null String
* input
*/
private static String[] splitWorker(String str, char separatorChar, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return EMPTY_STRING_ARRAY;
}
List list = new ArrayList<>();
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
while (i < len) {
if (str.charAt(i) == separatorChar) {
if (match || preserveAllTokens) {
list.add(str.substring(start, i));
match = false;
lastMatch = true;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return list.toArray(EMPTY_STRING_ARRAY);
}
/**
*
* Splits the provided text into an array, separator string specified.
*
*
*
* The separator(s) will not be included in the returned String array.
* Adjacent separators are treated as one separator.
*
*
*
* A null
input String returns null
. A
* null
separator splits on whitespace.
*
*
*
* StringUtils.splitByWholeSeparator(null, *) = null
* StringUtils.splitByWholeSeparator("", *) = []
* StringUtils.splitByWholeSeparator("ab de fg", null) = ["ab", "de", "fg"]
* StringUtils.splitByWholeSeparator("ab de fg", null) = ["ab", "de", "fg"]
* StringUtils.splitByWholeSeparator("ab:cd:ef", ":") = ["ab", "cd", "ef"]
* StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-") = ["ab", "cd", "ef"]
*
*
* @param str the String to parse, may be null
* @param separator String containing the String to be used as a delimiter,
* null
splits on whitespace
* @return an array of parsed Strings, null
if null String was
* input
*/
public static String[] splitByWholeSeparator(String str, String separator) {
return splitByWholeSeparatorWorker(str, separator, -1, false);
}
/**
*
* Splits the provided text into an array, separator string specified.
* Returns a maximum of max
substrings.
*
*
*
* The separator(s) will not be included in the returned String array.
* Adjacent separators are treated as one separator.
*
*
*
* A null
input String returns null
. A
* null
separator splits on whitespace.
*
*
*
* StringUtils.splitByWholeSeparator(null, *, *) = null
* StringUtils.splitByWholeSeparator("", *, *) = []
* StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab", "de", "fg"]
* StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab", "de", "fg"]
* StringUtils.splitByWholeSeparator("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
* StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 5) = ["ab", "cd", "ef"]
* StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 2) = ["ab", "cd-!-ef"]
*
*
* @param str the String to parse, may be null
* @param separator String containing the String to be used as a delimiter,
* null
splits on whitespace
* @param max the maximum number of elements to include in the returned
* array. A zero or negative value implies no limit.
* @return an array of parsed Strings, null
if null String was
* input
*/
public static String[] splitByWholeSeparator(String str, String separator, int max) {
return splitByWholeSeparatorWorker(str, separator, max, false);
}
/**
* Performs the logic for the
* splitByWholeSeparatorPreserveAllTokens
methods.
*
* @param str the String to parse, may be null
* @param separator String containing the String to be used as a delimiter,
* null
splits on whitespace
* @param max the maximum number of elements to include in the returned
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if true
, adjacent separators are treated as empty
* token separators; if false
, adjacent separators
* are treated as one separator.
* @return an array of parsed Strings, null
if null String
* input
* @since 2.4
*/
private static String[] splitByWholeSeparatorWorker(String str, String separator, int max,
boolean preserveAllTokens) {
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return EMPTY_STRING_ARRAY;
}
if ((separator == null) || (EMPTY.equals(separator))) {
// Split on whitespace.
return splitWorker(str, null, max, preserveAllTokens);
}
int separatorLength = separator.length();
ArrayList substrings = new ArrayList<>();
int numberOfSubstrings = 0;
int beg = 0;
int end = 0;
while (end < len) {
end = str.indexOf(separator, beg);
if (end > -1) {
if (end > beg) {
numberOfSubstrings += 1;
if (numberOfSubstrings == max) {
end = len;
substrings.add(str.substring(beg));
} else {
// The following is OK, because String.substring( beg,
// end ) excludes
// the character at the position 'end'.
// System.out.println("sub " + beg + "|" + end +"|" +
// str.substring(beg, end));
substrings.add(str.substring(beg, end));
// Set the starting point for the next search.
// The following is equivalent to beg = end +
// (separatorLength - 1) + 1,
// which is the right calculation:
beg = end + separatorLength;
}
} else {
// We found a consecutive occurrence of the separator, so
// skip it.
if (preserveAllTokens) {
numberOfSubstrings += 1;
if (numberOfSubstrings == max) {
end = len;
substrings.add(str.substring(beg));
} else {
substrings.add(EMPTY);
}
}
beg = end + separatorLength;
}
} else {
// String.substring( beg ) goes from 'beg' to the end of the
// String.
// System.out.println("sub~~ " + beg + "|" + end +"|" +
// str.substring(beg));
String t = str.substring(beg);
if (!t.equals(EMPTY))
substrings.add(str.substring(beg));
end = len;
}
}
return substrings.toArray(EMPTY_STRING_ARRAY);
}
public static boolean hasText(String str) {
return hasText((CharSequence) str);
}
public static boolean hasText(CharSequence str) {
if (!hasLength(str)) {
return false;
}
int strLen = str.length();
for (int i = 0; i < strLen; i++) {
if (!Character.isWhitespace(str.charAt(i))) {
return true;
}
}
return false;
}
public static boolean hasLength(CharSequence str) {
return (str != null && str.length() > 0);
}
public static boolean hasLength(String str) {
return hasLength((CharSequence) str);
}
/**
* Replace the pattern using a map, such as a pattern, such as A pattern is
* "hello ${foo}" and the map is {"foo" : "world"}, when you execute this
* function, the result is "hello world"
*
* @param s The pattern string.
* @param map The key-value
* @return The string replaced.
*/
public static String replace(String s, Map map) {
StringBuilder ret = new StringBuilder((int) (s.length() * 1.5));
int cursor = 0;
for (int start, end; (start = s.indexOf("${", cursor)) != -1 && (end = s.indexOf("}", start)) != -1; ) {
ret.append(s, cursor, start).append(map.get(s.substring(start + 2, end)));
cursor = end + 1;
}
ret.append(s, cursor, s.length());
return ret.toString();
}
public static String replace(String s, Object... objs) {
if (objs == null || objs.length == 0)
return s;
if (!s.contains("{}"))
return s;
StringBuilder ret = new StringBuilder((int) (s.length() * 1.5));
int cursor = 0;
int index = 0;
for (int start; (start = s.indexOf("{}", cursor)) != -1; ) {
ret.append(s, cursor, start);
if (index < objs.length) {
Object obj = objs[index];
try {
if (obj != null) {
if (obj instanceof AbstractCollection) {
ret.append(Arrays.toString(((AbstractCollection>) obj).toArray()));
} else {
ret.append(obj);
}
} else {
ret.append("null");
}
} catch (Throwable ignored) {
}
} else {
ret.append("{}");
}
cursor = start + 2;
index++;
}
ret.append(s, cursor, s.length());
return ret.toString();
}
public static String replaceStr(String s, String sub, String with) {
if (s == null) {
return null;
}
int c = 0;
int i = s.indexOf(sub, c);
if (i == -1) {
return s;
}
StringBuilder buf = new StringBuilder(s.length() + with.length());
do {
buf.append(s, c, i);
buf.append(with);
c = i + sub.length();
}
while ((i = s.indexOf(sub, c)) != -1);
if (c < s.length()) {
buf.append(s.substring(c));
}
return buf.toString();
}
public static String escapeXML(String str) {
if (str == null)
return "";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); ++i) {
char c = str.charAt(i);
switch (c) {
case '\u00FF':
case '\u0024':
break;
case '&':
sb.append("&");
break;
case '<':
sb.append("<");
break;
case '>':
sb.append(">");
break;
case '\"':
sb.append(""");
break;
case '\'':
sb.append("'");
break;
default:
if (c >= '\u0000' && c <= '\u001F')
break;
if (c >= '\uE000' && c <= '\uF8FF')
break;
if (c >= '\uFFF0' && c <= '\uFFFF')
break;
sb.append(c);
break;
}
}
return sb.toString();
}
/**
* Convert a string that is unicode form to a normal string.
*
* @param s The unicode form of a string, e.g. "\\u8001\\u9A6C"
* @return Normal string
*/
public static String unicodeToString(String s) {
StringBuilder sb = new StringBuilder();
StringTokenizer st = new StringTokenizer(s, "\\u");
while (st.hasMoreTokens()) {
String token = st.nextToken();
if (token.length() > 4) {
sb.append((char) Integer.parseInt(token.substring(0, 4), 16));
sb.append(token.substring(4));
} else {
sb.append((char) Integer.parseInt(token, 16));
}
}
return sb.toString();
}
/**
* Extract the filename extension from the given Java resource path,
* e.g. "mypath/myfile.txt" -> "txt".
*
* @param path the file path (may be {@code null})
* @return the extracted filename extension, or {@code null} if none
*/
public static String getFilenameExtension(String path) {
if (path == null) {
return null;
}
int extIndex = path.lastIndexOf(EXTENSION_SEPARATOR);
if (extIndex == -1) {
return null;
}
int folderIndex = path.lastIndexOf(FOLDER_SEPARATOR);
if (folderIndex > extIndex) {
return null;
}
return path.substring(extIndex + 1);
}
/**
* Extract the filename from the given Java resource path,
* e.g. {@code "mypath/myfile.txt" -> "myfile.txt"}.
*
* @param path the file path (may be {@code null})
* @return the extracted filename, or {@code null} if none
*/
public static String getFilename(String path) {
if (path == null) {
return null;
}
int separatorIndex = path.lastIndexOf(FOLDER_SEPARATOR);
return (separatorIndex != -1 ? path.substring(separatorIndex + 1) : path);
}
public static byte[] getUtf8Bytes(String string) {
return string.getBytes(StandardCharsets.UTF_8);
}
public static byte[] getBytes(String string) {
return string.getBytes(StandardCharsets.ISO_8859_1);
}
/**
* Convert String to an integer. Parses up to the first non-numeric
* character. If no number is found an IllegalArgumentException is thrown
*
* @param string A String containing an integer.
* @param from The index to start parsing from
* @return an int
*/
public static int toInt(String string, int from) {
int val = 0;
boolean started = false;
boolean minus = false;
for (int i = from; i < string.length(); i++) {
char b = string.charAt(i);
if (b <= ' ') {
if (started)
break;
} else if (b >= '0' && b <= '9') {
val = val * 10 + (b - '0');
started = true;
} else if (b == '-' && !started) {
minus = true;
} else
break;
}
if (started)
return minus ? (-val) : val;
throw new NumberFormatException(string);
}
/**
* append hex digit
*
* @param buf the buffer to append to
* @param b the byte to append
* @param base the base of the hex output (almost always 16).
*/
public static void append(StringBuilder buf, byte b, int base) {
int bi = 0xff & b;
int c = '0' + (bi / base) % base;
if (c > '9')
c = 'a' + (c - '0' - 10);
buf.append((char) c);
c = '0' + bi % base;
if (c > '9')
c = 'a' + (c - '0' - 10);
buf.append((char) c);
}
/**
* Append 2 digits (zero padded) to the StringBuilder
*
* @param buf the buffer to append to
* @param i the value to append
*/
public static void append2digits(StringBuilder buf, int i) {
if (i < 100) {
buf.append((char) (i / 10 + '0'));
buf.append((char) (i % 10 + '0'));
}
}
/**
* fast lower case conversion. Only works on ascii (not unicode)
*
* @param s the string to convert
* @return a lower case version of s
*/
public static String asciiToLowerCase(String s) {
char[] c = null;
int i = s.length();
// look for first conversion
while (i-- > 0) {
char c1 = s.charAt(i);
if (c1 <= 127) {
char c2 = LOWER_CASE[c1];
if (c1 != c2) {
c = s.toCharArray();
c[i] = c2;
break;
}
}
}
while (i-- > 0) {
if (c != null && c[i] <= 127)
c[i] = LOWER_CASE[c[i]];
}
return c == null ? s : new String(c);
}
/**
* Convert alternate charset names (eg utf8) to normalized name (eg UTF-8).
*
* @param s the charset to normalize
* @return the normalized charset (or null if normalized version not found)
*/
public static String normalizeCharset(String s) {
String n = CHARSETS.get(s);
return (n == null) ? s : n;
}
/**
* Convert alternate charset names (eg utf8) to normalized name (eg UTF-8).
*
* @param s the charset to normalize
* @param offset the offset in the charset
* @param length the length of the charset in the input param
* @return the normalized charset (or null if not found)
*/
public static String normalizeCharset(String s, int offset, int length) {
String n = CHARSETS.get(s, offset, length);
return (n == null) ? s.substring(offset, offset + length) : n;
}
public static boolean isHex(String str, int offset, int length) {
if (offset + length > str.length()) {
return false;
}
for (int i = offset; i < (offset + length); i++) {
char c = str.charAt(i);
if (!(((c >= 'a') && (c <= 'f')) ||
((c >= 'A') && (c <= 'F')) ||
((c >= '0') && (c <= '9')))) {
return false;
}
}
return true;
}
/**
* Truncate a string to a max size.
*
* @param str the string to possibly truncate
* @param maxSize the maximum size of the string
* @return the truncated string. if str
param is null, then the returned string will also be null.
*/
public static String truncate(String str, int maxSize) {
if (str == null) {
return null;
}
if (str.length() <= maxSize) {
return str;
}
return str.substring(0, maxSize);
}
}