org.nd4j.util.StringUtils Maven / Gradle / Ivy
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.nd4j.util; import java.io.PrintWriter; import java.io.StringWriter; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.StringTokenizer; import java.util.concurrent.TimeUnit; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.google.common.net.InetAddresses; import org.nd4j.base.Preconditions; /** * General string utils - adapted from * Apache Hadoop StringUtils, with modifications * */ public class StringUtils { /** * Make a string representation of the exception. * @param e The exception to stringify * @return A string with exception name and call stack. */ public static String stringifyException(Throwable e) { StringWriter stm = new StringWriter(); PrintWriter wrt = new PrintWriter(stm); e.printStackTrace(wrt); wrt.close(); return stm.toString(); } /** * Given a full hostname, return the word upto the first dot. * @param fullHostname the full hostname * @return the hostname to the first dot */ public static String simpleHostname(String fullHostname) { if (InetAddresses.isInetAddress(fullHostname)) { return fullHostname; } int offset = fullHostname.indexOf('.'); if (offset != -1) { return fullHostname.substring(0, offset); } return fullHostname; } /** * Given an integer, return a string that is in an approximate, but human * readable format. * @param number the number to format * @return a human readable form of the integer * * @deprecated use {@link TraditionalBinaryPrefix#long2String(long, String, int)}. */ @Deprecated public static String humanReadableInt(long number) { return TraditionalBinaryPrefix.long2String(number, "", 1); } /** The same as String.format(Locale.ENGLISH, format, objects). */ public static String format(final String format, final Object... objects) { return String.format(Locale.ENGLISH, format, objects); } /** * Format a percentage for presentation to the user. * @param fraction the percentage as a fraction, e.g. 0.1 = 10% * @param decimalPlaces the number of decimal places * @return a string representation of the percentage */ public static String formatPercent(double fraction, int decimalPlaces) { return format("%." + decimalPlaces + "f%%", fraction*100); } /** * Given an array of strings, return a comma-separated list of its elements. * @param strs Array of strings * @return Empty string if strs.length is 0, comma separated list of strings * otherwise */ public static String arrayToString(String[] strs) { if (strs.length == 0) { return ""; } StringBuilder sbuf = new StringBuilder(); sbuf.append(strs[0]); for (int idx = 1; idx < strs.length; idx++) { sbuf.append(","); sbuf.append(strs[idx]); } return sbuf.toString(); } /** * Given an array of bytes it will convert the bytes to a hex string * representation of the bytes * @param bytes * @param start start index, inclusively * @param end end index, exclusively * @return hex string representation of the byte array */ public static String byteToHexString(byte[] bytes, int start, int end) { if (bytes == null) { throw new IllegalArgumentException("bytes == null"); } StringBuilder s = new StringBuilder(); for(int i = start; i < end; i++) { s.append(format("%02x", bytes[i])); } return s.toString(); } /** Same as byteToHexString(bytes, 0, bytes.length). */ public static String byteToHexString(byte bytes[]) { return byteToHexString(bytes, 0, bytes.length); } /** * Convert a byte to a hex string. * @see #byteToHexString(byte[]) * @see #byteToHexString(byte[], int, int) * @param b byte * @return byte's hex value as a String */ public static String byteToHexString(byte b) { return byteToHexString(new byte[] {b}); } /** * Given a hexstring this will return the byte array corresponding to the * string * @param hex the hex String array * @return a byte array that is a hex string representation of the given * string. The size of the byte array is therefore hex.length/2 */ public static byte[] hexStringToByte(String hex) { byte[] bts = new byte[hex.length() / 2]; for (int i = 0; i < bts.length; i++) { bts[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16); } return bts; } /** * * @param uris */ public static String uriToString(URI[] uris){ if (uris == null) { return null; } StringBuilder ret = new StringBuilder(uris[0].toString()); for(int i = 1; i < uris.length;i++){ ret.append(","); ret.append(uris[i].toString()); } return ret.toString(); } /** * @param str * The string array to be parsed into an URI array. * @return null if str is null, else the URI array * equivalent to str. * @throws IllegalArgumentException * If any string in str violates RFC 2396. */ public static URI[] stringToURI(String[] str){ if (str == null) return null; URI[] uris = new URI[str.length]; for (int i = 0; i < str.length;i++){ try{ uris[i] = new URI(str[i]); }catch(URISyntaxException ur){ throw new IllegalArgumentException( "Failed to create uri for " + str[i], ur); } } return uris; } /** * * Given a finish and start time in long milliseconds, returns a * String in the format Xhrs, Ymins, Z sec, for the time difference between two times. * If finish time comes before start time then negative valeus of X, Y and Z wil return. * * @param finishTime finish time * @param startTime start time */ public static String formatTimeDiff(long finishTime, long startTime){ long timeDiff = finishTime - startTime; return formatTime(timeDiff); } /** * * Given the time in long milliseconds, returns a * String in the format Xhrs, Ymins, Z sec. * * @param timeDiff The time difference to format */ public static String formatTime(long timeDiff){ StringBuilder buf = new StringBuilder(); long hours = timeDiff / (60*60*1000); long rem = (timeDiff % (60*60*1000)); long minutes = rem / (60*1000); rem = rem % (60*1000); long seconds = rem / 1000; if (hours != 0){ buf.append(hours); buf.append("hrs, "); } if (minutes != 0){ buf.append(minutes); buf.append("mins, "); } // return "0sec if no difference buf.append(seconds); buf.append("sec"); return buf.toString(); } /** * * Given the time in long milliseconds, returns a String in the sortable * format Xhrs, Ymins, Zsec. X, Y, and Z are always two-digit. If the time is * more than 100 hours ,it is displayed as 99hrs, 59mins, 59sec. * * @param timeDiff The time difference to format */ public static String formatTimeSortable(long timeDiff) { StringBuilder buf = new StringBuilder(); long hours = timeDiff / (60 * 60 * 1000); long rem = (timeDiff % (60 * 60 * 1000)); long minutes = rem / (60 * 1000); rem = rem % (60 * 1000); long seconds = rem / 1000; // if hours is more than 99 hours, it will be set a max value format if (hours > 99) { hours = 99; minutes = 59; seconds = 59; } buf.append(String.format("%02d", hours)); buf.append("hrs, "); buf.append(String.format("%02d", minutes)); buf.append("mins, "); buf.append(String.format("%02d", seconds)); buf.append("sec"); return buf.toString(); } /** * Formats time in ms and appends difference (finishTime - startTime) * as returned by formatTimeDiff(). * If finish time is 0, empty string is returned, if start time is 0 * then difference is not appended to return value. * @param formattedFinishTime formattedFinishTime to use * @param finishTime finish time * @param startTime start time * @return formatted value. */ public static String getFormattedTimeWithDiff(String formattedFinishTime, long finishTime, long startTime){ StringBuilder buf = new StringBuilder(); if (0 != finishTime) { buf.append(formattedFinishTime); if (0 != startTime){ buf.append(" (" + formatTimeDiff(finishTime , startTime) + ")"); } } return buf.toString(); } /** * Returns an arraylist of strings. * @param str the comma separated string values * @return the arraylist of the comma separated string values */ public static String[] getStrings(String str){ String delim = ","; return getStrings(str, delim); } /** * Returns an arraylist of strings. * @param str the string values * @param delim delimiter to separate the values * @return the arraylist of the separated string values */ public static String[] getStrings(String str, String delim){ Collection
in the string * with the escape charvalues = getStringCollection(str, delim); if(values.size() == 0) { return null; } return values.toArray(new String[values.size()]); } /** * Returns a collection of strings. * @param str comma separated string values * @return an ArrayList
of string values */ public static CollectiongetStringCollection(String str){ String delim = ","; return getStringCollection(str, delim); } /** * Returns a collection of strings. * * @param str * String to parse * @param delim * delimiter to separate the values * @return Collection of parsed elements. */ public static Collection getStringCollection(String str, String delim) { List values = new ArrayList (); if (str == null) return values; StringTokenizer tokenizer = new StringTokenizer(str, delim); while (tokenizer.hasMoreTokens()) { values.add(tokenizer.nextToken()); } return values; } /** * Splits a comma separated value String
, trimming leading and * trailing whitespace on each value. Duplicate and empty values are removed. * * @param str a comma separatedwith values, may be null * @return a Collection
ofString
values, empty * Collection if null String input */ public static CollectiongetTrimmedStringCollection(String str){ Set set = new LinkedHashSet ( Arrays.asList(getTrimmedStrings(str))); set.remove(""); return set; } /** * Splits a comma or newline separated value String
, trimming * leading and trailing whitespace on each value. * * @param str a comma or newline separatedString
with values, * may be null * @return an array ofString
values, empty array if null String * input */ public static String[] getTrimmedStrings(String str){ if (null == str || str.trim().isEmpty()) { return emptyStringArray; } return str.trim().split("\\s*[,\n]\\s*"); } final public static String[] emptyStringArray = {}; final public static char COMMA = ','; final public static char ESCAPE_CHAR = '\\'; /** * Split a string using the default separator * @param str a string that may have escaped separator * @return an array of strings */ public static String[] split(String str) { return split(str, ESCAPE_CHAR, COMMA); } /** * Split a string using the given separator * @param str a string that may have escaped separator * @param escapeChar a char that be used to escape the separator * @param separator a separator char * @return an array of strings */ public static String[] split( String str, char escapeChar, char separator) { if (str==null) { return null; } ArrayListstrList = new ArrayList (); StringBuilder split = new StringBuilder(); int index = 0; while ((index = findNext(str, separator, escapeChar, index, split)) >= 0) { ++index; // move over the separator for next search strList.add(split.toString()); split.setLength(0); // reset the buffer } strList.add(split.toString()); // remove trailing empty split(s) int last = strList.size(); // last split while (--last>=0 && "".equals(strList.get(last))) { strList.remove(last); } return strList.toArray(new String[strList.size()]); } /** * Split a string using the given separator, with no escaping performed. * @param str a string to be split. Note that this may not be null. * @param separator a separator char * @return an array of strings */ public static String[] split( String str, char separator) { // String.split returns a single empty result for splitting the empty // string. if (str.isEmpty()) { return new String[]{""}; } ArrayList strList = new ArrayList (); int startIndex = 0; int nextIndex = 0; while ((nextIndex = str.indexOf(separator, startIndex)) != -1) { strList.add(str.substring(startIndex, nextIndex)); startIndex = nextIndex + 1; } strList.add(str.substring(startIndex)); // remove trailing empty split(s) int last = strList.size(); // last split while (--last>=0 && "".equals(strList.get(last))) { strList.remove(last); } return strList.toArray(new String[strList.size()]); } /** * Finds the first occurrence of the separator character ignoring the escaped * separators starting from the index. Note the substring between the index * and the position of the separator is passed. * @param str the source string * @param separator the character to find * @param escapeChar character used to escape * @param start from where to search * @param split used to pass back the extracted string */ public static int findNext(String str, char separator, char escapeChar, int start, StringBuilder split) { int numPreEscapes = 0; for (int i = start; i < str.length(); i++) { char curChar = str.charAt(i); if (numPreEscapes == 0 && curChar == separator) { // separator return i; } else { split.append(curChar); numPreEscapes = (curChar == escapeChar) ? (++numPreEscapes) % 2 : 0; } } return -1; } /** * This function splits the String s into multiple Strings using the * splitChar. However, it provides an quoting facility: it is possible to * quote strings with the quoteChar. * If the quoteChar occurs within the quotedExpression, it must be prefaced * by the escapeChar * * @param s The String to split * @param splitChar * @param quoteChar * @return An array of Strings that s is split into */ public static String[] splitOnCharWithQuoting(String s, char splitChar, char quoteChar, char escapeChar) { List result = new ArrayList<>(); int i = 0; int length = s.length(); StringBuilder b = new StringBuilder(); while (i < length) { char curr = s.charAt(i); if (curr == splitChar) { // add last buffer if (b.length() > 0) { result.add(b.toString()); b = new StringBuilder(); } i++; } else if (curr == quoteChar) { // find next instance of quoteChar i++; while (i < length) { curr = s.charAt(i); if (curr == escapeChar) { b.append(s.charAt(i + 1)); i += 2; } else if (curr == quoteChar) { i++; break; // break this loop } else { b.append(s.charAt(i)); i++; } } } else { b.append(curr); i++; } } if (b.length() > 0) { result.add(b.toString()); } return result.toArray(new String[0]); } /** * Escape commas in the string using the default escape char * @param str a string * @return an escaped string */ public static String escapeString(String str) { return escapeString(str, ESCAPE_CHAR, COMMA); } /** * Escape charToEscape
in the string * with the escape charescapeChar
* * @param str string * @param escapeChar escape char * @param charToEscape the char to be escaped * @return an escaped string */ public static String escapeString( String str, char escapeChar, char charToEscape) { return escapeString(str, escapeChar, new char[] {charToEscape}); } // check if the character array has the character private static boolean hasChar(char[] chars, char character) { for (char target : chars) { if (character == target) { return true; } } return false; } /** * @param charsToEscape array of characters to be escaped */ public static String escapeString(String str, char escapeChar, char[] charsToEscape) { if (str == null) { return null; } StringBuilder result = new StringBuilder(); for (int i=0; icharToEscape escapeChar
* * @param str string * @param escapeChar escape char * @param charToEscape the escaped char * @return an unescaped string */ public static String unEscapeString( String str, char escapeChar, char charToEscape) { return unEscapeString(str, escapeChar, new char[] {charToEscape}); } /** * @param charsToEscape array of characters to unescape */ public static String unEscapeString(String str, char escapeChar, char[] charsToEscape) { if (str == null) { return null; } StringBuilder result = new StringBuilder(str.length()); boolean hasPreEscape = false; for (int i=0; i(Long.MAX_VALUE/prefix) || num < (Long.MIN_VALUE/prefix)) { throw new IllegalArgumentException(s + " does not fit in a Long"); } return num * prefix; } } /** * Convert a long integer to a string with traditional binary prefix. * * @param n the value to be converted * @param unit The unit, e.g. "B" for bytes. * @param decimalPlaces The number of decimal places. * @return a string with traditional binary prefix. */ public static String long2String(long n, String unit, int decimalPlaces) { if (unit == null) { unit = ""; } //take care a special case if (n == Long.MIN_VALUE) { return "-8 " + EXA.symbol + unit; } final StringBuilder b = new StringBuilder(); //take care negative numbers if (n < 0) { b.append('-'); n = -n; } if (n < KILO.value) { //no prefix b.append(n); return (unit.isEmpty()? b: b.append(" ").append(unit)).toString(); } else { //find traditional binary prefix int i = 0; for(; i < values().length && n >= values()[i].value; i++); TraditionalBinaryPrefix prefix = values()[i - 1]; if ((n & prefix.bitMask) == 0) { //exact division b.append(n >> prefix.bitShift); } else { final String format = "%." + decimalPlaces + "f"; String s = format(format, n/(double)prefix.value); //check a special rounding up case if (s.startsWith("1024")) { prefix = values()[i]; s = format(format, n/(double)prefix.value); } b.append(s); } return b.append(' ').append(prefix.symbol).append(unit).toString(); } } } /** * Escapes HTML Special characters present in the string. * @param string * @return HTML Escaped String representation */ public static String escapeHTML(String string) { if(string == null) { return null; } StringBuilder sb = new StringBuilder(); boolean lastCharacterWasSpace = false; char[] chars = string.toCharArray(); for(char c : chars) { if(c == ' ') { if(lastCharacterWasSpace){ lastCharacterWasSpace = false; sb.append(" "); }else { lastCharacterWasSpace=true; sb.append(" "); } }else { lastCharacterWasSpace = false; switch(c) { case '<': sb.append("<"); break; case '>': sb.append(">"); break; case '&': sb.append("&"); break; case '"': sb.append("""); break; default : sb.append(c);break; } } } return sb.toString(); } /** * @return a byte description of the given long interger value. */ public static String byteDesc(long len) { return TraditionalBinaryPrefix.long2String(len, "B", 2); } /** @deprecated use StringUtils.format("%.2f", d). */ @Deprecated public static String limitDecimalTo2(double d) { return format("%.2f", d); } /** * Concatenates strings, using a separator. * * @param separator Separator to join with. * @param strings Strings to join. */ public static String join(CharSequence separator, Iterable> strings) { Iterator> i = strings.iterator(); if (!i.hasNext()) { return ""; } StringBuilder sb = new StringBuilder(i.next().toString()); while (i.hasNext()) { sb.append(separator); sb.append(i.next().toString()); } return sb.toString(); } /** * Concatenates strings, using whitespaces as separator. * * @param strings Strings to join. */ public static String join(Iterable strings){ return join(" ", strings); } /** * Concatenates strings, using whitespaces as separator. * * @param strings Strings to join. */ public static String join(char separator, Iterable> strings) { return join(separator + "", strings); } /** * Concatenates strings, using a separator. * * @param separator to join with * @param strings to join * @return the joined string */ public static String join(CharSequence separator, String[] strings) { // Ideally we don't have to duplicate the code here if array is iterable. StringBuilder sb = new StringBuilder(); boolean first = true; for (String s : strings) { if (first) { first = false; } else { sb.append(separator); } sb.append(s); } return sb.toString(); } public static String join(char separator, String[] strings) { return join(separator + "", strings); } /** * Convert SOME_STUFF to SomeStuff * * @param s input string * @return camelized string */ public static String camelize(String s) { StringBuilder sb = new StringBuilder(); String[] words = split(StringUtils.toLowerCase(s), ESCAPE_CHAR, '_'); for (String word : words) sb.append(org.apache.commons.lang3.StringUtils.capitalize(word)); return sb.toString(); } /** * Matches a template string against a pattern, replaces matched tokens with * the supplied replacements, and returns the result. The regular expression * must use a capturing group. The value of the first capturing group is used * to look up the replacement. If no replacement is found for the token, then * it is replaced with the empty string. * * For example, assume template is "%foo%_%bar%_%baz%", pattern is "%(.*?)%", * and replacements contains 2 entries, mapping "foo" to "zoo" and "baz" to * "zaz". The result returned would be "zoo__zaz". * * @param template String template to receive replacements * @param pattern Pattern to match for identifying tokens, must use a capturing * group * @param replacements Map mapping tokens identified by the * capturing group to their replacement values * @return String template with replacements */ public static String replaceTokens(String template, Pattern pattern, Map replacements) { StringBuffer sb = new StringBuffer(); Matcher matcher = pattern.matcher(template); while (matcher.find()) { String replacement = replacements.get(matcher.group(1)); if (replacement == null) { replacement = ""; } matcher.appendReplacement(sb, Matcher.quoteReplacement(replacement)); } matcher.appendTail(sb); return sb.toString(); } /** * Get stack trace for a given thread. */ public static String getStackTrace(Thread t) { final StackTraceElement[] stackTrace = t.getStackTrace(); StringBuilder str = new StringBuilder(); for (StackTraceElement e : stackTrace) { str.append(e.toString() + "\n"); } return str.toString(); } /** * Converts all of the characters in this String to lower case with * Locale.ENGLISH. * * @param str string to be converted * @return the str, converted to lowercase. */ public static String toLowerCase(String str) { return str.toLowerCase(Locale.ENGLISH); } /** * Converts all of the characters in this String to upper case with * Locale.ENGLISH. * * @param str string to be converted * @return the str, converted to uppercase. */ public static String toUpperCase(String str) { return str.toUpperCase(Locale.ENGLISH); } /** * Compare strings locale-freely by using String#equalsIgnoreCase. * * @param s1 Non-null string to be converted * @param s2 string to be converted * @return the str, converted to uppercase. */ public static boolean equalsIgnoreCase(String s1, String s2) { Preconditions.checkNotNull(s1); // don't check non-null against s2 to make the semantics same as // s1.equals(s2) return s1.equalsIgnoreCase(s2); } /** * Checks if the String contains only unicode letters.
* ** *
null
will returnfalse
. * An empty String (length()=0) will returntrue
.* StringUtils.isAlpha(null) = false * StringUtils.isAlpha("") = true * StringUtils.isAlpha(" ") = false * StringUtils.isAlpha("abc") = true * StringUtils.isAlpha("ab2c") = false * StringUtils.isAlpha("ab-c") = false ** * @param str the String to check, may be null * @returntrue
if only contains letters, and is non-null */ public static boolean isAlpha(String str) { if (str == null) { return false; } int sz = str.length(); for (int i = 0; i < sz; i++) { if (!Character.isLetter(str.charAt(i))) { return false; } } return true; } public static String timeUnitToString(long time, TimeUnit unit) { String str = String.valueOf(time); switch (unit) { case MILLISECONDS: str += "Millisecond"; break; case SECONDS: str += "Second"; break; case MINUTES: str += "Minute"; break; case HOURS: str += "Hour"; break; case DAYS: str += "Day"; break; default: throw new RuntimeException(); } if (time == 1) return str; return str + "s"; } public static TimeUnit stringToTimeUnit(String str) { switch (str.toLowerCase()) { case "ms": case "millisecond": case "milliseconds": return TimeUnit.MILLISECONDS; case "s": case "sec": case "second": case "seconds": return TimeUnit.SECONDS; case "min": case "minute": case "minutes": return TimeUnit.MINUTES; case "h": case "hour": case "hours": return TimeUnit.HOURS; case "day": case "days": return TimeUnit.DAYS; default: throw new RuntimeException("Unknown time unit: \"" + str + "\""); } } }