All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.api.util.StringUtils Maven / Gradle / Ivy

There is a newer version: 1.0.0-M2.1
Show newest version
/*-
 *  * Copyright 2016 Skymind, Inc.
 *  *
 *  *    Licensed under the Apache License, Version 2.0 (the "License");
 *  *    you may not use this file except in compliance with the License.
 *  *    You may obtain a copy of the License at
 *  *
 *  *        http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  *    Unless required by applicable law or agreed to in writing, software
 *  *    distributed under the License is distributed on an "AS IS" BASIS,
 *  *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  *    See the License for the specific language governing permissions and
 *  *    limitations under the License.
 */

package org.datavec.api.util;

import java.io.PrintWriter;
import java.io.StringWriter;
import java.net.InetAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.text.DateFormat;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.*;


/**
 * General string utils
 */
public class StringUtils {

    private static final DecimalFormat decimalFormat;
    static {
        NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.ENGLISH);
        decimalFormat = (DecimalFormat) numberFormat;
        decimalFormat.applyPattern("#.##");
    }

    /**
     * Make a string representation of the exception.
     * @param e The exception to stringify
     * @return A string with exception name and call stack.
     */
    public static String stringifyException(Throwable e) {
        StringWriter stm = new StringWriter();
        PrintWriter wrt = new PrintWriter(stm);
        e.printStackTrace(wrt);
        wrt.close();
        return stm.toString();
    }

    /**
     * Given a full hostname, return the word upto the first dot.
     * @param fullHostname the full hostname
     * @return the hostname to the first dot
     */
    public static String simpleHostname(String fullHostname) {
        int offset = fullHostname.indexOf('.');
        if (offset != -1) {
            return fullHostname.substring(0, offset);
        }
        return fullHostname;
    }

    private static DecimalFormat oneDecimal = new DecimalFormat("0.0");

    /**
     * Given an integer, return a string that is in an approximate, but human
     * readable format.
     * It uses the bases 'k', 'm', and 'g' for 1024, 1024**2, and 1024**3.
     * @param number the number to format
     * @return a human readable form of the integer
     */
    public static String humanReadableInt(long number) {
        long absNumber = Math.abs(number);
        double result = number;
        String suffix = "";
        if (absNumber < 1024) {
            // do nothing
        } else if (absNumber < 1024 * 1024) {
            result = number / 1024.0;
            suffix = "k";
        } else if (absNumber < 1024 * 1024 * 1024) {
            result = number / (1024.0 * 1024);
            suffix = "m";
        } else {
            result = number / (1024.0 * 1024 * 1024);
            suffix = "g";
        }
        return oneDecimal.format(result) + suffix;
    }

    /**
     * Format a percentage for presentation to the user.
     * @param done the percentage to format (0.0 to 1.0)
     * @param digits the number of digits past the decimal point
     * @return a string representation of the percentage
     */
    public static String formatPercent(double done, int digits) {
        DecimalFormat percentFormat = new DecimalFormat("0.00%");
        double scale = Math.pow(10.0, digits + 2);
        double rounded = Math.floor(done * scale);
        percentFormat.setDecimalSeparatorAlwaysShown(false);
        percentFormat.setMinimumFractionDigits(digits);
        percentFormat.setMaximumFractionDigits(digits);
        return percentFormat.format(rounded / scale);
    }

    /**
     * Given an array of strings, return a comma-separated list of its elements.
     * @param strs Array of strings
     * @return Empty string if strs.length is 0, comma separated list of strings
     * otherwise
     */

    public static String arrayToString(String[] strs) {
        if (strs.length == 0) {
            return "";
        }
        StringBuilder sbuf = new StringBuilder();
        sbuf.append(strs[0]);
        for (int idx = 1; idx < strs.length; idx++) {
            sbuf.append(",");
            sbuf.append(strs[idx]);
        }
        return sbuf.toString();
    }

    /**
     * Given an array of bytes it will convert the bytes to a hex string
     * representation of the bytes
     * @param bytes
     * @param start start index, inclusively
     * @param end end index, exclusively
     * @return hex string representation of the byte array
     */
    public static String byteToHexString(byte[] bytes, int start, int end) {
        if (bytes == null) {
            throw new IllegalArgumentException("bytes == null");
        }
        StringBuilder s = new StringBuilder();
        for (int i = start; i < end; i++) {
            s.append(String.format("%02x", bytes[i]));
        }
        return s.toString();
    }

    /** Same as byteToHexString(bytes, 0, bytes.length). */
    public static String byteToHexString(byte bytes[]) {
        return byteToHexString(bytes, 0, bytes.length);
    }

    /**
     * Given a hexstring this will return the byte array corresponding to the
     * string
     * @param hex the hex String array
     * @return a byte array that is a hex string representation of the given
     *         string. The size of the byte array is therefore hex.length/2
     */
    public static byte[] hexStringToByte(String hex) {
        byte[] bts = new byte[hex.length() / 2];
        for (int i = 0; i < bts.length; i++) {
            bts[i] = (byte) Integer.parseInt(hex.substring(2 * i, 2 * i + 2), 16);
        }
        return bts;
    }

    /**
     *
     * @param uris
     */
    public static String uriToString(URI[] uris) {
        if (uris == null) {
            return null;
        }
        StringBuilder ret = new StringBuilder(uris[0].toString());
        for (int i = 1; i < uris.length; i++) {
            ret.append(",");
            ret.append(uris[i].toString());
        }
        return ret.toString();
    }

    /**
     *
     * @param str
     */
    public static URI[] stringToURI(String[] str) {
        if (str == null)
            return null;
        URI[] uris = new URI[str.length];
        for (int i = 0; i < str.length; i++) {
            try {
                uris[i] = new URI(str[i]);
            } catch (URISyntaxException ur) {
                System.out.println("Exception in specified URI's " + StringUtils.stringifyException(ur));
                //making sure its assigned to null in case of an error
                uris[i] = null;
            }
        }
        return uris;
    }


    /**
     *
     * Given a finish and start time in long milliseconds, returns a
     * String in the format Xhrs, Ymins, Z sec, for the time difference between two times.
     * If finish time comes before start time then negative valeus of X, Y and Z wil return.
     *
     * @param finishTime finish time
     * @param startTime start time
     */
    public static String formatTimeDiff(long finishTime, long startTime) {
        long timeDiff = finishTime - startTime;
        return formatTime(timeDiff);
    }

    /**
     *
     * Given the time in long milliseconds, returns a
     * String in the format Xhrs, Ymins, Z sec.
     *
     * @param timeDiff The time difference to format
     */
    public static String formatTime(long timeDiff) {
        StringBuilder buf = new StringBuilder();
        long hours = timeDiff / (60 * 60 * 1000);
        long rem = (timeDiff % (60 * 60 * 1000));
        long minutes = rem / (60 * 1000);
        rem = rem % (60 * 1000);
        long seconds = rem / 1000;

        if (hours != 0) {
            buf.append(hours);
            buf.append("hrs, ");
        }
        if (minutes != 0) {
            buf.append(minutes);
            buf.append("mins, ");
        }
        // return "0sec if no difference
        buf.append(seconds);
        buf.append("sec");
        return buf.toString();
    }

    /**
     * Formats time in ms and appends difference (finishTime - startTime)
     * as returned by formatTimeDiff().
     * If finish time is 0, empty string is returned, if start time is 0
     * then difference is not appended to return value.
     * @param dateFormat date format to use
     * @param finishTime fnish time
     * @param startTime start time
     * @return formatted value.
     */
    public static String getFormattedTimeWithDiff(DateFormat dateFormat, long finishTime, long startTime) {
        StringBuilder buf = new StringBuilder();
        if (0 != finishTime) {
            buf.append(dateFormat.format(new Date(finishTime)));
            if (0 != startTime) {
                buf.append(" (" + formatTimeDiff(finishTime, startTime) + ")");
            }
        }
        return buf.toString();
    }

    /**
     * Returns an arraylist of strings.
     * @param str the comma seperated string values
     * @return the arraylist of the comma seperated string values
     */
    public static String[] getStrings(String str) {
        Collection values = getStringCollection(str);
        if (values.size() == 0) {
            return null;
        }
        return values.toArray(new String[values.size()]);
    }

    /**
     * Returns a collection of strings.
     * @param str comma seperated string values
     * @return an ArrayList of string values
     */
    public static Collection getStringCollection(String str) {
        List values = new ArrayList<>();
        if (str == null)
            return values;
        StringTokenizer tokenizer = new StringTokenizer(str, ",");
        values = new ArrayList<>();
        while (tokenizer.hasMoreTokens()) {
            values.add(tokenizer.nextToken());
        }
        return values;
    }

    /**
     * Splits a comma separated value String, trimming leading and trailing whitespace on each value.
     * @param str a comma separated  with values
     * @return a Collection of String values
     */
    public static Collection getTrimmedStringCollection(String str) {
        return Arrays.asList(getTrimmedStrings(str));
    }

    /**
     * Splits a comma separated value String, trimming leading and trailing whitespace on each value.
     * @param str a comma separated  with values
     * @return an array of String values
     */
    public static String[] getTrimmedStrings(String str) {
        if (null == str || "".equals(str.trim())) {
            return emptyStringArray;
        }

        return str.trim().split("\\s*,\\s*");
    }

    final public static String[] emptyStringArray = {};
    final public static char COMMA = ',';
    final public static char ESCAPE_CHAR = '\\';

    /**
     * Split a string using the default separator
     * @param str a string that may have escaped separator
     * @return an array of strings
     */
    public static String[] split(String str) {
        return split(str, ESCAPE_CHAR, COMMA);
    }

    /**
     * Split a string using the given separator
     * @param str a string that may have escaped separator
     * @param escapeChar a char that be used to escape the separator
     * @param separator a separator char
     * @return an array of strings
     */
    public static String[] split(String str, char escapeChar, char separator) {
        if (str == null) {
            return null;
        }
        List strList = new ArrayList<>();
        StringBuilder split = new StringBuilder();
        int index = 0;
        while ((index = findNext(str, separator, escapeChar, index, split)) >= 0) {
            ++index; // move over the separator for next search
            strList.add(split.toString());
            split.setLength(0); // reset the buffer
        }
        strList.add(split.toString());
        // remove trailing empty split(s)
        int last = strList.size(); // last split
        while (--last >= 0 && "".equals(strList.get(last))) {
            strList.remove(last);
        }
        return strList.toArray(new String[strList.size()]);
    }

    /**
     * Finds the first occurrence of the separator character ignoring the escaped
     * separators starting from the index. Note the substring between the index
     * and the position of the separator is passed.
     * @param str the source string
     * @param separator the character to find
     * @param escapeChar character used to escape
     * @param start from where to search
     * @param split used to pass back the extracted string
     */
    public static int findNext(String str, char separator, char escapeChar, int start, StringBuilder split) {
        int numPreEscapes = 0;
        for (int i = start; i < str.length(); i++) {
            char curChar = str.charAt(i);
            if (numPreEscapes == 0 && curChar == separator) { // separator
                return i;
            } else {
                split.append(curChar);
                numPreEscapes = (curChar == escapeChar) ? (++numPreEscapes) % 2 : 0;
            }
        }
        return -1;
    }

    /**
     * Escape commas in the string using the default escape char
     * @param str a string
     * @return an escaped string
     */
    public static String escapeString(String str) {
        return escapeString(str, ESCAPE_CHAR, COMMA);
    }

    /**
     * Escape charToEscape in the string
     * with the escape char escapeChar
     *
     * @param str string
     * @param escapeChar escape char
     * @param charToEscape the char to be escaped
     * @return an escaped string
     */
    public static String escapeString(String str, char escapeChar, char charToEscape) {
        return escapeString(str, escapeChar, new char[] {charToEscape});
    }

    // check if the character array has the character
    private static boolean hasChar(char[] chars, char character) {
        for (char target : chars) {
            if (character == target) {
                return true;
            }
        }
        return false;
    }

    /**
     * @param charsToEscape array of characters to be escaped
     */
    public static String escapeString(String str, char escapeChar, char[] charsToEscape) {
        if (str == null) {
            return null;
        }
        StringBuilder result = new StringBuilder();
        for (int i = 0; i < str.length(); i++) {
            char curChar = str.charAt(i);
            if (curChar == escapeChar || hasChar(charsToEscape, curChar)) {
                // special char
                result.append(escapeChar);
            }
            result.append(curChar);
        }
        return result.toString();
    }

    /**
     * Unescape commas in the string using the default escape char
     * @param str a string
     * @return an unescaped string
     */
    public static String unEscapeString(String str) {
        return unEscapeString(str, ESCAPE_CHAR, COMMA);
    }

    /**
     * Unescape charToEscape in the string
     * with the escape char escapeChar
     *
     * @param str string
     * @param escapeChar escape char
     * @param charToEscape the escaped char
     * @return an unescaped string
     */
    public static String unEscapeString(String str, char escapeChar, char charToEscape) {
        return unEscapeString(str, escapeChar, new char[] {charToEscape});
    }

    /**
     * @param charsToEscape array of characters to unescape
     */
    public static String unEscapeString(String str, char escapeChar, char[] charsToEscape) {
        if (str == null) {
            return null;
        }
        StringBuilder result = new StringBuilder(str.length());
        boolean hasPreEscape = false;
        for (int i = 0; i < str.length(); i++) {
            char curChar = str.charAt(i);
            if (hasPreEscape) {
                if (curChar != escapeChar && !hasChar(charsToEscape, curChar)) {
                    // no special char
                    throw new IllegalArgumentException(
                                    "Illegal escaped string " + str + " unescaped " + escapeChar + " at " + (i - 1));
                }
                // otherwise discard the escape char
                result.append(curChar);
                hasPreEscape = false;
            } else {
                if (hasChar(charsToEscape, curChar)) {
                    throw new IllegalArgumentException(
                                    "Illegal escaped string " + str + " unescaped " + curChar + " at " + i);
                } else if (curChar == escapeChar) {
                    hasPreEscape = true;
                } else {
                    result.append(curChar);
                }
            }
        }
        if (hasPreEscape) {
            throw new IllegalArgumentException(
                            "Illegal escaped string " + str + ", not expecting " + escapeChar + " in the end.");
        }
        return result.toString();
    }

    /**
     * Return hostname without throwing exception.
     * @return hostname
     */
    public static String getHostname() {
        try {
            return "" + InetAddress.getLocalHost();
        } catch (UnknownHostException uhe) {
            return "" + uhe;
        }
    }

    /**
     * Return a message for logging.
     * @param prefix prefix keyword for the message
     * @param msg content of the message
     * @return a message for logging
     */
    private static String toStartupShutdownString(String prefix, String[] msg) {
        StringBuilder b = new StringBuilder(prefix);
        b.append("\n/************************************************************");
        for (String s : msg)
            b.append("\n" + prefix + s);
        b.append("\n************************************************************/");
        return b.toString();
    }



    /**
     * The traditional binary prefixes, kilo, mega, ..., exa,
     * which can be represented by a 64-bit integer.
     * TraditionalBinaryPrefix symbol are case insensitive.
     */
    public static enum TraditionalBinaryPrefix {
        KILO(1024), MEGA(KILO.value << 10), GIGA(MEGA.value << 10), TERA(GIGA.value << 10), PETA(TERA.value << 10), EXA(
                        PETA.value << 10);

        public final long value;
        public final char symbol;

        TraditionalBinaryPrefix(long value) {
            this.value = value;
            this.symbol = toString().charAt(0);
        }

        /**
         * @return The TraditionalBinaryPrefix object corresponding to the symbol.
         */
        public static TraditionalBinaryPrefix valueOf(char symbol) {
            symbol = Character.toUpperCase(symbol);
            for (TraditionalBinaryPrefix prefix : TraditionalBinaryPrefix.values()) {
                if (symbol == prefix.symbol) {
                    return prefix;
                }
            }
            throw new IllegalArgumentException("Unknown symbol '" + symbol + "'");
        }

        /**
         * Convert a string to long.
         * The input string is first be trimmed
         * and then it is parsed with traditional binary prefix.
         *
         * For example,
         * "-1230k" will be converted to -1230 * 1024 = -1259520;
         * "891g" will be converted to 891 * 1024^3 = 956703965184;
         *
         * @param s input string
         * @return a long value represented by the input string.
         */
        public static long string2long(String s) {
            s = s.trim();
            final int lastpos = s.length() - 1;
            final char lastchar = s.charAt(lastpos);
            if (Character.isDigit(lastchar))
                return Long.parseLong(s);
            else {
                long prefix = TraditionalBinaryPrefix.valueOf(lastchar).value;
                long num = Long.parseLong(s.substring(0, lastpos));
                if (num > (Long.MAX_VALUE / prefix) || num < (Long.MIN_VALUE / prefix)) {
                    throw new IllegalArgumentException(s + " does not fit in a Long");
                }
                return num * prefix;
            }
        }
    }

    /**
     * Escapes HTML Special characters present in the string.
     * @param string
     * @return HTML Escaped String representation
     */
    public static String escapeHTML(String string) {
        if (string == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder();
        boolean lastCharacterWasSpace = false;
        char[] chars = string.toCharArray();
        for (char c : chars) {
            if (c == ' ') {
                if (lastCharacterWasSpace) {
                    lastCharacterWasSpace = false;
                    sb.append(" ");
                } else {
                    lastCharacterWasSpace = true;
                    sb.append(" ");
                }
            } else {
                lastCharacterWasSpace = false;
                switch (c) {
                    case '<':
                        sb.append("<");
                        break;
                    case '>':
                        sb.append(">");
                        break;
                    case '&':
                        sb.append("&");
                        break;
                    case '"':
                        sb.append(""");
                        break;
                    default:
                        sb.append(c);
                        break;
                }
            }
        }

        return sb.toString();
    }

    /**
     * Return an abbreviated English-language desc of the byte length
     */
    public static String byteDesc(long len) {
        double val;
        String ending;
        if (len < 1024 * 1024) {
            val = (1.0 * len) / 1024;
            ending = " KB";
        } else if (len < 1024 * 1024 * 1024) {
            val = (1.0 * len) / (1024 * 1024);
            ending = " MB";
        } else if (len < 1024L * 1024 * 1024 * 1024) {
            val = (1.0 * len) / (1024 * 1024 * 1024);
            ending = " GB";
        } else if (len < 1024L * 1024 * 1024 * 1024 * 1024) {
            val = (1.0 * len) / (1024L * 1024 * 1024 * 1024);
            ending = " TB";
        } else {
            val = (1.0 * len) / (1024L * 1024 * 1024 * 1024 * 1024);
            ending = " PB";
        }
        return limitDecimalTo2(val) + ending;
    }

    public static synchronized String limitDecimalTo2(double d) {
        return decimalFormat.format(d);
    }

    /**
     * Concatenates strings, using a separator.
     *
     * @param separator Separator to join with.
     * @param strings Strings to join.
     */
    public static String join(CharSequence separator, Iterable strings) {
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        for (String s : strings) {
            if (first) {
                first = false;
            } else {
                sb.append(separator);
            }
            sb.append(s);
        }
        return sb.toString();
    }

    /**
     * Concatenates stringified objects, using a separator.
     *
     * @param separator Separator to join with.
     * @param objects Objects to join.
     */
    public static String joinObjects(CharSequence separator, Iterable objects) {
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        for (Object o : objects) {
            if (first) {
                first = false;
            } else {
                sb.append(separator);
            }
            sb.append(String.valueOf(o));
        }
        return sb.toString();
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy