All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.pwall.util.Strings Maven / Gradle / Ivy

The newest version!
/*
 * @(#) Strings.java
 *
 * javautil Java Utility Library
 * Copyright (c) 2013, 2014, 2015, 2016, 2017, 2018 Peter Wall
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package net.pwall.util;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.Objects;
import java.util.function.IntPredicate;

/**
 * String utility functions.
 *
 * @author Peter Wall
 */
public class Strings {

    private static final String[] numberNamesEnglish = { "zero", "one", "two", "three", "four",
            "five", "six", "seven", "eight", "nine", "ten", "eleven", "twelve", "thirteen",
            "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen" };
    private static final String[] tensNamesEnglish = { "twenty", "thirty", "forty", "fifty",
            "sixty", "seventy", "eighty", "ninety" };

    private static char[] hexDigits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
            'B', 'C', 'D', 'E', 'F' };

    private static final String emptyString = "";
    private static final String[] emptyStringArray = {};

    /**
     * Private constructor to prevent instantiation.  Attempts to instantiate the class via
     * reflection will cause an {@link IllegalAccessException}.
     *
     * @throws  IllegalAccessException in all cases
     */
    private Strings() throws IllegalAccessException {
        throw new IllegalAccessException("Attempt to instantiate Strings class");
    }

    /**
     * Convert a number to words in English.
     *
     * @param   n       the number
     * @return          a string containing the number in words
     */
    public static String toEnglish(int n) {
        if (n >= 0 && n < 20)
            return numberNamesEnglish[n]; // avoids allocating StringBuilder
        StringBuilder sb = new StringBuilder();
        try {
            appendEnglish(sb, n);
        }
        catch (IOException e) {
            // can't happen - StringBuilder does not throw IOException
        }
        return sb.toString();
    }

    /**
     * Append a number converted to words in English to an {@link Appendable}.
     *
     * @param   a   the {@link Appendable}
     * @param   n   the number
     * @return  the {@link Appendable} (for chaining)
     * @throws  IOException if thrown by the {@link Appendable}
     */
    public static Appendable appendEnglish(Appendable a, int n) throws IOException {
        if (n >= 0 && n < 20) // optimisation also handles zero
            return a.append(numberNamesEnglish[n]);
        if (n < 0) {
            a.append("minus ");
            if (n == Integer.MIN_VALUE) { // can't simply negate MIN_VALUE
                a.append("two billion, ");
                n = (int)(0x80000000L % 1_000_000_000);
            }
            else
                n = -n;
        }
    concat: {
            if (n >= 1_000_000_000) { // signed 32-bit int can't be greater than three billion
                appendEnglish(a, n / 1_000_000_000).append(" billion");
                if ((n %= 1_000_000_000) == 0)
                    break concat;
                a.append(n >= 100 ? ", " : " and ");
            }
            if (n >= 1_000_000) {
                appendEnglish(a, n / 1_000_000).append(" million");
                if ((n %= 1_000_000) == 0)
                    break concat;
                a.append(n >= 100 ? ", " : " and ");
            }
            if (n >= 1_000) {
                appendEnglish(a, n / 1_000).append(" thousand");
                if ((n %= 1_000) == 0)
                    break concat;
                a.append(n >= 100 ? ", " : " and ");
            }
            if (n >= 100) {
                a.append(numberNamesEnglish[n / 100]).append(" hundred");
                if ((n %= 100) == 0)
                    break concat;
                a.append(" and ");
            }
            if (n >= 20) {
                a.append(tensNamesEnglish[n / 10 - 2]);
                if ((n %= 10) != 0)
                    a.append('-');
            }
            if (n > 0)
                a.append(numberNamesEnglish[n]);
        }
        return a;
    }

    /**
     * Capitalise the first letter of a string.  If the first character is not a lower-case
     * letter the string is returned unmodified.
     *
     * @param   str     the input string
     * @return          the string, with the first letter capitalised
     */
    public static String capitalise(String str) {
        int n = str.length();
        if (n > 0) {
            char ch = str.charAt(0);
            if (Character.isLowerCase(ch)) {
                StringBuilder sb = new StringBuilder(n);
                sb.append(Character.toUpperCase(ch));
                if (n > 1)
                    sb.append(str, 1, n);
                return sb.toString();
            }
        }
        return str;
    }

    /**
     * Create a string consisting of a number, space, and the singular or plural form of a given
     * noun (using standard English plural forms, i.e. add the letter "s").  For example:
     * 
     *     Strings.plural("file", 23);
     * 
* will return: *
     *     "23 files"
     * 
* * @param noun the noun * @param n the number * @return the string */ public static String plural(String noun, int n) { StringBuilder sb = new StringBuilder(); sb.append(n).append(' ').append(noun); if (n != 1) sb.append('s'); return sb.toString(); } /** * Create a string consisting of a number, space, and the singular or plural noun (for use * with irregular plurals). For example: *
     *     Strings.plural("axis", "axes", 2);
     * 
* will return: *
     *     "2 axes"
     * 
* * @param singularNoun the singular noun * @param pluralNoun the plural noun * @param n the number * @return the string */ public static String plural(String singularNoun, String pluralNoun, int n) { StringBuilder sb = new StringBuilder(); sb.append(n).append(' ').append(n == 1 ? singularNoun : pluralNoun); return sb.toString(); } /** * Split a string into white space delimited tokens, where white space is determined by * {@link Character#isWhitespace(char)}. * * @param s the string to be split * @return an array of tokens (possibly empty) * @throws NullPointerException if the input string is {@code null} */ public static String[] split(String s) { return split(s, 0, s.length(), Character::isWhitespace); } /** * Split a portion of a string into white space delimited tokens, where white space is * determined by {@link Character#isWhitespace(char)}. * * @param s the string to be split * @param start the start index of the portion to be examined * @param end the end index (exclusive) of the portion to be examined * @return an array of tokens (possibly empty) * @throws NullPointerException if the input string is {@code null} * @throws IndexOutOfBoundsException if {@code start} or {@code end} is invalid */ public static String[] split(String s, int start, int end) { return split(s, start, end, Character::isWhitespace); } /** * Split a string into white space delimited tokens, where white space is determined by a * supplied {@link IntPredicate} function. In lambda notation, this may be called by: *
     *     String[] array = Strings.split("a b c d e", Character::isWhitespace);
     * 
* * @param s the string to be split * @param spaceTest the space test function * @return an array of tokens (possibly empty) * @throws NullPointerException if the input string is {@code null} */ public static String[] split(String s, IntPredicate spaceTest) { return split(s, 0, s.length(), spaceTest); } /** * Split a portion of a string into white space delimited tokens, where white space is * determined by a supplied {@link IntPredicate} function. * * @param s the string to be split * @param start the start index of the portion to be examined * @param end the end index (exclusive) of the portion to be examined * @param spaceTest the space test function * @return an array of tokens (possibly empty) * @throws NullPointerException if the input string is {@code null} * @throws IndexOutOfBoundsException if {@code start} or {@code end} is invalid */ public static String[] split(String s, int start, int end, IntPredicate spaceTest) { // first, trim spaces from the start of the string; if we hit end, return empty array for (;;) { if (start >= end) return emptyStringArray; if (!spaceTest.test(s.charAt(start))) break; start++; } // now trim spaces from end (by this stage, we know there's at least one non-space) while (spaceTest.test(s.charAt(--end))) ; // first pass through the string to count the number of internal groups of spaces int count = 0, i = start + 1; outer: for (;;) { for (;;) { if (i >= end) break outer; if (spaceTest.test(s.charAt(i++))) break; } count++; while (spaceTest.test(s.charAt(i++))) ; } // result array size is number of separators plus 1 String[] result = new String[count + 1]; // for each result entry prior to the last... i = start; for (int j = 0; j < count; j++) { // store start index and skip past non-space int k = i; do { i++; } while (!spaceTest.test(s.charAt(i))); // add result substring to array result[j] = s.substring(k, i); // and skip past spaces do { i++; } while (spaceTest.test(s.charAt(i))); } // the last entry consists of the remainder of the (trimmed) string result[count] = s.substring(i, end + 1); return result; } /** * Split a string on a given string separator. * * @param s1 the string to be split * @param s2 the separator * @return an array of items (possibly empty) * @throws NullPointerException if either string is {@code null} */ public static String[] split(String s1, String s2) { // first pass through the string to count the number of separators int count = 0; int i = 0; int n2 = s2.length(); int stopper = s1.length() - n2; while (i <= stopper) { if (s1.regionMatches(i, s2, 0, n2)) { count++; i += n2; } else i++; } // result array size is number of separators plus 1 String[] result = new String[count + 1]; // for each result entry prior to the last... i = 0; for (int j = 0; j < count; j++) { // store start index and skip to separator int k = i; while (!s1.regionMatches(i, s2, 0, n2)) i++; // add result substring to array result[j] = s1.substring(k, i); // and skip past separator i += n2; } // the last entry consists of the remainder of the string result[count] = s1.substring(i); return result; } /** * Split a string on a given separator. * * @param s the string to be split * @param separator the separator * @return an array of items (possibly empty) * @throws NullPointerException if the input string is {@code null} */ public static String[] split(String s, char separator) { return split(s, 0, s.length(), separator, true, Character::isWhitespace); } /** * Split a string on a given separator character. Spaces may optionally be trimmed from * both ends of the items using the supplied space test, and zero-length items (after * optional trimming) may optionally be dropped. * * @param s the string to be split * @param separator the separator * @param skipEmpty if {@code true}, ignore zero-length items (possibly after trimming) * @param spaceTest if not {@code null}, use to trim spaces off both ends of each item * @return an array of items (possibly empty) * @throws NullPointerException if the input string is {@code null} */ public static String[] split(String s, char separator, boolean skipEmpty, IntPredicate spaceTest) { return split(s, 0, s.length(), separator, skipEmpty, spaceTest); } /** * Split a portion of a string on a given separator character. Spaces may optionally be * trimmed from both ends of the items using the supplied space test, and zero-length items * (after optional trimming) may optionally be dropped. * * @param s the string to be split * @param start the start index of the portion to be examined * @param end the end index (exclusive) of the portion to be examined * @param separator the separator * @param skipEmpty if {@code true}, ignore zero-length items (possibly after trimming) * @param spaceTest if not {@code null}, use to trim spaces off both ends of each item * @return an array of items (possibly empty) * @throws NullPointerException if the input string is {@code null} * @throws IndexOutOfBoundsException if {@code start} or {@code end} is invalid */ public static String[] split(String s, int start, int end, char separator, boolean skipEmpty, IntPredicate spaceTest) { int count = 0; int i = start; if (skipEmpty) { if (spaceTest != null) { // count the number of items (ignoring zero-length or all space items) for (;;) { boolean nonSpaceSeen = false; while (i < end) { char ch = s.charAt(i); if (ch == separator) break; nonSpaceSeen = nonSpaceSeen || !spaceTest.test(ch); i++; } if (nonSpaceSeen) count++; if (i >= end) break; i++; } } else { // count the number of items (ignoring zero-length items) for (;;) { int itemStart = i; while (i < end && s.charAt(i) != separator) i++; if (i > itemStart) count++; if (i >= end) break; i++; } } if (count == 0) return emptyStringArray; } else { // otherwise, count is just (number of separators + 1) count = 1; while (i < end) { if (s.charAt(i++) == separator) count++; } } String[] result = new String[count]; i = start; int j = 0; for (;;) { int itemStart = i; while (i < end && s.charAt(i) != separator) i++; int itemEnd = i; if (spaceTest != null) { while (itemStart < itemEnd && spaceTest.test(s.charAt(itemStart))) itemStart++; while (itemStart < itemEnd && spaceTest.test(s.charAt(itemEnd - 1))) itemEnd--; } if (itemEnd > itemStart) result[j++] = s.substring(itemStart, itemEnd); else if (!skipEmpty) result[j++] = emptyString; if (i >= end) break; i++; } return result; } /** * Join the string representations of the members of a collection. * * @param class of collection item * @param collection the collection (strictly speaking, an {@link Iterable}) * @return the concatenation of the string representations of the members (an empty string * if the collection is empty) */ public static String join(Iterable collection) { return join(collection.iterator()); } /** * Join the string representations of the members of an {@link Iterator}. * * @param class of collection item * @param it the {@link Iterator} * @return the concatenation of the string representations of the members (an empty string * if the {@link Iterator} has no members) */ public static String join(Iterator it) { if (!it.hasNext()) return emptyString; StringBuilder sb = new StringBuilder(); do { sb.append(it.next()); } while (it.hasNext()); return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of an {@link Enumeration}. * * @param class of collection item * @param e the {@link Enumeration} * @return the concatenation of the string representations of the members (an empty string * if the {@link Enumeration} has no members) */ public static String join(Enumeration e) { if (!e.hasMoreElements()) return emptyString; StringBuilder sb = new StringBuilder(); do { sb.append(e.nextElement()); } while (e.hasMoreElements()); return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of an array. * * @param class of array item * @param array the array * @return the concatenation of the string representations of the members (an empty string * if the array is empty) */ public static String join(E[] array) { int n = array.length; if (n == 0) return emptyString; int i = 0; StringBuilder sb = new StringBuilder(); do { sb.append(array[i++]); } while (i < n); return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of a collection, with the specified * character separator. * * @param class of collection item * @param collection the collection (strictly speaking, an {@link Iterable}) * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the collection is empty) */ public static String join(Iterable collection, char separator) { return join(collection.iterator(), separator); } /** * Join the string representations of the members of an {@link Iterator}, with the specified * character separator. * * @param class of collection item * @param it the {@link Iterator} * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the {@link Iterator} has no members) */ public static String join(Iterator it, char separator) { if (!it.hasNext()) return emptyString; StringBuilder sb = new StringBuilder(); for (;;) { sb.append(it.next()); if (!it.hasNext()) break; sb.append(separator); } return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of an {@link Enumeration}, with the * specified character separator. * * @param class of collection item * @param e the {@link Enumeration} * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the {@link Enumeration} has no members) */ public static String join(Enumeration e, char separator) { if (!e.hasMoreElements()) return emptyString; StringBuilder sb = new StringBuilder(); for (;;) { sb.append(e.nextElement()); if (!e.hasMoreElements()) break; sb.append(separator); } return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of an array, with the specified character * separator. * * @param class of array item * @param array the array * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the array is empty) */ public static String join(E[] array, char separator) { int n = array.length; if (n == 0) return emptyString; int i = 0; StringBuilder sb = new StringBuilder(); for (;;) { sb.append(array[i++]); if (i >= n) break; sb.append(separator); } return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of a collection, with the specified string * separator. * * @param class of collection item * @param collection the collection (strictly speaking, an {@link Iterable}) * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the collection is empty) */ public static String join(Iterable collection, String separator) { return join(collection.iterator(), separator); } /** * Join the string representations of the members of an {@link Iterator}, with the specified * string separator. * * @param class of collection item * @param it the {@link Iterator} * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the {@link Iterator} has no members) */ public static String join(Iterator it, String separator) { if (!it.hasNext()) return emptyString; StringBuilder sb = new StringBuilder(); for (;;) { sb.append(it.next()); if (!it.hasNext()) break; sb.append(separator); } return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of an {@link Enumeration}, with the * specified string separator. * * @param class of collection item * @param e the {@link Enumeration} * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the {@link Enumeration} has no members) */ public static String join(Enumeration e, String separator) { if (!e.hasMoreElements()) return emptyString; StringBuilder sb = new StringBuilder(); for (;;) { sb.append(e.nextElement()); if (!e.hasMoreElements()) break; sb.append(separator); } return sb.length() == 0 ? emptyString : sb.toString(); } /** * Join the string representations of the members of an array, with the specified string * separator. * * @param class of array item * @param array the array * @param separator the separator * @return the concatenation of the string representations of the members (an empty string * if the array is empty) */ public static String join(E[] array, String separator) { int n = array.length; if (n == 0) return emptyString; int i = 0; StringBuilder sb = new StringBuilder(); for (;;) { sb.append(array[i++]); if (i >= n) break; sb.append(separator); } return sb.length() == 0 ? emptyString : sb.toString(); } /** * Replace certain characters in a string with their mapped equivalents, as specified in the * provided {@link CharMapper} instance. If the string contains no characters to be mapped, * the original string is returned unmodified. * * @param s the string to be converted * @param mapper the {@link CharMapper} instance * @return the string with characters mapped as required */ public static String escape(String s, CharMapper mapper) { for (int i = 0, n = s.length(); i < n; ) { String mapped = mapper.map(s.charAt(i++)); if (mapped != null) { StringBuilder sb = new StringBuilder(); sb.append(s, 0, i - 1); sb.append(mapped); try { appendEscaped(sb, s, i, n, mapper); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb.toString(); } } return s; } /** * Replace certain characters in a {@link CharSequence} with their mapped equivalents, as * specified in the provided {@link CharMapper} instance. If the sequence contains no * characters to be mapped, the original sequence is returned unmodified. * * @param s the {@link CharSequence} to be converted * @param mapper the {@link CharMapper} instance * @return the sequence with characters mapped as required */ public static CharSequence escape(CharSequence s, CharMapper mapper) { for (int i = 0, n = s.length(); i < n; ) { String mapped = mapper.map(s.charAt(i++)); if (mapped != null) { StringBuilder sb = new StringBuilder(); sb.append(s, 0, i - 1); sb.append(mapped); try { appendEscaped(sb, s, i, n, mapper); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb; } } return s; } /** * Append characters to an {@link Appendable}, mapping them to their "escaped" equivalents * specified in the provided {@link CharMapper} instance. * * @param a the {@link Appendable} (e.g. a {@link StringBuilder}) * @param s the source {@link CharSequence} * @param index the start index within the source * @param end the end index within the source * @param mapper the {@link CharMapper} * @throws IOException if thrown by the {@link Appendable} */ public static void appendEscaped(Appendable a, CharSequence s, int index, int end, CharMapper mapper) throws IOException { while (index < end) { char ch = s.charAt(index++); String mapped = mapper.map(ch); if (mapped != null) a.append(mapped); else a.append(ch); } } /** * Append characters to an {@link Appendable}, mapping them to their "escaped" equivalents * specified in the provided {@link CharMapper} instance. * * @param a the {@link Appendable} (e.g. a {@link StringBuilder}) * @param s the source {@link CharSequence} * @param mapper the {@link CharMapper} * @throws IOException if thrown by the {@link Appendable} */ public static void appendEscaped(Appendable a, CharSequence s, CharMapper mapper) throws IOException { appendEscaped(a, s, 0, s.length(), mapper); } /** * Replace certain characters in a string with their mapped equivalents, as specified in the * provided {@link CharMapper} instance. Surrogate sequences are converted to Unicode * code points before mapping. If the string contains no characters to be mapped, the * original string is returned unmodified. * * @param s the UTF16 string to be converted * @param mapper the {@link CharMapper} instance * @return the string with characters mapped as required */ public static String escapeUTF16(String s, CharMapper mapper) { for (int i = 0, n = s.length(); i < n; ) { int k = i; char ch1 = s.charAt(i++); String mapped; if (Character.isHighSurrogate(ch1)) { char ch2; if (i >= n || !Character.isLowSurrogate(ch2 = s.charAt(i++))) throw new IllegalArgumentException("Illegal surrogate sequence"); mapped = mapper.map(Character.toCodePoint(ch1, ch2)); } else mapped = mapper.map(ch1); if (mapped != null) { StringBuilder sb = new StringBuilder(); sb.append(s, 0, k); sb.append(mapped); try { appendEscapedUTF16(sb, s, i, n, mapper); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb.toString(); } } return s; } /** * Replace certain characters in a {@link CharSequence} with their mapped equivalents, as * specified in the provided {@link CharMapper} instance. Surrogate sequences are converted * to Unicode code points before mapping. If the sequence contains no characters to be * mapped, the original sequence is returned unmodified. * * @param s the {@link CharSequence} to be converted * @param mapper the {@link CharMapper} instance * @return the sequence with characters mapped as required */ public static CharSequence escapeUTF16(CharSequence s, CharMapper mapper) { for (int i = 0, n = s.length(); i < n; ) { int k = i; char ch1 = s.charAt(i++); String mapped; if (Character.isHighSurrogate(ch1)) { char ch2; if (i >= n || !Character.isLowSurrogate(ch2 = s.charAt(i++))) throw new IllegalArgumentException("Illegal surrogate sequence"); mapped = mapper.map(Character.toCodePoint(ch1, ch2)); } else mapped = mapper.map(ch1); if (mapped != null) { StringBuilder sb = new StringBuilder(); sb.append(s, 0, k); sb.append(mapped); try { appendEscapedUTF16(sb, s, i, n, mapper); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb; } } return s; } /** * Append characters to an {@link Appendable}, mapping them to their "escaped" equivalents * specified in the provided {@link CharMapper} instance. Surrogate sequences are converted * to Unicode code points before mapping. * * @param a the {@link Appendable} (e.g. a {@link StringBuilder}) * @param s the source {@link CharSequence} * @param index the start index within the source * @param end the end index within the source * @param mapper the {@link CharMapper} * @throws IOException if thrown by the {@link Appendable} */ public static void appendEscapedUTF16(Appendable a, CharSequence s, int index, int end, CharMapper mapper) throws IOException { String mapped; while (index < end) { char ch1 = s.charAt(index++); if (Character.isHighSurrogate(ch1)) { char ch2; if (index >= end || !Character.isLowSurrogate(ch2 = s.charAt(index++))) throw new IllegalArgumentException("Illegal surrogate sequence"); mapped = mapper.map(Character.toCodePoint(ch1, ch2)); if (mapped != null) a.append(mapped); else a.append(ch1).append(ch2); } else { mapped = mapper.map(ch1); if (mapped != null) a.append(mapped); else a.append(ch1); } } } /** * Append characters to an {@link Appendable}, mapping them to their "escaped" equivalents * specified in the provided {@link CharMapper} instance. Surrogate sequences are converted * to Unicode code points before mapping. * * @param a the {@link Appendable} (e.g. a {@link StringBuilder}) * @param s the source {@link CharSequence} * @param mapper the {@link CharMapper} * @throws IOException if thrown by the {@link Appendable} */ public static void appendEscapedUTF16(Appendable a, CharSequence s, CharMapper mapper) throws IOException { appendEscapedUTF16(a, s, 0, s.length(), mapper); } /** * Scan a string for escape sequences and replace them by the original characters. For * example, in Java code the backslash character indicates the start of an escape sequence * representing a character that may not appear in its raw form in a string. If the * string contains no escape sequences to be unmapped, the original sequence is returned * unmodified. * * @param s the string to be converted * @param unmapper an instance of the {@link CharUnmapper} class, which will perform * the actual escape sequence mapping * @return the "unescaped" string */ public static String unescape(String s, CharUnmapper unmapper) { for (int i = 0, n = s.length(); i < n; i++) { if (unmapper.isEscape(s, i)) { StringBuilder sb = new StringBuilder(s.length()); sb.append(s, 0, i); i += unmapper.unmap(sb, s, i); while (i < n) { if (unmapper.isEscape(s, i)) i += unmapper.unmap(sb, s, i); else sb.append(s.charAt(i++)); } return sb.toString(); } } return s; } /** * Scan a {@link CharSequence} for escape sequences and replace them by the original * characters. For example, in Java code the backslash character indicates the start of an * escape sequence representing a character that may not appear in its raw form in a string. * * @param s the {@link CharSequence} to be converted * @param unmapper an instance of the {@link CharUnmapper} class, which will perform * the actual escape sequence mapping * @return the "unescaped" {@link CharSequence} */ public static CharSequence unescape(CharSequence s, CharUnmapper unmapper) { for (int i = 0, n = s.length(); i < n; i++) { if (unmapper.isEscape(s, i)) { StringBuilder sb = new StringBuilder(s.length()); sb.append(s, 0, i); i += unmapper.unmap(sb, s, i); while (i < n) { if (unmapper.isEscape(s, i)) i += unmapper.unmap(sb, s, i); else sb.append(s.charAt(i++)); } return sb; } } return s; } /** * Convert a Unicode code point to a one- or two-character string. * * @param codePoint the Unicode code point * @return the string equivalent */ public static String toUTF16(int codePoint) { StringBuilder sb = new StringBuilder(2); try { appendUTF16(sb, codePoint); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb.toString(); } /** * Convert an array of Unicode code points to a string. * * @param codePoints the Unicode code points * @return the string equivalent */ public static String toUTF16(int[] codePoints) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < codePoints.length; i++) { try { appendUTF16(sb, codePoints[i]); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } } return sb.toString(); } /** * Append a Unicode code point to an {@link Appendable}. Code points outside the BMP are * appended as surrogate sequences. * * @param a the {@link Appendable} (e.g. a {@link StringBuilder}) * @param codePoint the Unicode code point * @throws IOException if thrown by the {@link Appendable} */ public static void appendUTF16(Appendable a, int codePoint) throws IOException { if (Character.isSupplementaryCodePoint(codePoint)) { a.append(Character.highSurrogate(codePoint)); a.append(Character.lowSurrogate(codePoint)); } else if (Character.isBmpCodePoint(codePoint) && !Character.isSurrogate((char)codePoint)) a.append((char)codePoint); else throw new IllegalArgumentException("Illegal character for UTF-16"); } /** * Convert a string to UTF-8 encoding. * * @param str the string * @return the UTF-8 form of the string (as a byte array) */ public static byte[] toUTF8(String str) { // note - expects the string to be encoded in UTF-16 if (str == null) throw new IllegalArgumentException("String must not be null"); return toUTF8(str, 0, str.length()); } /** * Convert a byte array from UTF-8 encoding to a UTF-16 string. * * @param bytes the byte array * @return the decoded string * @throws IllegalArgumentException if the byte array is {@code null}, or if the * byte array contains an invalid UTF-8 sequence */ public static String fromUTF8(byte[] bytes) { if (bytes == null) throw new IllegalArgumentException("Byte array must not be null"); return fromUTF8(bytes, 0, bytes.length); } /** * Convert a portion of a string to UTF-8 encoding. * * @param str the string * @param start the start index * @param end the end index * @return the UTF-8 form of the string (as a byte array) * @throws IllegalArgumentException if the string is {@code null}, or if the string * contains an invalid UTF-16 sequence */ public static byte[] toUTF8(String str, int start, int end) { // note - expects the string to be encoded in UTF-16 if (str == null) throw new IllegalArgumentException("String must not be null"); ByteArrayBuilder bab = new ByteArrayBuilder((end - start) * 5 / 4); for (int i = start; i < end; i++) { char ch = str.charAt(i); int codePoint; if (Character.isHighSurrogate(ch)) { char lowSurrogate; if (++i >= end || !Character.isLowSurrogate(lowSurrogate = str.charAt(i))) throw new IllegalArgumentException("Invalid UTF-16 surrogate sequence"); codePoint = Character.toCodePoint(ch, lowSurrogate); } else codePoint = ch; appendUTF8(bab, codePoint); } return bab.toByteArray(); } /** * Append a codepoint to a {@link ByteArrayBuilder} as UTF-8. * * @param bab the {@link ByteArrayBuilder} * @param codepoint the codepoint */ public static void appendUTF8(ByteArrayBuilder bab, int codepoint) { if (codepoint <= 0x7F) bab.append(codepoint); else if (codepoint <= 0x7FF) { bab.append((codepoint >> 6) | 0xC0); bab.append((codepoint & 0x3F) | 0x80); } else if (codepoint <= 0xFFFF) { bab.append((codepoint >> 12) | 0xE0); bab.append(((codepoint >> 6) & 0x3F) | 0x80); bab.append((codepoint & 0x3F) | 0x80); } else { bab.append(((codepoint >> 18) & 0x7) | 0xF0); bab.append(((codepoint >> 12) & 0x3F) | 0x80); bab.append(((codepoint >> 6) & 0x3F) | 0x80); bab.append((codepoint & 0x3F) | 0x80); } } /** * Convert a sequence of bytes in a {@link ByteBuffer} from UTF-8 encoding to a UTF-16 string. * * @param byteBuffer the {@link ByteBuffer} * @return the decoded string * @throws IllegalArgumentException if the byte array is {@code null}, if the start * or end index is invalid, or if the byte array contains an invalid UTF-8 * sequence */ public static String fromUTF8(ByteBuffer byteBuffer) { if (byteBuffer == null) throw new IllegalArgumentException("ByteBuffer must not be null"); return fromUTF8(new Iterator() { @Override public boolean hasNext() { return byteBuffer.hasRemaining(); } @Override public Byte next() { return byteBuffer.get(); } }); } /** * Convert a sequence of bytes in an array of {@link ByteBuffer}s from UTF-8 encoding to a UTF-16 string. * * @param byteBuffers the {@link ByteBuffer} array * @return the decoded string * @throws IllegalArgumentException if the byte array is {@code null}, if the start * or end index is invalid, or if the byte array contains an invalid UTF-8 * sequence */ public static String fromUTF8(ByteBuffer[] byteBuffers) { if (byteBuffers == null) throw new IllegalArgumentException("ByteBuffer array must not be null"); return fromUTF8(new Iterator() { int i = 0; @Override public boolean hasNext() { while (i < byteBuffers.length) { if (byteBuffers[i].hasRemaining()) return true; i++; } return false; } @Override public Byte next() { return byteBuffers[i].get(); } }); } /** * Convert a sequence of bytes described by an {@link Iterator} from UTF-8 encoding to a UTF-16 string. * * @param byteIterator an {@link Iterator} over a sequence of bytes * @return the decoded string * @throws IllegalArgumentException if the byte array is {@code null}, if the start * or end index is invalid, or if the byte array contains an invalid UTF-8 * sequence */ public static String fromUTF8(Iterator byteIterator) { if (byteIterator == null) throw new IllegalArgumentException("Byte iterator must not be null"); StringBuilder sb = new StringBuilder(); while (byteIterator.hasNext()) { int b = byteIterator.next(); if ((b & 0x80) == 0) sb.append((char)b); else if ((b & 0x40) == 0) throw new IllegalArgumentException("Illegal character in UTF-8 bytes"); else if ((b & 0x20) == 0) { int codePoint = b & 0x1F; codePoint = addToCodePoint(codePoint, byteIterator); sb.append((char)codePoint); } else if ((b & 0x10) == 0) { int codePoint = b & 0x0F; codePoint = addToCodePoint(codePoint, byteIterator); codePoint = addToCodePoint(codePoint, byteIterator); sb.append((char)codePoint); } else { int codePoint = b & 0x07; codePoint = addToCodePoint(codePoint, byteIterator); codePoint = addToCodePoint(codePoint, byteIterator); codePoint = addToCodePoint(codePoint, byteIterator); try { appendUTF16(sb, codePoint); } catch (IOException ioe) { // can't happen - StringBuilder.append() does not throw IOException } } } return sb.toString(); } /** * Accumulate codepoint (UTF-8 decoding). * * @param codePoint the codepoint so far * @param byteIterator the {@link Iterator} * @return the updated codepoint * @throws IllegalArgumentException if the bytes are invalid */ private static int addToCodePoint(int codePoint, Iterator byteIterator) { if (!byteIterator.hasNext()) throw new IllegalArgumentException("Incomplete sequence in UTF-8 bytes"); int b = byteIterator.next(); if ((b & 0xC0) != 0x80) throw new IllegalArgumentException("Illegal character in UTF-8 bytes"); return (codePoint << 6) | (b & 0x3F); } /** * Convert a portion of a byte array from UTF-8 encoding to a UTF-16 string. * * @param bytes the byte array * @param start the start index * @param end the end index * @return the decoded string * @throws IllegalArgumentException if the byte array is {@code null}, if the start * or end index is invalid, or if the byte array contains an invalid UTF-8 * sequence */ public static String fromUTF8(byte[] bytes, int start, int end) { if (bytes == null) throw new IllegalArgumentException("Byte array must not be null"); if (start < 0 || start > bytes.length) throw new IllegalArgumentException("Start index invalid: " + start); if (end < start || end > bytes.length) throw new IllegalArgumentException("End index invalid: " + end); StringBuilder sb = new StringBuilder(); for (int i = start; i < end; i++) { int b = bytes[i]; if ((b & 0x80) == 0) sb.append((char)b); else if ((b & 0x40) == 0) throw new IllegalArgumentException("Illegal character in UTF-8 bytes"); else if ((b & 0x20) == 0) { int codePoint = b & 0x1F; codePoint = addToCodePoint(codePoint, bytes, ++i, end); sb.append((char)codePoint); } else if ((b & 0x10) == 0) { int codePoint = b & 0x0F; codePoint = addToCodePoint(codePoint, bytes, ++i, end); codePoint = addToCodePoint(codePoint, bytes, ++i, end); sb.append((char)codePoint); } else { int codePoint = b & 0x07; codePoint = addToCodePoint(codePoint, bytes, ++i, end); codePoint = addToCodePoint(codePoint, bytes, ++i, end); codePoint = addToCodePoint(codePoint, bytes, ++i, end); try { appendUTF16(sb, codePoint); } catch (IOException ioe) { // can't happen - StringBuilder.append() does not throw IOException } } } return sb.toString(); } /** * Accumulate codepoint (UTF-8 decoding). * * @param codePoint the codepoint so far * @param bytes the byte array * @param index the current index into the array * @param end the end index * @return the updated codepoint * @throws IllegalArgumentException if the bytes are invalid */ private static int addToCodePoint(int codePoint, byte[] bytes, int index, int end) { if (index >= end) throw new IllegalArgumentException("Incomplete sequence in UTF-8 bytes"); int b = bytes[index]; if ((b & 0xC0) != 0x80) throw new IllegalArgumentException("Illegal character in UTF-8 bytes"); return (codePoint << 6) | (b & 0x3F); } /** * Convert a {@link CharSequence} to hexadecimal. * * @param s the {@link CharSequence} * @return the converted string * @throws IllegalArgumentException if the {@link CharSequence} is {@code null} */ public static String toHex(CharSequence s) { if (s == null) throw new IllegalArgumentException("argument must not be null"); int n = s.length(); if (n == 0) return ""; StringBuilder sb = new StringBuilder(); for (int i = 0; i < n; i++) { try { appendHex(sb, s.charAt(i)); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } } return sb.toString(); } /** * Convert a {@link CharSequence} to hexadecimal, with a separator between bytes for easier * reading. * * @param s the {@link CharSequence} * @param separator the separator * @return the converted string * @throws IllegalArgumentException if the {@link CharSequence} is {@code null} */ public static String toHex(CharSequence s, char separator) { if (s == null) throw new IllegalArgumentException("argument must not be null"); int n = s.length(); if (n == 0) return ""; StringBuilder sb = new StringBuilder(); int i = 0; for (;;) { try { appendHex(sb, s.charAt(i++)); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } if (i >= n) break; sb.append(separator); } return sb.toString(); } /** * Convert a byte array to hexadecimal. * * @param bytes the byte array * @return the converted string * @throws IllegalArgumentException if the byte array is {@code null} */ public static String toHex(byte[] bytes) { if (bytes == null) throw new IllegalArgumentException("argument must not be null"); int n = bytes.length; if (n == 0) return ""; StringBuilder sb = new StringBuilder(); for (int i = 0; i < n; i++) { try { appendHex(sb, bytes[i]); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } } return sb.toString(); } /** * Convert a byte array to hexadecimal, with a separator between bytes for easier reading. * * @param bytes the byte array * @param separator the separator * @return the converted string * @throws IllegalArgumentException if the byte array is {@code null} */ public static String toHex(byte[] bytes, char separator) { if (bytes == null) throw new IllegalArgumentException("argument must not be null"); int n = bytes.length; if (n == 0) return ""; StringBuilder sb = new StringBuilder(); int i = 0; for (;;) { try { appendHex(sb, bytes[i++]); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } if (i >= n) break; sb.append(separator); } return sb.toString(); } /** * Convert a byte to hexadecimal. * * @param b the byte * @return the converted string */ public static String toHex(byte b) { StringBuilder sb = new StringBuilder(2); try { appendHex(sb, b); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb.toString(); } /** * Convert a character to hexadecimal. * * @param ch the character * @return the converted string */ public static String toHex(char ch) { StringBuilder sb = new StringBuilder(4); try { appendHex(sb, ch); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb.toString(); } /** * Convert an integer to hexadecimal. * * @param i the integer * @return the converted string */ public static String toHex(int i) { StringBuilder sb = new StringBuilder(8); try { appendHex(sb, i); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb.toString(); } /** * Convert a long to hexadecimal. * * @param n the number * @return the converted string */ public static String toHex(long n) { StringBuilder sb = new StringBuilder(16); try { appendHex(sb, n); } catch (IOException e) { // can't happen - StringBuilder.append() does not throw IOException } return sb.toString(); } /** * Append a byte value as hexadecimal to an {@link Appendable}. * * @param a the {@link Appendable} * @param b the byte * @throws IOException if thrown by the {@link Appendable} */ public static void appendHex(Appendable a, byte b) throws IOException { a.append(hexDigits[(b >>> 4) & 0xF]); a.append(hexDigits[b & 0xF]); } /** * Append a character value as hexadecimal to an {@link Appendable}. * * @param a the {@link Appendable} * @param ch the character * @throws IOException if thrown by the {@link Appendable} */ public static void appendHex(Appendable a, char ch) throws IOException { appendHex(a, (byte)(ch >>> 8)); appendHex(a, (byte)ch); } /** * Append an integer value as hexadecimal to an {@link Appendable}. * * @param a the {@link Appendable} * @param i the number * @throws IOException if thrown by the {@link Appendable} */ public static void appendHex(Appendable a, int i) throws IOException { appendHex(a, (char)(i >>> 16)); appendHex(a, (char)i); } /** * Append an integer value as hexadecimal to an {@link Appendable}, specifying the number of * output digits. * * @param a the {@link Appendable} * @param i the number * @param digits the number of digits * @throws IOException if thrown by the {@link Appendable} */ public static void appendHex(Appendable a, int i, int digits) throws IOException { if (digits > 0) { appendHex(a, i >>> 4, digits - 1); a.append(hexDigits[i & 0xF]); } } /** * Append a long value as hexadecimal to an {@link Appendable}. * * @param a the {@link Appendable} * @param n the number * @throws IOException if thrown by the {@link Appendable} */ public static void appendHex(Appendable a, long n) throws IOException { appendHex(a, (int)(n >>> 32)); appendHex(a, (int)n); } /** * Append a long value as hexadecimal to an {@link Appendable}, specifying the number of * output digits. * * @param a the {@link Appendable} * @param n the number * @param digits the number of digits * @throws IOException if thrown by the {@link Appendable} */ public static void appendHex(Appendable a, long n, int digits) throws IOException { if (digits > 0) { appendHex(a, n >>> 4, digits - 1); a.append(hexDigits[(int)(n & 0xF)]); } } private static final int MAX_INT_DIV_10 = Integer.MAX_VALUE / 10; private static final int MAX_INT_MOD_10 = Integer.MAX_VALUE % 10; /** * Convert a group of digits in a {@link CharSequence} to an {@code int}. * * @param text the {@link CharSequence} * @param start the start offset of the digits * @param end the end offset of the digits * @return the result as an {@code int} * @throws IndexOutOfBoundsException if start or end invalid * @throws NumberFormatException if any digit is invalid, or if the value is too big for an * {@code int} */ public static int convertToInt(CharSequence text, int start, int end) { if (start < 0 || end > text.length() || start >= end) throw new IndexOutOfBoundsException(); int result = 0; for (int i = start; i < end; i++) { int n = convertDecDigit(text.charAt(i)); if (result > MAX_INT_DIV_10 || result == MAX_INT_DIV_10 && n > MAX_INT_MOD_10) throw new NumberFormatException(); result = result * 10 + n; } return result; } private static final long MAX_LONG_DIV_10 = Long.MAX_VALUE / 10; private static final int MAX_LONG_MOD_10 = (int)(Long.MAX_VALUE % 10); /** * Convert a group of digits in a {@link CharSequence} to a {@code long}. * * @param text the {@link CharSequence} * @param start the start offset of the digits * @param end the end offset of the digits * @return the result as a {@code long} * @throws IndexOutOfBoundsException if start or end invalid * @throws NumberFormatException if any digit is invalid, or if the value is too big for a * {@code long} */ public static long convertToLong(CharSequence text, int start, int end) { if (start < 0 || end > text.length() || start >= end) throw new IndexOutOfBoundsException(); long result = 0; for (int i = start; i < end; i++) { int n = convertDecDigit(text.charAt(i)); if (result > MAX_LONG_DIV_10 || result == MAX_LONG_DIV_10 && n > MAX_LONG_MOD_10) throw new NumberFormatException(); result = result * 10 + n; } return result; } /** * Convert a decimal digit to the integer value of the digit. * * @param ch the decimal digit * @return the integer value (0 - 9) * @throws NumberFormatException if the digit is not valid */ public static int convertDecDigit(char ch) { if (ch >= '0' && ch <= '9') return ch - '0'; throw new NumberFormatException(); } /** * Convert a group of hexadecimal digits in a {@link CharSequence} to an {@code int}. * * @param text the {@link CharSequence} * @param start the start offset of the digits * @param end the end offset of the digits * @return the result as an {@code int} * @throws IndexOutOfBoundsException if start or end invalid * @throws NumberFormatException if any digit is invalid, or if the value is too big for an * {@code int} */ public static int convertHexToInt(CharSequence text, int start, int end) { if (start < 0 || end > text.length() || start >= end) throw new IndexOutOfBoundsException(); int result = 0; for (int i = start; i < end; i++) { if ((result & 0xF8000000) != 0) throw new NumberFormatException(); result = result << 4 | convertHexDigit(text.charAt(i)); } return result; } /** * Convert a group of hexadecimal digits in a {@link CharSequence} to a {@code long}. * * @param text the {@link CharSequence} * @param start the start offset of the digits * @param end the end offset of the digits * @return the result as a {@code long} * @throws IndexOutOfBoundsException if start or end invalid * @throws NumberFormatException if any digit is invalid, or if the value is too big for a * {@code long} */ public static long convertHexToLong(CharSequence text, int start, int end) { if (start < 0 || end > text.length() || start >= end) throw new IndexOutOfBoundsException(); long result = 0; for (int i = start; i < end; i++) { if ((result & 0xF800000000000000L) != 0) throw new NumberFormatException(); result = result << 4 | convertHexDigit(text.charAt(i)); } return result; } /** * Convert a hexadecimal digit to the integer value of the digit. * * @param ch the hexadecimal digit * @return the integer value (0 - 15) * @throws NumberFormatException if the digit is not valid */ public static int convertHexDigit(char ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; throw new NumberFormatException(); } /** * Perform multi-wildcard comparison. The pattern string consists of multiple wildcard * patterns (using ? for single character matches and * for * multiple character matches) separated by vertical bar (logical or) characters. The * comparison returns {@code true} if any of the patterns match the target. * * @param pattern the pattern string as described above * @param target the target {@link CharSequence} ({@link String}, {@link StringBuilder}, * {@link StringBuffer} etc.) * @return {@code true} if the target string matches the pattern * @see #wildcardCompare(String, int, int, CharSequence) */ public static boolean multiWildcardCompare(String pattern, CharSequence target) { int patIndex = 0; for (;;) { int i = pattern.indexOf('|', patIndex); if (i < 0) break; if (wildcardCompare(pattern, patIndex, i, target)) return true; patIndex = i + 1; } return wildcardCompare(pattern, patIndex, pattern.length(), target); } /** * Perform wildcard comparison. The method returns {@code true} if the target string * matches the pattern, according to the common wildcard rules: *
*
*
*
matches zero or more variable characters
*
?
*
matches a single variable character
*
anything else
*
matches the same character exactly
*
* * @param pattern the pattern string, which may include wildcard characters as described * above * @param target the target {@link CharSequence} ({@link String}, {@link StringBuilder}, * {@link StringBuffer} etc.) * @return {@code true} if the target string matches the pattern */ public static boolean wildcardCompare(String pattern, CharSequence target) { return wildcardCompare(pattern, 0, pattern.length(), target); } /** * Perform wildcard comparison. The method returns {@code true} if the target string * matches the pattern, according to the common wildcard rules: *
*
*
*
matches zero or more variable characters
*
?
*
matches a single variable character
*
anything else
*
matches the same character exactly
*
* This version allows a substring of the pattern string to be specified, removing the * necessity for a separate {@link String#substring(int, int)} operation. * * @param pattern the pattern string, which may include wildcard characters as * described above * @param patIndex the start index within the pattern string * @param patEnd the end index within the pattern string * @param target the target {@link CharSequence} ({@link String}, * {@link StringBuilder}, {@link StringBuffer} etc.) * @return {@code true} if the target string matches the pattern */ public static boolean wildcardCompare(String pattern, int patIndex, int patEnd, CharSequence target) { int tarLen = target.length(); int i = pattern.indexOf('*', patIndex); if (i < 0 || i >= patEnd) return tarLen == patEnd - patIndex && wildcardCompareSubstring(pattern, patIndex, patEnd, target, 0); if (i - patIndex > tarLen || !wildcardCompareSubstring(pattern, patIndex, i, target, 0)) return false; int tarIndex = i - patIndex; patIndex = i + 1; for (;;) { i = pattern.indexOf('*', patIndex); if (i < 0 || i >= patEnd) break; i -= patIndex; // i is now length of substring before next * for (;;) { if (tarIndex + i > tarLen) return false; if (wildcardCompareSubstring(pattern, patIndex, patIndex + i, target, tarIndex)) break; tarIndex++; } tarIndex += i; patIndex += i + 1; } i = tarLen - (patEnd - patIndex); // offset within str return tarIndex <= i && wildcardCompareSubstring(pattern, patIndex, patEnd, target, i); } /** * Compare a substring of a wildcard match. The substring may contain ? but * not * characters, so the pattern and target substrings must be the same * length. * * @param pattern the pattern string * @param patIndex the index of the substring within the pattern string * @param patEnd the end index of the substring within the pattern string * @param target the target {@link CharSequence} ({@link String}, * {@link StringBuilder}, {@link StringBuffer} etc.) * @param index the index of the substring within the target * @return {@code true} if the substrings match */ private static boolean wildcardCompareSubstring(String pattern, int patIndex, int patEnd, CharSequence target, int index) { while (patIndex < patEnd) { char ch = pattern.charAt(patIndex++); // don't re-order the comparison below - the auto-increment must always be done if (target.charAt(index++) != ch && ch != '?') return false; } return true; } /** * Trim leading and trailing characters from a string, where those characters match a * supplied {@link IntPredicate} function. * * @param s the string to be trimmed * @param test the test function * @return the trimmed string * @throws NullPointerException if either argument is {@code null} */ public static String trim(String s, IntPredicate test) { Objects.requireNonNull(test); int start = 0; int end = s.length(); for (;;) { if (start >= end) return emptyString; if (!test.test(s.charAt(start))) break; start++; } while (test.test(s.charAt(end - 1))) end--; return start == 0 && end == s.length() ? s : s.substring(start, end); } /** * Trim leading characters from a string, where those characters match a supplied {@link IntPredicate} function. * * @param s the string to be trimmed * @param test the test function * @return the trimmed string * @throws NullPointerException if either argument is {@code null} */ public static String trimLeading(String s, IntPredicate test) { Objects.requireNonNull(test); int start = 0; int end = s.length(); for (;;) { if (start >= end) return emptyString; if (!test.test(s.charAt(start))) break; start++; } return start == 0 ? s : s.substring(start); } /** * Trim trailing characters from a string, where those characters match a supplied {@link IntPredicate} function. * * @param s the string to be trimmed * @param test the test function * @return the trimmed string * @throws NullPointerException if either argument is {@code null} */ public static String trimTrailing(String s, IntPredicate test) { Objects.requireNonNull(test); int end = s.length(); for (;;) { if (end <= 0) return emptyString; if (!test.test(s.charAt(end - 1))) break; end--; } return end == s.length() ? s : s.substring(0, end); } /** * Trim leading and trailing characters from a {@link CharSequence}, where those characters * match a supplied {@link IntPredicate} function. * * @param cs the {@link CharSequence} to be trimmed * @param test the test function * @return the trimmed {@link CharSequence} * @throws NullPointerException if either argument is {@code null} */ public static CharSequence trim(CharSequence cs, IntPredicate test) { Objects.requireNonNull(test); int start = 0; int end = cs.length(); for (;;) { if (start >= end) return emptyString; if (!test.test(cs.charAt(start))) break; start++; } while (test.test(cs.charAt(end - 1))) end--; return start == 0 && end == cs.length() ? cs : new SubSequence(cs, start, end); } /** * Trim leading characters from a {@link CharSequence}, where those characters match a supplied {@link IntPredicate} * function. * * @param cs the {@link CharSequence} to be trimmed * @param test the test function * @return the trimmed {@link CharSequence} * @throws NullPointerException if either argument is {@code null} */ public static CharSequence trimLeading(CharSequence cs, IntPredicate test) { Objects.requireNonNull(test); int start = 0; int end = cs.length(); for (;;) { if (start >= end) return emptyString; if (!test.test(cs.charAt(start))) break; start++; } return start == 0 ? cs : new SubSequence(cs, start, end); } /** * Trim leading characters from a {@link CharSequence}, where those characters match a supplied {@link IntPredicate} * function. * * @param cs the {@link CharSequence} to be trimmed * @param test the test function * @return the trimmed {@link CharSequence} * @throws NullPointerException if either argument is {@code null} */ public static CharSequence trimTrailing(CharSequence cs, IntPredicate test) { Objects.requireNonNull(test); int end = cs.length(); for (;;) { if (end <= 0) return emptyString; if (!test.test(cs.charAt(end - 1))) break; end--; } return end == cs.length() ? cs : new SubSequence(cs, 0, end); } /** * Trim leading and trailing whitespace from a string, where white space is determined by * {@link Character#isWhitespace(char)}. * * @param s the string to be trimmed * @return the trimmed string * @throws NullPointerException if the input string is {@code null} */ public static String trim(String s) { return trim(s, Character::isWhitespace); } /** * Trim leading whitespace from a string, where white space is determined by {@link Character#isWhitespace(char)}. * * @param s the string to be trimmed * @return the trimmed string * @throws NullPointerException if the input string is {@code null} */ public static String trimLeading(String s) { return trimLeading(s, Character::isWhitespace); } /** * Trim trailing whitespace from a string, where white space is determined by {@link Character#isWhitespace(char)}. * * @param s the string to be trimmed * @return the trimmed string * @throws NullPointerException if the input string is {@code null} */ public static String trimTrailing(String s) { return trimTrailing(s, Character::isWhitespace); } /** * Trim leading and trailing whitespace from a {@link CharSequence}, where white space is determined by * {@link Character#isWhitespace(char)}. * * @param cs the {@link CharSequence} to be trimmed * @return the trimmed {@link CharSequence} * @throws NullPointerException if the input {@link CharSequence} is {@code null} */ public static CharSequence trim(CharSequence cs) { return trim(cs, Character::isWhitespace); } /** * Trim leading whitespace from a {@link CharSequence}, where white space is determined by * {@link Character#isWhitespace(char)}. * * @param cs the {@link CharSequence} to be trimmed * @return the trimmed {@link CharSequence} * @throws NullPointerException if the input {@link CharSequence} is {@code null} */ public static CharSequence trimLeading(CharSequence cs) { return trimLeading(cs, Character::isWhitespace); } /** * Trim trailing whitespace from a {@link CharSequence}, where white space is determined by * {@link Character#isWhitespace(char)}. * * @param cs the {@link CharSequence} to be trimmed * @return the trimmed {@link CharSequence} * @throws NullPointerException if the input {@link CharSequence} is {@code null} */ public static CharSequence trimTrailing(CharSequence cs) { return trimTrailing(cs, Character::isWhitespace); } /** * Trim leading and trailing code points from a UTF16 string, where those code points match * a supplied {@link IntPredicate} function. * * @param s the string to be trimmed * @param test the test function * @return the trimmed string * @throws NullPointerException if either argument is {@code null} */ public static String trimUTF16(String s, IntPredicate test) { Objects.requireNonNull(test); int start = 0; int end = s.length(); for (;;) { if (start >= end) return emptyString; char hi = s.charAt(start); if (Character.isHighSurrogate(hi) && start + 1 < end) { char lo = s.charAt(start + 1); if (Character.isLowSurrogate(lo)) { if (!test.test(Character.toCodePoint(hi, lo))) break; start += 2; continue; } } if (!test.test(hi)) break; start++; } while (end > start) { char lo = s.charAt(end - 1); if (Character.isLowSurrogate(lo) && end - 1 > start) { char hi = s.charAt(end - 2); if (Character.isHighSurrogate(hi)) { if (!test.test(Character.toCodePoint(hi, lo))) break; end -= 2; continue; } } if (!test.test(lo)) break; end--; } return start == 0 && end == s.length() ? s : s.substring(start, end); } /** * Strip characters from a string, where those characters match a supplied * {@link IntPredicate} function. * * @param s the string to be stripped * @param test the test function * @return the stripped string * @throws NullPointerException if either argument is {@code null} */ public static String strip(String s, IntPredicate test) { Objects.requireNonNull(test); for (int i = 0, n = s.length(); i < n; ) { if (test.test(s.charAt(i++))) { StringBuilder sb = new StringBuilder(); sb.append(s, 0, i - 1); while (i < n) { char ch = s.charAt(i++); if (!test.test(ch)) sb.append(ch); } return sb.toString(); } } return s; } /** * Strip characters from a {@link CharSequence}, where those characters match a supplied * {@link IntPredicate} function. * * @param cs the {@link CharSequence} to be stripped * @param test the test function * @return the stripped {@link CharSequence} * @throws NullPointerException if either argument is {@code null} */ public static CharSequence strip(CharSequence cs, IntPredicate test) { Objects.requireNonNull(test); for (int i = 0, n = cs.length(); i < n; ) { if (test.test(cs.charAt(i++))) { StringBuilder sb = new StringBuilder(); sb.append(cs, 0, i - 1); while (i < n) { char ch = cs.charAt(i++); if (!test.test(ch)) sb.append(ch); } return sb; } } return cs; } /** * Strip code points from a UTF16 string, where those code points match a supplied * {@link IntPredicate} function. * * @param s the string to be stripped * @param test the test function * @return the stripped string * @throws NullPointerException if either argument is {@code null} */ public static String stripUTF16(String s, IntPredicate test) { Objects.requireNonNull(test); for (int i = 0, n = s.length(); i < n; ) { int k = i; char hi = s.charAt(i++); boolean stripped; char lo; if (Character.isHighSurrogate(hi) && i < n && Character.isLowSurrogate(lo = s.charAt(i))) { stripped = test.test(Character.toCodePoint(hi, lo)); i++; } else stripped = test.test(hi); if (stripped) { StringBuilder sb = new StringBuilder(); sb.append(s, 0, k); while (i < n) { hi = s.charAt(i++); if (Character.isHighSurrogate(hi) && i < n && Character.isLowSurrogate(lo = s.charAt(i))) { if (!test.test(Character.toCodePoint(hi, lo))) { sb.append(hi); sb.append(lo); } i++; } else { if (!test.test(hi)) sb.append(hi); } } return sb.toString(); } } return s; } /** * Convenience method to create a {@link StringBuilder}. Supports the idiom: *
     *     String str = Strings.build().append('(').append(n).append(')').toString();
     * 
* * @return the new {@link StringBuilder} */ public static StringBuilder build() { return new StringBuilder(); } /** * Convenience method to create a {@link StringBuilder}. Supports the idiom: *
     *     String str = Strings.build("(").append(n).append(')').toString();
     * 
* * @param cs the initial contents * @return the new {@link StringBuilder} */ public static StringBuilder build(CharSequence cs) { return new StringBuilder(cs); } /** * Convert an integer to a spreadsheet-style column identifier ("A", "B", ... "Z", "AA" * etc.). * * @param i the number to convert * @return the identifier */ public static String toIdentifier(int i) { StringBuilder sb = new StringBuilder(); i = Math.abs(i); do { sb.insert(0, (char)(i % 26 + 'A')); i = i / 26 - 1; } while (i >= 0); return sb.toString(); } private static char[] digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }; private static char[] tensDigits = { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '7', '7', '7', '7', '7', '7', '7', '7', '7', '7', '8', '8', '8', '8', '8', '8', '8', '8', '8', '8', '9', '9', '9', '9', '9', '9', '9', '9', '9', '9' }; /** * Append an {@code int} to an {@link Appendable}. This method outputs the digits left to * right, avoiding the need to allocate a separate buffer. * * @param a the {@link Appendable} * @param i the {@code int} * @throws IOException if thrown by the {@link Appendable} */ public static void appendInt(Appendable a, int i) throws IOException { if (i < 0) { if (i == Integer.MIN_VALUE) { a.append("-2147483648"); return; } a.append('-'); appendPositiveInt(a, -i); } else appendPositiveInt(a, i); } /** * Append a positive {@code int} to an {@link Appendable}. This method outputs the digits * left to right, avoiding the need to allocate a separate buffer. * * @param a the {@link Appendable} * @param i the {@code int} * @throws IOException if thrown by the {@link Appendable} */ public static void appendPositiveInt(Appendable a, int i) throws IOException { if (i >= 100) { int n = i / 100; appendPositiveInt(a, n); i -= n * 100; a.append(tensDigits[i]); a.append(digits[i]); } else if (i >= 10) { a.append(tensDigits[i]); a.append(digits[i]); } else a.append(digits[i]); } /** * Append a {@code long} to an {@link Appendable}. This method outputs the digits left to * right, avoiding the need to allocate a separate buffer. * * @param a the {@link Appendable} * @param n the {@code long} * @throws IOException if thrown by the {@link Appendable} */ public static void appendLong(Appendable a, long n) throws IOException { if (n < 0) { if (n == Long.MIN_VALUE) { a.append("-9223372036854775808"); return; } a.append('-'); appendPositiveLong(a, -n); } else appendPositiveLong(a, n); } /** * Append a positive {@code long} to an {@link Appendable}. This method outputs the digits * left to right, avoiding the need to allocate a separate buffer. * * @param a the {@link Appendable} * @param n the {@code long} * @throws IOException if thrown by the {@link Appendable} */ public static void appendPositiveLong(Appendable a, long n) throws IOException { if (n >= 100) { long m = n / 100; appendPositiveLong(a, m); int i = (int)(n - m * 100); a.append(tensDigits[i]); a.append(digits[i]); } else if (n >= 10) { a.append(tensDigits[(int)n]); a.append(digits[(int)n]); } else a.append(digits[(int)n]); } /** * Append an {@code int} to an {@link Appendable} as two decimal digits. There is often a * requirement to output a number as 2 digits, for example the cents value in dollars and * cents, or hours, minutes and seconds in a time string. Note that there is no range check * on the input value; to use this method in cases where the value is not guaranteed to be * in the range 00-99, use: *
     *     Strings.append2Digits(a, Math.abs(i) % 100);
     * 
* * @param a the {@link Appendable} * @param i the {@code int} * @throws IOException if thrown by the {@link Appendable} */ public static void append2Digits(Appendable a, int i) throws IOException { a.append(tensDigits[i]); a.append(digits[i]); } /** * Append an {@code int} to an {@link Appendable} as three decimal digits. There is less * frequently a requirement to output a number as 3 digits, for example the milliseconds in * a time string. Note that there is no range check on the input value; to use this method * in cases where the value is not guaranteed to be in the range 000-999, use: *
     *     Strings.append3Digits(a, Math.abs(i) % 1000);
     * 
* * @param a the {@link Appendable} * @param i the {@code int} * @throws IOException if thrown by the {@link Appendable} */ public static void append3Digits(Appendable a, int i) throws IOException { int n = i / 100; a.append(digits[n]); i -= n * 100; a.append(tensDigits[i]); a.append(digits[i]); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy