All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.util.StringUtils Maven / Gradle / Ivy

There is a newer version: 0.43.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package tech.tablesaw.util;

import com.google.common.base.Strings;

import java.util.ArrayList;
import java.util.List;

/**
 * 

Operations on {@link java.lang.String} that are * {@code null} safe. * *

{@code StringUtils} handles {@code null} input Strings quietly. * That is to say that a {@code null} input will return {@code null}. * Where a {@code boolean} or {@code int} is being returned * details vary by method. * *

A side effect of the {@code null} handling is that a * {@code NullPointerException} should be considered a bug in * {@code StringUtils}. * *

Methods in this class give sample code to explain their operation. * The symbol {@code *} is used to indicate any input including {@code null}. * *

#ThreadSafe# * @see java.lang.String * @since 1.0 */ //@Immutable public class StringUtils { // Performance testing notes (JDK 1.4, Jul03, scolebourne) // Whitespace: // Character.isWhitespace() is faster than WHITESPACE.indexOf() // where WHITESPACE is a string of all whitespace characters // // Character access: // String.charAt(n) versus toCharArray(), then array[n] // String.charAt(n) is about 15% worse for a 10K string // They are about equal for a length 50 string // String.charAt(n) is about 4 times better for a length 3 string // String.charAt(n) is best bet overall // // Append: // String.concat about twice as fast as StringBuffer.append // (not sure who tested this) /** * The empty String {@code ""}. * @since 2.0 */ private static final String EMPTY = ""; /** *

The maximum size to which the padding constant(s) can expand. */ private static final int PAD_LIMIT = 8192; private StringUtils() {} // Empty checks //----------------------------------------------------------------------- /** *

Splits a String by Character type as returned by * {@code java.lang.Character.getType(char)}. Groups of contiguous * characters of the same type are returned as complete tokens, with the * following exception: if {@code camelCase} is {@code true}, * the character of type {@code Character.UPPERCASE_LETTER}, if any, * immediately preceding a token of type {@code Character.LOWERCASE_LETTER} * will belong to the following token rather than to the preceding, if any, * {@code Character.UPPERCASE_LETTER} token. * @param str the String to split, may be {@code null} * @return an array of parsed Strings, {@code null} if null String input * @since 2.4 */ public static String[] splitByCharacterTypeCamelCase(final String str) { if (str == null) { return null; } if (str.isEmpty()) { return new String[0]; } final char[] c = str.toCharArray(); final List list = new ArrayList<>(); int tokenStart = 0; int currentType = Character.getType(c[tokenStart]); for (int pos = tokenStart + 1; pos < c.length; pos++) { final int type = Character.getType(c[pos]); if (type == currentType) { continue; } if (type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) { final int newTokenStart = pos - 1; if (newTokenStart != tokenStart) { list.add(new String(c, tokenStart, newTokenStart - tokenStart)); tokenStart = newTokenStart; } } else { list.add(new String(c, tokenStart, pos - tokenStart)); tokenStart = pos; } currentType = type; } list.add(new String(c, tokenStart, c.length - tokenStart)); return list.toArray(new String[0]); } // Joining //----------------------------------------------------------------------- /** *

Joins the elements of the provided array into a single String * containing the provided list of elements. * *

No delimiter is added before or after the list. * Null objects or empty strings within the array are represented by * empty strings. * *

     * StringUtils.join(null, *)               = null
     * StringUtils.join([], *)                 = ""
     * StringUtils.join([null], *)             = ""
     * StringUtils.join(["a", "b", "c"], ';')  = "a;b;c"
     * StringUtils.join(["a", "b", "c"], null) = "abc"
     * StringUtils.join([null, "", "a"], ';')  = ";;a"
     * 
* * @param array the array of values to join together, may be null * @param separator the separator character to use * @return the joined String, {@code null} if null array input * @since 2.0 */ public static String join(final Object[] array, final char separator) { if (array == null) { return null; } return join(array, separator, 0, array.length); } /** *

Joins the elements of the provided array into a single String * containing the provided list of elements. * *

No delimiter is added before or after the list. * Null objects or empty strings within the array are represented by * empty strings. * *

     * StringUtils.join(null, *)               = null
     * StringUtils.join([], *)                 = ""
     * StringUtils.join([null], *)             = ""
     * StringUtils.join(["a", "b", "c"], ';')  = "a;b;c"
     * StringUtils.join(["a", "b", "c"], null) = "abc"
     * StringUtils.join([null, "", "a"], ';')  = ";;a"
     * 
* * @param array the array of values to join together, may be null * @param separator the separator character to use * @param startIndex the first index to start joining from. It is * an error to pass in an end index past the end of the array * @param endIndex the index to stop joining from (exclusive). It is * an error to pass in an end index past the end of the array * @return the joined String, {@code null} if null array input * @since 2.0 */ private static String join(final Object[] array, final char separator, final int startIndex, final int endIndex) { if (array == null) { return null; } final int noOfItems = endIndex - startIndex; if (noOfItems <= 0) { return EMPTY; } final StringBuilder buf = new StringBuilder(noOfItems * 16); for (int i = startIndex; i < endIndex; i++) { if (i > startIndex) { buf.append(separator); } if (array[i] != null) { buf.append(array[i]); } } return buf.toString(); } // Padding //----------------------------------------------------------------------- /** *

Repeat a String {@code repeat} times to form a * new String. * *

     * StringUtils.repeat(null, 2) = null
     * StringUtils.repeat("", 0)   = ""
     * StringUtils.repeat("", 2)   = ""
     * StringUtils.repeat("a", 3)  = "aaa"
     * StringUtils.repeat("ab", 2) = "abab"
     * StringUtils.repeat("a", -2) = ""
     * 
* * @param str the String to repeat, may be null * @param repeat number of times to repeat str, negative treated as zero * @return a new String consisting of the original String repeated, * {@code null} if null String input */ public static String repeat(final String str, final int repeat) { // Performance tuned for 2.0 (JDK1.4) if (str == null) { return null; } if (repeat <= 0) { return EMPTY; } final int inputLength = str.length(); if (repeat == 1 || inputLength == 0) { return str; } if (inputLength == 1 && repeat <= PAD_LIMIT) { return repeat(str.charAt(0), repeat); } final int outputLength = inputLength * repeat; switch (inputLength) { case 1 : return repeat(str.charAt(0), repeat); case 2 : final char ch0 = str.charAt(0); final char ch1 = str.charAt(1); final char[] output2 = new char[outputLength]; for (int i = repeat * 2 - 2; i >= 0; i--, i--) { output2[i] = ch0; output2[i + 1] = ch1; } return new String(output2); default : final StringBuilder buf = new StringBuilder(outputLength); for (int i = 0; i < repeat; i++) { buf.append(str); } return buf.toString(); } } /** *

Returns padding using the specified delimiter repeated * to a given length. * *

     * StringUtils.repeat('e', 0)  = ""
     * StringUtils.repeat('e', 3)  = "eee"
     * StringUtils.repeat('e', -2) = ""
     * 
* *

Note: this method does not support padding with * Unicode Supplementary Characters * as they require a pair of {@code char}s to be represented. * If you are needing to support full I18N of your applications * consider using {@link #repeat(String, int)} instead. * * * @param ch character to repeat * @param repeat number of times to repeat char, negative treated as zero * @return String with repeated character * @see #repeat(String, int) */ private static String repeat(final char ch, final int repeat) { if (repeat <= 0) { return EMPTY; } final char[] buf = new char[repeat]; for (int i = repeat - 1; i >= 0; i--) { buf[i] = ch; } return new String(buf); } /** * Gets a CharSequence length or {@code 0} if the CharSequence is * {@code null}. * * @param cs * a CharSequence or {@code null} * @return CharSequence length or {@code 0} if the CharSequence is * {@code null}. * @since 2.4 * @since 3.0 Changed signature from length(String) to length(CharSequence) */ public static int length(final CharSequence cs) { return cs == null ? 0 : cs.length(); } // Case conversion //----------------------------------------------------------------------- /** *

Capitalizes a String changing the first character to title case as * per {@link Character#toTitleCase(int)}. No other characters are changed. * * A {@code null} input String returns {@code null}. * *

     * StringUtils.capitalize(null)  = null
     * StringUtils.capitalize("")    = ""
     * StringUtils.capitalize("cat") = "Cat"
     * StringUtils.capitalize("cAt") = "CAt"
     * StringUtils.capitalize("'cat'") = "'cat'"
     * 
* * @param str the String to capitalize, may be null * @return the capitalized String, {@code null} if null String input * @since 2.0 */ public static String capitalize(final String str) { int strLen; if (str == null || (strLen = str.length()) == 0) { return str; } final int firstCodepoint = str.codePointAt(0); final int newCodePoint = Character.toTitleCase(firstCodepoint); if (firstCodepoint == newCodePoint) { // already capitalized return str; } final int newCodePoints[] = new int[strLen]; // cannot be longer than the char array int outOffset = 0; newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen; ) { final int codepoint = str.codePointAt(inOffset); newCodePoints[outOffset++] = codepoint; // copy the remaining ones inOffset += Character.charCount(codepoint); } return new String(newCodePoints, 0, outOffset); } // Character Tests //----------------------------------------------------------------------- /** *

Checks if the CharSequence contains only Unicode letters. * *

{@code null} will return {@code false}. * An empty CharSequence (length()=0) will return {@code false}. * *

     * StringUtils.isAlpha(null)   = false
     * StringUtils.isAlpha("")     = false
     * StringUtils.isAlpha("  ")   = false
     * StringUtils.isAlpha("abc")  = true
     * StringUtils.isAlpha("ab2c") = false
     * StringUtils.isAlpha("ab-c") = false
     * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains letters, and is non-null * @since 3.0 Changed signature from isAlpha(String) to isAlpha(CharSequence) * @since 3.0 Changed "" to return false and not true */ public static boolean isAlpha(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isLetter(cs.charAt(i))) { return false; } } return true; } /** *

Checks if the CharSequence contains only Unicode letters or digits. * *

{@code null} will return {@code false}. * An empty CharSequence (length()=0) will return {@code false}. * *

     * StringUtils.isAlphanumeric(null)   = false
     * StringUtils.isAlphanumeric("")     = false
     * StringUtils.isAlphanumeric("  ")   = false
     * StringUtils.isAlphanumeric("abc")  = true
     * StringUtils.isAlphanumeric("ab c") = false
     * StringUtils.isAlphanumeric("ab2c") = true
     * StringUtils.isAlphanumeric("ab-c") = false
     * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains letters or digits, * and is non-null * @since 3.0 Changed signature from isAlphanumeric(String) to isAlphanumeric(CharSequence) * @since 3.0 Changed "" to return false and not true */ public static boolean isAlphanumeric(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isLetterOrDigit(cs.charAt(i))) { return false; } } return true; } /** *

Checks if the CharSequence contains only Unicode digits. * A decimal point is not a Unicode digit and returns false. * *

{@code null} will return {@code false}. * An empty CharSequence (length()=0) will return {@code false}. * *

Note that the method does not allow for a leading sign, either positive or negative. * Also, if a String passes the numeric test, it may still generate a NumberFormatException * when parsed by Integer.parseInt or Long.parseLong, e.g. if the value is outside the range * for int or long respectively. * *

     * StringUtils.isNumeric(null)   = false
     * StringUtils.isNumeric("")     = false
     * StringUtils.isNumeric("  ")   = false
     * StringUtils.isNumeric("123")  = true
     * StringUtils.isNumeric("\u0967\u0968\u0969")  = true
     * StringUtils.isNumeric("12 3") = false
     * StringUtils.isNumeric("ab2c") = false
     * StringUtils.isNumeric("12-3") = false
     * StringUtils.isNumeric("12.3") = false
     * StringUtils.isNumeric("-123") = false
     * StringUtils.isNumeric("+123") = false
     * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains digits, and is non-null * @since 3.0 Changed signature from isNumeric(String) to isNumeric(CharSequence) * @since 3.0 Changed "" to return false and not true */ public static boolean isNumeric(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isDigit(cs.charAt(i))) { return false; } } return true; } /** *

Checks if the CharSequence contains only lowercase characters. * *

{@code null} will return {@code false}. * An empty CharSequence (length()=0) will return {@code false}. * *

     * StringUtils.isAllLowerCase(null)   = false
     * StringUtils.isAllLowerCase("")     = false
     * StringUtils.isAllLowerCase("  ")   = false
     * StringUtils.isAllLowerCase("abc")  = true
     * StringUtils.isAllLowerCase("abC")  = false
     * StringUtils.isAllLowerCase("ab c") = false
     * StringUtils.isAllLowerCase("ab1c") = false
     * StringUtils.isAllLowerCase("ab/c") = false
     * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains lowercase characters, and is non-null * @since 2.5 * @since 3.0 Changed signature from isAllLowerCase(String) to isAllLowerCase(CharSequence) */ public static boolean isAllLowerCase(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isLowerCase(cs.charAt(i))) { return false; } } return true; } /** *

Checks if the CharSequence contains only uppercase characters. * *

{@code null} will return {@code false}. * An empty String (length()=0) will return {@code false}. * *

     * StringUtils.isAllUpperCase(null)   = false
     * StringUtils.isAllUpperCase("")     = false
     * StringUtils.isAllUpperCase("  ")   = false
     * StringUtils.isAllUpperCase("ABC")  = true
     * StringUtils.isAllUpperCase("aBC")  = false
     * StringUtils.isAllUpperCase("A C")  = false
     * StringUtils.isAllUpperCase("A1C")  = false
     * StringUtils.isAllUpperCase("A/C")  = false
     * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains uppercase characters, and is non-null * @since 2.5 * @since 3.0 Changed signature from isAllUpperCase(String) to isAllUpperCase(CharSequence) */ public static boolean isAllUpperCase(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isUpperCase(cs.charAt(i))) { return false; } } return true; } // Abbreviating //----------------------------------------------------------------------- /** *

Abbreviates a String using ellipses. This will turn * "Now is the time for all good men" into "Now is the time for..." * *

Specifically: *

    *
  • If the number of characters in {@code str} is less than or equal to * {@code maxWidth}, return {@code str}.
  • *
  • Else abbreviate it to {@code (substring(str, 0, max-3) + "...")}.
  • *
  • If {@code maxWidth} is less than {@code 4}, throw an * {@code IllegalArgumentException}.
  • *
  • In no case will it return a String of length greater than * {@code maxWidth}.
  • *
* *
     * StringUtils.abbreviate(null, *)      = null
     * StringUtils.abbreviate("", 4)        = ""
     * StringUtils.abbreviate("abcdefg", 6) = "abc..."
     * StringUtils.abbreviate("abcdefg", 7) = "abcdefg"
     * StringUtils.abbreviate("abcdefg", 8) = "abcdefg"
     * StringUtils.abbreviate("abcdefg", 4) = "a..."
     * StringUtils.abbreviate("abcdefg", 3) = IllegalArgumentException
     * 
* * @param str the String to check, may be null * @param abbrevMarker the String indicate abbreviation * @param maxWidth maximum length of result String, must be at least 4 * @return abbreviated String, {@code null} if null String input * @throws IllegalArgumentException if the width is too small * @since 2.0 */ public static String abbreviate(final String str, final String abbrevMarker, final int maxWidth) { return abbreviate(str, abbrevMarker, 0, maxWidth); } /** *

Abbreviates a String using a given replacement marker. This will turn * "Now is the time for all good men" into "...is the time for..." if "..." was defined * as the replacement marker. * *

Works like {@code abbreviate(String, String, int)}, but allows you to specify * a "left edge" offset. Note that this left edge is not necessarily going to * be the leftmost character in the result, or the first character following the * replacement marker, but it will appear somewhere in the result. * *

In no case will it return a String of length greater than {@code maxWidth}. * *

     * StringUtils.abbreviate(null, null, *, *)                 = null
     * StringUtils.abbreviate("abcdefghijklmno", null, *, *)    = "abcdefghijklmno"
     * StringUtils.abbreviate("", "...", 0, 4)                  = ""
     * StringUtils.abbreviate("abcdefghijklmno", "---", -1, 10) = "abcdefg---"
     * StringUtils.abbreviate("abcdefghijklmno", ",", 0, 10)    = "abcdefghi,"
     * StringUtils.abbreviate("abcdefghijklmno", ",", 1, 10)    = "abcdefghi,"
     * StringUtils.abbreviate("abcdefghijklmno", ",", 2, 10)    = "abcdefghi,"
     * StringUtils.abbreviate("abcdefghijklmno", "::", 4, 10)   = "::efghij::"
     * StringUtils.abbreviate("abcdefghijklmno", "...", 6, 10)  = "...ghij..."
     * StringUtils.abbreviate("abcdefghijklmno", "*", 9, 10)    = "*ghijklmno"
     * StringUtils.abbreviate("abcdefghijklmno", "'", 10, 10)   = "'ghijklmno"
     * StringUtils.abbreviate("abcdefghijklmno", "!", 12, 10)   = "!ghijklmno"
     * StringUtils.abbreviate("abcdefghij", "abra", 0, 4)       = IllegalArgumentException
     * StringUtils.abbreviate("abcdefghij", "...", 5, 6)        = IllegalArgumentException
     * 
* * @param str the String to check, may be null * @param abbrevMarker the String used as replacement marker * @param offset left edge of source String * @param maxWidth maximum length of result String, must be at least 4 * @return abbreviated String, {@code null} if null String input * @throws IllegalArgumentException if the width is too small * @since 3.6 */ private static String abbreviate(final String str, final String abbrevMarker, int offset, final int maxWidth) { if (Strings.isNullOrEmpty(str) || Strings.isNullOrEmpty(abbrevMarker)) { return str; } final int abbrevMarkerLength = abbrevMarker.length(); final int minAbbrevWidth = abbrevMarkerLength + 1; final int minAbbrevWidthOffset = abbrevMarkerLength + abbrevMarkerLength + 1; if (maxWidth < minAbbrevWidth) { throw new IllegalArgumentException(String.format("Minimum abbreviation width is %d", minAbbrevWidth)); } if (str.length() <= maxWidth) { return str; } if (offset > str.length()) { offset = str.length(); } if (str.length() - offset < maxWidth - abbrevMarkerLength) { offset = str.length() - (maxWidth - abbrevMarkerLength); } if (offset <= abbrevMarkerLength+1) { return str.substring(0, maxWidth - abbrevMarkerLength) + abbrevMarker; } if (maxWidth < minAbbrevWidthOffset) { throw new IllegalArgumentException(String.format("Minimum abbreviation width with offset is %d", minAbbrevWidthOffset)); } if (offset + maxWidth - abbrevMarkerLength < str.length()) { return abbrevMarker + abbreviate(str.substring(offset), abbrevMarker, maxWidth - abbrevMarkerLength); } return abbrevMarker + str.substring(str.length() - (maxWidth - abbrevMarkerLength)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy