All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.tablesaw.util.StringUtils Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package tech.tablesaw.util;

import com.google.common.base.Strings;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

/**
 * Operations on {@link java.lang.String} that are {@code null} safe.
 *
 * 

{@code StringUtils} handles {@code null} input Strings quietly. That is to say that a {@code * null} input will return {@code null}. Where a {@code boolean} or {@code int} is being returned * details vary by method. * *

A side effect of the {@code null} handling is that a {@code NullPointerException} should be * considered a bug in {@code StringUtils}. * *

Methods in this class give sample code to explain their operation. The symbol {@code *} is * used to indicate any input including {@code null}. * *

#ThreadSafe# * * @see java.lang.String * @since 1.0 */ // @Immutable public class StringUtils { // Performance testing notes (JDK 1.4, Jul03, scolebourne) // Whitespace: // Character.isWhitespace() is faster than WHITESPACE.indexOf() // where WHITESPACE is a string of all whitespace characters // // Character access: // String.charAt(n) versus toCharArray(), then array[n] // String.charAt(n) is about 15% worse for a 10K string // They are about equal for a length 50 string // String.charAt(n) is about 4 times better for a length 3 string // String.charAt(n) is best bet overall // // Append: // String.concat about twice as fast as StringBuffer.append // (not sure who tested this) /** * The empty String {@code ""}. * * @since 2.0 */ private static final String EMPTY = ""; /** The maximum size to which the padding constant(s) can expand. */ private static final int PAD_LIMIT = 8192; private static final Pattern ZERO_DECIMAL_PATTERN = Pattern.compile("\\.0+$"); private StringUtils() {} // Empty checks // ----------------------------------------------------------------------- /** * Splits a String by Character type as returned by {@code java.lang.Character.getType(char)}. * Groups of contiguous characters of the same type are returned as complete tokens, with the * following exception: if {@code camelCase} is {@code true}, the character of type {@code * Character.UPPERCASE_LETTER}, if any, immediately preceding a token of type {@code * Character.LOWERCASE_LETTER} will belong to the following token rather than to the preceding, if * any, {@code Character.UPPERCASE_LETTER} token. * * @param str the String to split, may be {@code null} * @return an array of parsed Strings, {@code null} if null String input * @since 2.4 */ public static String[] splitByCharacterTypeCamelCase(final String str) { if (str == null) { return null; } if (str.isEmpty()) { return new String[0]; } final char[] c = str.toCharArray(); final List list = new ArrayList<>(); int tokenStart = 0; int currentType = Character.getType(c[tokenStart]); for (int pos = tokenStart + 1; pos < c.length; pos++) { final int type = Character.getType(c[pos]); if (type == currentType) { continue; } if (type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) { final int newTokenStart = pos - 1; if (newTokenStart != tokenStart) { list.add(new String(c, tokenStart, newTokenStart - tokenStart)); tokenStart = newTokenStart; } } else { list.add(new String(c, tokenStart, pos - tokenStart)); tokenStart = pos; } currentType = type; } list.add(new String(c, tokenStart, c.length - tokenStart)); return list.toArray(new String[0]); } // Joining // ----------------------------------------------------------------------- /** * Joins the elements of the provided array into a single String containing the provided list of * elements. * *

No delimiter is added before or after the list. Null objects or empty strings within the * array are represented by empty strings. * *

   * StringUtils.join(null, *)               = null
   * StringUtils.join([], *)                 = ""
   * StringUtils.join([null], *)             = ""
   * StringUtils.join(["a", "b", "c"], ';')  = "a;b;c"
   * StringUtils.join(["a", "b", "c"], null) = "abc"
   * StringUtils.join([null, "", "a"], ';')  = ";;a"
   * 
* * @param array the array of values to join together, may be null * @param separator the separator character to use * @return the joined String, {@code null} if null array input * @since 2.0 */ public static String join(final Object[] array, final char separator) { if (array == null) { return null; } return join(array, separator, 0, array.length); } /** * Joins the elements of the provided array into a single String containing the provided list of * elements. * *

No delimiter is added before or after the list. Null objects or empty strings within the * array are represented by empty strings. * *

   * StringUtils.join(null, *)               = null
   * StringUtils.join([], *)                 = ""
   * StringUtils.join([null], *)             = ""
   * StringUtils.join(["a", "b", "c"], ';')  = "a;b;c"
   * StringUtils.join(["a", "b", "c"], null) = "abc"
   * StringUtils.join([null, "", "a"], ';')  = ";;a"
   * 
* * @param array the array of values to join together, may be null * @param separator the separator character to use * @param startIndex the first index to start joining from. It is an error to pass in an end index * past the end of the array * @param endIndex the index to stop joining from (exclusive). It is an error to pass in an end * index past the end of the array * @return the joined String, {@code null} if null array input * @since 2.0 */ private static String join( final Object[] array, final char separator, final int startIndex, final int endIndex) { if (array == null) { return null; } final int noOfItems = endIndex - startIndex; if (noOfItems <= 0) { return EMPTY; } final StringBuilder buf = new StringBuilder(noOfItems * 16); for (int i = startIndex; i < endIndex; i++) { if (i > startIndex) { buf.append(separator); } if (array[i] != null) { buf.append(array[i]); } } return buf.toString(); } // Padding // ----------------------------------------------------------------------- /** * Repeat a String {@code repeat} times to form a new String. * *
   * StringUtils.repeat(null, 2) = null
   * StringUtils.repeat("", 0)   = ""
   * StringUtils.repeat("", 2)   = ""
   * StringUtils.repeat("a", 3)  = "aaa"
   * StringUtils.repeat("ab", 2) = "abab"
   * StringUtils.repeat("a", -2) = ""
   * 
* * @param str the String to repeat, may be null * @param repeat number of times to repeat str, negative treated as zero * @return a new String consisting of the original String repeated, {@code null} if null String * input */ public static String repeat(final String str, final int repeat) { // Performance tuned for 2.0 (JDK1.4) if (str == null) { return null; } if (repeat <= 0) { return EMPTY; } final int inputLength = str.length(); if (repeat == 1 || inputLength == 0) { return str; } if (inputLength == 1 && repeat <= PAD_LIMIT) { return repeat(str.charAt(0), repeat); } final int outputLength = inputLength * repeat; switch (inputLength) { case 1: return repeat(str.charAt(0), repeat); case 2: final char ch0 = str.charAt(0); final char ch1 = str.charAt(1); final char[] output2 = new char[outputLength]; for (int i = repeat * 2 - 2; i >= 0; i--, i--) { output2[i] = ch0; output2[i + 1] = ch1; } return new String(output2); default: final StringBuilder buf = new StringBuilder(outputLength); for (int i = 0; i < repeat; i++) { buf.append(str); } return buf.toString(); } } /** * Returns padding using the specified delimiter repeated to a given length. * *
   * StringUtils.repeat('e', 0)  = ""
   * StringUtils.repeat('e', 3)  = "eee"
   * StringUtils.repeat('e', -2) = ""
   * 
* *

Note: this method does not support padding with Unicode Supplementary * Characters as they require a pair of {@code char}s to be represented. If you are needing to * support full I18N of your applications consider using {@link #repeat(String, int)} instead. * * @param ch character to repeat * @param repeat number of times to repeat char, negative treated as zero * @return String with repeated character * @see #repeat(String, int) */ private static String repeat(final char ch, final int repeat) { if (repeat <= 0) { return EMPTY; } final char[] buf = new char[repeat]; for (int i = repeat - 1; i >= 0; i--) { buf[i] = ch; } return new String(buf); } /** * Gets a CharSequence length or {@code 0} if the CharSequence is {@code null}. * * @param cs a CharSequence or {@code null} * @return CharSequence length or {@code 0} if the CharSequence is {@code null}. * @since 2.4 * @since 3.0 Changed signature from length(String) to length(CharSequence) */ public static int length(final CharSequence cs) { return cs == null ? 0 : cs.length(); } // Case conversion // ----------------------------------------------------------------------- /** * Capitalizes a String changing the first character to title case as per {@link * Character#toTitleCase(int)}. No other characters are changed. * *

A {@code null} input String returns {@code null}. * *

   * StringUtils.capitalize(null)  = null
   * StringUtils.capitalize("")    = ""
   * StringUtils.capitalize("cat") = "Cat"
   * StringUtils.capitalize("cAt") = "CAt"
   * StringUtils.capitalize("'cat'") = "'cat'"
   * 
* * @param str the String to capitalize, may be null * @return the capitalized String, {@code null} if null String input * @since 2.0 */ public static String capitalize(final String str) { int strLen; if (str == null || (strLen = str.length()) == 0) { return str; } final int firstCodepoint = str.codePointAt(0); final int newCodePoint = Character.toTitleCase(firstCodepoint); if (firstCodepoint == newCodePoint) { // already capitalized return str; } final int newCodePoints[] = new int[strLen]; // cannot be longer than the char array int outOffset = 0; newCodePoints[outOffset++] = newCodePoint; // copy the first codepoint for (int inOffset = Character.charCount(firstCodepoint); inOffset < strLen; ) { final int codepoint = str.codePointAt(inOffset); newCodePoints[outOffset++] = codepoint; // copy the remaining ones inOffset += Character.charCount(codepoint); } return new String(newCodePoints, 0, outOffset); } // Character Tests // ----------------------------------------------------------------------- /** * Checks if the CharSequence contains only Unicode letters. * *

{@code null} will return {@code false}. An empty CharSequence (length()=0) will return * {@code false}. * *

   * StringUtils.isAlpha(null)   = false
   * StringUtils.isAlpha("")     = false
   * StringUtils.isAlpha("  ")   = false
   * StringUtils.isAlpha("abc")  = true
   * StringUtils.isAlpha("ab2c") = false
   * StringUtils.isAlpha("ab-c") = false
   * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains letters, and is non-null * @since 3.0 Changed signature from isAlpha(String) to isAlpha(CharSequence) * @since 3.0 Changed "" to return false and not true */ public static boolean isAlpha(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isLetter(cs.charAt(i))) { return false; } } return true; } /** * Checks if the CharSequence contains only Unicode letters or digits. * *

{@code null} will return {@code false}. An empty CharSequence (length()=0) will return * {@code false}. * *

   * StringUtils.isAlphanumeric(null)   = false
   * StringUtils.isAlphanumeric("")     = false
   * StringUtils.isAlphanumeric("  ")   = false
   * StringUtils.isAlphanumeric("abc")  = true
   * StringUtils.isAlphanumeric("ab c") = false
   * StringUtils.isAlphanumeric("ab2c") = true
   * StringUtils.isAlphanumeric("ab-c") = false
   * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains letters or digits, and is non-null * @since 3.0 Changed signature from isAlphanumeric(String) to isAlphanumeric(CharSequence) * @since 3.0 Changed "" to return false and not true */ public static boolean isAlphanumeric(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isLetterOrDigit(cs.charAt(i))) { return false; } } return true; } /** * Checks if the CharSequence contains only Unicode digits. A decimal point is not a Unicode digit * and returns false. * *

{@code null} will return {@code false}. An empty CharSequence (length()=0) will return * {@code false}. * *

Note that the method does not allow for a leading sign, either positive or negative. Also, * if a String passes the numeric test, it may still generate a NumberFormatException when parsed * by Integer.parseInt or Long.parseLong, e.g. if the value is outside the range for int or long * respectively. * *

   * StringUtils.isNumeric(null)   = false
   * StringUtils.isNumeric("")     = false
   * StringUtils.isNumeric("  ")   = false
   * StringUtils.isNumeric("123")  = true
   * StringUtils.isNumeric("\u0967\u0968\u0969")  = true
   * StringUtils.isNumeric("12 3") = false
   * StringUtils.isNumeric("ab2c") = false
   * StringUtils.isNumeric("12-3") = false
   * StringUtils.isNumeric("12.3") = false
   * StringUtils.isNumeric("-123") = false
   * StringUtils.isNumeric("+123") = false
   * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains digits, and is non-null * @since 3.0 Changed signature from isNumeric(String) to isNumeric(CharSequence) * @since 3.0 Changed "" to return false and not true */ public static boolean isNumeric(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isDigit(cs.charAt(i))) { return false; } } return true; } /** * Checks if the CharSequence contains only lowercase characters. * *

{@code null} will return {@code false}. An empty CharSequence (length()=0) will return * {@code false}. * *

   * StringUtils.isAllLowerCase(null)   = false
   * StringUtils.isAllLowerCase("")     = false
   * StringUtils.isAllLowerCase("  ")   = false
   * StringUtils.isAllLowerCase("abc")  = true
   * StringUtils.isAllLowerCase("abC")  = false
   * StringUtils.isAllLowerCase("ab c") = false
   * StringUtils.isAllLowerCase("ab1c") = false
   * StringUtils.isAllLowerCase("ab/c") = false
   * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains lowercase characters, and is non-null * @since 2.5 * @since 3.0 Changed signature from isAllLowerCase(String) to isAllLowerCase(CharSequence) */ public static boolean isAllLowerCase(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isLowerCase(cs.charAt(i))) { return false; } } return true; } /** * Checks if the CharSequence contains only uppercase characters. * *

{@code null} will return {@code false}. An empty String (length()=0) will return {@code * false}. * *

   * StringUtils.isAllUpperCase(null)   = false
   * StringUtils.isAllUpperCase("")     = false
   * StringUtils.isAllUpperCase("  ")   = false
   * StringUtils.isAllUpperCase("ABC")  = true
   * StringUtils.isAllUpperCase("aBC")  = false
   * StringUtils.isAllUpperCase("A C")  = false
   * StringUtils.isAllUpperCase("A1C")  = false
   * StringUtils.isAllUpperCase("A/C")  = false
   * 
* * @param cs the CharSequence to check, may be null * @return {@code true} if only contains uppercase characters, and is non-null * @since 2.5 * @since 3.0 Changed signature from isAllUpperCase(String) to isAllUpperCase(CharSequence) */ public static boolean isAllUpperCase(final String cs) { if (Strings.isNullOrEmpty(cs)) { return false; } final int sz = cs.length(); for (int i = 0; i < sz; i++) { if (!Character.isUpperCase(cs.charAt(i))) { return false; } } return true; } /** * Removes all trailing zero decimals from the given String, assuming all decimals are zero and * any zero decimals actually exist. * *

A {@code null} input String returns {@code null}. * * @param str the String to handle, may be null * @return string without trailing zero decimals */ public static String removeZeroDecimal(final String str) { if (Strings.isNullOrEmpty(str)) { return str; } return ZERO_DECIMAL_PATTERN.matcher(str).replaceFirst(EMPTY); } // Abbreviating // ----------------------------------------------------------------------- /** * Abbreviates a String using ellipses. This will turn "Now is the time for all good men" into * "Now is the time for..." * *

Specifically: * *

    *
  • If the number of characters in {@code str} is less than or equal to {@code maxWidth}, * return {@code str}. *
  • Else abbreviate it to {@code (substring(str, 0, max-3) + "...")}. *
  • If {@code maxWidth} is less than {@code 4}, throw an {@code IllegalArgumentException}. *
  • In no case will it return a String of length greater than {@code maxWidth}. *
* *
   * StringUtils.abbreviate(null, *)      = null
   * StringUtils.abbreviate("", 4)        = ""
   * StringUtils.abbreviate("abcdefg", 6) = "abc..."
   * StringUtils.abbreviate("abcdefg", 7) = "abcdefg"
   * StringUtils.abbreviate("abcdefg", 8) = "abcdefg"
   * StringUtils.abbreviate("abcdefg", 4) = "a..."
   * StringUtils.abbreviate("abcdefg", 3) = IllegalArgumentException
   * 
* * @param str the String to check, may be null * @param abbrevMarker the String indicate abbreviation * @param maxWidth maximum length of result String, must be at least 4 * @return abbreviated String, {@code null} if null String input * @throws IllegalArgumentException if the width is too small * @since 2.0 */ public static String abbreviate(final String str, final String abbrevMarker, final int maxWidth) { return abbreviate(str, abbrevMarker, 0, maxWidth); } /** * Abbreviates a String using a given replacement marker. This will turn "Now is the time for all * good men" into "...is the time for..." if "..." was defined as the replacement marker. * *

Works like {@code abbreviate(String, String, int)}, but allows you to specify a "left edge" * offset. Note that this left edge is not necessarily going to be the leftmost character in the * result, or the first character following the replacement marker, but it will appear somewhere * in the result. * *

In no case will it return a String of length greater than {@code maxWidth}. * *

   * StringUtils.abbreviate(null, null, *, *)                 = null
   * StringUtils.abbreviate("abcdefghijklmno", null, *, *)    = "abcdefghijklmno"
   * StringUtils.abbreviate("", "...", 0, 4)                  = ""
   * StringUtils.abbreviate("abcdefghijklmno", "---", -1, 10) = "abcdefg---"
   * StringUtils.abbreviate("abcdefghijklmno", ",", 0, 10)    = "abcdefghi,"
   * StringUtils.abbreviate("abcdefghijklmno", ",", 1, 10)    = "abcdefghi,"
   * StringUtils.abbreviate("abcdefghijklmno", ",", 2, 10)    = "abcdefghi,"
   * StringUtils.abbreviate("abcdefghijklmno", "::", 4, 10)   = "::efghij::"
   * StringUtils.abbreviate("abcdefghijklmno", "...", 6, 10)  = "...ghij..."
   * StringUtils.abbreviate("abcdefghijklmno", "*", 9, 10)    = "*ghijklmno"
   * StringUtils.abbreviate("abcdefghijklmno", "'", 10, 10)   = "'ghijklmno"
   * StringUtils.abbreviate("abcdefghijklmno", "!", 12, 10)   = "!ghijklmno"
   * StringUtils.abbreviate("abcdefghij", "abra", 0, 4)       = IllegalArgumentException
   * StringUtils.abbreviate("abcdefghij", "...", 5, 6)        = IllegalArgumentException
   * 
* * @param str the String to check, may be null * @param abbrevMarker the String used as replacement marker * @param offset left edge of source String * @param maxWidth maximum length of result String, must be at least 4 * @return abbreviated String, {@code null} if null String input * @throws IllegalArgumentException if the width is too small * @since 3.6 */ private static String abbreviate( final String str, final String abbrevMarker, int offset, final int maxWidth) { if (Strings.isNullOrEmpty(str) || Strings.isNullOrEmpty(abbrevMarker)) { return str; } final int abbrevMarkerLength = abbrevMarker.length(); final int minAbbrevWidth = abbrevMarkerLength + 1; final int minAbbrevWidthOffset = abbrevMarkerLength + abbrevMarkerLength + 1; if (maxWidth < minAbbrevWidth) { throw new IllegalArgumentException( String.format("Minimum abbreviation width is %d", minAbbrevWidth)); } if (str.length() <= maxWidth) { return str; } if (offset > str.length()) { offset = str.length(); } if (str.length() - offset < maxWidth - abbrevMarkerLength) { offset = str.length() - (maxWidth - abbrevMarkerLength); } if (offset <= abbrevMarkerLength + 1) { return str.substring(0, maxWidth - abbrevMarkerLength) + abbrevMarker; } if (maxWidth < minAbbrevWidthOffset) { throw new IllegalArgumentException( String.format("Minimum abbreviation width with offset is %d", minAbbrevWidthOffset)); } if (offset + maxWidth - abbrevMarkerLength < str.length()) { return abbrevMarker + abbreviate(str.substring(offset), abbrevMarker, maxWidth - abbrevMarkerLength); } return abbrevMarker + str.substring(str.length() - (maxWidth - abbrevMarkerLength)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy