All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.conqat.lib.commons.string.StringUtils Maven / Gradle / Ivy

There is a newer version: 2024.7.2
Show newest version
/*
 * Copyright (c) CQSE GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.conqat.lib.commons.string;

import static org.conqat.lib.commons.string.LineSplitter.UNICODE_NEL;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collector;
import java.util.stream.Collectors;

import org.apache.commons.text.StringEscapeUtils;
import org.apache.commons.text.translate.CharSequenceTranslator;
import org.apache.commons.text.translate.EntityArrays;
import org.apache.commons.text.translate.LookupTranslator;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.conqat.lib.commons.algo.Diff;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.collections.Pair;
import org.conqat.lib.commons.collections.PairList;
import org.conqat.lib.commons.filesystem.EByteOrderMark;
import org.conqat.lib.commons.filesystem.FileSystemUtils;
import org.intellij.lang.annotations.Language;
import org.jetbrains.annotations.Contract;

/**
 * A utility class providing some advanced string functionality.
 */
public class StringUtils {

	/**
	 * Matches all whitespace at the beginning of each line.
	 * 

* We deliberately don't use "\\s" here because this also matches new lines. Instead we use * "\\p{Zs}" which matches all unicode horizontal whitespace characters. */ private static final Pattern LEADING_WHITESPACE_PATTERN = Pattern.compile("^[\\t\\p{Zs}]+", Pattern.MULTILINE); /** Line separator of the current platform. */ public static final String LINE_SEPARATOR = System.lineSeparator(); /** Line feed ("\n"), platform independent. */ public static final String LINE_FEED = "\n"; /** * The empty string. Use this instead of "" (our Java coding guidelines). * *

    *
  1. Use this instead of an empty string literal ({@code ""}) to make it clear that this is * intentionally empty. Using "" may leave the reader in doubt whether this is something unfinished * (e.g. "" used temporarily until something meaningful is filled in) or intentionally the empty * String. Making it explicit removes this problem) *
  2. On most JVMs, using "" allocates a new String object per class in which it used. Using * EMPTY_STRING re-uses a single object and saves a few bytes. *
  3. Makes " " and "" better distinguishable. *
*/ public static final String EMPTY_STRING = ""; /** A space. */ public static final String SPACE = " "; /** A space character. */ public static final char SPACE_CHAR = ' '; /** A tab character. */ public static final String TAB = "\t"; /** Two spaces. */ public static final String TWO_SPACES = " "; /** Dot. */ public static final String DOT = "."; /** A String consisting of three dots. */ public static final String THREE_DOTS = "..."; /** The ellipsis character. */ public static final String ELLIPSIS = "…"; /** Number formatter. */ private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(Locale.US); /** Number formatter for percentages. */ private static final NumberFormat PERCENTAGE_FORMAT = NumberFormat.getPercentInstance(Locale.US); /** Random number generator. */ private static final Random RANDOM = new Random(); /** Char strings used to convert bytes to a hex string */ private static final char[] HEX_CHARACTERS = "0123456789ABCDEF".toCharArray(); /** * Characters that need to be escaped in markdown. Used in {@link #escapeMarkdownChars(String)} and * {@link #unescapeMarkdownChars(String)}. */ private static final List MARKDOWN_ESCAPE_CHARACTERS = Arrays.asList('[', ']', '(', ')', '*', '#', '_', '~', '^', '+', '=', '>'); /** * Defines the escaping map for Markdown characters based on {@link #MARKDOWN_ESCAPE_CHARACTERS}. * Characters to be escaped will be prefixed by a backslash. Escaping should be performed by using * {@link #escapeMarkdownChars(String)}. */ private static final Map MARKDOWN_ESCAPE_MAP = MARKDOWN_ESCAPE_CHARACTERS.stream() .collect(Collectors.toMap(String::valueOf, characterToBeEscaped -> "\\" + characterToBeEscaped)); /** * Defines how a newline and already escaped newlines should be escaped (\n->\\n and \r->\\r) . We * also escape the escape char (\->\\) to allow correct un-escaping. *

* Use it with {@link #escapeChars(String, Map)} and {@link #unescapeChars(String, Map)}. If * language-specific escaping should be performed, prefer using methods from * {@link StringEscapeUtils}. */ public static final Map ESCAPE_NEWLINE; static { Map initialMap = new HashMap<>(); initialMap.put("\n", "\\n"); initialMap.put("\r", "\\r"); initialMap.put("\\", "\\\\"); ESCAPE_NEWLINE = Collections.unmodifiableMap(initialMap); } /** * Create a string of the given length and center the given string within it. Left and right areas * are filled by the character provided. * * @param string * The input string. * @param length * The length of the string to be returned. * @param c * The character to surround the input string with. * @return the new string or, if the string is longer than the specified length, the original * string. * @see #flushLeft(String, int, char) * @see #flushRight(String, int, char) */ public static String center(String string, int length, char c) { if (string.length() >= length) { return string; } int strLen = string.length(); int fillLen = (length - strLen) / 2; String leftFiller = fillString(fillLen, c); if ((length - strLen) % 2 != 0) { fillLen++; } String rightFiller = fillString(fillLen, c); return leftFiller + string + rightFiller; } /** * Compares two strings both of which may be null. A string which is null * is always smaller than the other string, except for both strings being null. * * @param a * The string which is compared to the second string. * @param b * The string which is compared to the first string. * @return Returns 0 if both strings are null, -1 if only the first string is * null, and 1 if only the second string is null. If both strings * are not null, returns the result of the usual string comparison. */ public static int compare(@Nullable String a, @Nullable String b) { // noinspection StringEquality if (a == b) { // This is done on purpose, to ensure comparison of two nulls equals true, as // well as performance improvement. return 0; } if (a == null) { return -1; } if (b == null) { return 1; } return a.compareTo(b); } /** * Concatenates all elements of an iterable using the toString()-method. * * @param iterable * the iterable * @return a concatenation, separated by spaces */ public static String concat(Iterable iterable) { return concat(iterable, SPACE); } /** * Concatenates all elements of an iterable using the toString()-method, separating * them with the given separator. * * @param iterable * the iterable containing the strings * @param separator * the separator to place between the strings, may be null * @return a concatenation of the string in the iterable or null if iterable was * null. If the iterable is of size 0, the empty string is returned. */ @Contract("null, _ -> null; !null, _ -> !null") public static @Nullable String concat(@Nullable Iterable iterable, @Nullable String separator) { if (iterable == null) { return null; } return concat(iterable.iterator(), separator); } /** * Concatenates all elements of an iterator using the toString()-method, separating * them with the given separator. * * @param iterator * the {@link Iterator} containing the strings * @param separator * the separator to place between the strings, may be null * @return a concatenation of the string in the iterator or null if iterator was * null. If the iterator has no elements, the empty string is returned. */ public static @Nullable String concat(@Nullable Iterator iterator, @Nullable String separator) { if (iterator == null) { return null; } if (!iterator.hasNext()) { return EMPTY_STRING; } if (separator == null) { separator = EMPTY_STRING; } StringBuilder builder = new StringBuilder(); while (iterator.hasNext()) { builder.append(iterator.next()); if (iterator.hasNext()) { builder.append(separator); } } return builder.toString(); } /** * Concatenates all elements of an array using the toString() -method. * * @param array * the array containing the strings * @return a concatenation of the string separated by spaces */ public static String concat(Object[] array) { return concat(array, SPACE); } /** * Concatenates all elements of an array using the {@code toString()} -method, separating them with * the given {@code separator}. * * @param array * the array * @param separator * the separator to place between the strings, may be {@code null} * @return a concatenation of the string in the array or {@code null} if array was {@code null}. If * an array is of length 0, the empty string is returned. */ @Contract(value = "null, _ -> null; !null, _ -> !null", pure = true) public static @Nullable String concat(Object @Nullable [] array, @Nullable String separator) { if (array == null) { return null; } return concat(Arrays.asList(array), separator); } /** * Joins a list of strings with a delimiter and a different last delimiter. This could, for example, * be used if you want to create a String "a, b and c" from a list containing "a", "b" and "c". * * @param items * the list of strings to be joined * @param delimiter * the delimiter to use between the strings except the last two * @param lastDelimiter * the delimiter to use between the last two strings * @return the joined string with a different delimiter for the last two strings */ public static String joinDifferentLastDelimiter(@NonNull List items, String delimiter, String lastDelimiter) { if (items.isEmpty()) { return StringUtils.EMPTY_STRING; } if (items.size() == 1) { return items.get(0); } int last = items.size() - 1; return String.join(lastDelimiter, String.join(delimiter, items.subList(0, last)), items.get(last)); } /** * Returns a {@link Collector} that concatenates the input elements, separated by the specified * {@code delimiter}, in encounter order. Between the last two elements the {@code lastDelimiter} is * used instead of the {@code delimiter}. * *

	 * 
	 * // returns: one, two and three
	 * Stream.of("one", "two", "three").collect(joiningDifferentLastDelimiter(", ", " and ");
	 * // returns: one and two
	 * Stream.of("one", "two").collect(joiningDifferentLastDelimiter(", ", " and ");
	 * // returns: one
	 * Stream.of("one").collect(joiningDifferentLastDelimiter(", ", " and ");
	 * // returns the empty string
	 * Stream.empty().collect(joiningDifferentLastDelimiter(", ", " and ");
	 * 
	 * 
* * * @return A {@link Collector} similar to {@link Collectors#joining(CharSequence)}, but using a * different delimiter for the last two items. * * @see #joinDifferentLastDelimiter(List, String, String) * @see #joiningDifferentLastDelimiter(String, String, String, String) */ public static Collector joiningDifferentLastDelimiter(String delimiter, String lastDelimiter) { return joiningDifferentLastDelimiter(delimiter, lastDelimiter, StringUtils.EMPTY_STRING, StringUtils.EMPTY_STRING); } /** * Returns a {@link Collector} that concatenates the input elements, separated by the specified * {@code delimiter}, with the specified {@code prefix} and {@code suffix}, in encounter order. * Between the last two elements the {@code lastDelimiter} is used instead of the {@code delimiter}. * *
	 * 
	 * // returns: [one, two and three]
	 * Stream.of("one", "two", "three").collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
	 * // returns: [one and two]
	 * Stream.of("one", "two").collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
	 * // returns: [one]
	 * Stream.of("one").collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
	 * // returns: []
	 * Stream.empty().collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
	 * 
	 * 
* * * @return A {@link Collector} similar to * {@link Collectors#joining(CharSequence, CharSequence, CharSequence)}, but using a * different delimiter for the last two items. * * @see #joinDifferentLastDelimiter(List, String, String) * @see #joiningDifferentLastDelimiter(String, String) */ public static Collector joiningDifferentLastDelimiter(String delimiter, String lastDelimiter, String prefix, String suffix) { return Collector., String>of(ArrayList::new, List::add, (l1, l2) -> { l1.addAll(l2); return l1; }, l -> prefix + StringUtils.joinDifferentLastDelimiter(l, delimiter, lastDelimiter) + suffix); } /** Concatenate two string arrays. */ public static String[] concat(String[] array1, String[] array2) { String[] result = new String[array1.length + array2.length]; System.arraycopy(array1, 0, result, 0, array1.length); System.arraycopy(array2, 0, result, array1.length, array2.length); return result; } /** * Build a string with a specified length from a character. * * @param length * The length of the string. * @param c * The character. * @return The string. */ public static String fillString(int length, char c) { char[] characters = new char[length]; Arrays.fill(characters, c); return new String(characters); } /** * Create a sting of the given length starting with the provided string. Remaining characters are * filled with the provided character. * * @param string * The input string. * @param length * The length of the string to be returned. * @param c * The character to fill the string. * @return the new string or, if the string is longer than the specified length, the original * string. * @see #flushRight(String, int, char) * @see #center(String, int, char) */ public static String flushLeft(String string, int length, char c) { int gap = length - string.length(); if (gap <= 0) { return string; } return string + StringUtils.fillString(gap, c); } /** * Create a sting of the given length ending with the provided string. Remaining characters are * filled with the provided character. * * @param string * The input string. * @param length * The length of the string to be returned. * @param c * The character to fill the string. * @return the new string or, if the string is longer than the specified length, the original * string. * @see #flushLeft(String, int, char) * @see #center(String, int, char) */ public static String flushRight(String string, int length, char c) { int gap = length - string.length(); if (gap <= 0) { return string; } return StringUtils.fillString(gap, c) + string; } /** * Format number */ public static String format(Number number) { // Replacing the non-breaking space character (U+00A0) with a regular space // (U+0020) to avoid issues with HTTP requests to CCPs, similar to TS-29424. return NUMBER_FORMAT.format(number).replace(" ", " "); } /** * Format as percentage. */ public static String formatAsPercentage(Number number) { // Replacing the non-breaking space character (U+00A0) with a regular space // (U+0020) because it caused issues with HTTP requests to CCPs, c.f. TS-29424. return PERCENTAGE_FORMAT.format(number).replace(" ", " "); } /** * Returns the first line of a string. * * @return If the input string is empty string an empty string will be returned. If the input string * is null, the result will also be null. */ @Nullable public static String getFirstLine(@Nullable String string) { if (EMPTY_STRING.equals(string)) { return EMPTY_STRING; } LineSplitter lineSplitter = new LineSplitter(string); return lineSplitter.next(); } /** * Returns the first n part of a string, separated by the given character. *

* E.g., getStringParts("edu.tum.cs", 2, '.') gives: "edu.tum". * * @param string * the base string * @param partNumber * number of parts * @param separator * the separator character */ public static String getFirstParts(String string, int partNumber, char separator) { if (partNumber < 0 || string == null) { return string; } int idx = 0; for (int i = 0; i < partNumber; i++) { idx = string.indexOf(separator, idx + 1); if (idx == -1) { return string; } } return string.substring(0, idx); } /** * Returns the first part of a String whose parts are separated by the given character. *

* E.g., getFirstPart("foo@bar@acme", '@') gives "foo". * * @param string * the String * @param separator * separation character * @return the first part of the String, or the original String if the separation character is not * found. */ public static String getFirstPart(String string, String separator) { int idx = string.indexOf(separator); if (idx >= 0) { return string.substring(0, idx); } return string; } /** * Variant of {@link #getFirstPart(String, String)} which accepts a single char as the separator. * * @see #getFirstPart(String, String) */ public static String getFirstPart(String string, char separator) { return getFirstPart(string, String.valueOf(separator)); } /** * Returns the last part of a String whose parts are separated by the given String. *

* E.g., getLastPart("org.conqat##lib.commons.string##StringUtils", "##") gives "StringUtils". If * the separator is the empty string, this method returns the empty string. * * @param string * the String * @param separator * separation String * @return the last part of the String, or the original String if the separation String is not * found. */ public static String getLastPart(String string, String separator) { return getLastPart(string, separator, 1); } /** * Variant of {@link #getLastPart(String, String)} which accepts a single char as the separator. * * @see #getLastPart(String, String) */ public static String getLastPart(String string, char separator) { return getLastPart(string, String.valueOf(separator)); } /** * Returns the last {@code upToCount} part of a String whose parts are separated by the given * {@code separator}, e.g., *

    *
  • {@code getLastPart("org.conqat##lib.commons.string##StringUtils", "##", 1) -> "StringUtils"}
  • *
  • {@code getLastPart("org.conqat##lib.commons.string##StringUtils", "##", 2) -> "lib.commons.string##StringUtils"}
  • *
  • {@code getLastPart("org.conqat##lib.commons.string##StringUtils", "##", 3+) -> "org.conqat##lib.commons.string##StringUtils"}
  • *
* If the separator is the empty string, this method returns the empty string. * * @param string * the String * @param separator * separation String * @param upToCount * The number of times the {@code separator} is matched from the end. If the separator is * present fewer than this value, the complete {@code string} is returned. * @return the last part of the String, or the original String if the separation String is not found * {@code upToCount} times. */ public static String getLastPart(String string, String separator, int upToCount) { if (upToCount <= 0) { throw new IllegalArgumentException( String.format("Expected \"%s\" (%d) to be positive", "upToCount", upToCount)); } int idx = string.length(); for (int i = 0; i < upToCount; i++) { idx = string.lastIndexOf(separator, idx); if (idx < 0) { return string; } // Move the index before the separator idx--; } return string.substring(idx + separator.length() + 1); } /** * Splits the string at the first occurrence of {@code separator}. If the separator does not occur, * the second string in the returned pair is empty. E.g. splitAtFirst(foo.bar.baz, '.') gives * ("foo", "bar.baz"). */ public static Pair splitAtFirst(String string, String separator) { int idx = string.indexOf(separator); if (idx == -1) { return new Pair<>(string, StringUtils.EMPTY_STRING); } return new Pair<>(string.substring(0, idx), string.substring(idx + separator.length())); } /** * Splits the string at the first occurrence of {@code separator}. If the separator does not occur, * the second string in the returned pair is empty. E.g. splitAtFirst(foo.bar.baz, '.') gives * ("foo", "bar.baz"). */ public static Pair splitAtFirst(String string, char separator) { int idx = string.indexOf(separator); if (idx == -1) { return new Pair<>(string, StringUtils.EMPTY_STRING); } return new Pair<>(string.substring(0, idx), string.substring(idx + 1)); } /** * Splits the string at the last occurrence of {@code separator}. If the separator does not occur, * the second string in the returned pair is empty. E.g. splitAtLast(foo.bar.baz, '.') gives * ("foo.bar", "baz). */ public static Pair splitAtLast(String string, char separator) { int idx = string.lastIndexOf(separator); if (idx == -1) { return new Pair<>(string, ""); } return new Pair<>(string.substring(0, idx), string.substring(idx + 1)); } /** * Searches the elements of a string array for a string. Strings are trimmed. * * @param array * the array to search * @param string * the search string * @return the index of the element where the string was found or -1 if string wasn't * found. */ public static int indexOf(String[] array, String string) { for (int i = 0; i < array.length; i++) { if (array[i].trim().equals(string.trim())) { return i; } } return -1; } /** * Returns the n-th occurrence of the character c in the string s or -1 if no such character exists. */ public static int indexOfNthOccurrence(String s, char c, int n) { if (StringUtils.isEmpty(s)) { return -1; } int pos = -1; while (n > 0 && pos < s.length()) { int index = s.indexOf(c, pos + 1); if (index == -1) { return -1; } pos = index; n--; } if (n == 0) { return pos; } return -1; } /** * Checks if a string is empty (after trimming). * * @param text * the string to check. * @return true if string is empty or null, false otherwise. */ public static boolean isEmpty(@Nullable String text) { return org.apache.commons.lang3.StringUtils.isBlank(text); } /** * Checks if the given string contains at least one letter (checked with * {@link Character#isLetter(char)}). */ public static boolean containsLetter(String s) { for (int i = 0; i < s.length(); i++) { if (Character.isLetter(s.charAt(i))) { return true; } } return false; } /** Returns whether s1 contains s2 ignoring case */ public static boolean containsIgnoreCase(String s1, String s2) { return s1.toLowerCase().contains(s2.toLowerCase()); } /** Returns whether s contains all the given substrings. */ public static boolean containsAll(String s, String... substrings) { for (String substring : substrings) { if (!s.contains(substring)) { return false; } } return true; } /** * Generates a random string with a certain length. The string consists of characters with ASCII * code between 33 and 126. * * @param length * the length of the random string * @return the random string */ public static String randomString(int length) { return randomString(length, RANDOM); } /** * Performs the actal creation of the random string using the given randomizer. */ public static String randomString(int length, Random random) { char[] characters = new char[length]; for (int i = 0; i < length; i++) { characters[i] = (char) (random.nextInt(93) + 33); } return new String(characters); } /** * Generates an array of random strings. * * @param length * number of strings * @param stringLength * length of each string * @return the randomly generated array. */ public static String[] randomStringArray(int length, int stringLength) { String[] array = new String[length]; for (int i = 0; i < length; i++) { array[i] = randomString(stringLength); } return array; } /** * Generates a pseudo random string with a certain length in a deterministic, reproducible fashion. *

* * @param length * the length of the pseudo-random string * @param seed * seed value for the random number generator used for the generation of the * pseudo-random string. If the same seed value is used, the same pseudo-random string is * generated. */ public static String generateString(int length, int seed) { Random seededRandomizer = new Random(seed); return randomString(length, seededRandomizer); } /** * Generates an array of pseudo-random strings in a deterministic, reproducable fashion. * * @param length * number of strings * @param stringLength * length of each string * @param seed * seed value for the random number generator used for the generation of the * pseudo-random string. If the same seed value is used, the same pseudo-random string * array is generated. * @return the randomly generated array. */ public static String[] generateStringArray(int length, int stringLength, int seed) { String[] array = new String[length]; for (int i = 0; i < length; i++) { array[i] = generateString(stringLength, seed + i); } return array; } /** * Returns the beginning of a String, cutting off the last part which is separated by the given * character. *

* E.g., removeLastPart("org.conqat.lib.commons.string.StringUtils", '.') gives * "org.conqat.lib.commons.string". * * @param string * the String * @param separator * separation character * @return the String without the last part, or the original string (i.e., the same object) if the * separation character is not found. */ public static String removeLastPart(String string, char separator) { int idx = string.lastIndexOf(separator); if (idx == -1) { return string; } return string.substring(0, idx); } /** * Replaces all occurrences of keys of the given map in the given string with the associated value * in that map. The given map may be null, in which case the original string is * returned unchanged. *

* This method is semantically the same as calling * {@link String#replace(CharSequence, CharSequence)} for each of the entries in the map, but may be * significantly faster for many replacements performed on a short string, since * {@link String#replace(CharSequence, CharSequence)} uses regular expressions internally and * results in many String object allocations when applied iteratively. *

* The order in which replacements are applied depends on the order of the map's entry set. */ public static String replaceFromMap(String string, @Nullable Map replacements) { if (replacements == null) { return string; } StringBuilder sb = new StringBuilder(string); for (Entry entry : replacements.entrySet()) { String key = entry.getKey(); String value = entry.getValue(); int start = sb.indexOf(key, 0); while (start > -1) { int end = start + key.length(); int nextSearchStart = start + value.length(); sb.replace(start, end, value); start = sb.indexOf(key, nextSearchStart); } } return sb.toString(); } /** * Removes all occurrences of the specified strings from the given string. */ public static String removeAll(String string, String... stringsToRemove) { if (stringsToRemove == null || stringsToRemove.length == 0) { return string; } return removeAll(string, Arrays.asList(stringsToRemove)); } /** * Removes all occurrences of the specified strings from the given string. */ public static String removeAll(String string, Iterable stringsToRemove) { if (stringsToRemove == null) { return string; } StringBuilder sb = new StringBuilder(string); for (String key : stringsToRemove) { int start = sb.indexOf(key, 0); while (start > -1) { int end = start + key.length(); sb.delete(start, end); start = sb.indexOf(key, start); } } return sb.toString(); } /** * Replace all linebreaks in string with the platform-specific line separator * ({@link #LINE_SEPARATOR}). * * @return a string with linebreaks replaced by the line separator of the current platform. If the * given * string is {@code null}, then an empty string will be returned. */ public static @NonNull String normalizeLineSeparatorsPlatformSpecific(@Nullable String string) { return replaceLineBreaks(string, LINE_SEPARATOR); } /** * Replace all linebreaks in string with a line feed symbol (\n), platform independent. * * @return a string with linebreaks replaced by {@link #LINE_FEED}. If the given string is * {@code null}, then an empty string will be returned. */ public static @NonNull String normalizeLineSeparatorsPlatformIndependent(@Nullable String string) { return replaceLineBreaks(string, '\n'); } /** * Replace all linebreaks with the given character. * * @return a string with linebreaks replaced by the given replacementCharacter. If the given string * is {@code null}, then an empty string will be returned. */ public static @NonNull String replaceLineBreaks(@Nullable String string, char replacementCharacter) { return LineBreakReplacer.replaceLineBreaks(string, replacementCharacter); } /** * Replace all linebreaks in string by a specified symbol. * * @apiNote Use {@link #replaceLineBreaks(String, char)} whenever possible, as it is much faster. * * @return a string with line breaks replaced. Will return an empty string if the input string is * {@code null}. */ public static @NonNull String replaceLineBreaks(@Nullable String string, @NonNull String symbol) { StringBuilder builder = new StringBuilder(); LineSplitter lineSplitter = new LineSplitter(string); lineSplitter.setIncludeTrailingEmptyLine(true); for (String line : lineSplitter) { builder.append(line); if (lineSplitter.hasNext()) { builder.append(symbol); } } return builder.toString(); } /** * Split string in lines. For the empty string and null an array of length zero is * returned. * * @see #splitLinesAsList(String) */ public static String[] splitLines(@Nullable String content) { List lineList = splitLinesAsList(content); String[] result = new String[lineList.size()]; lineList.toArray(result); return result; } /** * Returns the number of occurrences of the given character in the given string. */ public static int countCharacter(String content, char character) { int count = 0; for (char c : content.toCharArray()) { if (c == character) { count++; } } return count; } /** * Return number of lines in a string, by counting newline characters, except if it is the last * character in the content (i.e. we ignore trailing new lines). Behaves consistently with * {@link #splitLines(String)}. *

* Will return 1 for a non-empty string that has no line breaks (i.e., the first line * already counts). */ public static int countLines(String content) { return countLines(content, false); } /** * Return number of lines in a string, by counting newline characters. *

* We count characters to avoid object allocations at all costs. *

* Will return 1 for a non-empty string that has no line breaks (i.e., the first line * already counts). */ public static int countLines(@Nullable String content, boolean includeTrailingNewline) { if (content == null || content.isEmpty()) { return 0; } int lines = 1; int index = 0; int contentLength = content.length(); while (index < contentLength) { char c = content.charAt(index); index++; if (c == '\n' || c == UNICODE_NEL) { if (index < contentLength || includeTrailingNewline) { lines++; } } else if (c == '\r') { if (index < contentLength && content.charAt(index) == '\n') { // Skip the additional character index++; } if (index < contentLength || includeTrailingNewline) { lines++; } } } return lines; } /** * Split string in lines. For the empty string and null an empty list is returned. * * @see #splitLines(String) */ public static List splitLinesAsList(@Nullable String content) { return splitLinesAsList(content, false); } /** * Split string in lines. For the empty string and null an empty list is returned. * * @see #splitLines(String) */ public static List splitLinesAsList(@Nullable String content, boolean includeTrailingEmptyLine) { List result = new ArrayList<>(); LineSplitter lineSplitter = new LineSplitter(content); lineSplitter.setIncludeTrailingEmptyLine(includeTrailingEmptyLine); for (String line : lineSplitter) { result.add(line); } return result; } /** * Prefixes a string with a prefix and separator if the prefix is not empty. */ public static String addPrefix(String string, String separator, String prefix) { if (StringUtils.isEmpty(prefix)) { return string; } return prefix + separator + string; } /** Prefixes the given list of strings with the specified prefix */ public static List prefixAll(Collection strings, String prefix) { List result = new ArrayList<>(strings.size()); for (String value : strings) { result.add(prefix + value); } return result; } /** * Suffixes a string with a suffix and separator if the suffix is not empty. */ public static String addSuffix(String string, String separator, String suffix) { if (StringUtils.isEmpty(suffix)) { return string; } return string + separator + suffix; } /** * Remove prefix from a string. * * @param string * the string * @param prefix * the prefix * * @return the string without the prefix or the original string if it does not start with the * prefix. */ public static String stripPrefix(String string, String prefix) { if (string.startsWith(prefix)) { return string.substring(prefix.length()); } return string; } /** * Remove prefix from a string. This ignores casing, i.e. * stripPrefixIgnoreCase("C:/Programs/", "c:/programs/notepad.exe") will return * "notepad.exe". * * @param string * the string * @param prefix * the prefix * * @return the string without the prefix or the original string if it does not start with the * prefix. */ public static String stripPrefixIgnoreCase(String string, String prefix) { if (startsWithIgnoreCase(string, prefix)) { return string.substring(prefix.length()); } return string; } /** * Remove suffix from a string. * * @param string * the string * @param suffix * the suffix * * @return the string without the suffix or the original string if it does not end with the suffix. */ public static String stripSuffix(String string, String suffix) { if (string.endsWith(suffix)) { return string.substring(0, string.length() - suffix.length()); } return string; } /** Strips all digits from the given String. */ public static String stripDigits(String string) { return string.replaceAll("[0-9]", EMPTY_STRING); } /** Strips all trailing digits from the end of the given String. */ public static String stripTrailingDigits(String string) { return string.replaceAll("\\d+$", EMPTY_STRING); } /** * Create string representation of a map. */ public static String toString(Map map) { return toString(map, EMPTY_STRING); } /** * Create string representation of a map. * * @param map * the map * @param indent * a line indent */ public static String toString(Map map, String indent) { StringBuilder result = new StringBuilder(); Iterator keyIterator = map.keySet().iterator(); while (keyIterator.hasNext()) { result.append(indent); Object key = keyIterator.next(); result.append(key); result.append(" = "); result.append(map.get(key)); if (keyIterator.hasNext()) { result.append(LINE_SEPARATOR); } } return result.toString(); } /** * Convert stack trace of a {@link Throwable} to a string. */ public static String obtainStackTrace(Throwable throwable) { StringWriter result = new StringWriter(); PrintWriter printWriter = new PrintWriter(result); throwable.printStackTrace(printWriter); FileSystemUtils.close(printWriter); FileSystemUtils.close(result); return result.toString(); } /** * Test if a string starts with one of the provided prefixes. Returns false if the list * of prefixes is empty. This should only be used for short lists of prefixes. */ public static boolean startsWithOneOf(String string, String... prefixes) { for (String prefix : prefixes) { if (string.startsWith(prefix)) { return true; } } return false; } /** * Test if a string starts with one of the provided prefixes. Returns false if the list * of prefixes is empty. This should only be used for short lists of prefixes. The given list must * not be null. */ public static boolean startsWithOneOf(String string, Iterable prefixes) { for (String prefix : prefixes) { if (string.startsWith(prefix)) { return true; } } return false; } /** * Returns whether the given string starts with the prefix ignoring case, i.e. * startsWithIgnoreCase("foobar", "Foo") will return true. */ public static boolean startsWithIgnoreCase(String string, String prefix) { return string.toLowerCase().startsWith(prefix.toLowerCase()); } /** * Test if a string contains of the provided strings. Returns false if the list of * strings is empty. This should only be used for short lists of strings. */ public static boolean containsOneOf(String text, String... strings) { return containsOneOf(text, Arrays.asList(strings)); } /** * Test if a string contains of the provided strings. Returns false if the list of * strings is empty. This should only be used for short lists of strings. */ public static boolean containsOneOf(String text, Iterable strings) { for (String substring : strings) { if (text.contains(substring)) { return true; } } return false; } /** * Returns whether the given String ends with the specified suffix ignoring case. */ public static boolean endsWithIgnoreCase(String string, String suffix) { return string.toLowerCase().endsWith(suffix.toLowerCase()); } /** * Test if a string ends with one of the provided suffixes. Returns false if the list * of prefixes is empty. This should only be used for short lists of suffixes. */ public static boolean endsWithOneOf(String string, String... suffixes) { for (String suffix : suffixes) { if (string.endsWith(suffix)) { return true; } } return false; } /** * Prefix all lines of a string. This also replaces line breaks with the platform-specific * line-separator. * * @param string * the string to prefix * @param prefix * the prefix to add * @param prefixFirstLine * a flag that indicates if the first line should be prefixed or not. */ public static String prefixLines(String string, String prefix, boolean prefixFirstLine) { String[] lines = StringUtils.splitLines(string.trim()); StringBuilder result = new StringBuilder(); for (int i = 0; i < lines.length; i++) { if (i > 0 || prefixFirstLine) { result.append(prefix); } result.append(lines[i]); if (i < lines.length - 1) { result.append(LINE_SEPARATOR); } } return result.toString(); } /** * Splits the given string into an array of {@link Character}s. This is mostly used for testing * purposes, if an array of certain objects is needed. */ public static Character[] splitChars(String s) { Character[] result = new Character[s.length()]; for (int i = 0; i < result.length; ++i) { result[i] = s.charAt(i); } return result; } /** Capitalize string. */ public static String capitalize(String string) { if (StringUtils.isEmpty(string)) { return string; } return string.substring(0, 1).toUpperCase() + string.substring(1); } /** * This method splits the input string into words (delimited by whitespace) and returns a string * whose words are separated by single spaces and whose lines are not longer than the given length * (unless a very long word occurs)). */ public static String wrapLongLines(String s, int maxLineLength) { String[] words = s.split("\\s+"); StringBuilder sb = new StringBuilder(); int lineLength = 0; for (String word : words) { if (word.isEmpty()) { continue; } if (lineLength > 0) { if (lineLength + 1 + word.length() > maxLineLength) { sb.append(LINE_SEPARATOR); lineLength = 0; } else { sb.append(SPACE); lineLength += 1; } } sb.append(word); lineLength += word.length(); } return sb.toString(); } /** Returns the longest common prefix of s and t */ public static String longestCommonPrefix(String s, String t) { int n = Math.min(s.length(), t.length()); for (int i = 0; i < n; i++) { if (s.charAt(i) != t.charAt(i)) { return s.substring(0, i); } } return s.substring(0, n); } /** Returns the longest common suffix of s and t */ public static String longestCommonSuffix(String s, String t) { return reverse(StringUtils.longestCommonPrefix(reverse(s), reverse(t))); } /** Reverse a string */ public static String reverse(String s) { return new StringBuilder(s).reverse().toString(); } /** * Returns the longest common prefix of the strings in the list or the empty string if no common * prefix exists. */ public static String longestCommonPrefix(Iterable strings) { Iterator iterator = strings.iterator(); CCSMAssert.isTrue(iterator.hasNext(), "Expected are at least 2 strings"); String commonPrefix = iterator.next(); CCSMAssert.isTrue(iterator.hasNext(), "Expected are at least 2 strings"); while (iterator.hasNext()) { commonPrefix = longestCommonPrefix(commonPrefix, iterator.next()); if (commonPrefix.isEmpty()) { break; } } return commonPrefix; } /** Removes whitespace from a string. */ public static String removeWhitespace(String content) { StringBuilder result = new StringBuilder(); for (int i = 0; i < content.length(); i++) { char c = content.charAt(i); if (!Character.isWhitespace(c)) { result.append(c); } } return result.toString(); } /** * Removes all whitespace at the beginning of each line in the given string. */ public static String removeWhitespaceAtBeginningOfLine(String content) { return LEADING_WHITESPACE_PATTERN.matcher(content).replaceAll(StringUtils.EMPTY_STRING); } /** * Creates a unique name which is not contained in the given set of names. If possible, the given * base name is directly returned; otherwise it is extended by a number. */ public static String createUniqueName(String baseName, Set usedNames) { String name = baseName; int i = 1; while (usedNames.contains(name)) { name = baseName + ++i; } return name; } /** * Transforms a string from camel-case to upper-case with underscores. */ public static String camelCaseToUnderscored(String s) { return stripPrefix(s.replaceAll("([A-Z][a-z])", "_$1").toUpperCase(), "_"); } /** * Encodes a byte array as a hex string following the method described here: http * ://stackoverflow.com/questions/9655181/convert-from-byte-array-to-hex- string-in-java */ public static String encodeAsHex(byte[] data) { char[] hexChars = new char[data.length * 2]; for (int j = 0; j < data.length; j++) { int v = data[j] & 0xFF; hexChars[j * 2] = HEX_CHARACTERS[v >>> 4]; hexChars[j * 2 + 1] = HEX_CHARACTERS[v & 0x0F]; } return new String(hexChars); } /** Decodes a byte array from a hex string. */ public static byte[] decodeFromHex(String s) { byte[] result = new byte[s.length() / 2]; for (int i = 0; i < result.length; ++i) { result[i] = (byte) Integer.parseInt(s.substring(2 * i, 2 * i + 2), 16); } return result; } /** * Format number with number formatter, if number formatter is null, this uses * {@link String#valueOf(double)}. */ public static String format(double number, @Nullable NumberFormat numberFormat) { if (numberFormat == null) { return String.valueOf(number); } return numberFormat.format(number); } /** * Regex replacement methods like {@link Matcher#appendReplacement(StringBuffer, String)} or * {@link String#replaceAll(String, String)} treat dollar signs as group references. This method * escapes replacement strings so that dollar signs are treated as literals. */ public static String escapeRegexReplacementString(String replacement) { // this needs to be escape thrice as replaceAll also recognizes the // dollar sign return replacement.replaceAll("([$\\\\])", "\\\\$1"); } /** * Converts a string to a (UTF-8) byte representation. This returns null on a null input. */ @Contract("null -> null; !null -> !null") public static byte @Nullable [] stringToBytes(@Nullable String s) { if (s == null) { return null; } return s.getBytes(StandardCharsets.UTF_8); } /** * Converts a (UTF-8) byte array to a string. This returns null on a null input. */ @Contract("null -> null; !null -> !null") public static @Nullable String bytesToString(byte @Nullable [] b) { return bytesToString(b, 0); } /** * Converts a (UTF-8) byte array, starting from the provided {@code offset} to a string. This * returns null on a null input. */ @Contract("null, _ -> null; !null, _ -> !null") public static @Nullable String bytesToString(byte @Nullable [] b, int offset) { if (b == null) { return null; } return bytesToString(b, offset, b.length - offset); } /** * Converts a (UTF-8) byte array, starting from the provided {@code offset} and reading * {@code count} amount, to a string. This returns null on a null input. */ @Contract("null, _, _ -> null; !null, _, _ -> !null") public static @Nullable String bytesToString(byte @Nullable [] b, int offset, int count) { if (b == null) { return null; } return new String(b, offset, count, StandardCharsets.UTF_8); } /** * Converts a byte array to a string, assuming the given encoding, unless a * byte-order mark included with the bytes implies that another encoding is actually used. *

* This method returns null on a null input. */ public static @Nullable String bytesToString(byte @Nullable [] b, Charset encoding) { if (b == null) { return null; } Optional bom = EByteOrderMark.determineBOM(b); Charset detectedEncoding = bom.map(EByteOrderMark::getEncoding).orElse(encoding); int bytesToSkip = bom.map(EByteOrderMark::getBOMLength).orElse(0); return new String(b, bytesToSkip, b.length - bytesToSkip, detectedEncoding); } /** * Converts the given objects into a string list by invoking {@link Object#toString()} on each * non-null element. For null entries in the input, the output will contain a null entry as well. */ public static List<@Nullable String> toStrings(Collection<@Nullable T> objects) { List strings = new ArrayList<>(); for (T t : objects) { if (t == null) { strings.add(null); } else { strings.add(t.toString()); } } return strings; } /** * Converts the given String to an {@link InputStream} with UTF-8 encoding. */ public static InputStream toInputStream(String string) { return toInputStream(string, StandardCharsets.UTF_8); } /** * Converts the given String to an {@link InputStream} with the specified encoding. */ public static InputStream toInputStream(String string, Charset charset) { return new ByteArrayInputStream(string.getBytes(charset)); } /** * Converts the given {@link InputStream} to a String with UTF-8 encoding. */ public static String fromInputStream(InputStream inputStream) throws IOException { ByteArrayOutputStream result = new ByteArrayOutputStream(); byte[] buffer = new byte[1024]; int length = inputStream.read(buffer); while (length != -1) { result.write(buffer, 0, length); length = inputStream.read(buffer); } return result.toString(StandardCharsets.UTF_8); } /** * Truncates the given string (if necessary) by removing characters from the end and attaching the * suffix such that the resulting string has at most length characters. Length must be >= * suffix.length(); */ public static String truncate(String string, int length, String suffix) { CCSMAssert.isTrue(length >= suffix.length(), "Expected length >= suffix.length()"); if (string.length() <= length) { return string; } return string.substring(0, length - suffix.length()) + suffix; } /** * Truncates the given string (if necessary) by removing characters from the end. The length of the * resulting string is guaranteed not to exceed the given length parameter. */ public static String truncate(String string, int length) { return truncate(string, length, EMPTY_STRING); } /** * Truncates the given string (if necessary) by removing characters from the end and instead * attaching three dots. The length of the resulting string is guaranteed not to exceed the given * length parameter. */ public static String truncateWithThreeDots(String string, int length) { return truncate(string, length, THREE_DOTS); } /** * Returns the beginning of the given String, retaining at most numberOfChars characters. In case * the String is shorter than or equals to numberOfChars, the supplied String is returned unchanged. * Otherwise, the String is truncated and suffixed with ... (where the overall length will be * numberOfChars). Please note that numberOfChars needs to be >= 2. */ public static String truncateWithEllipsis(String s, int numberOfChars) { return truncate(s, numberOfChars, ELLIPSIS); } /** * Calculates the edit distance (aka Levenshtein distance) for two strings, i.e. the number of * insert, delete or replace operations required to transform one string into the other. The running * time is O(n*m) and the space complexity is O(n+m), where n/m are the lengths of the strings. Note * that due to the high running time, for long strings the {@link Diff} class should be used, that * has a more efficient algorithm, but only for insert/delete (not replace operation). *

* Although this is a clean reimplementation, the basic algorithm is explained here: Wikipedia */ public static int editDistance(String s, String t) { char[] sChars = s.toCharArray(); char[] tChars = t.toCharArray(); int m = s.length(); int n = t.length(); int[] distance = new int[m + 1]; for (int i = 0; i <= m; ++i) { distance[i] = i; } int[] oldDistance = new int[m + 1]; for (int j = 1; j <= n; ++j) { // swap distance and oldDistance int[] tmp = oldDistance; oldDistance = distance; distance = tmp; distance[0] = j; for (int i = 1; i <= m; ++i) { int cost = 1 + Math.min(distance[i - 1], oldDistance[i]); if (sChars[i - 1] == tChars[j - 1]) { cost = Math.min(cost, oldDistance[i - 1]); } else { cost = Math.min(cost, 1 + oldDistance[i - 1]); } distance[i] = cost; } } return distance[m]; } /** * Returns whether the edit distance as calculated by {@link #editDistance(String, String)}, is 0 or * 1. This implementation is significantly more efficient compared to actually calculating the edit * distance and runs in O(n+m). *

* The idea is that with at most one change, the start and end of both strings must be the same, to * traverse from start and end to the first difference. If the distance between both pointers is at * most one, the edit distance is at most one as well. */ public static boolean isEditDistanceAtMost1(String s, String t) { int m = s.length(); int n = t.length(); if (Math.abs(n - m) > 1) { return false; } // advance to first characters that differ int sStart = 0; int tStart = 0; while (sStart < m && tStart < n && s.charAt(sStart) == t.charAt(tStart)) { sStart += 1; tStart += 1; } // reverse advance to first characters that differ int sEnd = m - 1; int tEnd = n - 1; while (sEnd >= sStart && tEnd >= tStart && s.charAt(sEnd) == t.charAt(tEnd)) { sEnd -= 1; tEnd -= 1; } // as both are exclusive indexes (i.e. we have a difference at the // index), the indexes must be the same or even overlap to have an edit // distance of 1 or less. return sEnd <= sStart && tEnd <= tStart; } /** * Returns a list that contains all entries of the original list as lowercase strings. Does not * operate in-place! */ public static List lowercaseList(Collection strings) { List lowercaseList = new ArrayList<>(); for (String string : strings) { lowercaseList.add(string.toLowerCase()); } return lowercaseList; } /** * Returns the input string. Returns the provided default value in case the input is null. */ public static String defaultIfNull(String input, String defaultValue) { if (input == null) { return defaultValue; } return input; } /** * Returns the input string. Returns the provided default value in case the input is null or the * empty string. */ public static String defaultIfNullOrEmpty(@Nullable String input, String defaultValue) { if (isEmpty(input)) { return defaultValue; } return input; } /** * Returns the input string. Returns {@link #EMPTY_STRING} in case the input is null. */ public static @NonNull String emptyIfNull(@Nullable String input) { return defaultIfNull(input, EMPTY_STRING); } /** * Returns {@code trueAlternative} if {@code condition} is {@code true} and {@code falseAlternative} * otherwise. */ public static String alternativeOnCondition(boolean condition, String trueAlternative, String falseAlternative) { if (condition) { return trueAlternative; } return falseAlternative; } /** * Splits a string at every top-level occurrence of the separator character. This can be useful, * e.g. for splitting type parameter lists. * "String,Map<String,Integer>,Map<String,Map<String,Integer>>" split * at ',' with levelStart = '<' and levelEnd = '>' would result in * ["String","Map<String,Integer>","Map<String,Map<String,Integer>>"] *

* If there is no separator char, use the levelStart char. In this case, separator chars are * included in the output. * splitTopLevel("((a))(b)", '(', '(', ')' ) = ["", "((a))", "(b)"] *

* Can also be used to split columns from a CSV line where values can be quoted * splitTopLevel("\"a\";\";\";c", ';', '"', '"' ) = ["\"a\"", "\";\"", "c"] * * @param input * The input string. * @param separator * The separator character. * @param levelStart * The character that starts a new level. * @param levelEnd * The character that ends a level. * @return The input string split at every top-level separator. * @throws AssertionError * If the numbers for opening and closing characters in the input string differ. */ public static List splitTopLevel(String input, char separator, char levelStart, char levelEnd) { int currentLevel = 0; int currentStartIndex = 0; List result = new ArrayList<>(); for (int i = 0; i < input.length(); i++) { char currentChar = input.charAt(i); if (currentChar == separator && currentLevel == 0) { result.add(input.substring(currentStartIndex, i)); if (separator == levelStart) { currentStartIndex = i; } else { currentStartIndex = i + 1; } } if (currentChar == levelEnd && currentLevel > 0) { currentLevel--; } else if (currentChar == levelStart) { currentLevel++; } } CCSMAssert.isTrue(currentLevel == 0, "String is imbalanced: " + input); result.add(input.substring(currentStartIndex)); return result; } /** * Ensure that the given string ends with the given suffix, i.e. if it does not have the given * suffix, the returned string is s + suffix. */ public static String ensureEndsWith(String s, String suffix) { if (!s.endsWith(suffix)) { return s + suffix; } return s; } /** * Removes the given affix from the start and end of a string. */ public static String strip(String s, String affix) { return StringUtils.stripPrefix(StringUtils.stripSuffix(s, affix), affix); } /** * Ensure that the given string starts with the given prefix, i.e. if it does not have the given * prefix, it is prepended to the string. */ public static String ensureStartsWith(String s, String prefix) { if (!s.startsWith(prefix)) { return prefix + s; } return s; } /** * Concatenates the list of string with delimiter and add escape character if needed. For example, * following list { "asd,rtz", "rrr", "rrr" } with delimiter as comma(,) will produce the following * comma(,) delimited sting "asd\\,rtz,rrr,rrr" */ public static String concatWithEscapeCharacter(List data, Character delimiter) { return data.stream().map(a -> a.replace(delimiter.toString(), "\\" + delimiter)) .collect(Collectors.joining(delimiter.toString())); } /** * Splits the delimited string with considering escaped delimiters. For example following comma(,) * delimited string "asd\\,rtz,rrr,rrr" will produce the list { "asd,rtz", "rrr", "rrr" }. Returns * {@link Collections#EMPTY_LIST} if the data or delimiter are {@code null}. Consecutive delimiters * will lead to empty strings, e.g. ",,asd, bar," should produce {"", "", "asd", "bar", ""}. *

* * @implNote We work really hard to make this code fast and efficient, by avoiding the use of * Regexes whenever possible. */ public static List splitWithEscapeCharacter(@Nullable String data, @Nullable Character delimiter) { if (isEmpty(data) || delimiter == null) { return Collections.emptyList(); } int nextInsertedStringStartIndex = 0; List result = new ArrayList<>(); Pattern delimiterPattern = null; boolean hadEscapedCharacter = false; for (int lastDelimiterIndex = data.indexOf(delimiter); lastDelimiterIndex != -1; lastDelimiterIndex = data .indexOf(delimiter, lastDelimiterIndex + 1)) { if (lastDelimiterIndex == nextInsertedStringStartIndex) { // found consecutive delimiter, create empty string for it. result.add(StringUtils.EMPTY_STRING); nextInsertedStringStartIndex++; } else if (data.charAt(lastDelimiterIndex - 1) == '\\') { hadEscapedCharacter = true; if (delimiterPattern == null) { // IntelliJ reports the \\ as error, but it is not, as this uses // Pattern.LITERAL. See also https://youtrack.jetbrains.com/issue/IDEA-180708 delimiterPattern = Pattern.compile("\\" + delimiter, Pattern.LITERAL); } } else { String substring = data.substring(nextInsertedStringStartIndex, lastDelimiterIndex).trim(); if (hadEscapedCharacter) { substring = delimiterPattern.matcher(substring).replaceAll(delimiter.toString()); } result.add(substring); nextInsertedStringStartIndex = lastDelimiterIndex + 1; hadEscapedCharacter = false; } } String substring = data.substring(nextInsertedStringStartIndex).trim(); if (hadEscapedCharacter) { substring = delimiterPattern.matcher(substring).replaceAll(delimiter.toString()); } result.add(substring); return result; } /** *

* Splits the given string at the given separators, but includes the separators in the returned * list. Empty strings will not be included in the result, unless the input is already an empty * string. This implementation is roughly 6x faster than using a corresponding regex pattern with * lookaheads. *

*

* The passed separators are checked in their given order (only relevant if separators are prefixed * of each other) *

*

* If no separator is found, the input is returned a a list with one element. *

*

* The returned list is mutable. *

*/ public static List splitKeepingSeparators(String input, String... separators) { List result = new ArrayList<>(); int lastSplitIndex = 0; for (int i = 0; i < input.length(); i++) { for (String separator : separators) { if (!input.startsWith(separator, i)) { continue; } if (i > 0 && lastSplitIndex != i) { result.add(input.substring(lastSplitIndex, i)); } result.add(separator); lastSplitIndex = i + separator.length(); break; } } if (lastSplitIndex != input.length() || input.isEmpty()) { result.add(input.substring(lastSplitIndex)); } return result; } /** * Splits the string at the given regex separator. Keeps empty strings after the separator. For * example: string "aaa;rrr, qqq; ; " split at ";" will produce the list { "aaa", "rrr, qqq", "", "" * }. */ public static List splitToList(String data, @Language("RegExp") String regexSeparator) { if (isEmpty(data) || isEmpty(regexSeparator)) { return Collections.emptyList(); } return CollectionUtils.map(Arrays.asList(data.split(regexSeparator)), String::trim); } /** * Returns the result of applying all pattern replacements in order, each as often as possible * (globally). */ public static String applyAllReplacements(String s, PairList replacements) { for (int i = 0; i < replacements.size(); ++i) { s = applyReplacement(s, replacements.getFirst(i), replacements.getSecond(i)); } return s; } /** * Returns the replacement as often as possible. This is the equivalent of * {@link String#replaceAll(String, String)}, but accepting a {@link Pattern} instead of a regex * string. */ public static String applyReplacement(String s, Pattern pattern, String replacement) { StringBuilder buffer = new StringBuilder(); Matcher matcher = pattern.matcher(s); while (matcher.find()) { matcher.appendReplacement(buffer, replacement); } matcher.appendTail(buffer); return buffer.toString(); } /** * Returns {@code null} if the input string is empty (after trimming) or {@code null}. Otherwise, * the input is returned unaltered. */ public static String nullIfEmpty(String input) { if (isEmpty(input)) { return null; } return input; } /** * Checks whether the parameter contains only number literals and (optionally) starts with a '-' * char. Returns false if the string is null or empty. */ public static boolean isInteger(@Nullable String string) { if (string == null || string.isEmpty()) { return false; } if (string.startsWith("-") && string.length() > 1) { string = string.substring(1); } for (char c : string.toCharArray()) { if (c < '0' || c > '9') { return false; } } return true; } /** * Returns the index of the first character in the given string that matches the pattern. The * pattern is applied to single characters, so it makes no sense to supply patterns that would match * on longer character sequences. */ public static int indexOfMatch(String string, Pattern pattern) { for (int i = 0; i < string.length(); i++) { char c = string.charAt(i); if (pattern.matcher(String.valueOf(c)).matches()) { return i; } } return -1; // no match at all } /** * Returns the index of the first found occurrence, together with the matched string. Example: * {@code Input ("abc", "b", "c") will return [1, "b"]} * * The candidates are checked in their given order (relevant only if the are prefixes of one * another). If none of the candidates is found, [-1, null] is returned. *

* Special case: If one of the candidates is the empty string, we will always return {@code [0, ""]} * (even when input is also empty) */ public static Pair indexOfAndFirstMatch(String input, int startIndex, String... candidates) { if (input.isEmpty()) { for (String separator : candidates) { if (separator.isEmpty()) { return Pair.createPair(0, separator); } } } for (int i = startIndex; i < input.length(); i++) { for (String separator : candidates) { if (input.startsWith(separator, i)) { return Pair.createPair(i, separator); } } } return Pair.createPair(-1, null); } /** * Returns {@link #indexOfAndFirstMatch(String, int, String...)} from the start of the input (index * 0) */ public static Pair indexOfAndFirstMatch(String input, String... candidates) { return indexOfAndFirstMatch(input, 0, candidates); } /** * Escapes the given chars according to the given map. If a char different from * {@code \b \n \t \f \r} should be escaped, e.g. an {@code a}, and also an escaping rule "\\" -> * "\\\\" exists, automatically an escape rule "\\a" -> "\\\\a" is added to prevent escaping of both * letter and backslash, if the char is already escaped in the origin string. */ public static String escapeChars(String content, Map escapeMap) { escapeMap = addEscapingForAlreadyEscapedNonSpecialChars(escapeMap); CharSequenceTranslator translator = new LookupTranslator(escapeMap); return StringEscapeUtils.builder(translator).escape(content).toString(); } /** * Performs unescaping according to the given translation map. This is the "reverse" method to * {@link #escapeChars(String, Map)}. * unescapeChars(escapeChars(someString, escapeMap), escapeMap) will yield * someString again. */ public static String unescapeChars(String content, Map escapeMap) { escapeMap = addEscapingForAlreadyEscapedNonSpecialChars(escapeMap); CharSequenceTranslator translator = new LookupTranslator(EntityArrays.invert(escapeMap)); return StringEscapeUtils.builder(translator).escape(content).toString(); } /** * If an escapeMap contains a rule "\\" -> "\\\\" and a char different from {@code \b \n \t \f \r}, * e.g. an {@code a}, should be escaped by prefixing with a backslash (e.g "a" -> "\\a"), escaping * of an already escaped "a" in the origin string (e.g. "\\a") would result in "\\\\\\a" (as the * letter is escaped as well as the already existing backslash). However, the desired escaping is * "\\\\a". To obtain this, an additional escaping rule "\\a" -> "\\\\a" is added to the escaping * map to correctly handle this case. */ private static Map addEscapingForAlreadyEscapedNonSpecialChars( Map escapeMap) { if (!escapeMap.containsKey("\\")) { return escapeMap; } Map extendedEscapeMap = new HashMap<>(); for (Map.Entry escapePair : escapeMap.entrySet()) { if (escapePair.getKey().equals("\\")) { continue; } if (EntityArrays.JAVA_CTRL_CHARS_ESCAPE.containsKey(escapePair.getKey())) { continue; } if (escapePair.getValue().equals("\\" + escapePair.getKey())) { extendedEscapeMap.put("\\" + escapePair.getKey(), "\\" + escapePair.getValue()); } } extendedEscapeMap.putAll(escapeMap); return extendedEscapeMap; } /** * Escapes the characters {@code [ ] ( ) * # _ ~ ^ + = >} by prefixing with a backslash. Use to * escape Markdown sequences (check if according to markdown dialect more/other characters have to * be escaped). */ public static String escapeMarkdownChars(String content) { return escapeChars(content, MARKDOWN_ESCAPE_MAP); } /** * Unescapes the characters {@code [ ] ( ) * # _ ~ ^ + = >} by removing a leading backslash. Use to * unescape Markdown sequences that were escaped using {@link #escapeMarkdownChars(String)}. */ public static String unescapeMarkdownChars(String content) { return unescapeChars(content, MARKDOWN_ESCAPE_MAP); } /** * Returns the first N characters of the given String, retaining at most numberOfChars characters. * In case the String is shorter than or equals to numberOfChars, the supplied String is returned * unchanged. */ public static String getFirstCharacters(String s, int numberOfChars) { if (s.length() <= numberOfChars) { return s; } return s.substring(0, numberOfChars); } /** * Returns the last N characters of the given String, retaining at most numberOfChars characters. In * case the String is shorter than or equals to numberOfChars, the supplied String is returned * unchanged. */ public static String getLastCharacters(String s, int numberOfChars) { if (s.length() <= numberOfChars) { return s; } return s.substring(s.length() - numberOfChars); } /** * Converts the given String to a String where the first character is in upper case and all other * characters are in lower case */ public static String toFirstUpper(String s) { if (isEmpty(s)) { return s; } char first = s.charAt(0); return Character.toUpperCase(first) + s.substring(1).toLowerCase(); } /** * Returns the string itself, if count is 1. Otherwise, returns the string with appended "s". */ public static String pluralize(String string, int count) { if (count == 1) { return string; } return string + "s"; } /** Adds a prefix and a suffix to the given string. */ public static String surroundWith(String s, String prefix, String suffix) { return prefix + s + suffix; } /** Surrounds the given string with single quotes. */ public static String surroundWithSingleQuotes(String s) { return surroundWith(s, "'", "'"); } /** * Compares the given value to all given strings. *

* This loops through the supplied string array. If the array is larger, or you already have the * Strings in a Collection, use Collection.contains(..). Consider putting the arguments * into a Collection constant. * * @return if one string equals the value. */ public static boolean equalsOneOf(String value, String... strings) { for (String compareValue : strings) { if (value.equals(compareValue)) { return true; } } return false; } /** * Removes double quotes from beginning and end (if present) and returns the new string. */ public static String removeDoubleQuotes(String string) { return stripPrefix(stripSuffix(string, "\""), "\""); } /** * Removes single quotes from beginning and end (if present) and returns the new string. */ public static String removeSingleQuotes(String string) { return stripPrefix(stripSuffix(string, "'"), "'"); } /** * Repeats a {@link String} * * @param s * the {@link String} to repeat * @param times * number of times the string gets repeated * @return the repeated {@link String} */ public static String repeat(String s, int times) { return new String(new char[times]).replace("\0", s); } /** * {@link #toString()} with null check. * * @param value * object to stringify * @return string representation or {@link StringUtils#EMPTY_STRING} if value is null. */ @NonNull public static String safeToString(@Nullable Object value) { if (value == null) { return StringUtils.EMPTY_STRING; } return value.toString(); } /** * Returns a truncated string that contains only the first x lines of the given text. */ public static String retainHeadLines(String text, int numberOfLines) { if (text.isEmpty() || numberOfLines <= 1) { return ""; } if (!text.contains("\n")) { // text contains only one line, and we know that numberOfLines>1, so return all return text + "\n"; } int charsBeforeCutLine = 0; for (int i = 0; i < numberOfLines; i++) { if (charsBeforeCutLine >= text.length()) { // numberOfLines is >= lines in text return text; } charsBeforeCutLine = text.indexOf("\n", charsBeforeCutLine) + 1; } return text.substring(0, charsBeforeCutLine - 1) + "\n"; } /** * Remove the common prefix from all lines in a multi-line string. * * @param lines * The multi-line string. * @param removeLeadingWhitespace * Indicates if whitespace characters at the beginning of lines should be removed prior * to prefix processing. * @param includeTrailingNewline * Indicates if a trailing newline in the input string should be kept, which leads to an * empty last line. Note that an empty last line also means that no prefix across all * lines can exist. * @param allowedPrefixPattern * A regex pattern that defines how a prefix must look like for it to be removed. * {@code null} means that any prefix found is removed. * @return A new string without the line prefix or the same string (identical object) as the input * string if no prefix was removed. */ @Contract("null, _, _, _ -> null; !null, _, _, _ -> !null") public static @Nullable String stripCommonLinePrefix(@Nullable String lines, boolean removeLeadingWhitespace, boolean includeTrailingNewline, @Nullable String allowedPrefixPattern) { if (StringUtils.countLines(lines, includeTrailingNewline) < 2) { // there can only be a common prefix with at least 2 lines return lines; } String linesNoSpace = lines; if (removeLeadingWhitespace) { linesNoSpace = StringUtils.removeWhitespaceAtBeginningOfLine(lines); } List splitLines = splitLinesAsList(linesNoSpace, includeTrailingNewline); String prefix = longestCommonPrefix(splitLines); if (prefix.isEmpty()) { return lines; } if (allowedPrefixPattern != null && !Pattern.compile(allowedPrefixPattern).matcher(prefix).matches()) { return lines; } return splitLines.stream().map(line -> stripPrefix(line, prefix)).collect(Collectors.joining(LINE_FEED)); } /** * Returns the string between the given startTerm and endTerm (where endTerm needs occur after the * startTerm). * * @return {@code null} if the startTerm or endTerm are not contained in the input, or if the * endTerm occurs *before* the startTerm. Otherwise, the phrase between the two terms, * excluding the terms itself. *

* Example: With input "ABC-DEF-GHI-GHI", startTerm "ABC" and endTerm "GHI", the result is * "-DEF-" */ public static @Nullable String between(String input, String startTerm, String endTerm) { int startOffset = input.indexOf(startTerm); if (startOffset == -1) { return null; } int endOffset = input.indexOf(endTerm, startOffset + startTerm.length()); if (endOffset == -1) { return null; } return input.substring(startOffset + startTerm.length(), endOffset); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy