org.conqat.lib.commons.string.StringUtils Maven / Gradle / Ivy
Show all versions of teamscale-lib-commons Show documentation
/*
* Copyright (c) CQSE GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.conqat.lib.commons.string;
import static org.conqat.lib.commons.string.LineSplitter.UNICODE_NEL;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.commons.text.translate.CharSequenceTranslator;
import org.apache.commons.text.translate.EntityArrays;
import org.apache.commons.text.translate.LookupTranslator;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.conqat.lib.commons.algo.Diff;
import org.conqat.lib.commons.assertion.CCSMAssert;
import org.conqat.lib.commons.collections.CollectionUtils;
import org.conqat.lib.commons.collections.Pair;
import org.conqat.lib.commons.collections.PairList;
import org.conqat.lib.commons.filesystem.EByteOrderMark;
import org.conqat.lib.commons.filesystem.FileSystemUtils;
import org.intellij.lang.annotations.Language;
import org.jetbrains.annotations.Contract;
/**
* A utility class providing some advanced string functionality.
*/
public class StringUtils {
/**
* Matches all whitespace at the beginning of each line.
*
* We deliberately don't use "\\s" here because this also matches new lines. Instead we use
* "\\p{Zs}" which matches all unicode horizontal whitespace characters.
*/
private static final Pattern LEADING_WHITESPACE_PATTERN = Pattern.compile("^[\\t\\p{Zs}]+", Pattern.MULTILINE);
/** Line separator of the current platform. */
public static final String LINE_SEPARATOR = System.lineSeparator();
/** Line feed ("\n"), platform independent. */
public static final String LINE_FEED = "\n";
/**
* The empty string. Use this instead of "" (our Java coding guidelines).
*
*
* - Use this instead of an empty string literal ({@code ""}) to make it clear that this is
* intentionally empty. Using "" may leave the reader in doubt whether this is something unfinished
* (e.g. "" used temporarily until something meaningful is filled in) or intentionally the empty
* String. Making it explicit removes this problem)
*
- On most JVMs, using "" allocates a new String object per class in which it used. Using
* EMPTY_STRING re-uses a single object and saves a few bytes.
*
- Makes " " and "" better distinguishable.
*
*/
public static final String EMPTY_STRING = "";
/** A space. */
public static final String SPACE = " ";
/** A space character. */
public static final char SPACE_CHAR = ' ';
/** A tab character. */
public static final String TAB = "\t";
/** Two spaces. */
public static final String TWO_SPACES = " ";
/** Dot. */
public static final String DOT = ".";
/** A String consisting of three dots. */
public static final String THREE_DOTS = "...";
/** The ellipsis character. */
public static final String ELLIPSIS = "…";
/** Number formatter. */
private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(Locale.US);
/** Number formatter for percentages. */
private static final NumberFormat PERCENTAGE_FORMAT = NumberFormat.getPercentInstance(Locale.US);
/** Random number generator. */
private static final Random RANDOM = new Random();
/** Char strings used to convert bytes to a hex string */
private static final char[] HEX_CHARACTERS = "0123456789ABCDEF".toCharArray();
/**
* Characters that need to be escaped in markdown. Used in {@link #escapeMarkdownChars(String)} and
* {@link #unescapeMarkdownChars(String)}.
*/
private static final List MARKDOWN_ESCAPE_CHARACTERS = Arrays.asList('[', ']', '(', ')', '*', '#', '_',
'~', '^', '+', '=', '>');
/**
* Defines the escaping map for Markdown characters based on {@link #MARKDOWN_ESCAPE_CHARACTERS}.
* Characters to be escaped will be prefixed by a backslash. Escaping should be performed by using
* {@link #escapeMarkdownChars(String)}.
*/
private static final Map MARKDOWN_ESCAPE_MAP = MARKDOWN_ESCAPE_CHARACTERS.stream()
.collect(Collectors.toMap(String::valueOf, characterToBeEscaped -> "\\" + characterToBeEscaped));
/**
* Defines how a newline and already escaped newlines should be escaped (\n->\\n and \r->\\r) . We
* also escape the escape char (\->\\) to allow correct un-escaping.
*
* Use it with {@link #escapeChars(String, Map)} and {@link #unescapeChars(String, Map)}. If
* language-specific escaping should be performed, prefer using methods from
* {@link StringEscapeUtils}.
*/
public static final Map ESCAPE_NEWLINE;
static {
Map initialMap = new HashMap<>();
initialMap.put("\n", "\\n");
initialMap.put("\r", "\\r");
initialMap.put("\\", "\\\\");
ESCAPE_NEWLINE = Collections.unmodifiableMap(initialMap);
}
/**
* Create a string of the given length and center the given string within it. Left and right areas
* are filled by the character provided.
*
* @param string
* The input string.
* @param length
* The length of the string to be returned.
* @param c
* The character to surround the input string with.
* @return the new string or, if the string is longer than the specified length, the original
* string.
* @see #flushLeft(String, int, char)
* @see #flushRight(String, int, char)
*/
public static String center(String string, int length, char c) {
if (string.length() >= length) {
return string;
}
int strLen = string.length();
int fillLen = (length - strLen) / 2;
String leftFiller = fillString(fillLen, c);
if ((length - strLen) % 2 != 0) {
fillLen++;
}
String rightFiller = fillString(fillLen, c);
return leftFiller + string + rightFiller;
}
/**
* Compares two strings both of which may be null
. A string which is null
* is always smaller than the other string, except for both strings being null
.
*
* @param a
* The string which is compared to the second string.
* @param b
* The string which is compared to the first string.
* @return Returns 0 if both strings are null
, -1 if only the first string is
* null
, and 1 if only the second string is null
. If both strings
* are not null
, returns the result of the usual string comparison.
*/
public static int compare(@Nullable String a, @Nullable String b) {
// noinspection StringEquality
if (a == b) {
// This is done on purpose, to ensure comparison of two nulls equals true, as
// well as performance improvement.
return 0;
}
if (a == null) {
return -1;
}
if (b == null) {
return 1;
}
return a.compareTo(b);
}
/**
* Concatenates all elements of an iterable using the toString()
-method.
*
* @param iterable
* the iterable
* @return a concatenation, separated by spaces
*/
public static String concat(Iterable iterable) {
return concat(iterable, SPACE);
}
/**
* Concatenates all elements of an iterable using the toString()
-method, separating
* them with the given separator
.
*
* @param iterable
* the iterable containing the strings
* @param separator
* the separator to place between the strings, may be null
* @return a concatenation of the string in the iterable or null
if iterable was
* null
. If the iterable is of size 0, the empty string is returned.
*/
@Contract("null, _ -> null; !null, _ -> !null")
public static @Nullable String concat(@Nullable Iterable iterable, @Nullable String separator) {
if (iterable == null) {
return null;
}
return concat(iterable.iterator(), separator);
}
/**
* Concatenates all elements of an iterator using the toString()
-method, separating
* them with the given separator
.
*
* @param iterator
* the {@link Iterator} containing the strings
* @param separator
* the separator to place between the strings, may be null
* @return a concatenation of the string in the iterator or null
if iterator was
* null
. If the iterator has no elements, the empty string is returned.
*/
public static @Nullable String concat(@Nullable Iterator iterator, @Nullable String separator) {
if (iterator == null) {
return null;
}
if (!iterator.hasNext()) {
return EMPTY_STRING;
}
if (separator == null) {
separator = EMPTY_STRING;
}
StringBuilder builder = new StringBuilder();
while (iterator.hasNext()) {
builder.append(iterator.next());
if (iterator.hasNext()) {
builder.append(separator);
}
}
return builder.toString();
}
/**
* Concatenates all elements of an array using the toString()
-method.
*
* @param array
* the array containing the strings
* @return a concatenation of the string separated by spaces
*/
public static String concat(Object[] array) {
return concat(array, SPACE);
}
/**
* Concatenates all elements of an array using the {@code toString()} -method, separating them with
* the given {@code separator}.
*
* @param array
* the array
* @param separator
* the separator to place between the strings, may be {@code null}
* @return a concatenation of the string in the array or {@code null} if array was {@code null}. If
* an array is of length 0, the empty string is returned.
*/
@Contract(value = "null, _ -> null; !null, _ -> !null", pure = true)
public static @Nullable String concat(Object @Nullable [] array, @Nullable String separator) {
if (array == null) {
return null;
}
return concat(Arrays.asList(array), separator);
}
/**
* Joins a list of strings with a delimiter and a different last delimiter. This could, for example,
* be used if you want to create a String "a, b and c" from a list containing "a", "b" and "c".
*
* @param items
* the list of strings to be joined
* @param delimiter
* the delimiter to use between the strings except the last two
* @param lastDelimiter
* the delimiter to use between the last two strings
* @return the joined string with a different delimiter for the last two strings
*/
public static String joinDifferentLastDelimiter(@NonNull List items, String delimiter,
String lastDelimiter) {
if (items.isEmpty()) {
return StringUtils.EMPTY_STRING;
}
if (items.size() == 1) {
return items.get(0);
}
int last = items.size() - 1;
return String.join(lastDelimiter, String.join(delimiter, items.subList(0, last)), items.get(last));
}
/**
* Returns a {@link Collector} that concatenates the input elements, separated by the specified
* {@code delimiter}, in encounter order. Between the last two elements the {@code lastDelimiter} is
* used instead of the {@code delimiter}.
*
*
*
* // returns: one, two and three
* Stream.of("one", "two", "three").collect(joiningDifferentLastDelimiter(", ", " and ");
* // returns: one and two
* Stream.of("one", "two").collect(joiningDifferentLastDelimiter(", ", " and ");
* // returns: one
* Stream.of("one").collect(joiningDifferentLastDelimiter(", ", " and ");
* // returns the empty string
* Stream.empty().collect(joiningDifferentLastDelimiter(", ", " and ");
*
*
*
*
* @return A {@link Collector} similar to {@link Collectors#joining(CharSequence)}, but using a
* different delimiter for the last two items.
*
* @see #joinDifferentLastDelimiter(List, String, String)
* @see #joiningDifferentLastDelimiter(String, String, String, String)
*/
public static Collector joiningDifferentLastDelimiter(String delimiter, String lastDelimiter) {
return joiningDifferentLastDelimiter(delimiter, lastDelimiter, StringUtils.EMPTY_STRING,
StringUtils.EMPTY_STRING);
}
/**
* Returns a {@link Collector} that concatenates the input elements, separated by the specified
* {@code delimiter}, with the specified {@code prefix} and {@code suffix}, in encounter order.
* Between the last two elements the {@code lastDelimiter} is used instead of the {@code delimiter}.
*
*
*
* // returns: [one, two and three]
* Stream.of("one", "two", "three").collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
* // returns: [one and two]
* Stream.of("one", "two").collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
* // returns: [one]
* Stream.of("one").collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
* // returns: []
* Stream.empty().collect(joiningDifferentLastDelimiter(", ", " and ", "[", "]");
*
*
*
*
* @return A {@link Collector} similar to
* {@link Collectors#joining(CharSequence, CharSequence, CharSequence)}, but using a
* different delimiter for the last two items.
*
* @see #joinDifferentLastDelimiter(List, String, String)
* @see #joiningDifferentLastDelimiter(String, String)
*/
public static Collector joiningDifferentLastDelimiter(String delimiter, String lastDelimiter,
String prefix, String suffix) {
return Collector., String>of(ArrayList::new, List::add, (l1, l2) -> {
l1.addAll(l2);
return l1;
}, l -> prefix + StringUtils.joinDifferentLastDelimiter(l, delimiter, lastDelimiter) + suffix);
}
/** Concatenate two string arrays. */
public static String[] concat(String[] array1, String[] array2) {
String[] result = new String[array1.length + array2.length];
System.arraycopy(array1, 0, result, 0, array1.length);
System.arraycopy(array2, 0, result, array1.length, array2.length);
return result;
}
/**
* Build a string with a specified length from a character.
*
* @param length
* The length of the string.
* @param c
* The character.
* @return The string.
*/
public static String fillString(int length, char c) {
char[] characters = new char[length];
Arrays.fill(characters, c);
return new String(characters);
}
/**
* Create a sting of the given length starting with the provided string. Remaining characters are
* filled with the provided character.
*
* @param string
* The input string.
* @param length
* The length of the string to be returned.
* @param c
* The character to fill the string.
* @return the new string or, if the string is longer than the specified length, the original
* string.
* @see #flushRight(String, int, char)
* @see #center(String, int, char)
*/
public static String flushLeft(String string, int length, char c) {
int gap = length - string.length();
if (gap <= 0) {
return string;
}
return string + StringUtils.fillString(gap, c);
}
/**
* Create a sting of the given length ending with the provided string. Remaining characters are
* filled with the provided character.
*
* @param string
* The input string.
* @param length
* The length of the string to be returned.
* @param c
* The character to fill the string.
* @return the new string or, if the string is longer than the specified length, the original
* string.
* @see #flushLeft(String, int, char)
* @see #center(String, int, char)
*/
public static String flushRight(String string, int length, char c) {
int gap = length - string.length();
if (gap <= 0) {
return string;
}
return StringUtils.fillString(gap, c) + string;
}
/**
* Format number
*/
public static String format(Number number) {
// Replacing the non-breaking space character (U+00A0) with a regular space
// (U+0020) to avoid issues with HTTP requests to CCPs, similar to TS-29424.
return NUMBER_FORMAT.format(number).replace(" ", " ");
}
/**
* Format as percentage.
*/
public static String formatAsPercentage(Number number) {
// Replacing the non-breaking space character (U+00A0) with a regular space
// (U+0020) because it caused issues with HTTP requests to CCPs, c.f. TS-29424.
return PERCENTAGE_FORMAT.format(number).replace(" ", " ");
}
/**
* Returns the first line of a string.
*
* @return If the input string is empty string an empty string will be returned. If the input string
* is null
, the result will also be null
.
*/
@Nullable
public static String getFirstLine(@Nullable String string) {
if (EMPTY_STRING.equals(string)) {
return EMPTY_STRING;
}
LineSplitter lineSplitter = new LineSplitter(string);
return lineSplitter.next();
}
/**
* Returns the first n part of a string, separated by the given character.
*
* E.g., getStringParts("edu.tum.cs", 2, '.') gives: "edu.tum".
*
* @param string
* the base string
* @param partNumber
* number of parts
* @param separator
* the separator character
*/
public static String getFirstParts(String string, int partNumber, char separator) {
if (partNumber < 0 || string == null) {
return string;
}
int idx = 0;
for (int i = 0; i < partNumber; i++) {
idx = string.indexOf(separator, idx + 1);
if (idx == -1) {
return string;
}
}
return string.substring(0, idx);
}
/**
* Returns the first part of a String whose parts are separated by the given character.
*
* E.g., getFirstPart("foo@bar@acme", '@') gives "foo".
*
* @param string
* the String
* @param separator
* separation character
* @return the first part of the String, or the original String if the separation character is not
* found.
*/
public static String getFirstPart(String string, String separator) {
int idx = string.indexOf(separator);
if (idx >= 0) {
return string.substring(0, idx);
}
return string;
}
/**
* Variant of {@link #getFirstPart(String, String)} which accepts a single char as the separator.
*
* @see #getFirstPart(String, String)
*/
public static String getFirstPart(String string, char separator) {
return getFirstPart(string, String.valueOf(separator));
}
/**
* Returns the last part of a String whose parts are separated by the given String.
*
* E.g., getLastPart("org.conqat##lib.commons.string##StringUtils", "##") gives "StringUtils". If
* the separator is the empty string, this method returns the empty string.
*
* @param string
* the String
* @param separator
* separation String
* @return the last part of the String, or the original String if the separation String is not
* found.
*/
public static String getLastPart(String string, String separator) {
return getLastPart(string, separator, 1);
}
/**
* Variant of {@link #getLastPart(String, String)} which accepts a single char as the separator.
*
* @see #getLastPart(String, String)
*/
public static String getLastPart(String string, char separator) {
return getLastPart(string, String.valueOf(separator));
}
/**
* Returns the last {@code upToCount} part of a String whose parts are separated by the given
* {@code separator}, e.g.,
*
* - {@code getLastPart("org.conqat##lib.commons.string##StringUtils", "##", 1) -> "StringUtils"}
* - {@code getLastPart("org.conqat##lib.commons.string##StringUtils", "##", 2) -> "lib.commons.string##StringUtils"}
* - {@code getLastPart("org.conqat##lib.commons.string##StringUtils", "##", 3+) -> "org.conqat##lib.commons.string##StringUtils"}
*
* If the separator is the empty string, this method returns the empty string.
*
* @param string
* the String
* @param separator
* separation String
* @param upToCount
* The number of times the {@code separator} is matched from the end. If the separator is
* present fewer than this value, the complete {@code string} is returned.
* @return the last part of the String, or the original String if the separation String is not found
* {@code upToCount} times.
*/
public static String getLastPart(String string, String separator, int upToCount) {
if (upToCount <= 0) {
throw new IllegalArgumentException(
String.format("Expected \"%s\" (%d) to be positive", "upToCount", upToCount));
}
int idx = string.length();
for (int i = 0; i < upToCount; i++) {
idx = string.lastIndexOf(separator, idx);
if (idx < 0) {
return string;
}
// Move the index before the separator
idx--;
}
return string.substring(idx + separator.length() + 1);
}
/**
* Splits the string at the first occurrence of {@code separator}. If the separator does not occur,
* the second string in the returned pair is empty. E.g. splitAtFirst(foo.bar.baz, '.') gives
* ("foo", "bar.baz").
*/
public static Pair splitAtFirst(String string, String separator) {
int idx = string.indexOf(separator);
if (idx == -1) {
return new Pair<>(string, StringUtils.EMPTY_STRING);
}
return new Pair<>(string.substring(0, idx), string.substring(idx + separator.length()));
}
/**
* Splits the string at the first occurrence of {@code separator}. If the separator does not occur,
* the second string in the returned pair is empty. E.g. splitAtFirst(foo.bar.baz, '.') gives
* ("foo", "bar.baz").
*/
public static Pair splitAtFirst(String string, char separator) {
int idx = string.indexOf(separator);
if (idx == -1) {
return new Pair<>(string, StringUtils.EMPTY_STRING);
}
return new Pair<>(string.substring(0, idx), string.substring(idx + 1));
}
/**
* Splits the string at the last occurrence of {@code separator}. If the separator does not occur,
* the second string in the returned pair is empty. E.g. splitAtLast(foo.bar.baz, '.') gives
* ("foo.bar", "baz).
*/
public static Pair splitAtLast(String string, char separator) {
int idx = string.lastIndexOf(separator);
if (idx == -1) {
return new Pair<>(string, "");
}
return new Pair<>(string.substring(0, idx), string.substring(idx + 1));
}
/**
* Searches the elements of a string array for a string. Strings are trimmed.
*
* @param array
* the array to search
* @param string
* the search string
* @return the index of the element where the string was found or -1
if string wasn't
* found.
*/
public static int indexOf(String[] array, String string) {
for (int i = 0; i < array.length; i++) {
if (array[i].trim().equals(string.trim())) {
return i;
}
}
return -1;
}
/**
* Returns the n-th occurrence of the character c in the string s or -1 if no such character exists.
*/
public static int indexOfNthOccurrence(String s, char c, int n) {
if (StringUtils.isEmpty(s)) {
return -1;
}
int pos = -1;
while (n > 0 && pos < s.length()) {
int index = s.indexOf(c, pos + 1);
if (index == -1) {
return -1;
}
pos = index;
n--;
}
if (n == 0) {
return pos;
}
return -1;
}
/**
* Checks if a string is empty (after trimming).
*
* @param text
* the string to check.
* @return true
if string is empty or null
, false
otherwise.
*/
public static boolean isEmpty(@Nullable String text) {
return org.apache.commons.lang3.StringUtils.isBlank(text);
}
/**
* Checks if the given string contains at least one letter (checked with
* {@link Character#isLetter(char)}).
*/
public static boolean containsLetter(String s) {
for (int i = 0; i < s.length(); i++) {
if (Character.isLetter(s.charAt(i))) {
return true;
}
}
return false;
}
/** Returns whether s1 contains s2 ignoring case */
public static boolean containsIgnoreCase(String s1, String s2) {
return s1.toLowerCase().contains(s2.toLowerCase());
}
/** Returns whether s contains all the given substrings. */
public static boolean containsAll(String s, String... substrings) {
for (String substring : substrings) {
if (!s.contains(substring)) {
return false;
}
}
return true;
}
/**
* Generates a random string with a certain length. The string consists of characters with ASCII
* code between 33 and 126.
*
* @param length
* the length of the random string
* @return the random string
*/
public static String randomString(int length) {
return randomString(length, RANDOM);
}
/**
* Performs the actal creation of the random string using the given randomizer.
*/
public static String randomString(int length, Random random) {
char[] characters = new char[length];
for (int i = 0; i < length; i++) {
characters[i] = (char) (random.nextInt(93) + 33);
}
return new String(characters);
}
/**
* Generates an array of random strings.
*
* @param length
* number of strings
* @param stringLength
* length of each string
* @return the randomly generated array.
*/
public static String[] randomStringArray(int length, int stringLength) {
String[] array = new String[length];
for (int i = 0; i < length; i++) {
array[i] = randomString(stringLength);
}
return array;
}
/**
* Generates a pseudo random string with a certain length in a deterministic, reproducible fashion.
*
*
* @param length
* the length of the pseudo-random string
* @param seed
* seed value for the random number generator used for the generation of the
* pseudo-random string. If the same seed value is used, the same pseudo-random string is
* generated.
*/
public static String generateString(int length, int seed) {
Random seededRandomizer = new Random(seed);
return randomString(length, seededRandomizer);
}
/**
* Generates an array of pseudo-random strings in a deterministic, reproducable fashion.
*
* @param length
* number of strings
* @param stringLength
* length of each string
* @param seed
* seed value for the random number generator used for the generation of the
* pseudo-random string. If the same seed value is used, the same pseudo-random string
* array is generated.
* @return the randomly generated array.
*/
public static String[] generateStringArray(int length, int stringLength, int seed) {
String[] array = new String[length];
for (int i = 0; i < length; i++) {
array[i] = generateString(stringLength, seed + i);
}
return array;
}
/**
* Returns the beginning of a String, cutting off the last part which is separated by the given
* character.
*
* E.g., removeLastPart("org.conqat.lib.commons.string.StringUtils", '.') gives
* "org.conqat.lib.commons.string".
*
* @param string
* the String
* @param separator
* separation character
* @return the String without the last part, or the original string (i.e., the same object) if the
* separation character is not found.
*/
public static String removeLastPart(String string, char separator) {
int idx = string.lastIndexOf(separator);
if (idx == -1) {
return string;
}
return string.substring(0, idx);
}
/**
* Replaces all occurrences of keys of the given map in the given string with the associated value
* in that map. The given map may be null
, in which case the original string is
* returned unchanged.
*
* This method is semantically the same as calling
* {@link String#replace(CharSequence, CharSequence)} for each of the entries in the map, but may be
* significantly faster for many replacements performed on a short string, since
* {@link String#replace(CharSequence, CharSequence)} uses regular expressions internally and
* results in many String object allocations when applied iteratively.
*
* The order in which replacements are applied depends on the order of the map's entry set.
*/
public static String replaceFromMap(String string, @Nullable Map replacements) {
if (replacements == null) {
return string;
}
StringBuilder sb = new StringBuilder(string);
for (Entry entry : replacements.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
int start = sb.indexOf(key, 0);
while (start > -1) {
int end = start + key.length();
int nextSearchStart = start + value.length();
sb.replace(start, end, value);
start = sb.indexOf(key, nextSearchStart);
}
}
return sb.toString();
}
/**
* Removes all occurrences of the specified strings from the given string.
*/
public static String removeAll(String string, String... stringsToRemove) {
if (stringsToRemove == null || stringsToRemove.length == 0) {
return string;
}
return removeAll(string, Arrays.asList(stringsToRemove));
}
/**
* Removes all occurrences of the specified strings from the given string.
*/
public static String removeAll(String string, Iterable stringsToRemove) {
if (stringsToRemove == null) {
return string;
}
StringBuilder sb = new StringBuilder(string);
for (String key : stringsToRemove) {
int start = sb.indexOf(key, 0);
while (start > -1) {
int end = start + key.length();
sb.delete(start, end);
start = sb.indexOf(key, start);
}
}
return sb.toString();
}
/**
* Replace all linebreaks in string with the platform-specific line separator
* ({@link #LINE_SEPARATOR}).
*
* @return a string with linebreaks replaced by the line separator of the current platform. If the
* given * string is {@code null}, then an empty string will be returned.
*/
public static @NonNull String normalizeLineSeparatorsPlatformSpecific(@Nullable String string) {
return replaceLineBreaks(string, LINE_SEPARATOR);
}
/**
* Replace all linebreaks in string with a line feed symbol (\n), platform independent.
*
* @return a string with linebreaks replaced by {@link #LINE_FEED}. If the given string is
* {@code null}, then an empty string will be returned.
*/
public static @NonNull String normalizeLineSeparatorsPlatformIndependent(@Nullable String string) {
return replaceLineBreaks(string, '\n');
}
/**
* Replace all linebreaks with the given character.
*
* @return a string with linebreaks replaced by the given replacementCharacter. If the given string
* is {@code null}, then an empty string will be returned.
*/
public static @NonNull String replaceLineBreaks(@Nullable String string, char replacementCharacter) {
return LineBreakReplacer.replaceLineBreaks(string, replacementCharacter);
}
/**
* Replace all linebreaks in string by a specified symbol.
*
* @apiNote Use {@link #replaceLineBreaks(String, char)} whenever possible, as it is much faster.
*
* @return a string with line breaks replaced. Will return an empty string if the input string is
* {@code null}.
*/
public static @NonNull String replaceLineBreaks(@Nullable String string, @NonNull String symbol) {
StringBuilder builder = new StringBuilder();
LineSplitter lineSplitter = new LineSplitter(string);
lineSplitter.setIncludeTrailingEmptyLine(true);
for (String line : lineSplitter) {
builder.append(line);
if (lineSplitter.hasNext()) {
builder.append(symbol);
}
}
return builder.toString();
}
/**
* Split string in lines. For the empty string and null
an array of length zero is
* returned.
*
* @see #splitLinesAsList(String)
*/
public static String[] splitLines(@Nullable String content) {
List lineList = splitLinesAsList(content);
String[] result = new String[lineList.size()];
lineList.toArray(result);
return result;
}
/**
* Returns the number of occurrences of the given character in the given string.
*/
public static int countCharacter(String content, char character) {
int count = 0;
for (char c : content.toCharArray()) {
if (c == character) {
count++;
}
}
return count;
}
/**
* Return number of lines in a string, by counting newline characters, except if it is the last
* character in the content (i.e. we ignore trailing new lines). Behaves consistently with
* {@link #splitLines(String)}.
*
* Will return 1
for a non-empty string that has no line breaks (i.e., the first line
* already counts).
*/
public static int countLines(String content) {
return countLines(content, false);
}
/**
* Return number of lines in a string, by counting newline characters.
*
* We count characters to avoid object allocations at all costs.
*
* Will return 1
for a non-empty string that has no line breaks (i.e., the first line
* already counts).
*/
public static int countLines(@Nullable String content, boolean includeTrailingNewline) {
if (content == null || content.isEmpty()) {
return 0;
}
int lines = 1;
int index = 0;
int contentLength = content.length();
while (index < contentLength) {
char c = content.charAt(index);
index++;
if (c == '\n' || c == UNICODE_NEL) {
if (index < contentLength || includeTrailingNewline) {
lines++;
}
} else if (c == '\r') {
if (index < contentLength && content.charAt(index) == '\n') {
// Skip the additional character
index++;
}
if (index < contentLength || includeTrailingNewline) {
lines++;
}
}
}
return lines;
}
/**
* Split string in lines. For the empty string and null
an empty list is returned.
*
* @see #splitLines(String)
*/
public static List splitLinesAsList(@Nullable String content) {
return splitLinesAsList(content, false);
}
/**
* Split string in lines. For the empty string and null
an empty list is returned.
*
* @see #splitLines(String)
*/
public static List splitLinesAsList(@Nullable String content, boolean includeTrailingEmptyLine) {
List result = new ArrayList<>();
LineSplitter lineSplitter = new LineSplitter(content);
lineSplitter.setIncludeTrailingEmptyLine(includeTrailingEmptyLine);
for (String line : lineSplitter) {
result.add(line);
}
return result;
}
/**
* Prefixes a string with a prefix and separator if the prefix is not empty.
*/
public static String addPrefix(String string, String separator, String prefix) {
if (StringUtils.isEmpty(prefix)) {
return string;
}
return prefix + separator + string;
}
/** Prefixes the given list of strings with the specified prefix */
public static List prefixAll(Collection strings, String prefix) {
List result = new ArrayList<>(strings.size());
for (String value : strings) {
result.add(prefix + value);
}
return result;
}
/**
* Suffixes a string with a suffix and separator if the suffix is not empty.
*/
public static String addSuffix(String string, String separator, String suffix) {
if (StringUtils.isEmpty(suffix)) {
return string;
}
return string + separator + suffix;
}
/**
* Remove prefix from a string.
*
* @param string
* the string
* @param prefix
* the prefix
*
* @return the string without the prefix or the original string if it does not start with the
* prefix.
*/
public static String stripPrefix(String string, String prefix) {
if (string.startsWith(prefix)) {
return string.substring(prefix.length());
}
return string;
}
/**
* Remove prefix from a string. This ignores casing, i.e.
* stripPrefixIgnoreCase("C:/Programs/", "c:/programs/notepad.exe")
will return
* "notepad.exe"
.
*
* @param string
* the string
* @param prefix
* the prefix
*
* @return the string without the prefix or the original string if it does not start with the
* prefix.
*/
public static String stripPrefixIgnoreCase(String string, String prefix) {
if (startsWithIgnoreCase(string, prefix)) {
return string.substring(prefix.length());
}
return string;
}
/**
* Remove suffix from a string.
*
* @param string
* the string
* @param suffix
* the suffix
*
* @return the string without the suffix or the original string if it does not end with the suffix.
*/
public static String stripSuffix(String string, String suffix) {
if (string.endsWith(suffix)) {
return string.substring(0, string.length() - suffix.length());
}
return string;
}
/** Strips all digits from the given String. */
public static String stripDigits(String string) {
return string.replaceAll("[0-9]", EMPTY_STRING);
}
/** Strips all trailing digits from the end of the given String. */
public static String stripTrailingDigits(String string) {
return string.replaceAll("\\d+$", EMPTY_STRING);
}
/**
* Create string representation of a map.
*/
public static String toString(Map map) {
return toString(map, EMPTY_STRING);
}
/**
* Create string representation of a map.
*
* @param map
* the map
* @param indent
* a line indent
*/
public static String toString(Map map, String indent) {
StringBuilder result = new StringBuilder();
Iterator keyIterator = map.keySet().iterator();
while (keyIterator.hasNext()) {
result.append(indent);
Object key = keyIterator.next();
result.append(key);
result.append(" = ");
result.append(map.get(key));
if (keyIterator.hasNext()) {
result.append(LINE_SEPARATOR);
}
}
return result.toString();
}
/**
* Convert stack trace of a {@link Throwable} to a string.
*/
public static String obtainStackTrace(Throwable throwable) {
StringWriter result = new StringWriter();
PrintWriter printWriter = new PrintWriter(result);
throwable.printStackTrace(printWriter);
FileSystemUtils.close(printWriter);
FileSystemUtils.close(result);
return result.toString();
}
/**
* Test if a string starts with one of the provided prefixes. Returns false
if the list
* of prefixes is empty. This should only be used for short lists of prefixes.
*/
public static boolean startsWithOneOf(String string, String... prefixes) {
for (String prefix : prefixes) {
if (string.startsWith(prefix)) {
return true;
}
}
return false;
}
/**
* Test if a string starts with one of the provided prefixes. Returns false
if the list
* of prefixes is empty. This should only be used for short lists of prefixes. The given list must
* not be null.
*/
public static boolean startsWithOneOf(String string, Iterable prefixes) {
for (String prefix : prefixes) {
if (string.startsWith(prefix)) {
return true;
}
}
return false;
}
/**
* Returns whether the given string starts with the prefix ignoring case, i.e.
* startsWithIgnoreCase("foobar", "Foo")
will return true.
*/
public static boolean startsWithIgnoreCase(String string, String prefix) {
return string.toLowerCase().startsWith(prefix.toLowerCase());
}
/**
* Test if a string contains of the provided strings. Returns false
if the list of
* strings is empty. This should only be used for short lists of strings.
*/
public static boolean containsOneOf(String text, String... strings) {
return containsOneOf(text, Arrays.asList(strings));
}
/**
* Test if a string contains of the provided strings. Returns false
if the list of
* strings is empty. This should only be used for short lists of strings.
*/
public static boolean containsOneOf(String text, Iterable strings) {
for (String substring : strings) {
if (text.contains(substring)) {
return true;
}
}
return false;
}
/**
* Returns whether the given String ends with the specified suffix ignoring case.
*/
public static boolean endsWithIgnoreCase(String string, String suffix) {
return string.toLowerCase().endsWith(suffix.toLowerCase());
}
/**
* Test if a string ends with one of the provided suffixes. Returns false
if the list
* of prefixes is empty. This should only be used for short lists of suffixes.
*/
public static boolean endsWithOneOf(String string, String... suffixes) {
for (String suffix : suffixes) {
if (string.endsWith(suffix)) {
return true;
}
}
return false;
}
/**
* Prefix all lines of a string. This also replaces line breaks with the platform-specific
* line-separator.
*
* @param string
* the string to prefix
* @param prefix
* the prefix to add
* @param prefixFirstLine
* a flag that indicates if the first line should be prefixed or not.
*/
public static String prefixLines(String string, String prefix, boolean prefixFirstLine) {
String[] lines = StringUtils.splitLines(string.trim());
StringBuilder result = new StringBuilder();
for (int i = 0; i < lines.length; i++) {
if (i > 0 || prefixFirstLine) {
result.append(prefix);
}
result.append(lines[i]);
if (i < lines.length - 1) {
result.append(LINE_SEPARATOR);
}
}
return result.toString();
}
/**
* Splits the given string into an array of {@link Character}s. This is mostly used for testing
* purposes, if an array of certain objects is needed.
*/
public static Character[] splitChars(String s) {
Character[] result = new Character[s.length()];
for (int i = 0; i < result.length; ++i) {
result[i] = s.charAt(i);
}
return result;
}
/** Capitalize string. */
public static String capitalize(String string) {
if (StringUtils.isEmpty(string)) {
return string;
}
return string.substring(0, 1).toUpperCase() + string.substring(1);
}
/**
* This method splits the input string into words (delimited by whitespace) and returns a string
* whose words are separated by single spaces and whose lines are not longer than the given length
* (unless a very long word occurs)).
*/
public static String wrapLongLines(String s, int maxLineLength) {
String[] words = s.split("\\s+");
StringBuilder sb = new StringBuilder();
int lineLength = 0;
for (String word : words) {
if (word.isEmpty()) {
continue;
}
if (lineLength > 0) {
if (lineLength + 1 + word.length() > maxLineLength) {
sb.append(LINE_SEPARATOR);
lineLength = 0;
} else {
sb.append(SPACE);
lineLength += 1;
}
}
sb.append(word);
lineLength += word.length();
}
return sb.toString();
}
/** Returns the longest common prefix of s and t */
public static String longestCommonPrefix(String s, String t) {
int n = Math.min(s.length(), t.length());
for (int i = 0; i < n; i++) {
if (s.charAt(i) != t.charAt(i)) {
return s.substring(0, i);
}
}
return s.substring(0, n);
}
/** Returns the longest common suffix of s and t */
public static String longestCommonSuffix(String s, String t) {
return reverse(StringUtils.longestCommonPrefix(reverse(s), reverse(t)));
}
/** Reverse a string */
public static String reverse(String s) {
return new StringBuilder(s).reverse().toString();
}
/**
* Returns the longest common prefix of the strings in the list or the empty string if no common
* prefix exists.
*/
public static String longestCommonPrefix(Iterable strings) {
Iterator iterator = strings.iterator();
CCSMAssert.isTrue(iterator.hasNext(), "Expected are at least 2 strings");
String commonPrefix = iterator.next();
CCSMAssert.isTrue(iterator.hasNext(), "Expected are at least 2 strings");
while (iterator.hasNext()) {
commonPrefix = longestCommonPrefix(commonPrefix, iterator.next());
if (commonPrefix.isEmpty()) {
break;
}
}
return commonPrefix;
}
/** Removes whitespace from a string. */
public static String removeWhitespace(String content) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < content.length(); i++) {
char c = content.charAt(i);
if (!Character.isWhitespace(c)) {
result.append(c);
}
}
return result.toString();
}
/**
* Removes all whitespace at the beginning of each line in the given string.
*/
public static String removeWhitespaceAtBeginningOfLine(String content) {
return LEADING_WHITESPACE_PATTERN.matcher(content).replaceAll(StringUtils.EMPTY_STRING);
}
/**
* Creates a unique name which is not contained in the given set of names. If possible, the given
* base name is directly returned; otherwise it is extended by a number.
*/
public static String createUniqueName(String baseName, Set usedNames) {
String name = baseName;
int i = 1;
while (usedNames.contains(name)) {
name = baseName + ++i;
}
return name;
}
/**
* Transforms a string from camel-case to upper-case with underscores.
*/
public static String camelCaseToUnderscored(String s) {
return stripPrefix(s.replaceAll("([A-Z][a-z])", "_$1").toUpperCase(), "_");
}
/**
* Encodes a byte array as a hex string following the method described here: http
* ://stackoverflow.com/questions/9655181/convert-from-byte-array-to-hex- string-in-java
*/
public static String encodeAsHex(byte[] data) {
char[] hexChars = new char[data.length * 2];
for (int j = 0; j < data.length; j++) {
int v = data[j] & 0xFF;
hexChars[j * 2] = HEX_CHARACTERS[v >>> 4];
hexChars[j * 2 + 1] = HEX_CHARACTERS[v & 0x0F];
}
return new String(hexChars);
}
/** Decodes a byte array from a hex string. */
public static byte[] decodeFromHex(String s) {
byte[] result = new byte[s.length() / 2];
for (int i = 0; i < result.length; ++i) {
result[i] = (byte) Integer.parseInt(s.substring(2 * i, 2 * i + 2), 16);
}
return result;
}
/**
* Format number with number formatter, if number formatter is null
, this uses
* {@link String#valueOf(double)}.
*/
public static String format(double number, @Nullable NumberFormat numberFormat) {
if (numberFormat == null) {
return String.valueOf(number);
}
return numberFormat.format(number);
}
/**
* Regex replacement methods like {@link Matcher#appendReplacement(StringBuffer, String)} or
* {@link String#replaceAll(String, String)} treat dollar signs as group references. This method
* escapes replacement strings so that dollar signs are treated as literals.
*/
public static String escapeRegexReplacementString(String replacement) {
// this needs to be escape thrice as replaceAll also recognizes the
// dollar sign
return replacement.replaceAll("([$\\\\])", "\\\\$1");
}
/**
* Converts a string to a (UTF-8) byte representation. This returns null on a null input.
*/
@Contract("null -> null; !null -> !null")
public static byte @Nullable [] stringToBytes(@Nullable String s) {
if (s == null) {
return null;
}
return s.getBytes(StandardCharsets.UTF_8);
}
/**
* Converts a (UTF-8) byte array to a string. This returns null on a null input.
*/
@Contract("null -> null; !null -> !null")
public static @Nullable String bytesToString(byte @Nullable [] b) {
return bytesToString(b, 0);
}
/**
* Converts a (UTF-8) byte array, starting from the provided {@code offset} to a string. This
* returns null on a null input.
*/
@Contract("null, _ -> null; !null, _ -> !null")
public static @Nullable String bytesToString(byte @Nullable [] b, int offset) {
if (b == null) {
return null;
}
return bytesToString(b, offset, b.length - offset);
}
/**
* Converts a (UTF-8) byte array, starting from the provided {@code offset} and reading
* {@code count} amount, to a string. This returns null on a null input.
*/
@Contract("null, _, _ -> null; !null, _, _ -> !null")
public static @Nullable String bytesToString(byte @Nullable [] b, int offset, int count) {
if (b == null) {
return null;
}
return new String(b, offset, count, StandardCharsets.UTF_8);
}
/**
* Converts a byte array to a string, assuming the given encoding, unless a
* byte-order mark included with the bytes implies that another encoding is actually used.
*
* This method returns null on a null input.
*/
public static @Nullable String bytesToString(byte @Nullable [] b, Charset encoding) {
if (b == null) {
return null;
}
Optional bom = EByteOrderMark.determineBOM(b);
Charset detectedEncoding = bom.map(EByteOrderMark::getEncoding).orElse(encoding);
int bytesToSkip = bom.map(EByteOrderMark::getBOMLength).orElse(0);
return new String(b, bytesToSkip, b.length - bytesToSkip, detectedEncoding);
}
/**
* Converts the given objects into a string list by invoking {@link Object#toString()} on each
* non-null element. For null entries in the input, the output will contain a null entry as well.
*/
public static List<@Nullable String> toStrings(Collection<@Nullable T> objects) {
List strings = new ArrayList<>();
for (T t : objects) {
if (t == null) {
strings.add(null);
} else {
strings.add(t.toString());
}
}
return strings;
}
/**
* Converts the given String to an {@link InputStream} with UTF-8 encoding.
*/
public static InputStream toInputStream(String string) {
return toInputStream(string, StandardCharsets.UTF_8);
}
/**
* Converts the given String to an {@link InputStream} with the specified encoding.
*/
public static InputStream toInputStream(String string, Charset charset) {
return new ByteArrayInputStream(string.getBytes(charset));
}
/**
* Converts the given {@link InputStream} to a String with UTF-8 encoding.
*/
public static String fromInputStream(InputStream inputStream) throws IOException {
ByteArrayOutputStream result = new ByteArrayOutputStream();
byte[] buffer = new byte[1024];
int length = inputStream.read(buffer);
while (length != -1) {
result.write(buffer, 0, length);
length = inputStream.read(buffer);
}
return result.toString(StandardCharsets.UTF_8);
}
/**
* Truncates the given string (if necessary) by removing characters from the end and attaching the
* suffix such that the resulting string has at most length characters. Length must be >=
* suffix.length();
*/
public static String truncate(String string, int length, String suffix) {
CCSMAssert.isTrue(length >= suffix.length(), "Expected length >= suffix.length()");
if (string.length() <= length) {
return string;
}
return string.substring(0, length - suffix.length()) + suffix;
}
/**
* Truncates the given string (if necessary) by removing characters from the end. The length of the
* resulting string is guaranteed not to exceed the given length parameter.
*/
public static String truncate(String string, int length) {
return truncate(string, length, EMPTY_STRING);
}
/**
* Truncates the given string (if necessary) by removing characters from the end and instead
* attaching three dots. The length of the resulting string is guaranteed not to exceed the given
* length parameter.
*/
public static String truncateWithThreeDots(String string, int length) {
return truncate(string, length, THREE_DOTS);
}
/**
* Returns the beginning of the given String, retaining at most numberOfChars characters. In case
* the String is shorter than or equals to numberOfChars, the supplied String is returned unchanged.
* Otherwise, the String is truncated and suffixed with ... (where the overall length will be
* numberOfChars). Please note that numberOfChars needs to be >= 2.
*/
public static String truncateWithEllipsis(String s, int numberOfChars) {
return truncate(s, numberOfChars, ELLIPSIS);
}
/**
* Calculates the edit distance (aka Levenshtein distance) for two strings, i.e. the number of
* insert, delete or replace operations required to transform one string into the other. The running
* time is O(n*m) and the space complexity is O(n+m), where n/m are the lengths of the strings. Note
* that due to the high running time, for long strings the {@link Diff} class should be used, that
* has a more efficient algorithm, but only for insert/delete (not replace operation).
*
* Although this is a clean reimplementation, the basic algorithm is explained here: Wikipedia
*/
public static int editDistance(String s, String t) {
char[] sChars = s.toCharArray();
char[] tChars = t.toCharArray();
int m = s.length();
int n = t.length();
int[] distance = new int[m + 1];
for (int i = 0; i <= m; ++i) {
distance[i] = i;
}
int[] oldDistance = new int[m + 1];
for (int j = 1; j <= n; ++j) {
// swap distance and oldDistance
int[] tmp = oldDistance;
oldDistance = distance;
distance = tmp;
distance[0] = j;
for (int i = 1; i <= m; ++i) {
int cost = 1 + Math.min(distance[i - 1], oldDistance[i]);
if (sChars[i - 1] == tChars[j - 1]) {
cost = Math.min(cost, oldDistance[i - 1]);
} else {
cost = Math.min(cost, 1 + oldDistance[i - 1]);
}
distance[i] = cost;
}
}
return distance[m];
}
/**
* Returns whether the edit distance as calculated by {@link #editDistance(String, String)}, is 0 or
* 1. This implementation is significantly more efficient compared to actually calculating the edit
* distance and runs in O(n+m).
*
* The idea is that with at most one change, the start and end of both strings must be the same, to
* traverse from start and end to the first difference. If the distance between both pointers is at
* most one, the edit distance is at most one as well.
*/
public static boolean isEditDistanceAtMost1(String s, String t) {
int m = s.length();
int n = t.length();
if (Math.abs(n - m) > 1) {
return false;
}
// advance to first characters that differ
int sStart = 0;
int tStart = 0;
while (sStart < m && tStart < n && s.charAt(sStart) == t.charAt(tStart)) {
sStart += 1;
tStart += 1;
}
// reverse advance to first characters that differ
int sEnd = m - 1;
int tEnd = n - 1;
while (sEnd >= sStart && tEnd >= tStart && s.charAt(sEnd) == t.charAt(tEnd)) {
sEnd -= 1;
tEnd -= 1;
}
// as both are exclusive indexes (i.e. we have a difference at the
// index), the indexes must be the same or even overlap to have an edit
// distance of 1 or less.
return sEnd <= sStart && tEnd <= tStart;
}
/**
* Returns a list that contains all entries of the original list as lowercase strings. Does not
* operate in-place!
*/
public static List lowercaseList(Collection strings) {
List lowercaseList = new ArrayList<>();
for (String string : strings) {
lowercaseList.add(string.toLowerCase());
}
return lowercaseList;
}
/**
* Returns the input string. Returns the provided default value in case the input is null.
*/
public static String defaultIfNull(String input, String defaultValue) {
if (input == null) {
return defaultValue;
}
return input;
}
/**
* Returns the input string. Returns the provided default value in case the input is null or the
* empty string.
*/
public static String defaultIfNullOrEmpty(@Nullable String input, String defaultValue) {
if (isEmpty(input)) {
return defaultValue;
}
return input;
}
/**
* Returns the input string. Returns {@link #EMPTY_STRING} in case the input is null.
*/
public static @NonNull String emptyIfNull(@Nullable String input) {
return defaultIfNull(input, EMPTY_STRING);
}
/**
* Returns {@code trueAlternative} if {@code condition} is {@code true} and {@code falseAlternative}
* otherwise.
*/
public static String alternativeOnCondition(boolean condition, String trueAlternative, String falseAlternative) {
if (condition) {
return trueAlternative;
}
return falseAlternative;
}
/**
* Splits a string at every top-level occurrence of the separator character. This can be useful,
* e.g. for splitting type parameter lists.
* "String,Map<String,Integer>,Map<String,Map<String,Integer>>"
split
* at ',' with levelStart = '<' and levelEnd = '>' would result in
* ["String","Map<String,Integer>","Map<String,Map<String,Integer>>"]
*
* If there is no separator char, use the levelStart char. In this case, separator chars are
* included in the output.
* splitTopLevel("((a))(b)", '(', '(', ')' ) = ["", "((a))", "(b)"]
*
* Can also be used to split columns from a CSV line where values can be quoted
* splitTopLevel("\"a\";\";\";c", ';', '"', '"' ) = ["\"a\"", "\";\"", "c"]
*
* @param input
* The input string.
* @param separator
* The separator character.
* @param levelStart
* The character that starts a new level.
* @param levelEnd
* The character that ends a level.
* @return The input string split at every top-level separator.
* @throws AssertionError
* If the numbers for opening and closing characters in the input string differ.
*/
public static List splitTopLevel(String input, char separator, char levelStart, char levelEnd) {
int currentLevel = 0;
int currentStartIndex = 0;
List result = new ArrayList<>();
for (int i = 0; i < input.length(); i++) {
char currentChar = input.charAt(i);
if (currentChar == separator && currentLevel == 0) {
result.add(input.substring(currentStartIndex, i));
if (separator == levelStart) {
currentStartIndex = i;
} else {
currentStartIndex = i + 1;
}
}
if (currentChar == levelEnd && currentLevel > 0) {
currentLevel--;
} else if (currentChar == levelStart) {
currentLevel++;
}
}
CCSMAssert.isTrue(currentLevel == 0, "String is imbalanced: " + input);
result.add(input.substring(currentStartIndex));
return result;
}
/**
* Ensure that the given string ends with the given suffix, i.e. if it does not have the given
* suffix, the returned string is s + suffix
.
*/
public static String ensureEndsWith(String s, String suffix) {
if (!s.endsWith(suffix)) {
return s + suffix;
}
return s;
}
/**
* Removes the given affix from the start and end of a string.
*/
public static String strip(String s, String affix) {
return StringUtils.stripPrefix(StringUtils.stripSuffix(s, affix), affix);
}
/**
* Ensure that the given string starts with the given prefix, i.e. if it does not have the given
* prefix, it is prepended to the string.
*/
public static String ensureStartsWith(String s, String prefix) {
if (!s.startsWith(prefix)) {
return prefix + s;
}
return s;
}
/**
* Concatenates the list of string with delimiter and add escape character if needed. For example,
* following list { "asd,rtz", "rrr", "rrr" } with delimiter as comma(,) will produce the following
* comma(,) delimited sting "asd\\,rtz,rrr,rrr"
*/
public static String concatWithEscapeCharacter(List data, Character delimiter) {
return data.stream().map(a -> a.replace(delimiter.toString(), "\\" + delimiter))
.collect(Collectors.joining(delimiter.toString()));
}
/**
* Splits the delimited string with considering escaped delimiters. For example following comma(,)
* delimited string "asd\\,rtz,rrr,rrr" will produce the list { "asd,rtz", "rrr", "rrr" }. Returns
* {@link Collections#EMPTY_LIST} if the data or delimiter are {@code null}. Consecutive delimiters
* will lead to empty strings, e.g. ",,asd, bar," should produce {"", "", "asd", "bar", ""}.
*
*
* @implNote We work really hard to make this code fast and efficient, by avoiding the use of
* Regexes whenever possible.
*/
public static List splitWithEscapeCharacter(@Nullable String data, @Nullable Character delimiter) {
if (isEmpty(data) || delimiter == null) {
return Collections.emptyList();
}
int nextInsertedStringStartIndex = 0;
List result = new ArrayList<>();
Pattern delimiterPattern = null;
boolean hadEscapedCharacter = false;
for (int lastDelimiterIndex = data.indexOf(delimiter); lastDelimiterIndex != -1; lastDelimiterIndex = data
.indexOf(delimiter, lastDelimiterIndex + 1)) {
if (lastDelimiterIndex == nextInsertedStringStartIndex) {
// found consecutive delimiter, create empty string for it.
result.add(StringUtils.EMPTY_STRING);
nextInsertedStringStartIndex++;
} else if (data.charAt(lastDelimiterIndex - 1) == '\\') {
hadEscapedCharacter = true;
if (delimiterPattern == null) {
// IntelliJ reports the \\ as error, but it is not, as this uses
// Pattern.LITERAL. See also https://youtrack.jetbrains.com/issue/IDEA-180708
delimiterPattern = Pattern.compile("\\" + delimiter, Pattern.LITERAL);
}
} else {
String substring = data.substring(nextInsertedStringStartIndex, lastDelimiterIndex).trim();
if (hadEscapedCharacter) {
substring = delimiterPattern.matcher(substring).replaceAll(delimiter.toString());
}
result.add(substring);
nextInsertedStringStartIndex = lastDelimiterIndex + 1;
hadEscapedCharacter = false;
}
}
String substring = data.substring(nextInsertedStringStartIndex).trim();
if (hadEscapedCharacter) {
substring = delimiterPattern.matcher(substring).replaceAll(delimiter.toString());
}
result.add(substring);
return result;
}
/**
*
* Splits the given string at the given separators, but includes the separators in the returned
* list. Empty strings will not be included in the result, unless the input is already an empty
* string. This implementation is roughly 6x faster than using a corresponding regex pattern with
* lookaheads.
*
*
* The passed separators are checked in their given order (only relevant if separators are prefixed
* of each other)
*
*
* If no separator is found, the input is returned a a list with one element.
*
*
* The returned list is mutable.
*
*/
public static List splitKeepingSeparators(String input, String... separators) {
List result = new ArrayList<>();
int lastSplitIndex = 0;
for (int i = 0; i < input.length(); i++) {
for (String separator : separators) {
if (!input.startsWith(separator, i)) {
continue;
}
if (i > 0 && lastSplitIndex != i) {
result.add(input.substring(lastSplitIndex, i));
}
result.add(separator);
lastSplitIndex = i + separator.length();
break;
}
}
if (lastSplitIndex != input.length() || input.isEmpty()) {
result.add(input.substring(lastSplitIndex));
}
return result;
}
/**
* Splits the string at the given regex separator. Keeps empty strings after the separator. For
* example: string "aaa;rrr, qqq; ; " split at ";" will produce the list { "aaa", "rrr, qqq", "", ""
* }.
*/
public static List splitToList(String data, @Language("RegExp") String regexSeparator) {
if (isEmpty(data) || isEmpty(regexSeparator)) {
return Collections.emptyList();
}
return CollectionUtils.map(Arrays.asList(data.split(regexSeparator)), String::trim);
}
/**
* Returns the result of applying all pattern replacements in order, each as often as possible
* (globally).
*/
public static String applyAllReplacements(String s, PairList replacements) {
for (int i = 0; i < replacements.size(); ++i) {
s = applyReplacement(s, replacements.getFirst(i), replacements.getSecond(i));
}
return s;
}
/**
* Returns the replacement as often as possible. This is the equivalent of
* {@link String#replaceAll(String, String)}, but accepting a {@link Pattern} instead of a regex
* string.
*/
public static String applyReplacement(String s, Pattern pattern, String replacement) {
StringBuilder buffer = new StringBuilder();
Matcher matcher = pattern.matcher(s);
while (matcher.find()) {
matcher.appendReplacement(buffer, replacement);
}
matcher.appendTail(buffer);
return buffer.toString();
}
/**
* Returns {@code null} if the input string is empty (after trimming) or {@code null}. Otherwise,
* the input is returned unaltered.
*/
public static String nullIfEmpty(String input) {
if (isEmpty(input)) {
return null;
}
return input;
}
/**
* Checks whether the parameter contains only number literals and (optionally) starts with a '-'
* char. Returns false if the string is null or empty.
*/
public static boolean isInteger(@Nullable String string) {
if (string == null || string.isEmpty()) {
return false;
}
if (string.startsWith("-") && string.length() > 1) {
string = string.substring(1);
}
for (char c : string.toCharArray()) {
if (c < '0' || c > '9') {
return false;
}
}
return true;
}
/**
* Returns the index of the first character in the given string that matches the pattern. The
* pattern is applied to single characters, so it makes no sense to supply patterns that would match
* on longer character sequences.
*/
public static int indexOfMatch(String string, Pattern pattern) {
for (int i = 0; i < string.length(); i++) {
char c = string.charAt(i);
if (pattern.matcher(String.valueOf(c)).matches()) {
return i;
}
}
return -1; // no match at all
}
/**
* Returns the index of the first found occurrence, together with the matched string. Example:
* {@code Input ("abc", "b", "c") will return [1, "b"]}
*
* The candidates are checked in their given order (relevant only if the are prefixes of one
* another). If none of the candidates is found, [-1, null] is returned.
*
* Special case: If one of the candidates is the empty string, we will always return {@code [0, ""]}
* (even when input is also empty)
*/
public static Pair indexOfAndFirstMatch(String input, int startIndex,
String... candidates) {
if (input.isEmpty()) {
for (String separator : candidates) {
if (separator.isEmpty()) {
return Pair.createPair(0, separator);
}
}
}
for (int i = startIndex; i < input.length(); i++) {
for (String separator : candidates) {
if (input.startsWith(separator, i)) {
return Pair.createPair(i, separator);
}
}
}
return Pair.createPair(-1, null);
}
/**
* Returns {@link #indexOfAndFirstMatch(String, int, String...)} from the start of the input (index
* 0)
*/
public static Pair indexOfAndFirstMatch(String input, String... candidates) {
return indexOfAndFirstMatch(input, 0, candidates);
}
/**
* Escapes the given chars according to the given map. If a char different from
* {@code \b \n \t \f \r} should be escaped, e.g. an {@code a}, and also an escaping rule "\\" ->
* "\\\\" exists, automatically an escape rule "\\a" -> "\\\\a" is added to prevent escaping of both
* letter and backslash, if the char is already escaped in the origin string.
*/
public static String escapeChars(String content, Map escapeMap) {
escapeMap = addEscapingForAlreadyEscapedNonSpecialChars(escapeMap);
CharSequenceTranslator translator = new LookupTranslator(escapeMap);
return StringEscapeUtils.builder(translator).escape(content).toString();
}
/**
* Performs unescaping according to the given translation map. This is the "reverse" method to
* {@link #escapeChars(String, Map)}.
* unescapeChars(escapeChars(someString, escapeMap), escapeMap)
will yield
* someString
again.
*/
public static String unescapeChars(String content, Map escapeMap) {
escapeMap = addEscapingForAlreadyEscapedNonSpecialChars(escapeMap);
CharSequenceTranslator translator = new LookupTranslator(EntityArrays.invert(escapeMap));
return StringEscapeUtils.builder(translator).escape(content).toString();
}
/**
* If an escapeMap contains a rule "\\" -> "\\\\" and a char different from {@code \b \n \t \f \r},
* e.g. an {@code a}, should be escaped by prefixing with a backslash (e.g "a" -> "\\a"), escaping
* of an already escaped "a" in the origin string (e.g. "\\a") would result in "\\\\\\a" (as the
* letter is escaped as well as the already existing backslash). However, the desired escaping is
* "\\\\a". To obtain this, an additional escaping rule "\\a" -> "\\\\a" is added to the escaping
* map to correctly handle this case.
*/
private static Map addEscapingForAlreadyEscapedNonSpecialChars(
Map escapeMap) {
if (!escapeMap.containsKey("\\")) {
return escapeMap;
}
Map extendedEscapeMap = new HashMap<>();
for (Map.Entry escapePair : escapeMap.entrySet()) {
if (escapePair.getKey().equals("\\")) {
continue;
}
if (EntityArrays.JAVA_CTRL_CHARS_ESCAPE.containsKey(escapePair.getKey())) {
continue;
}
if (escapePair.getValue().equals("\\" + escapePair.getKey())) {
extendedEscapeMap.put("\\" + escapePair.getKey(), "\\" + escapePair.getValue());
}
}
extendedEscapeMap.putAll(escapeMap);
return extendedEscapeMap;
}
/**
* Escapes the characters {@code [ ] ( ) * # _ ~ ^ + = >} by prefixing with a backslash. Use to
* escape Markdown sequences (check if according to markdown dialect more/other characters have to
* be escaped).
*/
public static String escapeMarkdownChars(String content) {
return escapeChars(content, MARKDOWN_ESCAPE_MAP);
}
/**
* Unescapes the characters {@code [ ] ( ) * # _ ~ ^ + = >} by removing a leading backslash. Use to
* unescape Markdown sequences that were escaped using {@link #escapeMarkdownChars(String)}.
*/
public static String unescapeMarkdownChars(String content) {
return unescapeChars(content, MARKDOWN_ESCAPE_MAP);
}
/**
* Returns the first N characters of the given String, retaining at most numberOfChars characters.
* In case the String is shorter than or equals to numberOfChars, the supplied String is returned
* unchanged.
*/
public static String getFirstCharacters(String s, int numberOfChars) {
if (s.length() <= numberOfChars) {
return s;
}
return s.substring(0, numberOfChars);
}
/**
* Returns the last N characters of the given String, retaining at most numberOfChars characters. In
* case the String is shorter than or equals to numberOfChars, the supplied String is returned
* unchanged.
*/
public static String getLastCharacters(String s, int numberOfChars) {
if (s.length() <= numberOfChars) {
return s;
}
return s.substring(s.length() - numberOfChars);
}
/**
* Converts the given String to a String where the first character is in upper case and all other
* characters are in lower case
*/
public static String toFirstUpper(String s) {
if (isEmpty(s)) {
return s;
}
char first = s.charAt(0);
return Character.toUpperCase(first) + s.substring(1).toLowerCase();
}
/**
* Returns the string itself, if count is 1. Otherwise, returns the string with appended "s".
*/
public static String pluralize(String string, int count) {
if (count == 1) {
return string;
}
return string + "s";
}
/** Adds a prefix and a suffix to the given string. */
public static String surroundWith(String s, String prefix, String suffix) {
return prefix + s + suffix;
}
/** Surrounds the given string with single quotes. */
public static String surroundWithSingleQuotes(String s) {
return surroundWith(s, "'", "'");
}
/**
* Compares the given value to all given strings.
*
* This loops through the supplied string array. If the array is larger, or you already have the
* Strings in a Collection, use Collection.contains(..)
. Consider putting the arguments
* into a Collection constant.
*
* @return if one string equals the value.
*/
public static boolean equalsOneOf(String value, String... strings) {
for (String compareValue : strings) {
if (value.equals(compareValue)) {
return true;
}
}
return false;
}
/**
* Removes double quotes from beginning and end (if present) and returns the new string.
*/
public static String removeDoubleQuotes(String string) {
return stripPrefix(stripSuffix(string, "\""), "\"");
}
/**
* Removes single quotes from beginning and end (if present) and returns the new string.
*/
public static String removeSingleQuotes(String string) {
return stripPrefix(stripSuffix(string, "'"), "'");
}
/**
* Repeats a {@link String}
*
* @param s
* the {@link String} to repeat
* @param times
* number of times the string gets repeated
* @return the repeated {@link String}
*/
public static String repeat(String s, int times) {
return new String(new char[times]).replace("\0", s);
}
/**
* {@link #toString()} with null check.
*
* @param value
* object to stringify
* @return string representation or {@link StringUtils#EMPTY_STRING} if value is null.
*/
@NonNull
public static String safeToString(@Nullable Object value) {
if (value == null) {
return StringUtils.EMPTY_STRING;
}
return value.toString();
}
/**
* Returns a truncated string that contains only the first x lines of the given text.
*/
public static String retainHeadLines(String text, int numberOfLines) {
if (text.isEmpty() || numberOfLines <= 1) {
return "";
}
if (!text.contains("\n")) {
// text contains only one line, and we know that numberOfLines>1, so return all
return text + "\n";
}
int charsBeforeCutLine = 0;
for (int i = 0; i < numberOfLines; i++) {
if (charsBeforeCutLine >= text.length()) {
// numberOfLines is >= lines in text
return text;
}
charsBeforeCutLine = text.indexOf("\n", charsBeforeCutLine) + 1;
}
return text.substring(0, charsBeforeCutLine - 1) + "\n";
}
/**
* Remove the common prefix from all lines in a multi-line string.
*
* @param lines
* The multi-line string.
* @param removeLeadingWhitespace
* Indicates if whitespace characters at the beginning of lines should be removed prior
* to prefix processing.
* @param includeTrailingNewline
* Indicates if a trailing newline in the input string should be kept, which leads to an
* empty last line. Note that an empty last line also means that no prefix across all
* lines can exist.
* @param allowedPrefixPattern
* A regex pattern that defines how a prefix must look like for it to be removed.
* {@code null} means that any prefix found is removed.
* @return A new string without the line prefix or the same string (identical object) as the input
* string if no prefix was removed.
*/
@Contract("null, _, _, _ -> null; !null, _, _, _ -> !null")
public static @Nullable String stripCommonLinePrefix(@Nullable String lines, boolean removeLeadingWhitespace,
boolean includeTrailingNewline, @Nullable String allowedPrefixPattern) {
if (StringUtils.countLines(lines, includeTrailingNewline) < 2) {
// there can only be a common prefix with at least 2 lines
return lines;
}
String linesNoSpace = lines;
if (removeLeadingWhitespace) {
linesNoSpace = StringUtils.removeWhitespaceAtBeginningOfLine(lines);
}
List splitLines = splitLinesAsList(linesNoSpace, includeTrailingNewline);
String prefix = longestCommonPrefix(splitLines);
if (prefix.isEmpty()) {
return lines;
}
if (allowedPrefixPattern != null && !Pattern.compile(allowedPrefixPattern).matcher(prefix).matches()) {
return lines;
}
return splitLines.stream().map(line -> stripPrefix(line, prefix)).collect(Collectors.joining(LINE_FEED));
}
/**
* Returns the string between the given startTerm and endTerm (where endTerm needs occur after the
* startTerm).
*
* @return {@code null} if the startTerm or endTerm are not contained in the input, or if the
* endTerm occurs *before* the startTerm. Otherwise, the phrase between the two terms,
* excluding the terms itself.
*
* Example: With input "ABC-DEF-GHI-GHI", startTerm "ABC" and endTerm "GHI", the result is
* "-DEF-"
*/
public static @Nullable String between(String input, String startTerm, String endTerm) {
int startOffset = input.indexOf(startTerm);
if (startOffset == -1) {
return null;
}
int endOffset = input.indexOf(endTerm, startOffset + startTerm.length());
if (endOffset == -1) {
return null;
}
return input.substring(startOffset + startTerm.length(), endOffset);
}
}