
org.kiwiproject.beta.base.KiwiStrings2 Maven / Gradle / Ivy
package org.kiwiproject.beta.base;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toUnmodifiableSet;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotNull;
import static org.kiwiproject.base.KiwiPreconditions.checkPositive;
import com.google.common.annotations.Beta;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.CaseFormat;
import com.google.common.collect.Sets;
import lombok.Synchronized;
import lombok.experimental.UtilityClass;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.Stream;
/**
* Utilities related to strings.
*
* These utilities could be considered for kiwi's {@link org.kiwiproject.base.KiwiStrings} class.
* Or somewhere else.
* Or nowhere.
*/
@Beta
@UtilityClass
public class KiwiStrings2 {
private static final Pattern NULL_CHAR_PATTERN = Pattern.compile("\u0000");
/**
* The default maximum number of strings that will be generated by {@link #randomCaseVariants(String, int)}
* and {@link #randomCaseVariants(String, int, Locale)}.
*/
public static final int DEFAULT_RANDOM_CASE_GENERATION_LIMIT = 10_000;
@VisibleForTesting
static int randomCaseGenerationLimit = DEFAULT_RANDOM_CASE_GENERATION_LIMIT;
/**
* Convert a camelCase value to snake_case.
*
* @param value the camelCase value; must not be blank
* @return the converted snake_case value
* @throws IllegalArgumentException if value is blank
*/
public static String camelToSnakeCase(String value) {
return camelToSnakeCaseOrEmpty(value)
.orElseThrow(() -> new IllegalArgumentException("value must not be blank"));
}
/**
* Convert a camelCase value to snake_case.
*
* @param value the camelCase value
* @return Optional containing the converted snake_case value, or an empty Optional if the input value is blank
*/
public static Optional camelToSnakeCaseOrEmpty(@Nullable String value) {
return Optional.ofNullable(camelToSnakeCaseOrNull(value));
}
/**
* Convert a camelCase value to snake_case.
*
* @param value the camelCase value
* @return the converted snake_case value, or null if the input value is blank
*/
public static String camelToSnakeCaseOrNull(@Nullable String value) {
if (isBlank(value)) {
return null;
}
return CaseFormat.LOWER_CAMEL.converterTo(CaseFormat.LOWER_UNDERSCORE).convert(value);
}
/**
* Replace null characters (Unicode U+0000) in {@code str} with an empty string.
*
* @param str the string to replace within
* @return a string with null characters replaced, or the original string if no null characters exist in it
*/
public static String replaceNullCharactersWithEmpty(@Nullable String str) {
return replaceNullCharacters(str, "", null);
}
/**
* Replace null characters (Unicode U+0000) in {@code str} with the given replacement string. If the input
* string is null, the default value is returned.
*
* @param str the string to replace within
* @param replacement the replacement string
* @param defaultValue the value to return if {@code str} is null
* @return a string with null characters replaced, or the original string if no null characters exist in it
*/
public static String replaceNullCharacters(@Nullable String str, String replacement, @Nullable String defaultValue) {
return Optional.ofNullable(str)
.map(s -> NULL_CHAR_PATTERN.matcher(s).replaceAll(replacement))
.orElse(defaultValue);
}
/**
* Generate a set of three strings (or two when the input is only one character) whose characters match the input
* string but are random upper and lower case using the default {@link Locale}.
*
* @param input the input string to generate random case variants for
* @return an unmodifiable set containing three random case variants of the input string
*/
public static Set randomCaseVariants(@NonNull String input) {
return randomCaseVariants(input, Locale.getDefault());
}
/**
* Generate a set of three strings (or two when the input is only one character) whose characters match the input
* string but are random upper and lower case using the given {@link Locale}.
*
* @param input the input string to generate random case variants for
* @param locale the Locale to use to conversion to upper and lower cases
* @return an unmodifiable set containing three random case variants of the input string
*/
public static Set randomCaseVariants(@NonNull String input, @NonNull Locale locale) {
return randomCaseVariants(input, 3, locale);
}
/**
* Get the current maximum number of random strings that can be generated using
* {@link #randomCaseVariants(String, int)} and {@link #randomCaseVariants(String, int, Locale)}.
*
* @return the maximum number of random strings that can be generated
*/
@Synchronized
public static int randomCaseGenerationLimit() {
return randomCaseGenerationLimit;
}
/**
* Set the maximum number of random strings that can be generated using {@link #randomCaseVariants(String, int)}
* and {@link #randomCaseVariants(String, int, Locale)} to the given limit.
*
* Setting this too high (e.g., to {@link Integer#MAX_VALUE}) will probably result in bad things happening
* such as extremely long run times, out of memory errors, etc. Why? Because if the input string is very
* long, and the strings are generated using random capitalization, it could take a prohibitively long
* time (i.e., near infinity) to generate the desired number of unique variants.
*
* @param limit the maximum number of random strings
* @implNote This is expected to be called rarely, but it still uses an internal lock to
* provide thread-safe access.
*/
@Synchronized
public static void setRandomCaseGenerationLimit(int limit) {
randomCaseGenerationLimit = limit;
}
/**
* Reset the maximum number of random strings that can be generated using {@link #randomCaseVariants(String, int)}
* to the default value ({@link #DEFAULT_RANDOM_CASE_GENERATION_LIMIT}).
*
* @implNote This is expected to be called rarely, but it still uses an internal lock to
* provide thread-safe access.
*/
@Synchronized
public static void resetRandomCaseGenerationLimit() {
randomCaseGenerationLimit = DEFAULT_RANDOM_CASE_GENERATION_LIMIT;
}
/**
* Generate a set of strings whose characters match the input string but are randomly upper and lower case
* using the default {@link Locale}.
*
* The maximum number of distinct strings that can be generated is 2^N where N is the length of the
* input string. If {@code desiredSize} is greater than this maximum, only the maximum will be generated.
* In addition, there is a hard maximum limit for the number of strings that will be generated. By default,
* this is {@link #DEFAULT_RANDOM_CASE_GENERATION_LIMIT}, but its value can be changed using
* {@link #setRandomCaseGenerationLimit(int)}.
*
* @param input the input string to generate random case variants for
* @param desiredSize the number of desired strings in the returned set
* @return an unmodifiable set containing random case variants of the input string
*/
public static Set randomCaseVariants(@NonNull String input, int desiredSize) {
return randomCaseVariants(input, desiredSize, Locale.getDefault());
}
/**
* Generate a set of strings whose characters match the input string but are random upper and lower case
* using the given {@link Locale}.
*
* See additional notes in {@link #randomCaseVariants(String, int)} regarding the maximum number of strings
* that can be generated based on the input string length as well as a limit imposed by this class.
*
* @param input the input string to generate random case variants for
* @param desiredSize the number of desired strings in the returned set
* @param locale the Locale to use during conversion to upper and lower cases
* @return an unmodifiable set containing random case variants of the input string
*/
public static Set randomCaseVariants(@NonNull String input, int desiredSize, @NonNull Locale locale) {
checkArgumentNotBlank(input);
checkPositive(desiredSize);
checkArgumentNotNull(locale);
var maxNumberOfVariants = maxVariantsOf(input);
var mathematicalLimit = Math.min(desiredSize, maxNumberOfVariants);
var hardLimit = Math.min(mathematicalLimit, randomCaseGenerationLimit());
return Stream.generate(() -> randomlyCapitalize(input, locale))
.distinct()
.limit(hardLimit)
.collect(toUnmodifiableSet());
}
/**
* Returns the maximum number of variants containing unique capitalization for the given input string.
* This is 2 raised to the power of the length of the input string. For example, if the input string
* is 5 characters long, then the maximum unique variants are 2^5 = 32, while if the input string is
* 10 characters long, the maximum unique variants are 2^10 = 1024.
*
* @implNote If input is longer than 32 characters, 2^32 is greater than {@link Integer#MAX_VALUE}, but
* casting the result of {@link Math#pow(double, double)} to {@code int} results in {@link Integer#MAX_VALUE}.
* Rather than rely on this behavior, explicitly check the input length and return the maximum value of
* an int directly when the input string length is more than 32, which (hopefully) makes it clear in the code
* that 32 is the maximum value that 2 can be raised without exceeding the maximum value of an int.
*/
private static int maxVariantsOf(@NonNull String input) {
var length = input.length();
return length > 32 ? Integer.MAX_VALUE : (int) Math.pow(2, length);
}
/**
* For the given input string, return a string whose characters match the input string but are random
* upper and lower case using the given {@link Locale}.
*
* @param input the input string to randomly capitalize
* @param locale the Locale to use during conversion to upper and lower cases
* @return a string marching the input string but with random capitalization
*/
public static String randomlyCapitalize(@NonNull String input, @NonNull Locale locale) {
checkArgumentNotBlank(input);
checkArgumentNotNull(locale);
var rand = ThreadLocalRandom.current();
return IntStream.range(0, input.length())
.mapToObj(idx -> {
var chStr = String.valueOf(input.charAt(idx));
return rand.nextBoolean() ? chStr.toUpperCase(locale) : chStr.toLowerCase(locale);
})
.collect(joining());
}
/**
* Generate a set of the three "standard" case variants: all uppercase, all lowercase, and sentence
* case (only the first letter is capitalized, and the rest are lowercase).
*
* Uses the default {@link Locale} to perform case conversions.
*
* @param input the input string to generate "standard" case variants for
* @return an unmodifiable set containing the three "standard" case variants of the input string
*/
public static Set standardCaseVariants(@NonNull String input) {
return standardCaseVariants(input, Locale.getDefault());
}
/**
* Generate a set of the three "standard" case variants: all uppercase, all lowercase, and sentence
* case (only the first letter is capitalized, and the rest are lowercase). If the input string has
* only one character, then the returned set contains only two strings: uppercase and lowercase.
*
* Uses the given {@link Locale} to perform case conversions.
*
* @param input the input string to generate "standard" case variants for
* @param locale the Locale to use to conversion during upper and lower cases
* @return an unmodifiable set containing the three "standard" case variants of the input string
*/
public static Set standardCaseVariants(@NonNull String input, @NonNull Locale locale) {
checkArgumentNotBlank(input);
checkArgumentNotNull(locale);
var upperCase = input.toUpperCase(locale);
var lowerCase = input.toLowerCase(locale);
var sentenceCase = sentenceCase(input, locale);
var variants = Sets.newHashSet(upperCase, lowerCase, sentenceCase);
return Set.copyOf(variants);
}
private static String sentenceCase(@NonNull String input, @NonNull Locale locale) {
var upperCaseFirstChar = input.substring(0, 1).toUpperCase(locale);
var lowerCaseRemainingChars = input.substring(1).toLowerCase(locale);
return upperCaseFirstChar + lowerCaseRemainingChars;
}
}