All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kiwiproject.beta.base.KiwiStrings2 Maven / Gradle / Ivy

package org.kiwiproject.beta.base;

import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toUnmodifiableSet;
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotBlank;
import static org.kiwiproject.base.KiwiPreconditions.checkArgumentNotNull;
import static org.kiwiproject.base.KiwiPreconditions.checkPositive;

import com.google.common.annotations.Beta;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.CaseFormat;
import com.google.common.collect.Sets;
import lombok.Synchronized;
import lombok.experimental.UtilityClass;
import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import java.util.regex.Pattern;
import java.util.stream.IntStream;
import java.util.stream.Stream;

/**
 * Utilities related to strings.
 * 

* These utilities could be considered for kiwi's {@link org.kiwiproject.base.KiwiStrings} class. * Or somewhere else. * Or nowhere. */ @Beta @UtilityClass public class KiwiStrings2 { private static final Pattern NULL_CHAR_PATTERN = Pattern.compile("\u0000"); /** * The default maximum number of strings that will be generated by {@link #randomCaseVariants(String, int)} * and {@link #randomCaseVariants(String, int, Locale)}. */ public static final int DEFAULT_RANDOM_CASE_GENERATION_LIMIT = 10_000; @VisibleForTesting static int randomCaseGenerationLimit = DEFAULT_RANDOM_CASE_GENERATION_LIMIT; /** * Convert a camelCase value to snake_case. * * @param value the camelCase value; must not be blank * @return the converted snake_case value * @throws IllegalArgumentException if value is blank */ public static String camelToSnakeCase(String value) { return camelToSnakeCaseOrEmpty(value) .orElseThrow(() -> new IllegalArgumentException("value must not be blank")); } /** * Convert a camelCase value to snake_case. * * @param value the camelCase value * @return Optional containing the converted snake_case value, or an empty Optional if the input value is blank */ public static Optional camelToSnakeCaseOrEmpty(@Nullable String value) { return Optional.ofNullable(camelToSnakeCaseOrNull(value)); } /** * Convert a camelCase value to snake_case. * * @param value the camelCase value * @return the converted snake_case value, or null if the input value is blank */ public static String camelToSnakeCaseOrNull(@Nullable String value) { if (isBlank(value)) { return null; } return CaseFormat.LOWER_CAMEL.converterTo(CaseFormat.LOWER_UNDERSCORE).convert(value); } /** * Replace null characters (Unicode U+0000) in {@code str} with an empty string. * * @param str the string to replace within * @return a string with null characters replaced, or the original string if no null characters exist in it */ public static String replaceNullCharactersWithEmpty(@Nullable String str) { return replaceNullCharacters(str, "", null); } /** * Replace null characters (Unicode U+0000) in {@code str} with the given replacement string. If the input * string is null, the default value is returned. * * @param str the string to replace within * @param replacement the replacement string * @param defaultValue the value to return if {@code str} is null * @return a string with null characters replaced, or the original string if no null characters exist in it */ public static String replaceNullCharacters(@Nullable String str, String replacement, @Nullable String defaultValue) { return Optional.ofNullable(str) .map(s -> NULL_CHAR_PATTERN.matcher(s).replaceAll(replacement)) .orElse(defaultValue); } /** * Generate a set of three strings (or two when the input is only one character) whose characters match the input * string but are random upper and lower case using the default {@link Locale}. * * @param input the input string to generate random case variants for * @return an unmodifiable set containing three random case variants of the input string */ public static Set randomCaseVariants(@NonNull String input) { return randomCaseVariants(input, Locale.getDefault()); } /** * Generate a set of three strings (or two when the input is only one character) whose characters match the input * string but are random upper and lower case using the given {@link Locale}. * * @param input the input string to generate random case variants for * @param locale the Locale to use to conversion to upper and lower cases * @return an unmodifiable set containing three random case variants of the input string */ public static Set randomCaseVariants(@NonNull String input, @NonNull Locale locale) { return randomCaseVariants(input, 3, locale); } /** * Get the current maximum number of random strings that can be generated using * {@link #randomCaseVariants(String, int)} and {@link #randomCaseVariants(String, int, Locale)}. * * @return the maximum number of random strings that can be generated */ @Synchronized public static int randomCaseGenerationLimit() { return randomCaseGenerationLimit; } /** * Set the maximum number of random strings that can be generated using {@link #randomCaseVariants(String, int)} * and {@link #randomCaseVariants(String, int, Locale)} to the given limit. *

* Setting this too high (e.g., to {@link Integer#MAX_VALUE}) will probably result in bad things happening * such as extremely long run times, out of memory errors, etc. Why? Because if the input string is very * long, and the strings are generated using random capitalization, it could take a prohibitively long * time (i.e., near infinity) to generate the desired number of unique variants. * * @param limit the maximum number of random strings * @implNote This is expected to be called rarely, but it still uses an internal lock to * provide thread-safe access. */ @Synchronized public static void setRandomCaseGenerationLimit(int limit) { randomCaseGenerationLimit = limit; } /** * Reset the maximum number of random strings that can be generated using {@link #randomCaseVariants(String, int)} * to the default value ({@link #DEFAULT_RANDOM_CASE_GENERATION_LIMIT}). * * @implNote This is expected to be called rarely, but it still uses an internal lock to * provide thread-safe access. */ @Synchronized public static void resetRandomCaseGenerationLimit() { randomCaseGenerationLimit = DEFAULT_RANDOM_CASE_GENERATION_LIMIT; } /** * Generate a set of strings whose characters match the input string but are randomly upper and lower case * using the default {@link Locale}. *

* The maximum number of distinct strings that can be generated is 2^N where N is the length of the * input string. If {@code desiredSize} is greater than this maximum, only the maximum will be generated. * In addition, there is a hard maximum limit for the number of strings that will be generated. By default, * this is {@link #DEFAULT_RANDOM_CASE_GENERATION_LIMIT}, but its value can be changed using * {@link #setRandomCaseGenerationLimit(int)}. * * @param input the input string to generate random case variants for * @param desiredSize the number of desired strings in the returned set * @return an unmodifiable set containing random case variants of the input string */ public static Set randomCaseVariants(@NonNull String input, int desiredSize) { return randomCaseVariants(input, desiredSize, Locale.getDefault()); } /** * Generate a set of strings whose characters match the input string but are random upper and lower case * using the given {@link Locale}. *

* See additional notes in {@link #randomCaseVariants(String, int)} regarding the maximum number of strings * that can be generated based on the input string length as well as a limit imposed by this class. * * @param input the input string to generate random case variants for * @param desiredSize the number of desired strings in the returned set * @param locale the Locale to use during conversion to upper and lower cases * @return an unmodifiable set containing random case variants of the input string */ public static Set randomCaseVariants(@NonNull String input, int desiredSize, @NonNull Locale locale) { checkArgumentNotBlank(input); checkPositive(desiredSize); checkArgumentNotNull(locale); var maxNumberOfVariants = maxVariantsOf(input); var mathematicalLimit = Math.min(desiredSize, maxNumberOfVariants); var hardLimit = Math.min(mathematicalLimit, randomCaseGenerationLimit()); return Stream.generate(() -> randomlyCapitalize(input, locale)) .distinct() .limit(hardLimit) .collect(toUnmodifiableSet()); } /** * Returns the maximum number of variants containing unique capitalization for the given input string. * This is 2 raised to the power of the length of the input string. For example, if the input string * is 5 characters long, then the maximum unique variants are 2^5 = 32, while if the input string is * 10 characters long, the maximum unique variants are 2^10 = 1024. * * @implNote If input is longer than 32 characters, 2^32 is greater than {@link Integer#MAX_VALUE}, but * casting the result of {@link Math#pow(double, double)} to {@code int} results in {@link Integer#MAX_VALUE}. * Rather than rely on this behavior, explicitly check the input length and return the maximum value of * an int directly when the input string length is more than 32, which (hopefully) makes it clear in the code * that 32 is the maximum value that 2 can be raised without exceeding the maximum value of an int. */ private static int maxVariantsOf(@NonNull String input) { var length = input.length(); return length > 32 ? Integer.MAX_VALUE : (int) Math.pow(2, length); } /** * For the given input string, return a string whose characters match the input string but are random * upper and lower case using the given {@link Locale}. * * @param input the input string to randomly capitalize * @param locale the Locale to use during conversion to upper and lower cases * @return a string marching the input string but with random capitalization */ public static String randomlyCapitalize(@NonNull String input, @NonNull Locale locale) { checkArgumentNotBlank(input); checkArgumentNotNull(locale); var rand = ThreadLocalRandom.current(); return IntStream.range(0, input.length()) .mapToObj(idx -> { var chStr = String.valueOf(input.charAt(idx)); return rand.nextBoolean() ? chStr.toUpperCase(locale) : chStr.toLowerCase(locale); }) .collect(joining()); } /** * Generate a set of the three "standard" case variants: all uppercase, all lowercase, and sentence * case (only the first letter is capitalized, and the rest are lowercase). *

* Uses the default {@link Locale} to perform case conversions. * * @param input the input string to generate "standard" case variants for * @return an unmodifiable set containing the three "standard" case variants of the input string */ public static Set standardCaseVariants(@NonNull String input) { return standardCaseVariants(input, Locale.getDefault()); } /** * Generate a set of the three "standard" case variants: all uppercase, all lowercase, and sentence * case (only the first letter is capitalized, and the rest are lowercase). If the input string has * only one character, then the returned set contains only two strings: uppercase and lowercase. *

* Uses the given {@link Locale} to perform case conversions. * * @param input the input string to generate "standard" case variants for * @param locale the Locale to use to conversion during upper and lower cases * @return an unmodifiable set containing the three "standard" case variants of the input string */ public static Set standardCaseVariants(@NonNull String input, @NonNull Locale locale) { checkArgumentNotBlank(input); checkArgumentNotNull(locale); var upperCase = input.toUpperCase(locale); var lowerCase = input.toLowerCase(locale); var sentenceCase = sentenceCase(input, locale); var variants = Sets.newHashSet(upperCase, lowerCase, sentenceCase); return Set.copyOf(variants); } private static String sentenceCase(@NonNull String input, @NonNull Locale locale) { var upperCaseFirstChar = input.substring(0, 1).toUpperCase(locale); var lowerCaseRemainingChars = input.substring(1).toLowerCase(locale); return upperCaseFirstChar + lowerCaseRemainingChars; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy