io.github.bhowell2.apilib.checks.StringChecks Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of api-lib Show documentation
Show all versions of api-lib Show documentation
API verification library.
The newest version!
package io.github.bhowell2.apilib.checks;
import io.github.bhowell2.apilib.checks.utils.CodePointUtils;
import io.github.bhowell2.apilib.checks.utils.CollectionUtils;
import io.github.bhowell2.apilib.checks.utils.IntegerUtils;
import io.github.bhowell2.apilib.checks.utils.StringUtils;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* Great care must be taken when using string checks in Java (and probably any language..).
* There are a few articles (see below) the user may want to read to brush up on unicode and
* how "characters" are represented in the various unicode encodings (e.g., UTF-8, UTF-16).
* Generally speaking, a String in Java should be considered to be in UTF-16 (which uses 2
* bytes, 16 bits, to represent a character), but as of Java 9 strings may be in an optimized
* format (called compact. encoding: ISO-8859-1/Latin-1) if they ONLY contain characters that
* can be represented in 1 byte - this is not too relevant to the string checks here, but is
* worth mentioning.
*
* This class will provide checks that help the user handle unicode encodings when checking a
* string for certain conditions.
*
* Unicode Articles:
* https://www.joelonsoftware.com/2003/10/08/the-absolute-minimum-every-software-developer-absolutely-positively-must-know-about-unicode-and-character-sets-no-excuses/
* http://kunststube.net/encoding/
* https://kishuagarwal.github.io/unicode.html
* http://utf8everywhere.org/ - java doesn't use this, but still provides information for a solid understanding
* http://www.unicode.org/versions/latest/ - of course
*
*
* Overview of terms (taken from: https://stackoverflow.com/questions/27331819/whats-the-difference-between-a-character-a-code-point-a-glyph-and-a-grapheme)
*
* Character - is an overloaded term than can mean many things.
*
* Code point - is the atomic unit of information. Text is a sequence of code points. Each code
* point is a number which is given meaning by the Unicode standard.
*
* Code unit - is the unit of storage of a part of an encoded code point. In UTF-8 this means 8-bits,
* in UTF-16 this means 16-bits. A single code unit may represent a full code point, or
* part of a code point. For example, the snowman glyph (☃) is a single code point but 3
* UTF-8 code units, and 1 UTF-16 code unit.
*
* Grapheme - is a sequence of one or more code points that are displayed as a single, graphical unit
* that a reader recognizes as a single element of the writing system. For example, both a
* and ä are graphemes, but they may consist of multiple code points (e.g. ä may be two code
* points, one for the base character a followed by one for the diaresis; but there's also
* an alternative, legacy, single code point representing this grapheme). Some code points
* are never part of any grapheme (e.g. the zero-width non-joiner, or directional overrides).
*
* Glyph - is an image, usually stored in a font (which is a collection of glyphs), used to represent
* graphemes or parts thereof. Fonts may compose multiple glyphs into a single representation,
* for example, if the above ä is a single code point, a font may chose to render that as two
* separate, spatially overlaid glyphs. For OTF, the font's GSUB and GPOS tables contain
* substitution and positioning information to make this work. A font may contain multiple
* alternative glyphs for the same grapheme, too.
*
* As mentioned in the terms above, a Grapheme may appear to be one character, but could consists of
* multiple code points, so even the string checks provided below may not work exactly as expected.
* Generally the user should prefer to use the "codePoint*" functions over non-codePoint functions to
* handle unicode input better.
*
* E.g., {@code "🤓".length() = 2}, because it consists of 2 characters.
* The method {@link String#charAt(int)} will return the individual char at the specified position, but
* this would not be the actual item that was displayed at what is visibly position 0 (array index
* notation) of the text sequence in the example, because it actually consists of 2 chars. While 2 chars
* (32 bits) is plenty to represent all of the codepoints in the unicode standard, some "characters" at
* a given position in a text sequence are actually a combination of codepoints (grapheme) and are thus
* more than even 2 chars (data type, 16 bits)..
*
* @author Blake Howell
*/
public final class StringChecks {
private StringChecks() {} // no instantiation
/**
* Check to ensure that the parameter is a String.
*/
public static final Check IS_STRING = Check.alwaysPass(String.class);
/**
* Check to ensure that the string is empty (i.e., length = 0).
*/
public static final Check IS_EMPTY = s -> s.isEmpty()
?
Check.Result.success()
:
Check.Result.failure("Must be empty.");
public static final Check IS_NOT_EMPTY = s -> !s.isEmpty()
?
Check.Result.success()
:
Check.Result.failure("Cannot be empty.");
/**
* Check to ensure the string is empty or only contains whitespace.
* This supports supplementary unicode characters.
*/
public static final Check IS_EMPTY_OR_ONLY_WHITESPACE = s -> {
if (s.length() == 0) {
return Check.Result.success();
}
boolean isOnlyWhitespace = true;
for (int i = 0; i < s.length(); i++) {
/*
* This should not be run for the second part of the surrogate pair, but even if the position is a low-end
* surrogate pair it will not be whitespace due to the low-end being U+DC00 to U+DFFF, which is not whitespace.
* */
int codePoint = s.codePointAt(i);
if (!Character.isWhitespace(codePoint)) {
isOnlyWhitespace = false;
break;
}
}
return isOnlyWhitespace
?
Check.Result.success()
:
Check.Result.failure("Must be empty or contain only whitespace.");
};
/**
* Check to ensure the string is NOT empty or contains only whitespace.
*/
public static final Check IS_NOT_EMPTY_OR_ONLY_WHITESPACE = s ->
IS_EMPTY_OR_ONLY_WHITESPACE.check(s).successful()
?
Check.Result.failure("Cannot be empty or only contain whitespace.")
:
Check.Result.success();
/**
* Only permits unreserved URL characters in the string (i.e., a-z, 0-9, '-', '.', '_', '~').
* Successful if the string only contains unreserved URL characters, fails otherwise.
*/
public static final Check ONLY_ALLOW_UNRESERVED_URL_CHARS = s -> {
for (int i = 0; i < s.length(); i++) {
/*
* Can just use a character here, because even if the string provided contains 2-character codepoints
* they will be outside of the range of unreserved url characters (U+D8000 to U+DFFFF in each position).
* */
char c = s.charAt(i);
if (
(c < 'A' || c > 'Z') &&
(c < 'a' || c > 'z') &&
(c < '0' || c > '9') &&
(c != '-' && c != '.' && c != '_' && c != '~')
) {
return Check.Result.failure("Contains reserved URL characters. May only contain A-Z, " +
"a-z, 0-9, '-', '.', '_', and '~'");
}
}
return Check.Result.success();
};
private static final Pattern BASIC_EMAIL_PATTERN = Pattern.compile(".+@.+\\..{2,}$");
/**
* A very basic email check. This only limits the email string to the form [email protected],
* where X is at least 1 character, Y is at least 1 character, and Z is 2 or more
* characters (this is because public top level domains are a minimum of 2 chars).
*
* Warning: this does not limit the unicode at any position, so emojis could
* be submitted. The developer should always validate the email by actually
* sending a verification email.
*/
public static final Check MATCHES_BASIC_EMAIL_PATTERN = s ->
BASIC_EMAIL_PATTERN.matcher(s).matches()
?
Check.Result.success()
:
Check.Result.failure("Is not a valid email address.");
/**
* Creates check which ensures the length of the string is greater than min.
* This uses {@link String#length()} which is based on how many java characters
* (each char is 16 bits) are in the string, which may not actually be desired
* since a visual position in the text sequence may take 2 java characters to
* represent it. Generally, using {@link #codePointCountGreaterThan(int)} is
* preferable and more aligned with expectations.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check lengthGreaterThan(int min) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, min);
return s -> {
if (s.length() > min) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be greater than " + min + ".");
}
};
}
/**
* Creates check which ensures the code point count of the string is greater than min.
* This uses {@link String#codePointCount(int, int)} rather than {@link String#length()}.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check codePointCountGreaterThan(int min) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, min);
return s -> {
if (s.codePointCount(0, s.length()) > min) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be greater than " + min + ".");
}
};
}
/**
* Creates check which ensures the length of the string is greater than or equal to min.
* This uses {@link String#length()} which is based on how many java characters
* (each char is 16 bits) are in the string, which may not actually be desired
* since a visual position in the text sequence may take 2 java characters to
* represent it. Generally, using {@link #codePointCountGreaterThanOrEqualTo(int)} is
* preferable and more aligned with expectations.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check lengthGreaterThanOrEqualTo(int min) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, min);
return s -> {
if (s.length() >= min) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be greater than or equal to " + min + ".");
}
};
}
/**
* Creates check which ensures the code point count of the string is greater than
* or equal to min. This uses {@link String#codePointCount(int, int)} rather than
* {@link String#length()}.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check codePointCountGreaterThanOrEqualTo(int min) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, min);
return s -> {
if (s.codePointCount(0, s.length()) >= min) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be greater than or equal to " + min + ".");
}
};
}
/**
* Creates check which ensures the length of the string is less than max.
* This uses {@link String#length()} which is based on how many java characters
* (each char is 16 bits) are in the string, which may not actually be desired
* since a visual position in the text sequence may take 2 java characters to
* represent it. Generally, using {@link #codePointCountLessThan(int)} is
* preferable and more aligned with expectations.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check lengthLessThan(int max) {
IntegerUtils.requireIntGreaterThanOrEqualTo(1, max, "String length cannot be less than 0.");
return s -> {
if (s.length() < max) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be less than " + max + ".");
}
};
}
/**
* Creates check which ensures the code point count of the string is less than max.
* This uses {@link String#codePointCount(int, int)} rather than {@link String#length()}.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check codePointCountLessThan(int max) {
IntegerUtils.requireIntGreaterThanOrEqualTo(1, max, "String length cannot be less than 0.");
return s -> {
if (s.codePointCount(0, s.length()) < max) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be less than " + max + ".");
}
};
}
/**
* Creates check which ensures the length of the string is less than or equal to max.
* This uses {@link String#length()} which is based on how many java characters
* (each char is 16 bits) are in the string, which may not actually be desired
* since a visual position in the text sequence may take 2 java characters to
* represent it. Generally, using {@link #codePointCountLessThan(int)} is
* preferable and more aligned with expectations.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check lengthLessThanOrEqualTo(int max) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, max);
return s -> {
if (s.length() <= max) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be less than of equal to " + max + ".");
}
};
}
/**
* Creates check which ensures the code point count of the string is less than or
* equal to max. This uses {@link String#codePointCount(int, int)} rather than
* {@link String#length()}.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check codePointCountLessThanOrEqualTo(int max) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, max);
return s -> {
if (s.codePointCount(0, s.length()) <= max) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be less than of equal to " + max + ".");
}
};
}
/**
* Creates check which ensures the length of the string is equal to length.
* This uses {@link String#length()} which is based on how many java characters
* (each char is 16 bits) are in the string, which may not actually be desired
* since a visual position in the text sequence may take 2 java characters to
* represent it. Generally, using {@link #codePointCountEqualTo(int)} is
* preferable and more aligned with expectations.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check lengthEqualTo(int length) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, length);
return s -> {
if (s.length() == length) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be equal to " + length + ".");
}
};
}
/**
* Creates check which ensures the code point count of the string is equal to length.
* This uses {@link String#codePointCount(int, int)} rather than {@link String#length()}.
*
* E.g.,
* {@code "🤓".length() = 2} because the emoji takes 2 java characters to represent it,
* but {@code "🤓".codePointCount(..) = 1} because the emoji is one code point.
*/
public static Check codePointCountEqualTo(int length) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, length);
return s -> {
if (s.codePointCount(0, s.length()) == length) {
return Check.Result.success();
} else {
return Check.Result.failure("Length must be equal to " + length + ".");
}
};
}
/**
* Creates check which ensures the param string matches the pattern or returns a failure.
* The failure message is generic as it would be unwise to return the pattern that was
* not matched as that could aid user exploitation.
*/
public static Check matchesRegex(Pattern pattern) {
if (pattern.pattern().equals("")) {
throw new IllegalArgumentException(
"Cannot create check with empty regex. If an empty string is desired use "
+ "StringChecks.lengthEqualTo(0) or StringChecks.IS_EMPTY.");
}
return s -> pattern.matcher(s).matches()
?
Check.Result.success()
:
/*
* Regexes are fickle so not returning the regex pattern here to avoid the developer
* accidentally sending it back to the user, leading to easier exploitation.
* */
Check.Result.failure("Is not of correct form.");
}
/**
* Creates check that ensures the first "character" in the string begins with one
* of the codepoints in the supplied string.
*
* Fails on empty string.
*
* @param codePoints string where each code point is treated individually
*/
public static Check beginsWithCodePoints(String codePoints) {
Objects.requireNonNull(codePoints);
StringUtils.requireNonEmptyString(codePoints);
StringUtils.requireUniqueCodePoints(codePoints);
StringUtils.requireCodePointCountGreaterThanOrEqualTo(1, codePoints);
return s -> {
if (s.isEmpty()) {
return Check.Result.failure("Must begin with one of the following characters: '" + codePoints + "'.");
}
for (int i = 0; i < codePoints.length(); i++) {
int shouldBeginWithCodepoint = codePoints.codePointAt(i);
if (shouldBeginWithCodepoint == s.codePointAt(0)) {
return Check.Result.success();
} else if (Character.isSupplementaryCodePoint(shouldBeginWithCodepoint)) {
// skip next position, because it is 2nd char making up the single code point
i++;
}
}
return Check.Result.failure("Must begin with one of the following characters: '" + codePoints + "'.");
};
}
/**
* Creates check that ensures the string begins with a code point in the range (inclusive).
*
* Fails on empty string.
*
* @param minCodePoint single code point string that contains beginning code point of the range (inclusive)
* @param maxCodePoint single code point string that contains ending code point of the range (inclusive)
*/
public static Check beginsWithCodePointsInRange(String minCodePoint, String maxCodePoint) {
StringUtils.requireCodePointCountEqualTo(1, minCodePoint);
StringUtils.requireCodePointCountEqualTo(1, maxCodePoint);
return beginsWithCodePointsInRange(minCodePoint.codePointAt(0), maxCodePoint.codePointAt(0));
}
/**
* Creates check that ensures the string begins with a code point in the range (inclusive).
*
* Fails on empty string.
*
* @param minCodePoint inclusive minimum code point
* @param maxCodePoint inclusive maximum code point
*/
public static Check beginsWithCodePointsInRange(int minCodePoint, int maxCodePoint) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, minCodePoint);
IntegerUtils.requireIntGreaterThanOrEqualTo(minCodePoint, maxCodePoint,
"max code point must be greater than or equal to min code point.");
return s -> {
if (s.isEmpty()) {
return Check.Result.failure("First character must be within range '" +
String.valueOf(Character.toChars(minCodePoint)) +
"' to '" +
String.valueOf(Character.toChars(maxCodePoint)) +
"'.");
}
int codePoint = s.codePointAt(0);
if (codePoint < minCodePoint || codePoint > maxCodePoint) {
return Check.Result.failure("First character must be within range '" +
String.valueOf(Character.toChars(minCodePoint)) +
"' to '" +
String.valueOf(Character.toChars(maxCodePoint)) +
"'.");
}
return Check.Result.success();
};
}
/**
* Creates check that ensures the string does not begin with any code points in the supplied string.
*
* Does not fail on empty string.
*
* @param codePoints each code point in the string is treated individually
*/
public static Check doesNotBeginWithCodePoints(String codePoints) {
StringUtils.requireNonEmptyString(codePoints);
StringUtils.requireCodePointCountGreaterThanOrEqualTo(1, codePoints);
StringUtils.requireUniqueCodePoints(codePoints);
return s -> {
if (s.length() > 0) {
int beginningCodePoint = s.codePointAt(0);
for (int i = 0; i < codePoints.length(); i++) {
int shouldNotBeginWithCodepoint = codePoints.codePointAt(i);
if (shouldNotBeginWithCodepoint == beginningCodePoint) {
return Check.Result.failure("Must begin with one of the following characters: '" + codePoints + "'.");
} else if (Character.isSupplementaryCodePoint(shouldNotBeginWithCodepoint)) {
// skip next position, because it is 2nd char making up the single code point
i++;
}
}
}
return Check.Result.success();
};
}
/**
* Creates check that ensures the string does not begin with any code point
* (inclusive) in the range supplied.
*
* Does not fail on empty string.
*
* @param minCodePoint single code point string that contains beginning code point of the range (inclusive)
* @param maxCodePoint single code point string that contains ending code point of the range (inclusive)
*/
public static Check doesNotBeginWithCodePointsInRange(String minCodePoint, String maxCodePoint) {
StringUtils.requireCodePointCountEqualTo(1, minCodePoint);
StringUtils.requireCodePointCountEqualTo(1, maxCodePoint);
return doesNotBeginWithCodePointsInRange(minCodePoint.codePointAt(0), maxCodePoint.codePointAt(0));
}
/**
* Creates check that ensures the string does not begin with any code point
* (inclusive) in the range supplied.
*
* Does not fail on empty string.
*
* @param minCodePoint inclusive minimum code point
* @param maxCodePoint inclusive maximum code point
*/
public static Check doesNotBeginWithCodePointsInRange(int minCodePoint, int maxCodePoint) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, minCodePoint);
IntegerUtils.requireIntGreaterThanOrEqualTo(minCodePoint, maxCodePoint,
"max code point must be greater than or equal to min code point.");
return s -> {
if (s.length() > 0) {
int beginCodePoint = s.codePointAt(0);
if (beginCodePoint >= minCodePoint && beginCodePoint <= maxCodePoint) {
return Check.Result.failure("First character cannot be within range '" +
String.valueOf(Character.toChars(minCodePoint)) +
"' to '" +
String.valueOf(Character.toChars(maxCodePoint)) +
"'.");
}
}
return Check.Result.success();
};
}
/**
* Creates check that ensures the string begins with one of the strings supplied.
*
* Fails on empty string.
*
* @param strings list of strings that the supplied string must begin with
*/
public static Check beginsWithStrings(String... strings) {
Objects.requireNonNull(strings);
CollectionUtils.requireNonNullEntries(strings);
return s -> {
for (int i = 0; i < strings.length; i++) {
String beginWithString = strings[i];
if (s.length() < beginWithString.length()) {
continue; // cannot begin with this string if it is longer than param string
}
for (int j = 0; j < beginWithString.length(); j++) {
if (beginWithString.charAt(j) != s.charAt(j)) {
break; // does not equal this current beginWithString, break to go to next.
}
if (j == beginWithString.length() - 1) {
// all characters checked and matched
return Check.Result.success();
}
}
}
String joinedStringsErrMsg = Arrays.stream(strings)
.map(str -> "\"" + str + "\"")
.collect(Collectors.joining(","));
return Check.Result.failure("Must begin with one of the following strings: " + joinedStringsErrMsg + ".");
};
}
/**
* Creates check that ensures the string does not begin with one of the strings supplied.
*
* Does not fail on empty string.
*
* @param strings list of strings that the param string must NOT begin with
*/
public static Check doesNotBeginWithStrings(String... strings) {
Objects.requireNonNull(strings);
CollectionUtils.requireNonNullEntries(strings);
return s -> {
// checking this here, because loop below iterates over strings to check against and not the param string supplied
if (s.isEmpty()) {
return Check.Result.success();
}
for (int i = 0; i < strings.length; i++) {
String beginWithString = strings[i];
if (s.length() < beginWithString.length()) {
continue;
}
for (int j = 0; j < beginWithString.length(); j++) {
if (beginWithString.charAt(j) != s.charAt(j)) {
break;
}
if (j == beginWithString.length() - 1) {
// all characters checked and matched. should not have. fail.
String joinedStringsErrMsg = Arrays.stream(strings)
.map(str -> "\"" + str + "\"")
.collect(Collectors.joining(","));
return Check.Result.failure("Must begin with one of the following strings: " + joinedStringsErrMsg + ".");
}
}
}
// nothing matched
return Check.Result.success();
};
}
/**
* Creates check that ensures ALL code points in the param string are within the
* range (inclusive) of code points specified. This will fail if ANY code point
* in the param string is outside of the range.
*
* Fails on empty string.
*
* @param minCodePoint single code point string that contains beginning code point of the range (inclusive)
* @param maxCodePoint single code point string that contains ending code point of the range (inclusive)
*/
public static Check limitCodePointsToRange(String minCodePoint, String maxCodePoint, boolean allowWhitespace) {
StringUtils.requireCodePointCountEqualTo(1, minCodePoint);
StringUtils.requireCodePointCountEqualTo(1, maxCodePoint);
return limitCodePointsToRange(minCodePoint.codePointAt(0), maxCodePoint.codePointAt(0), allowWhitespace);
}
/**
* Creates check that ensures all code points in the string are within the specified
* range (inclusive) of code points.
*
* Fails on empty string.
*
* @param minCodePoint minimum code point (inclusive)
* @param maxCodePoint maximum code point (inclusive)
* @param allowWhitespace whether or not whitespace should be checked in range
*/
public static Check limitCodePointsToRange(int minCodePoint, int maxCodePoint, boolean allowWhitespace) {
IntegerUtils.requireIntGreaterThanOrEqualTo(0, minCodePoint);
IntegerUtils.requireIntGreaterThanOrEqualTo(minCodePoint, maxCodePoint,
"max code point must be greater than or equal to min code point.");
return s -> {
if (s.isEmpty()) {
return Check.Result.failure("Characters must be within range '" +
String.valueOf(Character.toChars(minCodePoint)) +
"' to '" +
String.valueOf(Character.toChars(maxCodePoint)) +
"'.");
}
for (int i = 0; i < s.length(); i++) {
int codePoint = s.codePointAt(i);
if (!(allowWhitespace && Character.isWhitespace(codePoint)) &&
(codePoint < minCodePoint || codePoint > maxCodePoint)) {
return Check.Result.failure("Characters must be within range '" +
String.valueOf(Character.toChars(minCodePoint)) +
"' to '" +
String.valueOf(Character.toChars(maxCodePoint)) +
"'.");
}
if (Character.isSupplementaryCodePoint(codePoint)) {
// skip next position, because it is 2nd char making up the single code point
i++;
}
}
return Check.Result.success();
};
}
/**
* Creates check that ensures the string contains the (single) code point a
* minimum number of times.
*
* Fails on empty string.
*
* @param min minimum number of times code point must be supplied in string
* @param codePoint string with a single code point (i.e., {@code String.codePointCount(..) == 1})
*/
public static Check containsCodePoint(int min, String codePoint) {
Objects.requireNonNull(codePoint);
IntegerUtils.requireIntGreaterThan(0, min);
StringUtils.requireCodePointCountEqualTo(1, codePoint);
return containsCodePoint(min, codePoint.codePointAt(0));
}
/**
* Creates check that ensures the string contains the (single) code point a
* minimum number of times.
*
* Fails on empty string.
*
* @param min minimum number of times code point must be supplied in string
* @param codePoint int of required code point
*/
public static Check containsCodePoint(int min, int codePoint) {
return containsCodePointsInRange(min, codePoint, codePoint, false);
}
/**
* Creates check that ensures the string contains code points within the specified
* range a minimum number of times. This does NOT fail if code points are outside of
* the range specified, only fails if (at least) the minimum number is not present
* in the string.
*
* Fails on empty string.
*
* @param min minimum number of times the code points in the range must appear in the string
* @param minCodePoint inclusive minimum code point in range
* @param maxCodePoint inclusive maximum code point in range. must be greater than or equal to minCodePoint
* @param unique true if a code point should only be counted once, false otherwise
* @param min
* @param minCodePoint inclusive
* @param maxCodePoint
* @param unique
* @return
*/
public static Check containsCodePointsInRange(int min,
String minCodePoint,
String maxCodePoint,
boolean unique) {
StringUtils.requireCodePointCountEqualTo(1, minCodePoint);
StringUtils.requireCodePointCountEqualTo(1, maxCodePoint);
return containsCodePointsInRange(min, minCodePoint.codePointAt(0), maxCodePoint.codePointAt(0), unique);
}
/**
* Creates check that ensures the string contains code points within the specified
* range a minimum number of times. This does NOT fail if code points are outside of
* the range specified, only fails if (at least) the minimum number is not present
* in the string. {@code unique = true} will only count a code point once.
*
* Fails on empty string.
*
* @param min minimum number of times the code points in the range must appear in the string
* @param minCodePoint inclusive minimum code point in range
* @param maxCodePoint inclusive maximum code point in range. must be greater than or equal to minCodePoint
* @param unique true if a code point should only be counted once, false otherwise
*/
public static Check containsCodePointsInRange(int min,
int minCodePoint,
int maxCodePoint,
boolean unique) {
IntegerUtils.requireIntGreaterThanOrEqualTo(1, min);
IntegerUtils.requireIntGreaterThanOrEqualTo(0, minCodePoint);
IntegerUtils.requireIntGreaterThanOrEqualTo(minCodePoint, maxCodePoint,
"Maximum code point must be greater than or equal to the minimum.");
if (unique && min > maxCodePoint - minCodePoint + 1) { // add 1 because range is inclusive
throw new IllegalArgumentException(
"N (" + min + ") cannot be greater than difference between min and max "
+ "code points if the count must be unique."
);
}
return s -> {
if (s.length() < min) {
return minCodePoint == maxCodePoint
?
Check.Result.failure("Must contain '" + String.valueOf(Character.toChars(minCodePoint)) + "' " +
"at least " + min + " times in the provided string.")
:
Check.Result.failure("Must contain at least " + min + " characters within the range '" +
CodePointUtils.codePointToString(minCodePoint) +
"' to '" +
CodePointUtils.codePointToString(maxCodePoint) + "'.");
}
Set uniqueSet = null;
if (unique) {
uniqueSet = new HashSet<>();
}
int counter = 0;
for (int i = 0; i < s.length(); i++) {
int curCodePoint = s.codePointAt(i);
if (curCodePoint >= minCodePoint && curCodePoint <= maxCodePoint) {
if (unique && uniqueSet.add(curCodePoint)) {
// increment if unique and was successfully added (returns true if it was not already in set)
counter++;
} else if (!unique) {
// always increment if not unique
counter++;
}
if (counter >= min) {
return Check.Result.success();
} else if (Character.isSupplementaryCodePoint(curCodePoint)) {
// skip next position, because it is 2nd char making up the single code point
i++;
}
}
}
return minCodePoint == maxCodePoint
?
Check.Result.failure("Must contain '" + String.valueOf(Character.toChars(minCodePoint)) + "' " +
"at least " + min + " times in the provided string.")
:
Check.Result.failure("Must contain at least " + min + " characters within the range '" +
CodePointUtils.codePointToString(minCodePoint) +
"' to '" +
CodePointUtils.codePointToString(maxCodePoint) + "'.");
};
}
/**
* Creates check that ensures the string contains code points in the 'mustContainCodePoints'
* argument (at least) a minimum number of times. This does NOT fail if code points in the
* string are not in the 'mustContainCodePoints' argument, only if the string does not contain
* the minimum number of code points. {@code unique = true} will only count a code point once.
*
* Fails on empty string.
*
* @param min minimum number of times the code points in the 'mustContainCodePoints' argument must appear in the string
* @param mustContainCodePoints string with chars/code points to be matched. each code point in the string is
* treated separately
* @param unique true if a code point should only be counted once, false otherwise
*/
public static Check containsCodePoints(int min, String mustContainCodePoints, boolean unique) {
IntegerUtils.requireIntGreaterThanOrEqualTo(1, min);
StringUtils.requireNonEmptyString(mustContainCodePoints);
StringUtils.requireUniqueCodePoints(mustContainCodePoints);
if (unique) {
int codePointCount = mustContainCodePoints.codePointCount(0, mustContainCodePoints.length());
if (min > codePointCount) {
throw new IllegalArgumentException(
"Cannot have N greater than the maximum number of code points when the "
+ "code points must be unique as the check would always fail.");
}
}
return s -> {
int counter = 0;
for (int i = 0; i < mustContainCodePoints.length(); i++) {
int codePoint = mustContainCodePoints.codePointAt(i);
if (Character.isSupplementaryCodePoint(codePoint)) {
// skip next position, because it is 2nd char making up the single code point
i++;
}
for (int k = 0; k < s.length(); k++) {
int inputCodePoint = s.codePointAt(k);
if (Character.isSupplementaryCodePoint(inputCodePoint)) {
// skip next position, because it is 2nd char making up the single code point
k++;
}
if (codePoint == inputCodePoint) {
counter++;
if (counter >= min) {
return Check.Result.success();
} else if (unique) {
// since code point has been matched and count has not been met yet move on to next code point
break;
}
}
}
}
return Check.Result.failure("Must contain at least " + min + " of the following: '" +
mustContainCodePoints + "'.");
};
}
/**
* Creates check that ensures the string contains the provided string.
*/
public static Check containsString(String s) {
return containsStrings(1, s);
}
/**
* Creates check that ensures the string contains (using {@link String#contains(CharSequence)})
* the strings in the set (at least) a minimum number of times.
*
* E.g.,
* min = 1, mustContainStrings = ["abc", "bb", "🤓"]
* "ab" will fail
* "a b c" will fail
* "abc" will pass
* "def🤓" will pass
* "zyx\uD83E\uDD13" will pass, because it contains the unicode for the nerd face
*/
public static Check containsStrings(int min, String... mustContainStrings) {
// use set, makes sure there are no duplicates
return containsStrings(min, new HashSet<>(Arrays.asList(mustContainStrings)));
}
/**
* Creates check that ensures the string contains (using {@link String#contains(CharSequence)})
* the strings in the set (at least) a minimum number of times.
*
* E.g.,
* min = 1, mustContainStrings = ["abc", "bb", "🤓"]
* "ab" will fail
* "a b c" will fail
* "abc" will pass
* "def🤓" will pass
* "zyx\uD83E\uDD13" will pass, because it contains the unicode for the nerd face
*/
public static Check containsStrings(int min, Set mustContainStrings) {
IntegerUtils.requireIntGreaterThanOrEqualTo(1, min);
Objects.requireNonNull(mustContainStrings);
CollectionUtils.requireNonNullEntries(mustContainStrings);
CollectionUtils.requireSizeGreaterThan(0, mustContainStrings);
CollectionUtils.requireNonEmptyStrings(mustContainStrings);
String[] mustContainStringsArray = mustContainStrings.toArray(new String[0]);
return s -> {
int counter = 0;
for (int i = 0; i < mustContainStringsArray.length; i++) {
if (s.contains(mustContainStringsArray[i])) {
counter++;
}
if (counter >= min) {
return Check.Result.success();
}
}
String errorMsgChars = String.join(", ", mustContainStringsArray);
return Check.Result.failure("Must contain at least " + min + " of the following strings: " +
errorMsgChars + ".");
};
}
/**
* Creates check which ensures a code point cannot be repeated more than N times consecutively.
*
* Does not fail on empty string.
*
* E.g.,
* Assuming max = 2
* "aa" will pass
* "a a a " will pass (contains the space character between each 'a')
* "aaa" will fail
*
* @param max maximum number of times a code point can be repeated consecutively
*/
public static Check limitConsecutiveCodePoints(int max) {
IntegerUtils.requireIntGreaterThanOrEqualTo(1, max);
return s -> {
// always start consecutive counter at 1. whatever is at the current position has occurred 1 time..!
int consecutiveCounter = 1;
int strLength = s.length() - 1;
for (int i = 0; i < strLength; i++) {
int codePoint = s.codePointAt(i);
if (Character.isSupplementaryCodePoint(codePoint)) {
// skip next pos, because is part of current code point
i++;
/*
* Because a code point can take up to 2 char positions in the string, need to check here to make sure
* that the length (minus 1) of the string has not been exceeded.
*
* E.g., say the string is "🤓" - the length is 2 and the codepoint is supplementary, thus the index would be
* incremented here and then if a break was not made s would be accessed at (effectively) i + 2 (due to the
* increment above and the +1 below when getting the codepoint)
* */
if (i >= strLength) {
break;
}
}
int codePointNext = s.codePointAt(i + 1);
if (codePoint == codePointNext) {
consecutiveCounter++;
if (consecutiveCounter > max) {
return Check.Result.failure("Cannot contain a character that repeats (consecutively) more than " +
max + " times.");
}
} else {
consecutiveCounter = 1;
}
}
return Check.Result.success();
};
}
/**
* Creates check that ensures the string is empty or meets the provided checks.
* This creates an OR condition between empty and all of the checks, either the
* string is empty or ALL checks provided must be successful.
* @param checks all checks should pass for check to return successful (or string should be empty)
*/
@SafeVarargs
@SuppressWarnings("unchecked")
public static Check emptyOrMeetsChecks(Check... checks) {
return s -> {
if (s.length() == 0) {
return Check.Result.success();
} else {
for (Check Check : checks) {
Check.Result checkResult = Check.check(s);
if (checkResult.failed()) {
return checkResult; // return error
}
}
}
// all checks passed
return Check.Result.success();
};
}
/**
* Similar to empty, but length can be 0 or all white space characters.
*
* @param checks
* @return
*/
@SafeVarargs
@SuppressWarnings("unchecked")
public static Check emptyOrWhitespaceOrMeetsChecks(Check... checks) {
return s -> {
if (IS_EMPTY_OR_ONLY_WHITESPACE.check(s).successful()) {
return Check.Result.success();
} else {
for (Check Check : checks) {
Check.Result checkResult = Check.check(s);
if (checkResult.failed()) {
return checkResult; // return unsuccessful check
}
}
}
// all checks passed
return Check.Result.success();
};
}
/**
* Checks that the param string equals a string in the provided list.
*/
public static Check equalsString(String... strings) {
return equalsString(new HashSet<>(Arrays.asList(strings)));
}
/**
* Checks that the param string equals a string in the provided set.
*
* @param strings
* @return
*/
public static Check equalsString(Set strings) {
Objects.requireNonNull(strings, "Cannot create check for empty set.");
CollectionUtils.requireSizeGreaterThan(0, strings);
CollectionUtils.requireNonNullEntries(strings);
String acceptableListForFailureMsg = strings.stream()
.map(setStr -> "'" + setStr + "'")
.collect(Collectors.joining(", "));
return s -> strings.contains(s)
?
Check.Result.success()
:
Check.Result.failure("Must equal on of the following strings: " + acceptableListForFailureMsg + ".");
}
public static Check equalsStringIgnoreCase(String... strings) {
return equalsStringIgnoreCase(new HashSet<>(Arrays.asList(strings)));
}
public static Check equalsStringIgnoreCase(Set strings) {
Objects.requireNonNull(strings);
CollectionUtils.requireSizeGreaterThan(0, strings);
CollectionUtils.requireNonNullEntries(strings);
Set lowerCasedStrings = strings.stream().map(String::toLowerCase).collect(Collectors.toSet());
String acceptableListForFailureMsg = lowerCasedStrings.stream()
.map(setStr -> "'" + setStr + "'")
.collect(Collectors.joining(", "));
return s -> lowerCasedStrings.contains(s.toLowerCase())
?
Check.Result.success()
:
Check.Result.failure("Must equal one of the following strings: " + acceptableListForFailureMsg + ".");
}
/**
* Checks that the param string does not equal any strings provided in the list.
*/
public static Check doesNotEqualStrings(String... notEqualsList) {
return doesNotEqualStrings(new HashSet<>(Arrays.asList(notEqualsList)));
}
/**
* Checks that the param string does not equal any strings in the provided set.
*/
public static Check doesNotEqualStrings(Set notEqualsSet) {
Objects.requireNonNull(notEqualsSet);
CollectionUtils.requireSizeGreaterThan(0, notEqualsSet);
CollectionUtils.requireNonNullEntries(notEqualsSet);
String unacceptableListForFailureMsg = notEqualsSet.stream().map(setStr -> "'" + setStr + "'")
.collect(Collectors.joining(", "));
return s ->
notEqualsSet.contains(s)
?
Check.Result.failure("Cannot be one of the following strings: " + unacceptableListForFailureMsg + ".")
:
Check.Result.success();
}
public static Check doesNotEqualStringsIgnoreCase(String... notEquals) {
return doesNotEqualStringsIgnoreCase(new HashSet<>(Arrays.asList(notEquals)));
}
public static Check doesNotEqualStringsIgnoreCase(Set notEqualsSet) {
Objects.requireNonNull(notEqualsSet);
CollectionUtils.requireSizeGreaterThan(0, notEqualsSet);
CollectionUtils.requireNonNullEntries(notEqualsSet);
Set lowerCasedNotEqualsSet = notEqualsSet.stream().map(String::toLowerCase).collect(Collectors.toSet());
String unacceptableListForFailureMsg = lowerCasedNotEqualsSet.stream()
.map(setStr -> "'" + setStr + "'")
.collect(Collectors.joining(", "));
return s ->
lowerCasedNotEqualsSet.contains(s.toLowerCase())
?
Check.Result.failure("Cannot be one of the following strings: " + unacceptableListForFailureMsg + ".")
:
Check.Result.success();
}
}