org.memoeslink.UnicodeCharCategory Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of helper Show documentation
Show all versions of helper Show documentation
A Java library that provides utility to several classes.
The newest version!
package org.memoeslink;
public class UnicodeCharCategory {
public static final String ALNUM_REGEX = "\\p{Alnum}"; // Matches an alphanumeric character: [\p{Alpha}\p{Digit}]
public static final String ALPHA_REGEX = "\\p{Alpha}"; // Matches an alphabetic character: [\p{Lower}\p{Upper}]
public static final String ASCII_REGEX = "\\p{ASCII}"; // Matches ASCII characters: [\x00-\x7F]
public static final String BLANK_REGEX = "\\p{Blank}"; // Matches a space or a tab: [ \t]
public static final String CNTRL_REGEX = "\\p{Cntrl}"; // Matches a control character: [\x00-\x1F\x7F]
public static final String DIGIT_REGEX = "\\p{Digit}"; // Matches a digit: [0-9]
public static final String LOWER_REGEX = "\\p{Lower}"; // Matches a lowercase letter: [a-z]
public static final String UPPER_REGEX = "\\p{Upper}"; // Matches an uppercase letter: [A-Z]
public static final String PUNCT_REGEX = "\\p{Punct}"; // Matches punctuation and symbols: [!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]
public static final String GRAPH_REGEX = "\\p{Graph}"; // Matches a visible character: [\p{Alnum}\p{Punct}]
public static final String PRINT_REGEX = "\\p{Print}"; // Matches a printable character: [\p{Graph}\x20]
public static final String SPACE_REGEX = "\\p{Space}"; // Matches a whitespace character: [ \t\n\x0B\f\r]
public static final String XDIGIT_REGEX = "\\p{XDigit}"; // Matches a hexadecimal digit: [0-9a-fA-F]
public static final String LETTER_REGEX = "\\p{L}"; // Matches any kind of letter from any language.
public static final String LOWERCASE_LETTER_REGEX = "\\p{Ll}"; // Matches a lowercase letter that has an uppercase variant.
public static final String UPPERCASE_LETTER_REGEX = "\\p{Lu}"; // Matches an uppercase letter that has a lowercase variant.
public static final String TITLECASE_LETTER_REGEX = "\\p{Lt}"; // Matches a letter that appears at the start of a word when only the first letter is capitalized.
public static final String CASED_LETTER_REGEX = "[\\p{Lu}\\p{Ll}\\p{Lt}]"; // Matches a letter that exists in lowercase and uppercase variants.
public static final String MODIFIER_LETTER_REGEX = "\\p{Lm}"; // Matches a special character that is used like a letter.
public static final String OTHER_LETTER_REGEX = "\\p{Lo}"; // Matches a letter or ideograph that does not have lowercase and uppercase variants.
public static final String MARK_REGEX = "\\p{M}"; // Matches a character intended to be combined with another character.
public static final String NON_SPACING_MARK_REGEX = "\\p{Mn}"; // Matches a character intended to be combined with another character without taking up extra space.
public static final String SPACING_COMBINING_MARK_REGEX = "\\p{Mc}"; // Matches a character intended to be combined with another character that takes up extra space.
public static final String ENCLOSING_MARK_REGEX = "\\p{Me}"; // Matches a character that encloses the character it is combined with.
public static final String SEPARATOR_REGEX = "\\p{Z}"; // Matches any kind of whitespace or invisible separator.
public static final String SPACE_SEPARATOR_REGEX = "\\p{Zs}"; // Matches a whitespace character that is invisible, but does take up space.
public static final String LINE_SEPARATOR_REGEX = "\\p{Zl}"; // Matches line separator character U+2028.
public static final String PARAGRAPH_SEPARATOR_REGEX = "\\p{Zp}"; // Matches paragraph separator character U+2029.
public static final String SYMBOL_REGEX = "\\p{S}"; // Matches math symbols, currency signs, dingbats, box-drawing characters, etc.
public static final String MATH_SYMBOL_REGEX = "\\p{Sm}"; // Matches any mathematical symbol.
public static final String CURRENCY_SYMBOL_REGEX = "\\p{Sc}"; // Matches any currency sign.
public static final String MODIFIER_SYMBOL_REGEX = "\\p{Sk}"; // Matches a combining character (mark) as a full character on its own.
public static final String OTHER_SYMBOL_REGEX = "\\p{So}"; // Matches various symbols that are not math symbols, currency signs, or combining characters.
public static final String NUMBER_REGEX = "\\p{N}"; // Matches any kind of numeric character in any script.
public static final String DECIMAL_DIGIT_NUMBER_REGEX = "\\p{Nd}"; // Matches a digit zero through nine in any script except ideographic scripts.
public static final String LETTER_NUMBER_REGEX = "\\p{Nl}"; // Matches a number that looks like a letter, such as a Roman numeral.
public static final String OTHER_NUMBER_REGEX = "\\p{No}"; // Matches a superscript or subscript digit, or a number that is not a digit (0–9).
public static final String PUNCTUATION_REGEX = "\\p{P}"; // Matches any kind of punctuation character.
public static final String DASH_PUNCTUATION_REGEX = "\\p{Pd}"; // Matches any kind of hyphen or dash.
public static final String OPEN_PUNCTUATION_REGEX = "\\p{Ps}"; // Matches any kind of opening bracket.
public static final String CLOSE_PUNCTUATION_REGEX = "\\p{Pe}"; // Matches any kind of closing bracket.
public static final String INITIAL_PUNCTUATION_REGEX = "\\p{Pi}"; // Matches any kind of opening quote.
public static final String FINAL_PUNCTUATION_REGEX = "\\p{Pf}"; // Matches any kind of closing quote.
public static final String CONNECTOR_PUNCTUATION_REGEX = "\\p{Pc}"; // Matches a punctuation character such as an underscore that connects words.
public static final String OTHER_PUNCTUATION_REGEX = "\\p{Po}"; // Matches any kind of punctuation character that is not a dash, bracket, quote, or connector.
public static final String OTHER_REGEX = "\\p{C}"; // Matches invisible control characters and unused code points.
public static final String CONTROL_REGEX = "\\p{Cc}"; // Matches an ASCII or Latin-1 control character.
public static final String FORMAT_REGEX = "\\p{Cf}"; // Matches invisible formatting indicator.
public static final String PRIVATE_USE_REGEX = "\\p{Co}"; // Matches any code point reserved for private use.
public static final String SURROGATE_REGEX = "\\p{Cs}"; // Matches one half of a surrogate pair in UTF-16 encoding.
public static final String UNASSIGNED_REGEX = "\\p{Cn}"; // Matches any code point to which no character has been assigned.
public static final String ZERO_WIDTH_CHAR_REGEX = "[\u200B\u200C\u200D\u2060\uFEFF]"; // Matches any zero-width character.
public static final String DIACRITICAL_LETTER_REGEX = "[\\p{L}\\p{M}&&[^\\p{ASCII}]]"; // Matches any diacritical letter.
public static final String DIACRITIC_REGEX = "\\p{InCombiningDiacriticalMarks}"; //Matches any diacritic character.
public static final String EXCEPT_ALNUM_REGEX = "\\P{Alnum}"; // Matches any non-alphanumeric character: [^\p{Alpha}\p{Digit}]
public static final String EXCEPT_ALPHA_REGEX = "\\P{Alpha}"; // Matches any non-alphabetic character: [^\p{Lower}\p{Upper}]
public static final String EXCEPT_ASCII_REGEX = "\\P{ASCII}"; // Matches any non-ASCII character: [^\x00-\x7F]
public static final String EXCEPT_BLANK_REGEX = "\\P{Blank}"; // Matches any non-space or non-tab character: [^ \t]
public static final String EXCEPT_CNTRL_REGEX = "\\P{Cntrl}"; // Matches any non-control character: [^\x00-\x1F\x7F]
public static final String EXCEPT_DIGIT_REGEX = "\\P{Digit}"; // Matches any non-digit character: [^0-9]
public static final String EXCEPT_LOWER_REGEX = "\\P{Lower}"; // Matches any non-lowercase letter: [^a-z]
public static final String EXCEPT_UPPER_REGEX = "\\P{Upper}"; // Matches any non-uppercase letter: [^A-Z]
public static final String EXCEPT_PUNCT_REGEX = "\\P{Punct}"; // Matches any non-punctuation and non-symbols character: [^!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]
public static final String EXCEPT_GRAPH_REGEX = "\\P{Graph}"; // Matches any non-visible character: [^\p{Alnum}\p{Punct}]
public static final String EXCEPT_PRINT_REGEX = "\\P{Print}"; // Matches any non-printable character: [^\p{Graph}\x20]
public static final String EXCEPT_SPACE_REGEX = "\\P{Space}"; // Matches any non-whitespace character: [^ \t\n\x0B\f\r]
public static final String EXCEPT_XDIGIT_REGEX = "\\P{XDigit}"; // Matches any non-hexadecimal digit: [^0-9a-fA-F]
public static final String EXCEPT_LETTER_REGEX = "\\P{L}"; // Matches any non-letter from any language.
public static final String EXCEPT_LOWERCASE_LETTER_REGEX = "\\P{Ll}"; // Matches any non-lowercase letter that has an uppercase variant.
public static final String EXCEPT_UPPERCASE_LETTER_REGEX = "\\P{Lu}"; // Matches any non-uppercase letter that has a lowercase variant.
public static final String EXCEPT_TITLECASE_LETTER_REGEX = "\\P{Lt}"; // Matches any non-titlecase letter.
public static final String EXCEPT_CASED_LETTER_REGEX = "[^\\p{Lu}\\p{Ll}\\p{Lt}]"; // Matches any non-letter that exists in lowercase and uppercase variants.
public static final String EXCEPT_MODIFIER_LETTER_REGEX = "\\P{Lm}"; // Matches any non-special character that is used like a letter.
public static final String EXCEPT_OTHER_LETTER_REGEX = "\\P{Lo}"; // Matches any non-letter or non-ideograph that does not have lowercase and uppercase variants.
public static final String EXCEPT_MARK_REGEX = "\\P{M}"; // Matches any non-character intended to be combined with another character.
public static final String EXCEPT_NON_SPACING_MARK_REGEX = "\\P{Mn}"; // Matches any non-character intended to be combined with another character without taking up extra space.
public static final String EXCEPT_SPACING_COMBINING_MARK_REGEX = "\\P{Mc}"; // Matches any non-character intended to be combined with another character that takes up extra space.
public static final String EXCEPT_ENCLOSING_MARK_REGEX = "\\P{Me}"; // Matches any non-character that encloses the character it is combined with.
public static final String EXCEPT_SEPARATOR_REGEX = "\\P{Z}"; // Matches any non-whitespace or non-invisible separator.
public static final String EXCEPT_SPACE_SEPARATOR_REGEX = "\\P{Zs}"; // Matches any non-invisible whitespace character that does take up space.
public static final String EXCEPT_LINE_SEPARATOR_REGEX = "\\P{Zl}"; // Matches any non-line separator character U+2028.
public static final String EXCEPT_PARAGRAPH_SEPARATOR_REGEX = "\\P{Zp}"; // Matches any non-paragraph separator character U+2029.
public static final String EXCEPT_SYMBOL_REGEX = "\\P{S}"; // Matches any non-symbol (math symbols, currency signs, dingbats, etc.).
public static final String EXCEPT_MATH_SYMBOL_REGEX = "\\P{Sm}"; // Matches any non-mathematical symbol.
public static final String EXCEPT_CURRENCY_SYMBOL_REGEX = "\\P{Sc}"; // Matches any non-currency sign.
public static final String EXCEPT_MODIFIER_SYMBOL_REGEX = "\\P{Sk}"; // Matches any non-combining character (mark) as a full character on its own.
public static final String EXCEPT_OTHER_SYMBOL_REGEX = "\\P{So}"; // Matches any non-other symbol (not math symbols, currency signs, or combining characters).
public static final String EXCEPT_NUMBER_REGEX = "\\P{N}"; // Matches any non-numeric character in any script.
public static final String EXCEPT_DECIMAL_DIGIT_NUMBER_REGEX = "\\P{Nd}"; // Matches any non-digit zero through nine in any script except ideographic scripts.
public static final String EXCEPT_LETTER_NUMBER_REGEX = "\\P{Nl}"; // Matches any non-number that looks like a letter, such as a Roman numeral.
public static final String EXCEPT_OTHER_NUMBER_REGEX = "\\P{No}"; // Matches any non-superscript or subscript digit, or a number that is not a digit (0–9).
public static final String EXCEPT_PUNCTUATION_REGEX = "\\P{P}"; // Matches any non-punctuation character.
public static final String EXCEPT_DASH_PUNCTUATION_REGEX = "\\P{Pd}"; // Matches any non-hyphen or non-dash.
public static final String EXCEPT_OPEN_PUNCTUATION_REGEX = "\\P{Ps}"; // Matches any non-opening bracket.
public static final String EXCEPT_CLOSE_PUNCTUATION_REGEX = "\\P{Pe}"; // Matches any non-closing bracket.
public static final String EXCEPT_INITIAL_PUNCTUATION_REGEX = "\\P{Pi}"; // Matches any non-opening quote.
public static final String EXCEPT_FINAL_PUNCTUATION_REGEX = "\\P{Pf}"; // Matches any non-closing quote.
public static final String EXCEPT_CONNECTOR_PUNCTUATION_REGEX = "\\P{Pc}"; // Matches any non-connector punctuation such as an underscore.
public static final String EXCEPT_OTHER_PUNCTUATION_REGEX = "\\P{Po}"; // Matches any non-other punctuation character (not a dash, bracket, quote, or connector).
public static final String EXCEPT_OTHER_REGEX = "\\P{C}"; // Matches any non-invisible control character and non-unused code point.
public static final String EXCEPT_CONTROL_REGEX = "\\P{Cc}"; // Matches any non-ASCII or non-Latin-1 control character.
public static final String EXCEPT_FORMAT_REGEX = "\\P{Cf}"; // Matches any non-invisible formatting indicator.
public static final String EXCEPT_PRIVATE_USE_REGEX = "\\P{Co}"; // Matches any non-code point reserved for private use.
public static final String EXCEPT_SURROGATE_REGEX = "\\P{Cs}"; // Matches any non-one half of a surrogate pair in UTF-16 encoding.
public static final String EXCEPT_UNASSIGNED_REGEX = "\\P{Cn}"; // Matches any non-code point to which no character has been assigned.
public static final String EXCEPT_ZERO_WIDTH_CHAR_REGEX = "[^\u200B\u200C\u200D\u2060\uFEFF]"; // Matches any non-zero-width character.
public static final String EXCEPT_DIACRITICAL_LETTER_REGEX = "[^\\p{L}\\p{M}&&[^\\p{ASCII}]]"; // Matches any non-diacritical letter.
public static final String EXCEPT_DIACRITIC_REGEX = "\\P{InCombiningDiacriticalMarks}"; // Matches any non-diacritic character.
private UnicodeCharCategory() {
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy