io.github.mmm.base.text.CaseSyntax Maven / Gradle / Ivy
/* Copyright (c) The m-m-m Team, Licensed under the Apache License, Version 2.0
* http://www.apache.org/licenses/LICENSE-2.0 */
package io.github.mmm.base.text;
import static io.github.mmm.base.text.CaseConversion.LOWER_CASE;
import static io.github.mmm.base.text.CaseConversion.ORIGINAL_CASE;
import static io.github.mmm.base.text.CaseConversion.UPPER_CASE;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.regex.Pattern;
/**
* Defines the different styles of lower-/upper-case usage such as {@link #CAML_CASE camlCase}, {@link #TRAIN_CASE
* train-case}, etc. For further examples see the constants defined here in {@link CaseSyntax}.
* The main idea is that you "parse" a given variable name syntax via {@link #ofExample(String, boolean)} and then you
* can transform the value of such variable via {@link #convert(String)}. As a small teaser and motivation here are some
* examples assuming that the value for "VaraibleName" would be "MyValue":
*
*
* VariableSyntax
* Result
*
*
* VariableName
* MyValue
*
*
* variablename
* myvalue
*
*
* variable-name
* my-value
*
*
* VARIABLE_NAME
* MY_VALUE
*
*
* variableName
* myValue
*
*
*
* @since 1.0.0
*/
public class CaseSyntax {
private static final List CONSTANTS = new ArrayList<>();
private static final String REGEX_SPECIAL_CHARS = "[^\\p{IsAlphabetic}^\\p{IsDigit}]+";
private static final Pattern PATTERN_SPECIAL_CHARS = Pattern.compile(REGEX_SPECIAL_CHARS);
/**
* The {@link #getWordSeparator() word separator} used to keep any special (separator) characters untouched by
* {@link #convert(String) convert}.
*/
public static final Character KEEP_SPECIAL_CHARS = Character.valueOf('\0');
/** {@link CaseSyntax} for {@code lowercase} (e.g. for Java package segments). */
public static final CaseSyntax LOWERCASE = new CaseSyntax(null, LOWER_CASE, "lowercase");
/** {@link CaseSyntax} for {@code UPPERCASE}. */
public static final CaseSyntax UPPERCASE = new CaseSyntax(null, UPPER_CASE, "UPPERCASE");
/** {@link CaseSyntax} for {@code camlCase} (e.g. for Java fields). */
public static final CaseSyntax CAML_CASE = new CaseSyntax(null, LOWER_CASE, UPPER_CASE, LOWER_CASE, "camlCase");
/** {@link CaseSyntax} for {@code PascalCase} (e.g. for Java types). */
public static final CaseSyntax PASCAL_CASE = new CaseSyntax(null, UPPER_CASE, UPPER_CASE, LOWER_CASE, "PascalCase");
/** {@link CaseSyntax} for {@code train-case} (e.g. for URLs or Angular). */
public static final CaseSyntax TRAIN_CASE = new CaseSyntax(Character.valueOf('-'), LOWER_CASE, "train-case");
/** {@link CaseSyntax} for {@code UPPER-TRAIN-CASE}. */
public static final CaseSyntax UPPER_TRAIN_CASE = new CaseSyntax(Character.valueOf('-'), UPPER_CASE,
"UPPER-TRAIN-CASE");
/** {@link CaseSyntax} for {@code Pascal-Train-Case}. */
public static final CaseSyntax PASCAL_TRAIN_CASE = new CaseSyntax(Character.valueOf('-'), UPPER_CASE, UPPER_CASE,
LOWER_CASE, "Pascal-Train-Case");
/** {@link CaseSyntax} for {@code caml-Train-Case}. */
public static final CaseSyntax CAML_TRAIN_CASE = new CaseSyntax(Character.valueOf('-'), LOWER_CASE, UPPER_CASE,
LOWER_CASE, "caml-Train-Case");
/** {@link CaseSyntax} for {@code lower_snake_case} (e.g. for Ruby). */
public static final CaseSyntax LOWER_SNAKE_CASE = new CaseSyntax(Character.valueOf('_'), LOWER_CASE,
"lower_snake_case");
/** {@link CaseSyntax} for {@code UPPER_SNAKE_CASE} (e.g. for Java constants). */
public static final CaseSyntax UPPER_SNAKE_CASE = new CaseSyntax(Character.valueOf('_'), UPPER_CASE,
"UPPER_SNAKE_CASE");
/** {@link CaseSyntax} for {@code Pascal_Snake_Case}. */
public static final CaseSyntax PASCAL_SNAKE_CASE = new CaseSyntax(Character.valueOf('_'), UPPER_CASE, UPPER_CASE,
LOWER_CASE, "Pascal_Snake_Case");
/** {@link CaseSyntax} for {@code caml_Snake_Case}. */
public static final CaseSyntax CAML_SNAKE_CASE = new CaseSyntax(Character.valueOf('_'), LOWER_CASE, UPPER_CASE,
LOWER_CASE, "caml_Snake_Case");
/** {@link CaseSyntax} for {@code lower space case}. */
public static final CaseSyntax LOWER_SPACE_CASE = new CaseSyntax(Character.valueOf(' '), LOWER_CASE,
"lower space case");
/** {@link CaseSyntax} for {@code UPPER SPACE CASE}. */
public static final CaseSyntax UPPER_SPACE_CASE = new CaseSyntax(Character.valueOf(' '), UPPER_CASE,
"UPPER SPACE CASE");
/** {@link CaseSyntax} for {@code Pascal Space Case}. */
public static final CaseSyntax PASCAL_SPACE_CASE = new CaseSyntax(Character.valueOf(' '), UPPER_CASE, UPPER_CASE,
LOWER_CASE, "Pascal Space Case");
/** {@link CaseSyntax} for {@code caml Space Case}. */
public static final CaseSyntax CAML_SPACE_CASE = new CaseSyntax(Character.valueOf(' '), LOWER_CASE, UPPER_CASE,
LOWER_CASE, "caml Space Case");
/**
* {@link CaseSyntax} for {@code $$$unmodified} (keep all characters untouched).
*/
public static final CaseSyntax UNMODIFIED = new CaseSyntax(null, ORIGINAL_CASE, "$$$unmodified");
/**
* {@link CaseSyntax} for {@code C$$apitalized} (only convert first char to upper case, keep other case untouched).
*/
public static final CaseSyntax CAPITALIZED = new CaseSyntax(null, UPPER_CASE, ORIGINAL_CASE, "C$$apitalized");
/**
* {@link CaseSyntax} for {@code u$$capitalized} (only convert first char to lower case, keep other case untouched).
*/
public static final CaseSyntax UNCAPITALIZED = new CaseSyntax(null, LOWER_CASE, ORIGINAL_CASE, "u$$capitalized");
/** {@link CaseSyntax} for {@code Capitalizedlower}. */
public static final CaseSyntax CAPITALIZED_LOWER = new CaseSyntax(null, UPPER_CASE, LOWER_CASE, LOWER_CASE,
"Capitalizedlower");
/** {@link CaseSyntax} for {@code Capitalizedlower}. */
public static final CaseSyntax UNCAPITALIZED_UPPER = new CaseSyntax(null, LOWER_CASE, UPPER_CASE, UPPER_CASE,
"uNCAPITALIZEDUPPER");
private final Character wordSeparator;
private final CaseConversion firstCase;
private final CaseConversion wordStartCase;
private final CaseConversion otherCase;
private final String example;
private CaseSyntax(Character separator, CaseConversion allCharCase, String example) {
this(separator, allCharCase, allCharCase, allCharCase, example, true);
}
private CaseSyntax(Character separator, CaseConversion firstCharCase, CaseConversion wordStartCharCase,
String example) {
this(separator, firstCharCase, wordStartCharCase, ORIGINAL_CASE, example, true);
}
private CaseSyntax(Character separator, CaseConversion firstCharCase, CaseConversion wordStartCharCase,
CaseConversion otherCharCase, String example) {
this(separator, firstCharCase, wordStartCharCase, otherCharCase, example, true);
}
private CaseSyntax(Character separator, CaseConversion firstCharCase, CaseConversion wordStartCharCase,
CaseConversion otherCharCase, String example, boolean register) {
super();
this.wordSeparator = separator;
assert (otherCharCase != null);
this.otherCase = otherCharCase;
assert (firstCharCase != null);
this.firstCase = firstCharCase;
assert (wordStartCharCase != null);
this.wordStartCase = wordStartCharCase;
if (example == null) {
assert (!register);
this.example = createExample();
} else {
this.example = example;
}
if (register) {
CONSTANTS.add(this);
}
}
/**
* @return the word separator char (e.g. {@code '_','-' or '.'}) or {@code null} for no word separator. The value
* {@link #KEEP_SPECIAL_CHARS} can be used to keep special characters untouched.
*/
public Character getWordSeparator() {
return this.wordSeparator;
}
/**
* @return {@code true} if the {@link #getWordSeparator() word separator} is {@code null} or
*/
public boolean hasWordSeparator() {
return (this.wordSeparator != null) && (!KEEP_SPECIAL_CHARS.equals(this.wordSeparator));
}
/**
* @return the {@link CaseConversion} used by {@link #convert(String)} for the first character.
*/
public CaseConversion getFirstCase() {
return this.firstCase;
}
/**
* @return the {@link CaseConversion} used by {@link #convert(String)} for word start characters (the first character
* of a new word except for the {@link #getFirstCase() first} character of the entire {@link String}).
*/
public CaseConversion getWordStartCase() {
return this.wordStartCase;
}
/**
* @return the {@link CaseConversion} used by {@link #convert(String)} for all other characters (except
* {@link #getFirstCase() first} and {@link #getWordStartCase() word start} characters).
*/
public CaseConversion getOtherCase() {
return this.otherCase;
}
/**
* Same as {@link #convert(String, Locale)} with a standard {@link Locale} to prevent accidental conversion.
*
* @param string the {@link String} to convert.
* @return the String converted to this {@link CaseSyntax}.
*/
public String convert(String string) {
return convert(string, null);
}
/**
* Converts to this {@link CaseSyntax}. The first character will be converted using {@link #getFirstCase()}.
* Characters other than {@link Character#isLetterOrDigit(char) letters or digits} are considered as word separator.
* In the most cases, they will be
*
* @param string the {@link String} to convert.
* @param locale the explicit {@link Locale} to use. In most cases you want to use {@link #convert(String)} instead.
* @return the String converted to this {@link CaseSyntax}.
*/
public String convert(String string, Locale locale) {
if ((string == null) || (string.isEmpty())) {
return string;
}
// fast and simple cases first...
if ((!hasWordSeparator()) && (this.wordStartCase == this.otherCase)) {
String s = string;
if (this.wordSeparator == null) {
s = removeSpecialCharacters(s);
}
if (this.firstCase == this.wordStartCase) {
s = this.firstCase.convert(s, locale);
} else {
String first = this.firstCase.convert(s.substring(0, 1), locale);
String rest = this.otherCase.convert(s.substring(1), locale);
s = first + rest;
}
return s;
}
StringBuilder buffer = new StringBuilder(string.length() + 4);
CharacterIterator charIterator = new StringCharacterIterator(string);
char c = charIterator.first();
CharClass previousClass = CharClass.of(c);
while (previousClass.isSeparatorOrDollar()) {
if ((buffer.length() == 0) && hasWordSeparator()) {
buffer.append(this.wordSeparator);
}
c = charIterator.next();
if (c == CharacterIterator.DONE) {
return buffer.toString();
}
previousClass = CharClass.of(c);
}
appendCasedChar(buffer, c, this.firstCase);
CaseConversion previousCase = CaseConversion.ofExample(c, false);
int start = 1;
int end = start;
while (true) {
c = charIterator.next();
if (c == CharacterIterator.DONE) {
break;
}
CharClass currentClass = CharClass.of(c);
CaseConversion currentCase = CaseConversion.ofExample(c, false);
switch (currentClass) {
case LETTER:
if (previousClass.isSeparatorOrDollar()) {
appendCasedChar(buffer, c, this.wordStartCase);
start++;
} else if (currentCase != previousCase) {
if ((currentCase == UPPER_CASE) && (end > 1)) {
assert (previousCase == LOWER_CASE);
start = appendOthers(string, buffer, start, end);
if (hasWordSeparator()) {
buffer.append(this.wordSeparator);
}
appendCasedChar(buffer, c, this.wordStartCase);
}
}
break;
case SEPARATOR:
case DOLLAR:
if (!previousClass.isSeparatorOrDollar()) {
start = appendOthers(string, buffer, start, end);
if (KEEP_SPECIAL_CHARS.equals(this.wordSeparator)) {
buffer.append(c);
} else if (this.wordSeparator != null) {
buffer.append(this.wordSeparator);
}
} else {
start++;
}
break;
default:
break;
}
if (currentClass != CharClass.DIGIT) {
previousClass = currentClass;
previousCase = currentCase;
}
end++;
}
if (start < end) {
appendOthers(string, buffer, start, end);
}
return buffer.toString();
}
private int appendOthers(String string, StringBuilder buffer, int start, int end) {
buffer.append(this.otherCase.convert(string.substring(start, end)));
return end + 1;
}
private void appendCasedChar(StringBuilder buffer, char c, CaseConversion targetCase) {
if (targetCase == ORIGINAL_CASE) {
buffer.append(c);
} else {
CaseConversion currentCase = CaseConversion.ofExample(c, false);
if (currentCase == targetCase) {
buffer.append(c);
} else {
String string = targetCase.convert(Character.toString(c));
if ((this.otherCase != targetCase) && (string.length() > 1)) {
buffer.append(string.charAt(0));
CaseConversion restCase;
if (this.otherCase == ORIGINAL_CASE) {
restCase = currentCase;
} else {
restCase = this.otherCase;
}
buffer.append(restCase.convert(string.charAt(0)));
} else {
buffer.append(string);
}
}
}
}
@Override
public boolean equals(Object obj) {
if (obj == this) {
return true;
}
if ((obj == null) || (obj.getClass() != getClass())) {
return false;
}
CaseSyntax other = (CaseSyntax) obj;
if (!Objects.equals(this.wordSeparator, other.wordSeparator)) {
return false;
}
if (this.firstCase != other.firstCase) {
return false;
}
if (this.wordStartCase != other.wordStartCase) {
return false;
}
if (this.otherCase != other.otherCase) {
return false;
}
return true;
}
@Override
public int hashCode() {
return Objects.hash(this.wordSeparator, this.firstCase, this.wordStartCase, this.otherCase);
}
@Override
public String toString() {
return this.example;
}
/**
* The constructor.
*
* @param separator - see {@link #getWordSeparator()}.
* @param allCharCase the {@link CaseConversion} used for {@link #getFirstCase() first}, {@link #getWordStartCase()
* word-start}, and all {@link #getOtherCase() other} alphabetic characters.
* @return the requested {@link CaseSyntax}.
*/
public static CaseSyntax of(Character separator, CaseConversion allCharCase) {
return of(separator, allCharCase, allCharCase, allCharCase);
}
/**
* The constructor. Will use {@link CaseConversion#LOWER_CASE} for {@link #getOtherCase() other} characters.
*
* @param separator - see {@link #getWordSeparator()}.
* @param firstCharCase - see {@link #getFirstCase()}.
* @param wordStartCharCase - see {@link #getWordStartCase()}.
* @return the requested {@link CaseSyntax}.
*/
public static CaseSyntax of(Character separator, CaseConversion firstCharCase, CaseConversion wordStartCharCase) {
return of(separator, firstCharCase, wordStartCharCase, LOWER_CASE);
}
/**
* @param separator - see {@link #getWordSeparator()}.
* @param firstCharCase - see {@link #getFirstCase()}.
* @param wordStartCharCase - see {@link #getWordStartCase()}.
* @param otherCharCase - see {@link #getOtherCase()}.
* @return the requested {@link CaseSyntax}.
*/
public static CaseSyntax of(Character separator, CaseConversion firstCharCase, CaseConversion wordStartCharCase,
CaseConversion otherCharCase) {
CaseSyntax syntax = ofStandardized(separator, firstCharCase, wordStartCharCase, otherCharCase);
if (syntax == null) {
syntax = new CaseSyntax(separator, firstCharCase, wordStartCharCase, otherCharCase, null, false);
}
return syntax;
}
private String createExample() {
// "CustomCase"
StringBuilder buffer = new StringBuilder(11);
buffer.append(this.firstCase.convert('C'));
int i = 0;
if (this.firstCase == ORIGINAL_CASE) {
buffer.insert(i, CaseConversion.EXAMPLE_CHAR_FOR_ORIGINAL_CASE);
} else {
i++;
}
if (this.wordStartCase == ORIGINAL_CASE) {
buffer.insert(i, CaseConversion.EXAMPLE_CHAR_FOR_ORIGINAL_CASE);
}
if (this.otherCase == ORIGINAL_CASE) {
buffer.insert(i, CaseConversion.EXAMPLE_CHAR_FOR_ORIGINAL_CASE);
}
buffer.append(this.otherCase.convert("ustom"));
if (this.wordSeparator != null) {
buffer.append(this.wordSeparator);
}
buffer.append(this.wordStartCase.convert('C'));
buffer.append(this.otherCase.convert("ase"));
String string = buffer.toString();
return string;
}
private static CaseSyntax ofStandardized(Character separator, CaseConversion first, CaseConversion wordStart,
CaseConversion other) {
for (CaseSyntax syntax : CONSTANTS) {
if (Objects.equals(separator, syntax.wordSeparator) && (first == syntax.firstCase)
&& (wordStart == syntax.wordStartCase) && (other == syntax.otherCase)) {
return syntax;
}
}
return null;
}
/**
* @param example the {@link #ofExample(String, boolean) example} to normalize.
* @return the normalized {@code example} (in lower case with all special characters removed).
*/
public static String normalizeExample(String example) {
if (example == null) {
return null;
}
return LOWER_CASE.convert(removeSpecialCharacters(example));
}
private static String removeSpecialCharacters(String string) {
return replaceSpecialCharacters(string, "");
}
private static String replaceSpecialCharacters(String string, String replacement) {
return PATTERN_SPECIAL_CHARS.matcher(string).replaceAll(replacement);
}
/**
* @param example an example for the requested {@link CaseSyntax} such as {@code PascalCase} or {@code MyVariableName}
* for {@link #PASCAL_CASE}. For further details see {@link #ofExample(String, boolean)}.
* @return the {@link CaseSyntax} for the given {@code example} as described above.
* @see #ofExample(String, boolean)
*/
public static CaseSyntax ofExample(String example) {
return ofExample(example, false);
}
/**
* @param example an example for the requested {@link CaseSyntax} such as {@code PascalCase} or {@code MyVariableName}
* for {@link #PASCAL_CASE} and {@code train-case} or {@code my-variable-name} for {@link #TRAIN_CASE}. See
* other {@link CaseSyntax} constants for more examples. The given {@code example} {@link String} has to start
* with a {@link Character#isLetter(char) letter} with an underscore for {@link #KEEP_SPECIAL_CHARS}, or with
* {@link CaseConversion#EXAMPLE_CHAR_FOR_ORIGINAL_CASE $-Sign}) and contain at least two more such characters
* (letter or $-sign) next to each other. Digits are more or less ignored. If you use variable names as example
* consider naming such variables with more than one word and no characters other than
* {@link Character#isLetterOrDigit(char) letters or digits}. Otherwise you would have to write things such as
* {@code ComPonent} or {@code com-ponent} to indicate PascalCase or train-case for the variable
* {@code component}. Also when using variables as {@link CaseSyntax} example you should
* {@link #normalizeExample(String)} normalize} them before resolving.
* @param standardize - {@code true} if existing constants such as {@link #PASCAL_CASE} or {@link #TRAIN_CASE} shall
* be returned if {@link #equals(Object) equal} to the requested {@link CaseSyntax}, {@code false} otherwise.
* @return the {@link CaseSyntax} for the given {@code example} as described above.
*/
public static CaseSyntax ofExample(String example, boolean standardize) {
Objects.requireNonNull(example, "example");
CharacterIterator charIterator = new StringCharacterIterator(example);
Character separator = null;
CaseConversion other = null;
CaseConversion wordStart = null;
char c = charIterator.first();
if (c == '_') {
separator = KEEP_SPECIAL_CHARS;
c = charIterator.next();
}
CaseConversion first = CaseConversion.ofExample(c, false);
CharClass previousClass = CharClass.of(c);
CaseConversion previousCase = first;
char previousChar = c;
int index = 0;
int letterOrDollarCount = previousClass.isLetterOrDollar() ? 1 : 0;
while (true) {
c = charIterator.next();
if (c == CharacterIterator.DONE) {
break;
}
index++;
CharClass currentClass = CharClass.of(c);
CaseConversion currentCase = CaseConversion.ofExample(c, false);
switch (currentClass) {
case DOLLAR:
letterOrDollarCount++;
if ((other == null) && (index < 3)) { // $$ or $A$
other = ORIGINAL_CASE;
} else if ((wordStart == null) && (index < 3)) { // $$$ or $A$$ or AB$
wordStart = ORIGINAL_CASE;
} else {
throw new IllegalArgumentException(
example + ": the special character '" + CaseConversion.EXAMPLE_CHAR_FOR_ORIGINAL_CASE
+ "' is only allowed twice within the first three characters or three times at the beginning!");
}
break;
case LETTER:
letterOrDollarCount++;
boolean wordStarted = false;
if (previousClass == CharClass.SEPARATOR) {
if (wordStart == null) {
wordStart = currentCase;
}
wordStarted = true;
} else if ((other == null) && (wordStart == null)) {
other = currentCase;
} else if ((currentCase != previousCase) && (currentCase != other)) { // Case change (Aa or aA) from other
if (wordStart == null) {
wordStart = currentCase;
}
wordStarted = true;
} else if (CaseConversion.areIncompatible(currentCase, other)) {
throw new IllegalArgumentException(example + ": the two characters '" + previousChar + c
+ "' both have case " + currentCase + " but other case was detected as " + other + "!");
}
if (wordStarted && CaseConversion.areIncompatible(currentCase, wordStart)) {
throw new IllegalArgumentException(
example + ": different word start cases " + wordStart + " and " + currentCase + "!");
}
break;
case SEPARATOR:
if (separator == null) {
if (wordStart != null) {
throw new IllegalArgumentException(example + ": word separator '" + c
+ "' detected but word start was already detected via case change!");
}
separator = Character.valueOf(c);
} else if (c != separator.charValue()) {
String sep;
if (separator == KEEP_SPECIAL_CHARS) {
sep = "'\0' (_)";
} else {
sep = "'" + separator + "'";
}
throw new IllegalArgumentException(example + ": different word separators " + sep + " and '" + c + "'!");
} else if (previousClass == CharClass.SEPARATOR) {
throw new IllegalArgumentException(
example + ": duplicate word separators '" + separator + separator + "'!");
}
break;
case DIGIT:
default:
break;
}
if (currentClass != CharClass.DIGIT) {
previousCase = currentCase;
previousClass = currentClass;
}
previousChar = c;
}
if (letterOrDollarCount < 3) {
throw new IllegalArgumentException(example + ": at least 3 letters are required!");
}
if (wordStart == null) {
wordStart = other;
} else if (other == null) {
other = wordStart;
}
CaseSyntax syntax = null;
if (standardize) {
syntax = ofStandardized(separator, first, wordStart, other);
}
if (syntax == null) {
syntax = new CaseSyntax(separator, first, wordStart, other, example, false);
}
return syntax;
}
private enum CharClass {
LETTER,
DIGIT,
DOLLAR,
SEPARATOR;
private boolean isSeparatorOrDollar() {
return (this == SEPARATOR) || (this == DOLLAR);
}
private boolean isLetterOrDollar() {
return (this == CharClass.LETTER) || (this == DOLLAR);
}
private static CharClass of(char c) {
if (Character.isLetter(c)) {
return LETTER;
} else if (Character.isDigit(c)) {
return DIGIT;
} else if (c == CaseConversion.EXAMPLE_CHAR_FOR_ORIGINAL_CASE) {
return DOLLAR;
} else {
return SEPARATOR;
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy