cdc.applic.expressions.literals.EscapingUtils Maven / Gradle / Ivy
Show all versions of cdc-applic-expressions Show documentation
package cdc.applic.expressions.literals;
import cdc.applic.expressions.parsing.OneCharEscapes;
import cdc.util.lang.Checks;
/**
* Escape handling.
*
* - Two characters are forbidden in literals by S1000D: '~' and '|'.
*
- Integers and Reals must be escaped when used as string values (but this is not recommended).
*
*
* @author Damien Carbonne
*/
public final class EscapingUtils {
private EscapingUtils() {
}
/**
* Returns {@code true} when a character is legal (S1000D rules) in a literal.
*
* - It MUST NOT be '~'.
*
- It MUST NOT be '|'.
*
*
* @param c The character.
* @return {@code true} when {@code c} is a legal literal character.
*/
public static boolean isLegal(char c) {
return c != '~' && c != '|';
}
/**
* Returns {@code true} when a string is legal literal.
*
* WARNING: A legal literal is not necessarily valid.
*
* - It MUST NOT be null.
*
- It MUST NOT be empty.
*
- It MUST NOT contain illegal characters ('~' and '|').
*
*
* @param s The string.
* @return {@code true} when {@code s} is legal.
*/
public static boolean isLegalLiteral(String s) {
if (s == null || s.isEmpty()) {
return false;
} else {
for (int index = 0; index < s.length(); index++) {
final char c = s.charAt(index);
if (!isLegal(c)) {
return false;
}
}
return true;
}
}
/**
* Returns {@code true} when a character needs to be escaped.
*
* Presence of certain characters in a literal necessitates escaping whatever the context is.
*
* @param c The character to test
* @return {@code true} when {@code c} needs to be escaped.
*/
private static boolean needsEscape(char c) {
return OneCharEscapes.BEST_MATCHER.test(c);
}
/**
* Returns {@code true} when a string needs to be escaped in name context.
*
* Result is independent of the legality of the string as a literal.
*
* @param s The tested string.
* @return {@code true} when {@code s} needs to be escaped in {@code context}.
*/
public static boolean needsEscape(String s) {
// FIXME this does not handle all cases
if (s != null) {
for (int index = 0; index < s.length(); index++) {
if (needsEscape(s.charAt(index))) {
return true;
}
}
return LiteralUtils.isBooleanLiteral(s)
|| LiteralUtils.isSpecialStringLiteral(s)
|| LiteralUtils.startsAsNumber(s); // TODO is number
}
return false;
}
/**
* Escapes a string, whether it is needed or not.
*
* The escaped version of s is built by replacing all occurrences of '"' by '""'
* and by prepending and appending '"'.
*
*
* escape("abc") = "\"abc\""
* escape("ab\"c") = "\"ab\"\"c\""
*
*
* @param s The string to escape.
* @return The escaped version of {@code s}.
*/
public static String escape(String s) {
final StringBuilder builder = new StringBuilder();
builder.append('"');
for (int index = 0; index < s.length(); index++) {
final char c = s.charAt(index);
if (c == '"') {
builder.append("\"\"");
} else {
builder.append(c);
}
}
builder.append('"');
return builder.toString();
}
/**
* Escapes a string only of it is needed.
*
* @param s The string to possibly escape.
* @return {@code s} or the escaped version of {@code s}.
*/
public static String escapeIfNeeded(String s) {
return needsEscape(s) ? escape(s) : s;
}
/**
* Returns the unescaped version of an escaped string.
*
* This removes first and last character (supposedly '"') and all characters that
* follow '"' (supposedly another '"').
*
* {@code unescape("abc") = "b"} (Invalid call)
* {@code unescape("\"ab\"\"c\"" = "ab\"c"} (Valid call)
*
* @param s The escaped string.
* @return The unescaped version of s.
* @throws IllegalArgumentException When s is {@code null} or not an escaped string.
*/
public static String unescape(String s) {
// Checks.isNotNull(s, "s");
// Checks.isTrue(s.length() >= 2, "'" + s + "' is too short for unescape");
// Checks.isTrue(s.charAt(0) == '"' && s.charAt(s.length() - 1) == '"', "'" + s + "' is not un escaped string");
final StringBuilder builder = new StringBuilder();
int index = 1;
while (index < s.length() - 1) {
final char c = s.charAt(index);
builder.append(c);
if (c == '"') {
index++;
}
index++;
}
return builder.toString();
}
/**
* Find the index of the last char of an escaped section.
*
* @param chars The characters.
* @param from The index of the beginning of the escape section. It must contain '"'.
* @return The index of the last char of the escaped section, or -1.
* @throws IllegalArgumentException When the character at {@code from} is not '"'.
*/
public static int getEndIndexOfEscapeSection(char[] chars,
int from) {
Checks.isTrue(chars[from] == '"', "Unexpected char at {} in '{}'", from, chars);
final int len = chars.length;
int pos = from + 1;
while (pos < len) {
if (chars[pos] == '"') {
pos++;
if (pos < len && chars[pos] == '"') {
// We found an escaped '"'
// One char after '""'
pos++;
// continue exploration
} else {
// pos is one char after closing '"'
return pos - 1;
}
} else {
pos++;
}
}
// We did'nt find a matching closing '"'
return -1;
}
public static int getEndIndexOfEscapeSection(String s,
int from) {
return getEndIndexOfEscapeSection(s.toCharArray(), from);
}
public static int getPathSeparatorIndex(char[] chars,
int from) {
final int len = chars.length;
int pos = from;
while (pos < len) {
if (chars[pos] == '"') {
pos = getEndIndexOfEscapeSection(chars, from);
if (pos == -1) {
throw new IllegalArgumentException("Non-closed escaped section.");
} else {
pos++;
}
} else if (chars[pos] == LiteralUtils.PATH_SEPARATOR) {
return pos;
} else {
pos++;
}
}
return -1;
}
public static int getPathSeparatorIndex(String s,
int from) {
return getPathSeparatorIndex(s.toCharArray(), from);
}
/**
* Returns {@code true} when a string is a valid escaped or unescaped text.
*
* WARNING: this does not mean it is a valid literal.
*
* @param s The string to test.
* @return {@code true} when {@code s} is a valid escaped or unescaped text.
*/
public static boolean isValidText(String s) {
return isValidUnescapedText(s) || isValidEscapedText(s);
}
/**
* Returns {@code true} when a string is a valid unescaped text.
*
* WARNING: This does not mean it is a valid literal.
* Such a text must:
*
* - not be {@code null},
*
- not be empty,
*
- not use any character that needs escape.
*
*
* @param s The string to test.
* @return {@code true} when {@code s} is a valid unescaped text.
*/
public static boolean isValidUnescapedText(String s) {
return isLegalLiteral(s) && !needsEscape(s);
}
/**
* Returns true when a string is a valid escaped text.
*
* WARNING: This does not mean it is a valid literal.
* Such a text must:
*
* - not be null,
*
- start and finish with '"',
*
- not be empty (excluding surrounding '"'),
*
- contain pairs of successive '"' (excluding surrounding '"').
*
*
* @param s The string to test.
* @return {@code true} when a {@code s} is a valid escaped text.
*/
public static boolean isValidEscapedText(String s) {
if (!isLegalLiteral(s) || s.length() < 3 || s.charAt(0) != '"' || s.charAt(s.length() - 1) != '"') {
return false;
}
int count = 0;
for (int index = 1; index < s.length() - 1; index++) {
final char c = s.charAt(index);
if (c == '"') {
count++;
} else {
if ((count % 2) != 0) {
return false;
}
count = 0;
}
}
return (count % 2) == 0;
}
public static boolean isProbablyEscaped(String s) {
return s != null
&& !s.isEmpty()
&& s.charAt(0) == '"'
&& s.charAt(s.length() - 1) == '"';
}
/**
* Returns the non-escaped view of an escaped or non-escaped literal.
*
* @param literal The literal.
* @param escaped {@code true} if {@code literal} is escaped.
* @return The non-escaped view of {@code literal}.
* @throws IllegalArgumentException When {@code literal} and {@code escaped} don't match.
*/
public static String toNonEscaped(String literal,
boolean escaped) {
if (escaped) {
if (EscapingUtils.isValidEscapedText(literal)) {
return unescape(literal);
} else {
throw new IllegalArgumentException("Invalid escaped text: '" + literal + "'");
}
} else {
if (isLegalLiteral(literal)) {
return literal;
} else {
throw new IllegalArgumentException("Invalid non-escaped text: '" + literal + "'");
}
}
}
}