
org.apache.commons.jexl3.parser.StringParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-jexl3 Show documentation
Show all versions of commons-jexl3 Show documentation
The Apache Commons JEXL library is an implementation of the JSTL Expression Language with extensions.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.jexl3.parser;
import java.util.Objects;
/**
* Common constant strings utilities.
*
* This package methods read JEXL string literals and handle escaping through the
* 'backslash' (ie: \) character. Escaping is used to neutralize string delimiters (the single
* and double quotes) and read Unicode hexadecimal encoded characters.
*
*
* The only escapable characters are the single and double quotes - ''' and '"' -,
* a Unicode sequence starting with 'u' followed by 4 hexadecimals and
* the backslash character - '\' - itself.
*
*
* A sequence where '\' occurs before any non-escapable character or sequence has no effect, the
* sequence output being the same as the input.
*
*/
public class StringParser {
/** The length of an escaped unicode sequence. */
private static final int UCHAR_LEN = 4;
/** Initial shift value for composing a Unicode char from 4 nibbles (16 - 4). */
private static final int SHIFT = 12;
/** The base 10 offset used to convert hexa characters to decimal. */
private static final int BASE10 = 10;
/** The last 7bits ASCII character. */
private static final char LAST_ASCII = 127;
/** The first printable 7bits ASCII character. */
private static final char FIRST_ASCII = 32;
/**
* Builds a regex pattern string, handles escaping '/' through '\/' syntax.
* @param str the string to build from
* @return the built string
*/
public static String buildRegex(final CharSequence str) {
return buildString(str.subSequence(1, str.length()), true);
}
/**
* Builds a string, handles escaping through '\' syntax.
* @param str the string to build from
* @param eatsep whether the separator, the first character, should be considered
* @return the built string
*/
public static String buildString(final CharSequence str, final boolean eatsep) {
return buildString(str, eatsep, true);
}
/**
* Builds a string, handles escaping through '\' syntax.
* @param str the string to build from
* @param eatsep whether the separator, the first character, should be considered
* @param esc whether escape characters are interpreted or escaped
* @return the built string
*/
private static String buildString(final CharSequence str, final boolean eatsep, final boolean esc) {
final StringBuilder strb = new StringBuilder(str.length());
final char sep = eatsep ? str.charAt(0) : 0;
final int end = str.length() - (eatsep ? 1 : 0);
final int begin = eatsep ? 1 : 0;
read(strb, str, begin, end, sep, esc);
return strb.toString();
}
/**
* Builds a template, does not escape characters.
* @param str the string to build from
* @param eatsep whether the separator, the first character, should be considered
* @return the built string
*/
public static String buildTemplate(final CharSequence str, final boolean eatsep) {
return buildString(str, eatsep, false);
}
/**
* Adds a escape char ('\') where needed in a string form of an ide
* @param str the identifier un-escaped string
* @return the string with added backslash character before space, quote, double-quote and backslash
*/
public static String escapeIdentifier(final String str) {
StringBuilder strb = null;
if (str != null) {
int n = 0;
final int last = str.length();
while (n < last) {
final char c = str.charAt(n);
switch (c) {
case ' ':
case '\'':
case '"':
case '\\': {
if (strb == null) {
strb = new StringBuilder(last);
strb.append(str, 0, n);
}
strb.append('\\');
strb.append(c);
break;
}
default:
if (strb != null) {
strb.append(c);
}
}
n += 1;
}
}
return Objects.toString(strb, str);
}
/**
* Escapes a String representation, expand non-ASCII characters as Unicode escape sequence.
* @param delim the delimiter character
* @param str the string to escape
* @return the escaped representation
*/
public static String escapeString(final String str, final char delim) {
if (str == null) {
return null;
}
final int length = str.length();
final StringBuilder strb = new StringBuilder(length + 2);
strb.append(delim);
for (int i = 0; i < length; ++i) {
final char c = str.charAt(i);
switch (c) {
case 0:
continue;
case '\b':
strb.append('\\');
strb.append('b');
break;
case '\t':
strb.append('\\');
strb.append('t');
break;
case '\n':
strb.append('\\');
strb.append('n');
break;
case '\f':
strb.append('\\');
strb.append('f');
break;
case '\r':
strb.append('\\');
strb.append('r');
break;
case '\\':
strb.append('\\');
strb.append('\\');
break;
default:
if (c == delim) {
strb.append('\\');
strb.append(delim);
} else if (c >= FIRST_ASCII && c <= LAST_ASCII) {
strb.append(c);
} else {
// convert to Unicode escape sequence
strb.append('\\');
strb.append('u');
final String hex = Integer.toHexString(c);
for (int h = hex.length(); h < UCHAR_LEN; ++h) {
strb.append('0');
}
strb.append(hex);
}
}
}
strb.append(delim);
return strb.toString();
}
/**
* Read the remainder of a string till a given separator,
* handles escaping through '\' syntax.
* @param strb the destination buffer to copy characters into
* @param str the origin
* @param begin the relative offset in str to begin reading
* @param end the relative offset in str to end reading
* @param sep the separator, single or double quote, marking end of string
* @param esc whether escape characters are interpreted or escaped
* @return the last character offset handled in origin
*/
private static int read(final StringBuilder strb, final CharSequence str, final int begin, final int end, final char sep, final boolean esc) {
boolean escape = false;
int index = begin;
for (; index < end; ++index) {
final char c = str.charAt(index);
if (escape) {
if (c == 'u' && index + UCHAR_LEN < end && readUnicodeChar(strb, str, index + 1) > 0) {
index += UCHAR_LEN;
} else {
// if c is not an escapable character, re-emmit the backslash before it
final boolean notSeparator = sep == 0 ? c != '\'' && c != '"' : c != sep;
if (notSeparator && c != '\\') {
if (!esc) {
strb.append('\\').append(c);
} else {
switch (c) {
// https://es5.github.io/x7.html#x7.8.4
case 'b':
strb.append('\b');
break; // backspace \u0008
case 't':
strb.append('\t');
break; // horizontal tab \u0009
case 'n':
strb.append('\n');
break; // line feed \u000A
// We don't support vertical tab. If needed, the unicode (\u000B) should be used instead
case 'f':
strb.append('\f');
break; // form feed \u000C
case 'r':
strb.append('\r');
break; // carriage return \u000D
default:
strb.append('\\').append(c);
}
}
} else {
strb.append(c);
}
}
escape = false;
continue;
}
if (c == '\\') {
escape = true;
continue;
}
strb.append(c);
if (c == sep) {
break;
}
}
return index;
}
/**
* Read the remainder of a string till a given separator,
* handles escaping through '\' syntax.
* @param strb the destination buffer to copy characters into
* @param str the origin
* @param index the offset into the origin
* @param sep the separator, single or double quote, marking end of string
* @return the offset in origin
*/
public static int readString(final StringBuilder strb, final CharSequence str, final int index, final char sep) {
return read(strb, str, index, str.length(), sep, true);
}
/**
* Reads a Unicode escape character.
* @param strb the builder to write the character to
* @param str the sequence
* @param begin the begin offset in sequence (after the '\\u')
* @return 0 if char could not be read, 4 otherwise
*/
private static int readUnicodeChar(final StringBuilder strb, final CharSequence str, final int begin) {
char xc = 0;
int bits = SHIFT;
int value;
for (int offset = 0; offset < UCHAR_LEN; ++offset) {
final char c = str.charAt(begin + offset);
if (c >= '0' && c <= '9') {
value = c - '0';
} else if (c >= 'a' && c <= 'h') {
value = c - 'a' + BASE10;
} else if (c >= 'A' && c <= 'H') {
value = c - 'A' + BASE10;
} else {
return 0;
}
xc |= value << bits;
bits -= UCHAR_LEN;
}
strb.append(xc);
return UCHAR_LEN;
}
/**
* Remove escape char ('\') from an identifier.
* @param str the identifier escaped string, ie with a backslash before space, quote, double-quote and backslash
* @return the string with no '\\' character
*/
public static String unescapeIdentifier(final String str) {
StringBuilder strb = null;
if (str != null) {
int n = 0;
final int last = str.length();
while (n < last) {
final char c = str.charAt(n);
if (c == '\\') {
if (strb == null) {
strb = new StringBuilder(last);
strb.append(str, 0, n);
}
} else if (strb != null) {
strb.append(c);
}
n += 1;
}
}
return Objects.toString(strb, str);
}
/** Default constructor. */
protected StringParser() {
// nothing to initialize
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy