
org.thymeleaf.testing.templateengine.util.EscapeUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of thymeleaf-testing Show documentation
Show all versions of thymeleaf-testing Show documentation
XML/XHTML/HTML5 template engine for Java
/*
* =============================================================================
*
* Copyright (c) 2011-2016, The THYMELEAF team (http://www.thymeleaf.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
package org.thymeleaf.testing.templateengine.util;
/**
*
* @author Daniel Fernández
*
* @since 2.1.3
*
*/
public final class EscapeUtils {
/*
* Prefixes defined for use in escape and unescape operations
*/
private static final char ESCAPE_PREFIX = '\\';
private static final char ESCAPE_UHEXA_PREFIX2 = 'u';
/*
* Small utility char arrays for hexadecimal conversion.
*/
private static char[] HEXA_CHARS_UPPER = "0123456789ABCDEF".toCharArray();
private static char[] HEXA_CHARS_LOWER = "0123456789abcdef".toCharArray();
/*
* This methods (the two versions) are used instead of Integer.parseInt(str,radix) in order to avoid the need
* to create substrings of the text being unescaped to feed such method.
* - No need to check all chars are within the radix limits - reference parsing code will already have done so.
*/
static int parseIntFromReference(final String text, final int start, final int end, final int radix) {
int result = 0;
for (int i = start; i < end; i++) {
final char c = text.charAt(i);
int n = -1;
for (int j = 0; j < HEXA_CHARS_UPPER.length; j++) {
if (c == HEXA_CHARS_UPPER[j] || c == HEXA_CHARS_LOWER[j]) {
n = j;
break;
}
}
result = (radix * result) + n;
}
return result;
}
/**
*
* Unescapes unicode (UHEXA) escapes in a String, like \u00E1.
*
*
* @param text the String to be processed
* @return the processed String, null if input is null
*/
public static String unescapeUnicode(final String text) {
if (text == null) {
return null;
}
StringBuilder strBuilder = null;
final int offset = 0;
final int max = text.length();
int readOffset = offset;
int referenceOffset = offset;
for (int i = offset; i < max; i++) {
final char c = text.charAt(i);
/*
* Check the need for an unescape operation at this point
*/
if (c != ESCAPE_PREFIX || (i + 1) >= max) {
continue;
}
int codepoint = -1;
if (c == ESCAPE_PREFIX) {
final char c1 = text.charAt(i + 1);
if (c1 == ESCAPE_UHEXA_PREFIX2) {
// This can be a uhexa escape, we need exactly four more characters
int f = i + 2;
// First, discard any additional 'u' characters, which are allowed
while (f < max) {
final char cf = text.charAt(f);
if (cf != ESCAPE_UHEXA_PREFIX2) {
break;
}
f++;
}
int s = f;
// Parse the hexadecimal digits
while (f < (s + 4) && f < max) {
final char cf = text.charAt(f);
if (!((cf >= '0' && cf <= '9') || (cf >= 'A' && cf <= 'F') || (cf >= 'a' && cf <= 'f'))) {
break;
}
f++;
}
if ((f - s) < 4) {
// We weren't able to consume the required four hexa chars, leave it as slash+'u', which
// is invalid, and let the corresponding Java parser fail.
i++;
continue;
}
codepoint = parseIntFromReference(text, s, f, 16);
// Fast-forward to the first char after the parsed codepoint
referenceOffset = f - 1;
// Don't continue here, just let the unescape code below do its job
} else if (c1 == ESCAPE_PREFIX && i + 2 < max && text.charAt(i + 2) == ESCAPE_UHEXA_PREFIX2){
// This unicode escape is actually escaped itself, so we don't need to perform the real unescaping,
// but we need to merge the "\\" into "\"
if (strBuilder == null) {
strBuilder = new StringBuilder(max + 5);
}
if (i - readOffset > 0) {
strBuilder.append(text, readOffset, i);
}
strBuilder.append('\\');
readOffset = i + 2;
i++;
continue;
} else {
// Other escape sequences will not be processed in this unescape step.
i++;
continue;
}
}
/*
* At this point we know for sure we will need some kind of unescape, so we
* can increase the offset and initialize the string builder if needed, along with
* copying to it all the contents pending up to this point.
*/
if (strBuilder == null) {
strBuilder = new StringBuilder(max + 5);
}
if (i - readOffset > 0) {
strBuilder.append(text, readOffset, i);
}
i = referenceOffset;
readOffset = i + 1;
/*
* --------------------------
*
* Peform the real unescape
*
* --------------------------
*/
if (codepoint > '\uFFFF') {
strBuilder.append(Character.toChars(codepoint));
} else {
strBuilder.append((char)codepoint);
}
}
/*
* -----------------------------------------------------------------------------------------------
* Final cleaning: return the original String object if no unescape was actually needed. Otherwise
* append the remaining escaped text to the string builder and return.
* -----------------------------------------------------------------------------------------------
*/
if (strBuilder == null) {
return text;
}
if (max - readOffset > 0) {
strBuilder.append(text, readOffset, max);
}
return strBuilder.toString();
}
private EscapeUtils() {
super();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy