org.thymeleaf.testing.templateengine.util.EscapeUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of thymeleaf-testing Show documentation
XML/XHTML/HTML5 template engine for Java
There is a newer version: 3.1.0.M2
/*
 * =============================================================================
 * 
 *   Copyright (c) 2011-2016, The THYMELEAF team (http://www.thymeleaf.org)
 * 
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 * 
 *       http://www.apache.org/licenses/LICENSE-2.0
 * 
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 * 
 * =============================================================================
 */
package org.thymeleaf.testing.templateengine.util;

/**
 * 
 * @author Daniel Fernández
 *
 * @since 2.1.3
 *
 */
public final class EscapeUtils {


    /*
     * Prefixes defined for use in escape and unescape operations
     */
    private static final char ESCAPE_PREFIX = '\\';
    private static final char ESCAPE_UHEXA_PREFIX2 = 'u';

    /*
     * Small utility char arrays for hexadecimal conversion.
     */
    private static char[] HEXA_CHARS_UPPER = "0123456789ABCDEF".toCharArray();
    private static char[] HEXA_CHARS_LOWER = "0123456789abcdef".toCharArray();






    /*
     * This methods (the two versions) are used instead of Integer.parseInt(str,radix) in order to avoid the need
     * to create substrings of the text being unescaped to feed such method.
     * -  No need to check all chars are within the radix limits - reference parsing code will already have done so.
     */

    static int parseIntFromReference(final String text, final int start, final int end, final int radix) {
        int result = 0;
        for (int i = start; i < end; i++) {
            final char c = text.charAt(i);
            int n = -1;
            for (int j = 0; j < HEXA_CHARS_UPPER.length; j++) {
                if (c == HEXA_CHARS_UPPER[j] || c == HEXA_CHARS_LOWER[j]) {
                    n = j;
                    break;
                }
            }
            result = (radix * result) + n;
        }
        return result;
    }


    /**
     * 
     *    Unescapes unicode (UHEXA) escapes in a String, like \u00E1.
     * 
     *
     * @param text the String to be processed
     * @return the processed String, null if input is null
     */
    public static String unescapeUnicode(final String text) {

        if (text == null) {
            return null;
        }

        StringBuilder strBuilder = null;

        final int offset = 0;
        final int max = text.length();

        int readOffset = offset;
        int referenceOffset = offset;

        for (int i = offset; i < max; i++) {

            final char c = text.charAt(i);

            /*
             * Check the need for an unescape operation at this point
             */

            if (c != ESCAPE_PREFIX || (i + 1) >= max) {
                continue;
            }

            int codepoint = -1;

            if (c == ESCAPE_PREFIX) {

                final char c1 = text.charAt(i + 1);

                if (c1 == ESCAPE_UHEXA_PREFIX2) {
                    // This can be a uhexa escape, we need exactly four more characters

                    int f = i + 2;
                    // First, discard any additional 'u' characters, which are allowed
                    while (f < max) {
                        final char cf = text.charAt(f);
                        if (cf != ESCAPE_UHEXA_PREFIX2) {
                            break;
                        }
                        f++;
                    }
                    int s = f;
                    // Parse the hexadecimal digits
                    while (f < (s + 4) && f < max) {
                        final char cf = text.charAt(f);
                        if (!((cf >= '0' && cf <= '9') || (cf >= 'A' && cf <= 'F') || (cf >= 'a' && cf <= 'f'))) {
                            break;
                        }
                        f++;
                    }

                    if ((f - s) < 4) {
                        // We weren't able to consume the required four hexa chars, leave it as slash+'u', which
                        // is invalid, and let the corresponding Java parser fail.
                        i++;
                        continue;
                    }

                    codepoint = parseIntFromReference(text, s, f, 16);

                    // Fast-forward to the first char after the parsed codepoint
                    referenceOffset = f - 1;

                    // Don't continue here, just let the unescape code below do its job

                } else if (c1 == ESCAPE_PREFIX && i + 2 < max && text.charAt(i + 2) == ESCAPE_UHEXA_PREFIX2){
                    // This unicode escape is actually escaped itself, so we don't need to perform the real unescaping,
                    // but we need to merge the "\\" into "\"

                    if (strBuilder == null) {
                        strBuilder = new StringBuilder(max + 5);
                    }

                    if (i - readOffset > 0) {
                        strBuilder.append(text, readOffset, i);
                    }

                    strBuilder.append('\\');

                    readOffset = i + 2;

                    i++;
                    continue;

                } else {

                    // Other escape sequences will not be processed in this unescape step.
                    i++;
                    continue;

                }

            }


            /*
             * At this point we know for sure we will need some kind of unescape, so we
             * can increase the offset and initialize the string builder if needed, along with
             * copying to it all the contents pending up to this point.
             */

            if (strBuilder == null) {
                strBuilder = new StringBuilder(max + 5);
            }

            if (i - readOffset > 0) {
                strBuilder.append(text, readOffset, i);
            }

            i = referenceOffset;
            readOffset = i + 1;

            /*
             * --------------------------
             *
             * Peform the real unescape
             *
             * --------------------------
             */

            if (codepoint > '\uFFFF') {
                strBuilder.append(Character.toChars(codepoint));
            } else {
                strBuilder.append((char)codepoint);
            }

        }


        /*
         * -----------------------------------------------------------------------------------------------
         * Final cleaning: return the original String object if no unescape was actually needed. Otherwise
         *                 append the remaining escaped text to the string builder and return.
         * -----------------------------------------------------------------------------------------------
         */

        if (strBuilder == null) {
            return text;
        }

        if (max - readOffset > 0) {
            strBuilder.append(text, readOffset, max);
        }

        return strBuilder.toString();

    }



    private EscapeUtils() {
	    super();
    }


}