io.muserver.rest.SafeHtml Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mu-server Show documentation
A simple but powerful web server framework
There is a newer version: 2.0.3
package io.muserver.rest;

/*
 * This is free and unencumbered software released into the public domain.
 *
 * Thanks to https://gist.github.com/phansson/ac586b2b7594ec5c3cc6275ad078ba2b
 */

/**
 * HTML string utility.
 */
class SafeHtml {

    private SafeHtml() {
    }

    /**
     * Escapes a string for use in an HTML entity or HTML attribute.
     *
     * 
     * The returned value is always suitable for an HTML entity but only
     * suitable for an HTML attribute if the attribute value is inside
     * double quotes. So for attribute values be sure to do like this:
     * 
     *    <div title="value-from-this-method" > ....
     * 
     * Putting attribute values in double quotes is always a good idea
     * anyway.
     *
     * 
     * The following characters will be escaped:
     * 

     *   {@code &} (ampersand) -- replaced with {@code &}
     *   {@code <} (less than) -- replaced with {@code <}
     *   {@code >} (greater than) -- replaced with {@code >}
     *   {@code "} (double quote) -- replaced with {@code "}
     *   {@code '} (single quote) -- replaced with {@code '}
     *   {@code /} (forward slash) -- replaced with {@code /}
     * 
     * Justification: It is not necessary to escape more than this as long as
     * the HTML page
     * uses
     * a Unicode encoding. (Most web pages uses UTF-8 which is also the
     * HTML5 recommendation.). Escaping more than this makes the HTML much less
     * readable.
     *
     * @param str the string to make HTML safe
     * @param avoidDoubleEscape avoid double escaping, which means for example
     * not escaping {@code <} one more time. Any sequence {@code &....;}, as
     * explained in
     * {@link #isHtmlCharEntityRef(java.lang.String, int) isHtmlCharEntityRef()},
     * will not be escaped.
     *
     * @return a HTML safe string
     */
    public static String htmlEscape(String str, boolean avoidDoubleEscape) {
        if (str == null || str.length() == 0) {
            return str;
        }
        // Implementation:  Most likely this can be further optimized
        // by finding a way to lazily instantiate the StringBuilder, because
        // most often there will be strings where there's nothing to
        // escape at all and in that case it will be much faster not to
        // do an unneseccary copy of the string.
        StringBuilder sb = new StringBuilder(str.length() + 16);
        for (int i = 0; i < str.length(); i++) {
            char c = str.charAt(i);
            switch (c) {
                case '&':
                    // Avoid double escaping if already escaped
                    if (avoidDoubleEscape && (isHtmlCharEntityRef(str, i))) {
                        sb.append(c);
                    } else {
                        sb.append("&");
                    }
                    break;
                case '<':
                    sb.append("<");
                    break;
                case '>':
                    sb.append(">");
                    break;
                case '"':
                    sb.append(""");
                    break;
                case '\'':
                    sb.append("'");
                    break;
                case '/':
                    sb.append("/");
                    break;
                default:
                    sb.append(c);
            }
        }
        return sb.toString();
    }


    /**
     * Escapes a string for use in an HTML entity or HTML attribute.
     * Double escaping is avoided, meaning this method is equivalent to
     * calling {@code htmlEscape(str, true)}.
     *
     * @see #htmlEscape(java.lang.String, boolean)
     *
     * @param str the string to make HTML safe
     * @return a HTML safe string
     */
    public static String htmlEscape(String str) {
        return htmlEscape(str, true);
    }

    /**
     * Checks if the value at {@code index} in {@code str} is a HTML entity
     * reference. This means any of :
     * 
     *   {@code &} or {@code <} or {@code >} or {@code "} 
     *   A value of the form {@code &#dddd;} where {@code dddd} is a decimal
     *       value
     *   A value of the form {@code &#xhhhh;} where {@code hhhh} is a
     *        hexadecimal value
     * 
     *
     * @param str the string to test for HTML entity reference.
     * @param index position of the {@code '&'} in {@code str}
     * @return {@code true} is there's a HTML entity reference at the
     *      index position, otherwise false.
     */
    public static boolean isHtmlCharEntityRef(String str, int index) {
        if (str.charAt(index) != '&') {
            return false;
        }
        int indexOfSemicolon = str.indexOf(';', index + 1);
        if (indexOfSemicolon == -1) { // is there a semicolon sometime later ?
            return false;
        }
        if (!(indexOfSemicolon > (index + 2))) {   // is the string actually long enough
            return false;
        }
        if (followingCharsAre(str, index, "amp;")
            || followingCharsAre(str, index, "lt;")
            || followingCharsAre(str, index, "gt;")
            || followingCharsAre(str, index, "quot;")) {
            return true;
        }
        if (str.charAt(index + 1) == '#') {
            if (str.charAt(index + 2) == 'x' || str.charAt(index + 2) == 'X') {
                // It's presumably a hex value
                if (str.charAt(index + 3) == ';') {
                    return false;
                }
                for (int i = index + 3; i < indexOfSemicolon; i++) {
                    char c = str.charAt(i);
                    if (c >= 48 && c <= 57) {  // 0 -- 9
                        continue;
                    }
                    if (c >= 65 && c <= 70) {   // A -- F
                        continue;
                    }
                    if (c >= 97 && c <= 102) {   // a -- f
                        continue;
                    }
                    return false;
                }
                return true;   // yes, the value is a hex string
            } else {
                // It's presumably a decimal value
                for (int i = index + 2; i < indexOfSemicolon; i++) {
                    char c = str.charAt(i);
                    if (c >= 48 && c <= 57) {  // 0 -- 9
                        continue;
                    }
                    return false;
                }
                return true; // yes, the value is decimal
            }
        }
        return false;
    }

    /**
     * Tests if the chars following position {@code startIndex} in string
     * {@code str} are that of {@code nextChars}.
     *
     * @param str
     * @param startIndex
     * @param nextChars
     * @return
     */
    private static boolean followingCharsAre(String str, int startIndex, String nextChars) {
        return (str.indexOf(nextChars, startIndex + 1) == (startIndex + 1));
    }
}