io.muserver.rest.SafeHtml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mu-server Show documentation
Show all versions of mu-server Show documentation
A simple but powerful web server framework
package io.muserver.rest;
/*
* This is free and unencumbered software released into the public domain.
*
* Thanks to https://gist.github.com/phansson/ac586b2b7594ec5c3cc6275ad078ba2b
*/
/**
* HTML string utility.
*/
class SafeHtml {
private SafeHtml() {
}
/**
* Escapes a string for use in an HTML entity or HTML attribute.
*
*
* The returned value is always suitable for an HTML entity but only
* suitable for an HTML attribute if the attribute value is inside
* double quotes. So for attribute values be sure to do like this:
*
* <div title="value-from-this-method" > ....
*
* Putting attribute values in double quotes is always a good idea
* anyway.
*
*
* The following characters will be escaped:
*
* - {@code &} (ampersand) -- replaced with {@code &}
* - {@code <} (less than) -- replaced with {@code <}
* - {@code >} (greater than) -- replaced with {@code >}
* - {@code "} (double quote) -- replaced with {@code "}
* - {@code '} (single quote) -- replaced with {@code '}
* - {@code /} (forward slash) -- replaced with {@code /}
*
* Justification: It is not necessary to escape more than this as long as
* the HTML page
* uses
* a Unicode encoding. (Most web pages uses UTF-8 which is also the
* HTML5 recommendation.). Escaping more than this makes the HTML much less
* readable.
*
* @param str the string to make HTML safe
* @param avoidDoubleEscape avoid double escaping, which means for example
* not escaping {@code <} one more time. Any sequence {@code &....;}, as
* explained in
* {@link #isHtmlCharEntityRef(java.lang.String, int) isHtmlCharEntityRef()},
* will not be escaped.
*
* @return a HTML safe string
*/
public static String htmlEscape(String str, boolean avoidDoubleEscape) {
if (str == null || str.length() == 0) {
return str;
}
// Implementation: Most likely this can be further optimized
// by finding a way to lazily instantiate the StringBuilder, because
// most often there will be strings where there's nothing to
// escape at all and in that case it will be much faster not to
// do an unneseccary copy of the string.
StringBuilder sb = new StringBuilder(str.length() + 16);
for (int i = 0; i < str.length(); i++) {
char c = str.charAt(i);
switch (c) {
case '&':
// Avoid double escaping if already escaped
if (avoidDoubleEscape && (isHtmlCharEntityRef(str, i))) {
sb.append(c);
} else {
sb.append("&");
}
break;
case '<':
sb.append("<");
break;
case '>':
sb.append(">");
break;
case '"':
sb.append(""");
break;
case '\'':
sb.append("'");
break;
case '/':
sb.append("/");
break;
default:
sb.append(c);
}
}
return sb.toString();
}
/**
* Escapes a string for use in an HTML entity or HTML attribute.
* Double escaping is avoided, meaning this method is equivalent to
* calling {@code htmlEscape(str, true)}.
*
* @see #htmlEscape(java.lang.String, boolean)
*
* @param str the string to make HTML safe
* @return a HTML safe string
*/
public static String htmlEscape(String str) {
return htmlEscape(str, true);
}
/**
* Checks if the value at {@code index} in {@code str} is a HTML entity
* reference. This means any of :
*
* - {@code &} or {@code <} or {@code >} or {@code "}
* - A value of the form {@code dddd;} where {@code dddd} is a decimal
* value
* - A value of the form {@code hhhh;} where {@code hhhh} is a
* hexadecimal value
*
*
* @param str the string to test for HTML entity reference.
* @param index position of the {@code '&'} in {@code str}
* @return {@code true} is there's a HTML entity reference at the
* index position, otherwise false.
*/
public static boolean isHtmlCharEntityRef(String str, int index) {
if (str.charAt(index) != '&') {
return false;
}
int indexOfSemicolon = str.indexOf(';', index + 1);
if (indexOfSemicolon == -1) { // is there a semicolon sometime later ?
return false;
}
if (!(indexOfSemicolon > (index + 2))) { // is the string actually long enough
return false;
}
if (followingCharsAre(str, index, "amp;")
|| followingCharsAre(str, index, "lt;")
|| followingCharsAre(str, index, "gt;")
|| followingCharsAre(str, index, "quot;")) {
return true;
}
if (str.charAt(index + 1) == '#') {
if (str.charAt(index + 2) == 'x' || str.charAt(index + 2) == 'X') {
// It's presumably a hex value
if (str.charAt(index + 3) == ';') {
return false;
}
for (int i = index + 3; i < indexOfSemicolon; i++) {
char c = str.charAt(i);
if (c >= 48 && c <= 57) { // 0 -- 9
continue;
}
if (c >= 65 && c <= 70) { // A -- F
continue;
}
if (c >= 97 && c <= 102) { // a -- f
continue;
}
return false;
}
return true; // yes, the value is a hex string
} else {
// It's presumably a decimal value
for (int i = index + 2; i < indexOfSemicolon; i++) {
char c = str.charAt(i);
if (c >= 48 && c <= 57) { // 0 -- 9
continue;
}
return false;
}
return true; // yes, the value is decimal
}
}
return false;
}
/**
* Tests if the chars following position {@code startIndex} in string
* {@code str} are that of {@code nextChars}.
*
* @param str
* @param startIndex
* @param nextChars
* @return
*/
private static boolean followingCharsAre(String str, int startIndex, String nextChars) {
return (str.indexOf(nextChars, startIndex + 1) == (startIndex + 1));
}
}