All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.coverity.security.Escape Maven / Gradle / Ivy

/**
 *   Copyright (c) 2012, Coverity, Inc. 
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without modification, 
 *   are permitted provided that the following conditions are met:
 *   - Redistributions of source code must retain the above copyright notice, this 
 *   list of conditions and the following disclaimer.
 *   - Redistributions in binary form must reproduce the above copyright notice, this
 *   list of conditions and the following disclaimer in the documentation and/or other
 *   materials provided with the distribution.
 *   - Neither the name of Coverity, Inc. nor the names of its contributors may be used
 *   to endorse or promote products derived from this software without specific prior 
 *   written permission from Coverity, Inc.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 *   EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND INFRINGEMENT ARE DISCLAIMED.
 *   IN NO EVENT SHALL THE COPYRIGHT HOLDER OR  CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 *   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 *   WHETHER IN CONTRACT,  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
 *   OF SUCH DAMAGE.
 */
package com.coverity.security;

/**
 * Escape is a small set of methods for escaping tainted data. These escaping
 * methods are useful in transforming user-controlled ("tainted") data into
 * forms that are safe from being interpreted as something other than data, such
 * as JavaScript.
 * 

* At this time most of these escaping routines focus on cross-site scripting * mitigations. Each method is good for a different HTML context. For a primer * on HTML contexts, see OWASP's XSS Prevention Cheat Sheet (note however that * the escaping routines are not implemented exactly according to OWASP's * recommendations) or the Coverity Security Advisor documentation. * Also see the Coverity Security Research Laboratory blog on * how to properly use each function. *

* While Coverity's static analysis product references these escaping routines * as exemplars and understands their behavior, there is no dependency on * Coverity products and these routines are completely standalone. Feel free to * use them! Just make sure you use them correctly. * * @author Romain Gaucher * @author Andy Chou * @author Jon Passki * */ public class Escape { /** * HTML entity escaping for text content and attributes. *

* HTML entity escaping that is appropriate for the most common HTML contexts: * PCDATA and "normal" attributes (non-URI, non-event, and non-CSS attributes).
* Note that we do not recommend using non-quoted HTML attributes since * the security obligations vary more between web browser. We recommend * to always quote (single or double quotes) HTML attributes.
* This method is generic to HTML entity escaping, and therefore escapes more * characters than usually necessary -- mostly to handle non-quoted attribute values. * If this method is somehow too slow, such as you output megabytes of text with spaces, * please use the {@link #htmlText(String)} method which only escape HTML text specific * characters. * *

* The following characters are escaped: *

    *
  • * HTML characters: ' (U+0022), " (U+0027), * \ (U+005C), / (U+002F), * < (U+003C), > (U+003E), * & (U+0026) *
  • *
  • * Control characters: \t (U+0009), \n (U+000A), * \f (U+000C), \r (U+000D), * SPACE (U+0020) *
  • *
  • * Unicode newlines: LS (U+2028), PS (U+2029) *
  • *
* * @param input the string to be escaped * @return the HTML escaped string or null if input is null * @since 1.0 */ public static String html(String input) { if (input == null) return null; int length = input.length(); StringBuilder output = allocateStringBuilder(input, length); for (int i = 0; i < length; i++) { char c = input.charAt(i); switch (c) { // Control chars case '\t': output.append(" "); break; case '\n': output.append(" "); break; case '\f': output.append(" "); break; case '\r': output.append(" "); break; // Chars that have a meaning for HTML case '\'': output.append("'"); break; case '\\': output.append("\"); break; case ' ': output.append(" "); break; case '/': output.append("/"); break; case '"': output.append("""); break; case '<': output.append("<"); break; case '>': output.append(">"); break; case '&': output.append("&"); break; // Unicode new lines case '\u2028': output.append("
"); break; case '\u2029': output.append("
"); break; default: output.append(c); break; } } return output.toString(); } /** * Faster HTML entity escaping for tag content or quoted attributes values only. *

* HTML entity escaping that is specific to text elements such as the content of * a typical HTML tag (div, p, etc.).
* This method is not appropriate in all cases, and especially when appending data * in a non-quoted context (e.g., an HTML attribute value that is not surrounded by * single or double quotes). Note that we however, highly discourage the use * of non-quoted attributes. * *

* The following characters are escaped: *

    *
  • * HTML characters: ' (U+0022), " (U+0027), * < (U+003C), > (U+003E), * & (U+0026) *
  • *
* * @param input the string to be escaped * @return the HTML escaped string or null if input is null * @since 1.0 */ public static String htmlText(String input) { if (input == null) return null; int length = input.length(); StringBuilder output = allocateStringBuilder(input, length); for (int i = 0; i < length; i++) { char c = input.charAt(i); switch (c) { case '\'': output.append("'"); break; case '"': output.append("""); break; case '<': output.append("<"); break; case '>': output.append(">"); break; case '&': output.append("&"); break; default: output.append(c); break; } } return output.toString(); } /** * URI encoder. *

* URI encoding for query string values of the URI: * /example/?name=URI_ENCODED_VALUE_HERE
* Note that this method is not sufficient to protect for cross-site scripting * in a generic URI context, but only for query string values. If you * need to escape a URI in an href attribute (for example), * ensure that: *

    *
  • The scheme is allowed (restrict to http, https, or mailto)
  • *
  • Use the HTML escaper {@link #html(String)} on the entire URI
  • *
*

* This URI encoder processes the following characters: *

    *
  • * URI characters: ' (U+0022), " (U+0027), * \ (U+005C), / (U+002F), * < (U+003C), > (U+003E), * & (U+0026), * < (U+003C), > (U+003E), * ! (U+0021), # (U+0023), * $ (U+0024), % (U+0025), * ( (U+0028), ) (U+0029), * * (U+002A), + (U+002B), * , (U+002C), . (U+002E), * : (U+003A), ; (U+003B), * = (U+003D), ? (U+003F), * @ (U+0040), [ (U+005B), * ] (U+005D) *
  • *
  • * Control characters: \t (U+0009), \n (U+000A), * \f (U+000C), \r (U+000D), * SPACE (U+0020) *
  • *
* * @param input the string to be escaped * @return the URI encoded string or null if input is null * @since 1.0 */ public static String uriParam(String input) { if (input == null) return null; int length = input.length(); StringBuilder output = allocateStringBuilder(input, length); for (int i = 0; i < length; i++) { char c = input.charAt(i); switch (c) { // Control chars case '\t': output.append("%09"); break; case '\n': output.append("%0A"); break; case '\f': output.append("%0C"); break; case '\r': output.append("%0D"); break; // RFC chars to encode, plus % ' " < and >, and space case ' ': output.append("%20"); break; case '!': output.append("%21"); break; case '"': output.append("%22"); break; case '#': output.append("%23"); break; case '$': output.append("%24"); break; case '%': output.append("%25"); break; case '&': output.append("%26"); break; case '\'': output.append("%27"); break; case '(': output.append("%28"); break; case ')': output.append("%29"); break; case '*': output.append("%2A"); break; case '+': output.append("%2B"); break; case ',': output.append("%2C"); break; case '.': output.append("%2E"); break; case '/': output.append("%2F"); break; case ':': output.append("%3A"); break; case ';': output.append("%3B"); break; case '<': output.append("%3C"); break; case '=': output.append("%3D"); break; case '>': output.append("%3E"); break; case '?': output.append("%3F"); break; case '@': output.append("%40"); break; case '[': output.append("%5B"); break; case ']': output.append("%5D"); break; default: output.append(c); break; } } return output.toString(); } /** * Same as {@link #uriParam(String)} for now. *

* Eventually, this method will evolve into filtering the URI so that * it is safely considered as a URL by a web browser, and does not contain * malicious payloads (data:text/html..., javascript:, etc.). */ public static String uri(String input) { return uriParam(input); } /** * JavaScript String Unicode escaper. *

* JavaScript String Unicode escaping (\UXXXX) to be used in single or double quoted * JavaScript strings: *

     * <script type="text/javascript">
     *   window.myString = 'JS_STRING_ESCAPE_HERE';
     *   window.yourString = "JS_STRING_ESCAPE_HERE";
     * </script>
     * 
*

* This JavaScript string escaper processes the following characters: *

    *
  • * JS String characters: ' (U+0022), " (U+0027), * \ (U+005C) *
  • *
  • * HTML characters: / (U+002F), * < (U+003C), > (U+003E), * & (U+0026) *
  • *
  • * Control characters: \b (U+0008), \t (U+0009), * \n (U+000A), 0x0b (U+000B), * \f (U+000C), \r (U+000D) *
  • *
  • * Unicode newlines: LS (U+2028), PS (U+2029) *
  • *
* * @param input the string to be escaped * @return the JavaScript string Unicode escaped string or null if input is null * @since 1.0 */ public static String jsString(String input) { if (input == null) return null; int length = input.length(); StringBuilder output = allocateStringBuilder(input, length); for (int i = 0; i < length; i++) { char c = input.charAt(i); switch (c) { // Control chars case '\b': output.append("\\u0008"); break; case '\t': output.append("\\u0009"); break; case '\n': output.append("\\u000A"); break; case '\u000b': output.append("\\u000B"); break; case '\f': output.append("\\u000C"); break; case '\r': output.append("\\u000D"); break; // JavaScript String chars case '\'': output.append("\\u0027"); break; case '"': output.append("\\u0022"); break; case '\\': output.append("\\u005C"); break; // HTML chars for closing the parent context case '&': output.append("\\u0026"); break; case '/': output.append("\\u002F"); break; case '<': output.append("\\u003C"); break; case '>': output.append("\\u003E"); break; // Unicode case '\u2028': output.append("\\u2028"); break; case '\u2029': output.append("\\u2029"); break; default: output.append(c); break; } } return output.toString(); } /** * JavaScript regex content escaper. *

* Escape for a JavaScript regular expression: *

     * <script type="text/javascript">
     *   var b = /^JS_REGEX_ESCAPE_HERE/.test(document.location);
     * </script>
     * 
*

* Note that when using a regular expression inside a JavaScript string such as: *

<script type="text/javascript">
     *   var b = (new RegExp('^CONTENT_HERE')).test(document.location);
     * </script>
* You should first escape using the {@link #jsRegex(String)} escaper, and make sure * that the JavaScript string itself is properly rendered using the {@link #jsString(String)} * escaper. This is a nested context scenario in which we have a JavaScript regex * inside a JavaScript string, for which we need to first escape the inner most context * and walking back the stack of context to the outer most one. *

*

* This JavaScript regex escaper processes the following characters: *

    *
  • * Regex characters: \ (U+005C), / (U+002F), * ( (U+0028), [ (U+005B), * { (U+007B), ] (U+005D), * } (U+007D), ) (U+0029), * * (U+002A), + (U+002B), * - (U+002D), . (U+002E), * ? (U+003F), ! (U+0021), * ^ (U+005E), $ (U+0024), * | (U+007C) *
  • *
  • * Control characters: \t (U+0009), \n (U+000A), * \v (U+000B), * \f (U+000C), \r (U+000D) *
  • *
* * @param input the string to be escaped * @return the escaped JavaScript regex or null if input is null * @since 1.0 */ public static String jsRegex(String input) { if (input == null) return null; int length = input.length(); StringBuilder output = allocateStringBuilder(input, length); for (int i = 0; i < length; i++) { char c = input.charAt(i); switch (c) { // Control chars case '\t': output.append("\\t"); break; case '\n': output.append("\\n"); break; case '\u000b': output.append("\\v"); break; case '\f': output.append("\\f"); break; case '\r': output.append("\\r"); break; // Escape sequence, and regexp terminator case '\\': output.append("\\\\"); break; case '/': output.append("\\/"); break; // Regexp specific characters case '(': output.append("\\("); break; case '[': output.append("\\["); break; case '{': output.append("\\{"); break; case ']': output.append("\\]"); break; case ')': output.append("\\)"); break; case '}': output.append("\\}"); break; case '*': output.append("\\*"); break; case '+': output.append("\\+"); break; case '-': output.append("\\-"); break; case '.': output.append("\\."); break; case '?': output.append("\\?"); break; case '!': output.append("\\!"); break; case '^': output.append("\\^"); break; case '$': output.append("\\$"); break; case '|': output.append("\\|"); break; default: output.append(c); break; } } return output.toString(); } /** * CSS String escaper. *

* CSS escaper for strings such as CSS selector or quoted URI: *

     * <style">
     *  a[href *= "DATA_HERE"] {...}
     *  li { background: url('DATA_HERE'); }
     * </style>
     * 
*

* This CSS string escaper processes the following characters: *

    *
  • * CSS string characters: ' (U+0022), " (U+0027), * \ (U+005C) *
  • *
  • * HTML characters: / (U+002F), * < (U+003C), > (U+003E), * & (U+0026) *
  • *
  • * Control characters: \b (U+0008), * \t (U+0009), \n (U+000A), * \f (U+000C), \r (U+000D) *
  • *
  • * Unicode newlines: LS (U+2028), PS (U+2029) *
  • *
* * @param input the string to be escaped * @return the CSS string escaped or null if input is null * @since 1.0 */ public static String cssString(String input) { if (input == null) return null; int length = input.length(); StringBuilder output = allocateStringBuilder(input, length); for (int i = 0; i < length; i++) { char c = input.charAt(i); switch (c) { // Control chars case '\b': output.append("\\08 "); break; case '\t': output.append("\\09 "); break; case '\n': output.append("\\0A "); break; case '\f': output.append("\\0C "); break; case '\r': output.append("\\0D "); break; // String chars case '\'': output.append("\\27 "); break; case '"': output.append("\\22 "); break; case '\\': output.append("\\5C "); break; // HTML chars for closing the parent context case '&': output.append("\\26 "); break; case '/': output.append("\\2F "); break; case '<': output.append("\\3C "); break; case '>': output.append("\\3E "); break; // Unicode case '\u2028': output.append("\\002028 "); break; case '\u2029': output.append("\\002029 "); break; default: output.append(c); break; } } return output.toString(); } /** * SQL LIKE clause escaper. *

* This SQL LIKE clause escaper does not protect against SQL injection, but ensure * that the string to be consumed in SQL LIKE clause does not alter the current * LIKE query by inserting % or _: *

     * entityManager.createQuery("FROM MyEntity e WHERE e.content LIKE :like_query ESCAPE '@'")
     *              .setParameter("like_query", "%" + Escape.sqlLikeClause(USER_DATA_HERE))
     *              .getResultList();
     * 
* This escaper has to be used with a safe SQL query construct such as the JPQL * named parameterized query in the previous example. *

* This escaper uses by default the @ as escape character. The other method * {@link #sqlLikeClause(String,char)} allows for using a different escape character such as * \. * *

* This SQL LIKE escaper processes the following characters: *

    *
  • * SQL LIKE characters: _ (U+005F), % (U+0025), * @ (U+0040) *
  • *
* * @param input the string to be escaped * @return the SQL LIKE escaped string or null if input is null * @since 1.0 */ public static String sqlLikeClause(String input) { return sqlLikeClause(input, '@'); } /** * SQL LIKE clause escaper. *

* Similar to {@link #sqlLikeClause(String)}, but allows to specify the escape character * to be used. When a character different than @ is used, @ will * not be escaped by the escaper, and the specified escape character will be. * * @param input the string to be escaped * @param escape the escape character to be used * @return the SQL LIKE escaped string or null if input is null * @since 1.0 */ public static String sqlLikeClause(String input, char escape) { if (input == null) return null; int length = input.length(); StringBuilder output = allocateStringBuilder(input, length); for (int i = 0; i < length; i++) { char c = input.charAt(i); if (c == escape || c == '_' || c == '%') { output.append(escape); } output.append(c); } return output.toString(); } /** * Compute the allocation size of the StringBuilder based on the input and its * length. */ private static StringBuilder allocateStringBuilder(String input, int length) { // Allocate enough temporary buffer space to avoid reallocation in most // cases. If you believe you will output large amount of data at once // you might need to change the factor. int buflen = length; if (length * 2 > 0) buflen = length * 2; return new StringBuilder(buflen); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy