com.coverity.security.Escape Maven / Gradle / Ivy

Go to download
/**
 *   Copyright (c) 2012, Coverity, Inc. 
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without modification, 
 *   are permitted provided that the following conditions are met:
 *   - Redistributions of source code must retain the above copyright notice, this 
 *   list of conditions and the following disclaimer.
 *   - Redistributions in binary form must reproduce the above copyright notice, this
 *   list of conditions and the following disclaimer in the documentation and/or other
 *   materials provided with the distribution.
 *   - Neither the name of Coverity, Inc. nor the names of its contributors may be used
 *   to endorse or promote products derived from this software without specific prior 
 *   written permission from Coverity, Inc.
 *   
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 *   EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND INFRINGEMENT ARE DISCLAIMED.
 *   IN NO EVENT SHALL THE COPYRIGHT HOLDER OR  CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
 *   INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 *   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 *   WHETHER IN CONTRACT,  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY 
 *   OF SUCH DAMAGE.
 */
package com.coverity.security;

/**
 * Escape is a small set of methods for escaping tainted data. These escaping
 * methods are useful in transforming user-controlled ("tainted") data into
 * forms that are safe from being interpreted as something other than data, such
 * as JavaScript.
 * 
 * At this time most of these escaping routines focus on cross-site scripting
 * mitigations. Each method is good for a different HTML context. For a primer
 * on HTML contexts, see OWASP's XSS Prevention Cheat Sheet (note however that
 * the escaping routines are not implemented exactly according to OWASP's
 * recommendations) or the Coverity Security Advisor documentation. 
 * Also see the Coverity Security Research Laboratory blog on
 * how to properly use each function.
 * 

 * While Coverity's static analysis product references these escaping routines
 * as exemplars and understands their behavior, there is no dependency on
 * Coverity products and these routines are completely standalone. Feel free to
 * use them! Just make sure you use them correctly.
 * 
 * @author Romain Gaucher
 * @author Andy Chou
 * @author Jon Passki
 * 
 */
public class Escape {

    /**
     * HTML entity escaping for text content and attributes.
     * 

     * HTML entity escaping that is appropriate for the most common HTML contexts:
     * PCDATA and "normal" attributes (non-URI, non-event, and non-CSS attributes). 

     * Note that we do not recommend using non-quoted HTML attributes since
     * the security obligations vary more between web browser. We recommend
     * to always quote (single or double quotes) HTML attributes.

     * This method is generic to HTML entity escaping, and therefore escapes more
     * characters than usually necessary -- mostly to handle non-quoted attribute values.
     * If this method is somehow too slow, such as you output megabytes of text with spaces,
     * please use the {@link #htmlText(String)} method which only escape HTML text specific
     * characters.
     *
     * 

     * The following characters are escaped:
     * 

     * 
     * HTML characters: ' (U+0022), " (U+0027), 
     *                  \ (U+005C), / (U+002F), 
     *                  < (U+003C), > (U+003E), 
     *                  & (U+0026)
     * 
     * 
     * Control characters: \t (U+0009), \n (U+000A), 
     *                     \f (U+000C), \r (U+000D), 
     *                     SPACE (U+0020)
     * 
     * 
     * Unicode newlines: LS (U+2028), PS (U+2029)
     * 
     * 
     *
     * @param  input the string to be escaped
     * @return       the HTML escaped string or null if input is null
     * @since  1.0
     */
    public static String html(String input) {
        if (input == null)
            return null;

        int length = input.length();
        StringBuilder output = allocateStringBuilder(length);
        
        for (int i = 0; i < length; i++) {
            char c = input.charAt(i);
            switch (c) {
            // Control chars
            case '\t':
                output.append("	");
                break;
            case '\n':
                output.append("
");
                break;
            case '\f':
                output.append("");
                break;
            case '\r':
                output.append("");
                break;
            // Chars that have a meaning for HTML
            case '\'':
                output.append("'");
                break;
            case '\\':
                output.append("\");
                break;
            case ' ':
                output.append(" ");
                break;
            case '/':
                output.append("/");
                break;
            case '"':
                output.append(""");
                break;
            case '<':
                output.append("<");
                break;
            case '>':
                output.append(">");
                break;
            case '&':
                output.append("&");
                break;
            // Unicode new lines
            case '\u2028':
                output.append(" ");
                break;
            case '\u2029':
                output.append(" ");
                break;

            default:
                output.append(c);
                break;
            }
        }
        return output.toString();
    }


    /**
     * Faster HTML entity escaping for tag content or quoted attributes values only.
     * 
     * HTML entity escaping that is specific to text elements such as the content of
     * a typical HTML tag (div, p, etc.).

     * This method is not appropriate in all cases, and especially when appending data
     * in a non-quoted context (e.g., an HTML attribute value that is not surrounded by
     * single or double quotes). Note that we however, highly discourage the use 
     * of non-quoted attributes.
     *
     * 

     * The following characters are escaped:
     * 

     * 
     * HTML characters: ' (U+0022), " (U+0027),  
     *                  < (U+003C), > (U+003E),  
     *                  & (U+0026)
     * 
     * 
     *
     * @param  input the string to be escaped
     * @return       the HTML escaped string or null if input is null
     * @since  1.0
     */
    public static String htmlText(String input) {
        if (input == null)
            return null;

        int length = input.length();
        StringBuilder output = allocateStringBuilder(length);
        
        for (int i = 0; i < length; i++) {
            char c = input.charAt(i);
            switch (c) {
            case '\'':
                output.append("'");
                break;
            case '"':
                output.append(""");
                break;
            case '<':
                output.append("<");
                break;
            case '>':
                output.append(">");
                break;
            case '&':
                output.append("&");
                break;
            default:
                output.append(c);
                break;
            }
        }
        return output.toString();   
    }


    /**
     * URI encoder.
     * 
     * URI encoding for query string values of the URI: 
     *  /example/?name=URI_ENCODED_VALUE_HERE 

     * Note that this method is not sufficient to protect for cross-site scripting
     * in a generic URI context, but only for query string values. If you
     * need to escape a URI in an href attribute (for example), 
     * ensure that:
     * 

     *   The scheme is allowed (restrict to http, https, or mailto)
     *   Use the HTML escaper {@link #html(String)} on the entire URI
     * 
     * 
     * This URI encoder processes the following characters:
     * 

     * 
     * URI characters: ' (U+0022), " (U+0027), 
     *                 \ (U+005C), / (U+002F), 
     *                 < (U+003C), > (U+003E),  
     *                 & (U+0026), 
     *                 < (U+003C), > (U+003E), 
     *                 ! (U+0021), # (U+0023), 
     *                 $ (U+0024), % (U+0025), 
     *                 ( (U+0028), ) (U+0029), 
     *                 * (U+002A), + (U+002B), 
     *                 , (U+002C), . (U+002E), 
     *                 : (U+003A), ; (U+003B), 
     *                 = (U+003D), ? (U+003F), 
     *                 @ (U+0040), [ (U+005B), 
     *                 ] (U+005D) 
     * 
     * 
     * Control characters: \t (U+0009), \n (U+000A), 
     *                     \f (U+000C), \r (U+000D), 
     *                     SPACE (U+0020)
     * 
     * 
     *
     * @param  input the string to be escaped
     * @return       the URI encoded string or null if input is null
     * @since  1.0
     */
    public static String uriParam(String input) {
        if (input == null)
            return null;

        int length = input.length();
        StringBuilder output = allocateStringBuilder(length);

        for (int i = 0; i < length; i++) {
            char c = input.charAt(i);
            switch (c) {
            // Control chars
            case '\t':
                output.append("%09");
                break;
            case '\n':
                output.append("%0A");
                break;
            case '\f':
                output.append("%0C");
                break;
            case '\r':
                output.append("%0D");
                break;
            // RFC chars to encode, plus % ' " < and >, and space
            case ' ':
                output.append("%20");
                break;
            case '!':
                output.append("%21");
                break;
            case '"':
                output.append("%22");
                break;
            case '#':
                output.append("%23");
                break;
            case '$':
                output.append("%24");
                break;
            case '%':
                output.append("%25");
                break;
            case '&':
                output.append("%26");
                break;
            case '\'':
                output.append("%27");
                break;
            case '(':
                output.append("%28");
                break;
            case ')':
                output.append("%29");
                break;
            case '*':
                output.append("%2A");
                break;
            case '+':
                output.append("%2B");
                break;
            case ',':
                output.append("%2C");
                break;
            case '.':
                output.append("%2E");
                break;
            case '/':
                output.append("%2F");
                break;
            case ':':
                output.append("%3A");
                break;
            case ';':
                output.append("%3B");
                break;
            case '<':
                output.append("%3C");
                break;
            case '=':
                output.append("%3D");
                break;
            case '>':
                output.append("%3E");
                break;
            case '?':
                output.append("%3F");
                break;
            case '@':
                output.append("%40");
                break;
            case '[':
                output.append("%5B");
                break;
            case ']':
                output.append("%5D");
                break;

            default:
                output.append(c);
                break;
            }
        }
        return output.toString();
    }


    /**
     * Same as {@link #uriParam(String)} for now.
     * 
     * Eventually, this method will evolve into filtering the URI so that
     * it is safely considered as a URL by a web browser, and does not contain
     * malicious payloads (data:text/html..., javascript:, etc.).
     */
    public static String uri(String input) {
        return uriParam(input);
    }


    /**
     * JavaScript String Unicode escaper.
     * 

     * JavaScript String Unicode escaping (\UXXXX) to be used in single or double quoted
     * JavaScript strings: 
     * 
     * <script type="text/javascript">
     *   window.myString = 'JS_STRING_ESCAPE_HERE';
     *   window.yourString = "JS_STRING_ESCAPE_HERE";
     * </script>
     * 
     * 
     * This JavaScript string escaper processes the following characters:
     * 

     * 
     * JS String characters: ' (U+0022), " (U+0027), 
     *                       \ (U+005C) 
     * 
     * 
     * URI encoding characters: % (U+0025)
     * 
     * 
     * HTML characters: / (U+002F),
     *                  < (U+003C), > (U+003E), 
     *                  & (U+0026)
     * 
     * 
     * Control characters: \b (U+0008), \t (U+0009), 
     *                     \n (U+000A), 0x0b (U+000B), 
     *                     \f (U+000C), \r (U+000D) 
     * 
     * 
     * Unicode newlines: LS (U+2028), PS (U+2029) 
     * 
     * 
     *
     * @param  input the string to be escaped
     * @return       the JavaScript string Unicode escaped string or null if input is null
     * @since  1.0
     */
    public static String jsString(String input) {
        if (input == null)
            return null;

        int length = input.length();
        StringBuilder output = allocateStringBuilder(length);
        
        for (int i = 0; i < length; i++) {
            char c = input.charAt(i);
            switch (c) {
            // Control chars
            case '\b':
                output.append("\\u0008");
                break;
            case '\t':
                output.append("\\u0009");
                break;
            case '\n':
                output.append("\\u000A");
                break;
            case '\u000b':
                output.append("\\u000B");
                break;
            case '\f':
                output.append("\\u000C");
                break;
            case '\r':
                output.append("\\u000D");
                break;
            // JavaScript String chars
            case '\'':
                output.append("\\u0027");
                break;
            case '"':
                output.append("\\u0022");
                break;
            case '\\':
                output.append("\\u005C");
                break;
            // URI encoding char
            case '%':
                output.append("\\u0025");
                break;
            // HTML chars for closing the parent context
            case '&':
                output.append("\\u0026");
                break;
            case '/':
                output.append("\\u002F");
                break;
            case '<':
                output.append("\\u003C");
                break;
            case '>':
                output.append("\\u003E");
                break;
            // Unicode
            case '\u2028':
                output.append("\\u2028");
                break;
            case '\u2029':
                output.append("\\u2029");
                break;

            default:
                output.append(c);
                break;
            }
        }
        return output.toString();
    }


    /**
     * JavaScript regex content escaper.
     * 
     * Escape for a JavaScript regular expression:
     * 
     * <script type="text/javascript">
     *   var b = /^JS_REGEX_ESCAPE_HERE/.test(document.location);
     * </script>
     * 
     * 
     * Note that when using a regular expression inside a JavaScript string such as:
     * 
<script type="text/javascript">
     *   var b = (new RegExp('^CONTENT_HERE')).test(document.location);
     * </script>
     * You should first escape using the {@link #jsRegex(String)} escaper, and make sure
     * that the JavaScript string itself is properly rendered using the {@link #jsString(String)}
     * escaper. This is a nested context scenario in which we have a JavaScript regex
     * inside a JavaScript string, for which we need to first escape the inner most context
     * and walking back the stack of context to the outer most one.
     * 
     * 
     * This JavaScript regex escaper processes the following characters:
     * 

     * 
     * Regex characters: \ (U+005C), / (U+002F), 
     *                   ( (U+0028), [ (U+005B), 
     *                   { (U+007B), ] (U+005D), 
     *                   } (U+007D), ) (U+0029), 
     *                   * (U+002A), + (U+002B), 
     *                   - (U+002D), . (U+002E), 
     *                   ? (U+003F), ! (U+0021), 
     *                   ^ (U+005E), $ (U+0024), 
     *                   | (U+007C) 
     * 
     * 
     * Control characters: \t (U+0009), \n (U+000A), 
     *                     \v (U+000B), 
     *                     \f (U+000C), \r (U+000D) 
     * 
     * 
     *
     * @param  input the string to be escaped
     * @return       the escaped JavaScript regex or null if input is null
     * @since  1.0
     */
    public static String jsRegex(String input) {
        if (input == null)
            return null;

        int length = input.length();
        StringBuilder output = allocateStringBuilder(length);

        for (int i = 0; i < length; i++) {
            char c = input.charAt(i);
            switch (c) {
            // Control chars
            case '\t':
                output.append("\\t");
                break;
            case '\n':
                output.append("\\n");
                break;
            case '\u000b':
                output.append("\\v");
                break;
            case '\f':
                output.append("\\f");
                break;
            case '\r':
                output.append("\\r");
                break;
            // Escape sequence, and regexp terminator
            case '\\':
                output.append("\\\\");
                break;
            case '/':
                output.append("\\/");
                break;
            // Regexp specific characters
            case '(':
                output.append("\\(");
                break;
            case '[':
                output.append("\\[");
                break;
            case '{':
                output.append("\\{");
                break;
            case ']':
                output.append("\\]");
                break;
            case ')':
                output.append("\\)");
                break;
            case '}':
                output.append("\\}");
                break;
            case '*':
                output.append("\\*");
                break;
            case '+':
                output.append("\\+");
                break;
            case '-':
                output.append("\\-");
                break;
            case '.':
                output.append("\\.");
                break;
            case '?':
                output.append("\\?");
                break;
            case '!':
                output.append("\\!");
                break;
            case '^':
                output.append("\\^");
                break;
            case '$':
                output.append("\\$");
                break;
            case '|':
                output.append("\\|");
                break;

            default:
                output.append(c);
                break;
            }
        }
        return output.toString();
    }


    /**
     * CSS String escaper.
     * 
     * CSS escaper for strings such as CSS selector or quoted URI: 
     * 
     * <style">
     *  a[href *= "DATA_HERE"] {...}
     *  li { background: url('DATA_HERE'); }
     * </style>
     * 
     * 
     * This CSS string escaper processes the following characters:
     * 

     * 
     * CSS string characters: ' (U+0022), " (U+0027), 
     *                        \ (U+005C)
     * 
     * 
     * HTML characters: / (U+002F),
     *                  < (U+003C), > (U+003E), 
     *                  & (U+0026)
     * 
     * 
     * Control characters: \b (U+0008), 
     *                     \t (U+0009), \n (U+000A), 
     *                     \f (U+000C), \r (U+000D) 
     * 
     * 
     * Unicode newlines: LS (U+2028), PS (U+2029)
     * 
     * 
     *
     * @param  input the string to be escaped
     * @return       the CSS string escaped or null if input is null
     * @since  1.0
     */
    public static String cssString(String input) {
        if (input == null)
            return null;

        int length = input.length();
        StringBuilder output = allocateStringBuilder(length);

        for (int i = 0; i < length; i++) {
            char c = input.charAt(i);
            switch (c) {
            // Control chars
            case '\b':
                output.append("\\08 ");
                break;
            case '\t':
                output.append("\\09 ");
                break;
            case '\n':
                output.append("\\0A ");
                break;
            case '\f':
                output.append("\\0C ");
                break;
            case '\r':
                output.append("\\0D ");
                break;
            // String chars
            case '\'':
                output.append("\\27 ");
                break;
            case '"':
                output.append("\\22 ");
                break;
            case '\\':
                output.append("\\5C ");
                break;
            // HTML chars for closing the parent context
            case '&':
                output.append("\\26 ");
                break;
            case '/':
                output.append("\\2F ");
                break;
            case '<':
                output.append("\\3C ");
                break;
            case '>':
                output.append("\\3E ");
                break;
            // Unicode
            case '\u2028':
                output.append("\\002028 ");
                break;
            case '\u2029':
                output.append("\\002029 ");
                break;

            default:
                output.append(c);
                break;
            }
        }
        return output.toString();
    }


    /**
     * SQL LIKE clause escaper.
     * 
     * This SQL LIKE clause escaper does not protect against SQL injection, but ensure
     * that the string to be consumed in SQL LIKE clause does not alter the current
     * LIKE query by inserting % or _: 
     * 
     * entityManager.createQuery("FROM MyEntity e WHERE e.content LIKE :like_query ESCAPE '@'")
     *              .setParameter("like_query", "%" + Escape.sqlLikeClause(USER_DATA_HERE))
     *              .getResultList();
     * 
     * This escaper has to be used with a safe SQL query construct such as the JPQL
     * named parameterized query in the previous example.
     * 
     * This escaper uses by default the @ as escape character. The other method
     * {@link #sqlLikeClause(String,char)} allows for using a different escape character such as
     * \. 
     *
     * 

     * This SQL LIKE escaper processes the following characters:
     * 

     * 
     * SQL LIKE characters: _ (U+005F), % (U+0025), 
     *                      @ (U+0040)
     * 
     * 
     *
     * @param  input the string to be escaped
     * @return       the SQL LIKE escaped string or null if input is null
     * @since  1.0
     */
    public static String sqlLikeClause(String input) {
        return sqlLikeClause(input, '@');
    }


    /**
     * SQL LIKE clause escaper.
     * 
     * Similar to {@link #sqlLikeClause(String)}, but allows to specify the escape character
     * to be used. When a character different than @ is used, @ will
     * not be escaped by the escaper, and the specified escape character will be.
     *
     * @param  input  the string to be escaped
     * @param  escape the escape character to be used 
     * @return        the SQL LIKE escaped string or null if input is null
     * @since  1.0
     */
    public static String sqlLikeClause(String input, char escape) {
        if (input == null)
            return null;

        int length = input.length();
        StringBuilder output = allocateStringBuilder(length);

        for (int i = 0; i < length; i++) {
            char c = input.charAt(i);
            if (c == escape || c == '_' || c == '%') {
                output.append(escape); 
            }
            output.append(c); 
        }
        return output.toString();
    }


    /**
     * Compute the allocation size of the StringBuilder based on the length.
     */
    private static StringBuilder allocateStringBuilder(int length) {
        // Allocate enough temporary buffer space to avoid reallocation in most
        // cases. If you believe you will output large amount of data at once
        // you might need to change the factor.
        int buflen = length;
        if (length * 2 > 0)
            buflen = length * 2;
        return new StringBuilder(buflen);
    }

}