All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.encoder.Encode Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
// Copyright (c) 2012 Jeff Ichnowski
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//     * Redistributions of source code must retain the above
//       copyright notice, this list of conditions and the following
//       disclaimer.
//
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials
//       provided with the distribution.
//
//     * Neither the name of the OWASP nor the names of its
//       contributors may be used to endorse or promote products
//       derived from this software without specific prior written
//       permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.

package org.owasp.encoder;

import java.io.IOException;
import java.io.Writer;
import java.nio.CharBuffer;
import java.nio.charset.CoderResult;

/**
 * Encode -- fluent interface for contextual encoding.  Example usage in a JSP:
 *
 * 
 *     <input value="<%=Encode.forHtml(value)%>" />
 * 
* *

There are two versions of each contextual encoding method. The first * takes a {@code String} argument and returns the encoded version as a * {@code String}. The second version writes the encoded version directly * to a {@code Writer}.

* *

Please make sure to read and understand the context that the method encodes * for. Encoding for the incorrect context will likely lead to exposing a * cross-site scripting vulnerability.

* * @author Jeff Ichnowski */ public final class Encode { /** No instances. */ private Encode() {} /** *

Encodes for (X)HTML text content and text attributes. Since * this method encodes for both contexts, it may be slightly less * efficient to use this method over the methods targeted towards * the specific contexts ({@link #forHtmlAttribute(String)} and * {@link #forHtmlContent(String)}. In general this method should * be preferred unless you are really concerned with saving a few * bytes or are writing a framework that utilizes this * package.

* * Example JSP Usage *
     *     <div><%=Encode.forHtml(unsafeData)%></div>
     *
     *     <input value="<%=Encode.forHtml(unsafeData)%>" />
     * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Encoding Table
InputResult
{@code &}{@code &}
{@code <}{@code <}
{@code >}{@code >}
{@code "}{@code "}
{@code '}{@code '}
* *

Additional Notes

*
    *
  • The encoding of the greater-than sign ({@code >}) is not * strictly required, but is included for maximum * compatibility.
  • * *
  • Numeric encoding is used for double-quote character ({@code * "}) as it shorter than the also valid {@code "}.
  • * *
  • Carriage return (U+0D), line-feed (U+0A), horizontal tab * (U+09) and space (U+20) are valid in quoted attributes and in * block in an unescaped form.
  • * *
  • Surrogate pairs are passed through only if valid.
  • * *
  • Characters that are not valid according * to the XML specification are replaced by a space character * as they could lead to parsing errors. In particular only {@code #x9 * | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | * [#x10000-#x10FFFF]} are considered valid.
  • *
* * @param input the data to encode * @return the data encoded for html. */ public static String forHtml(String input) { return forXml(input); } /** * See {@link #forHtml(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forHtml(Writer out, String input) throws IOException { forXml(out, input); } /** *

This method encodes for HTML text content. It does not escape * quotation characters and is thus unsafe for use with * HTML attributes. Use either forHtml or forHtmlAttribute for those * methods.

* * Example JSP Usage *
     *     <div><%=Encode.forHtmlContent(unsafeData)%></div>
     * 
* * * * * * * * * * * * * * * * * * * * * * *
Encoding Table
InputResult
{@code &}{@code &}
{@code <}{@code <}
{@code >}{@code >}
* *

Additional Notes

*
    *
  • Single-quote character ({@code '}) and double-quote * character ({@code "}) do not require encoding in HTML * blocks, unlike other HTML contexts.
  • * *
  • The encoding of the greater-than sign ({@code >}) is not * strictly required, but is included for maximum * compatibility.
  • * *
  • Carriage return (U+0D), line-feed (U+0A), horizontal tab * (U+09) and space (U+20) are valid in quoted attributes and in * block in an unescaped form.
  • * *
  • Surrogate pairs are passed through only if valid.
  • * *
  • Characters that are not valid according * to the XML specification are replaced by a space character * as they could lead to parsing errors. In particular only {@code #x9 * | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | * [#x10000-#x10FFFF]} are considered valid.
  • *
* * @param input the input to encode * @return the encoded result */ public static String forHtmlContent(String input) { return forXmlContent(input); } /** * See {@link #forHtmlContent(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forHtmlContent(Writer out, String input) throws IOException { forXmlContent(out, input); } /** *

This method encodes for HTML text attributes.

* * Example JSP Usage *
     *     <div><%=Encode.forHtmlAttribute(unsafeData)%></div>
     * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * *
Encoding Table
InputResult
{@code &}{@code &}
{@code <}{@code <}
{@code "}{@code "}
{@code '}{@code '}
* *

Additional Notes

*
    *
  • Both the single-quote character ({@code '}) and the * double-quote character ({@code "}) are encoded so this is safe * for HTML attributes with either enclosing character.
  • * *
  • The encoding of the greater-than sign ({@code >}) is not * required for attributes.
  • * *
  • Numeric encoding is used for double-quote character ({@code * "}) as it shorter than the also valid {@code "}.
  • * *
  • Carriage return (U+0D), line-feed (U+0A), horizontal tab * (U+09) and space (U+20) are valid in quoted attributes and in * block in an unescaped form.
  • * *
  • Surrogate pairs are passed through only if valid.
  • * *
  • Characters that are not valid according * to the XML specification are replaced by a space character * as they could lead to parsing errors. In particular only {@code #x9 * | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | * [#x10000-#x10FFFF]} are considered valid.
  • *
* * @param input the input to encode * @return the encoded result */ public static String forHtmlAttribute(String input) { return forXmlAttribute(input); } /** * See {@link #forHtmlAttribute(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forHtmlAttribute(Writer out, String input) throws IOException { forXmlAttribute(out, input); } /** *

Encodes for unquoted HTML attribute values. {@link * #forHtml(String)} or {@link #forHtmlAttribute(String)} should * usually be preferred over this method as quoted attributes are * XHTML compliant.

* *

When using this method, the caller is not required to * provide quotes around the attribute (since it is encoded for * such context). The caller should make sure that the attribute * value does not abut unsafe characters--and thus should usually * err on the side of including a space character after the * value.

* *

Use of this method is discouraged as quoted attributes are * generally more compatible and safer. Also note, that no * attempt has been made to optimize this encoding, though it is * still probably faster than other encoding libraries.

* * Example JSP Usage *
     *     <input value=<%=Encode.forHtmlUnquotedAttribute(input)%> >
     * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Encoding Table
InputResult
{@code U+0009} (horizontal tab){@code }
{@code U+000A} (line feed){@code }
{@code U+000C} (form feed){@code }
{@code U+000D} (carriage return){@code }
{@code U+0020} (space){@code }
{@code &}{@code &}
{@code <}{@code <}
{@code >}{@code >}
{@code "}{@code "}
{@code '}{@code '}
{@code /}{@code /}
{@code =}{@code =}
{@code `}{@code `}
{@code U+0085} (next line){@code …}
{@code U+2028} (line separator){@code 
}
{@code U+2029} (paragraph separator){@code 
}
* *

Additional Notes

*
    *
  • The following characters are not encoded: * {@code 0-9, a-z, A-Z}, {@code !}, {@code * #}, {@code $}, {@code %}, * {@code (}, {@code )}, {@code * *}, {@code +}, {@code ,}, * {@code -}, {@code .}, {@code * [}, {@code \}, {@code ]}, * {@code ^}, {@code _}, {@code * }}.
  • * *
  • Surrogate pairs are passed through only if valid. Invalid * surrogate pairs are replaced by a hyphen (-).
  • * *
  • Characters in the C0 and C1 control blocks and not * otherwise listed above are considered invalid and replaced by a * hyphen (-) character.
  • * *
  • Unicode "non-characters" are replaced by hyphens (-).
  • *
* * @param input the attribute value to be encoded. * @return the attribute value encoded for unquoted attribute * context. */ public static String forHtmlUnquotedAttribute(String input) { return encode(Encoders.HTML_UNQUOTED_ATTRIBUTE_ENCODER, input); } /** * See {@link #forHtmlUnquotedAttribute(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forHtmlUnquotedAttribute(Writer out, String input) throws IOException { encode(Encoders.HTML_UNQUOTED_ATTRIBUTE_ENCODER, out, input); } // HTML comment encoding is not currently supported because // of the number of vendor-specific sequences that would need // to be handled (e.g. "" // public static String forHtmlComment(String input) { // // only alphanumeric and space, everything else becomes a space // // // HTML comment context needs to avoid browser extensions // // such as "" // throw new UnsupportedOperationException(); // } /** * Encodes for CSS strings. The context must be surrounded by quotation * characters. It is safe for use in both style blocks and attributes in * HTML. * * Example JSP Usage *
     *     <div style="background: url('<=Encode.forCssString(...)%>');">
     *
     *     <style type="text/css">
     *         background: url('<%=Encode.forCssString(...)%>');
     *     </style>
     * 
* * Encoding Notes *
    * *
  • The following characters are encoded using hexidecimal * encodings: {@code U+0000} - {@code U+001f}, * {@code "}, * {@code '}, * {@code \}, * {@code <}, * {@code &}, * {@code (}, * {@code )}, * {@code /}, * {@code >}, * {@code U+007f}, * line separator ({@code U+2028}), * paragraph separator ({@code U+2029}).
  • * *
  • Any character requiring encoding is encoded as {@code \xxx} * where {@code xxx} is the shortest hexidecimal representation of * its Unicode code point (after decoding surrogate pairs if * necessary). This encoding is never zero padded. Thus, for * example, the tab character is encoded as {@code \9}, not {@code * \0009}.
  • * *
  • The encoder looks ahead 1 character in the input and * appends a space to an encoding to avoid the next character * becoming part of the hexidecimal encoded sequence. Thus * “{@code '1}” is encoded as “{@code \27 * 1}”, and not as “{@code \271}”. If a space * is not necessary, it is not included, thus “{@code * 'x}” is encoded as “{@code \27x}”, and not as * “{@code \27 x}”.
  • * *
  • Surrogate pairs are passed through only if valid. Invalid * surrogate pairs are replaced by an underscore (_).
  • * *
  • Unicode "non-characters" are replaced by underscores (_).
  • * *
* * @param input the input to encode * @return the encoded result */ public static String forCssString(String input) { // need to watch out for CSS expressions return encode(Encoders.CSS_STRING_ENCODER, input); } /** * See {@link #forCssString(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forCssString(Writer out, String input) throws IOException { encode(Encoders.CSS_STRING_ENCODER, out, input); } /** * Encodes for CSS URL contexts. The context must be surrounded by {@code "url("} * and {@code ")"}. It is safe for use in both style blocks and attributes in HTML. * Note: this does not do any checking on the quality or safety of the URL * itself. The caller should insure that the URL is safe for embedding * (e.g. input validation) by other means. * * Example JSP Usage *
     *     <div style="background:url(<=Encode.forCssUrl(...)%>);">
     *
     *     <style type="text/css">
     *         background: url(<%=Encode.forCssUrl(...)%>);
     *     </style>
     * 
* Encoding Notes *
    * *
  • The following characters are encoded using hexidecimal * encodings: {@code U+0000} - {@code U+001f}, * {@code "}, * {@code '}, * {@code \}, * {@code <}, * {@code &}, * {@code /}, * {@code >}, * {@code U+007f}, * line separator ({@code U+2028}), * paragraph separator ({@code U+2029}).
  • * *
  • Any character requiring encoding is encoded as {@code \xxx} * where {@code xxx} is the shortest hexidecimal representation of * its Unicode code point (after decoding surrogate pairs if * necessary). This encoding is never zero padded. Thus, for * example, the tab character is encoded as {@code \9}, not {@code * \0009}.
  • * *
  • The encoder looks ahead 1 character in the input and * appends a space to an encoding to avoid the next character * becoming part of the hexidecimal encoded sequence. Thus * “{@code '1}” is encoded as “{@code \27 * 1}”, and not as “{@code \271}”. If a space * is not necessary, it is not included, thus “{@code * 'x}” is encoded as “{@code \27x}”, and not as * “{@code \27 x}”.
  • * *
  • Surrogate pairs are passed through only if valid. Invalid * surrogate pairs are replaced by an underscore (_).
  • * *
  • Unicode "non-characters" are replaced by underscores (_).
  • * *
* * @param input the input to encode * @return the encoded result */ public static String forCssUrl(String input) { return encode(Encoders.CSS_URL_ENCODER, input); } /** * See {@link #forCssUrl(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forCssUrl(Writer out, String input) throws IOException { encode(Encoders.CSS_URL_ENCODER, out, input); } /** *

Performs percent-encoding of a URL according to RFC 3986. The provided * URL is assumed to a valid URL. This method does not do any checking on * the quality or safety of the URL itself. In many applications it may * be better to use {@link java.net.URI} instead. Note: this is a * particularly dangerous context to put untrusted content in, as for * example a "javascript:" URL provided by a malicious user would be * "properly" escaped, and still execute.

* * Encoding Table *

The following characters are not encoded:

*
     * U+20:   !   # $   & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ;   =   ?
     * U+40: @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [   ]   _
     * U+60:   a b c d e f g h i j k l m n o p q r s t u v w x y z       ~
     * 
* * Encoding Notes *
    * *
  • The single-quote character({@code '}) is not encoded.
  • * *
  • This encoding is not intended to be used standalone. The * output should be encoded to the target context. For example: * {@code ...}. * (Note, the single-quote character ({@code '}) is not * encoded.)
  • * *
  • URL encoding is an encoding for bytes, not unicode. The * input string is thus first encoded as a sequence of UTF-8 * byte. The bytes are then encoded as {@code %xx} where {@code * xx} is the two-digit hexidecimal representation of the * byte. (The implementation does this as one step for * performance.)
  • * *
  • Surrogate pairs are first decoded to a Unicode code point * before encoding as UTF-8.
  • * *
  • Invalid characters (e.g. partial or invalid surrogate * pairs), are replaced with a hyphen ({@code -}) character.
  • * *
* * @param input the input to encode * @return the encoded result */ @Deprecated public static String forUri(String input) { return encode(Encoders.URI_ENCODER, input); } /** * See {@link #forUri(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer * * @deprecated There is never a need to encode a complete URI with this form of encoding. */ @Deprecated public static void forUri(Writer out, String input) throws IOException { encode(Encoders.URI_ENCODER, out, input); } /** * Performs percent-encoding for a component of a URI, such as a query * parameter name or value, path or query-string. In particular this * method insures that special characters in the component do not get * interpreted as part of another component. * *
     *     <a href="http://www.owasp.org/<%=Encode.forUriComponent(...)%>?query#fragment">
     *
     *     <a href="/search?value=<%=Encode.forUriComponent(...)%>&order=1#top">
     * 
* * Encoding Table *

The following characters are not encoded:

*
     * U+20:                           - .   0 1 2 3 4 5 6 7 8 9
     * U+40: @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z         _
     * U+60:   a b c d e f g h i j k l m n o p q r s t u v w x y z       ~
     * 
* * Encoding Notes *
    * *
  • Unlike {@link #forUri(String)} this method is safe to be * used in most containing contexts, including: HTML/XML, CSS, * and JavaScript contexts.
  • * *
  • URL encoding is an encoding for bytes, not unicode. The * input string is thus first encoded as a sequence of UTF-8 * byte. The bytes are then encoded as {@code %xx} where {@code * xx} is the two-digit hexidecimal representation of the * byte. (The implementation does this as one step for * performance.)
  • * *
  • Surrogate pairs are first decoded to a Unicode code point * before encoding as UTF-8.
  • * *
  • Invalid characters (e.g. partial or invalid surrogate * pairs), are replaced with a hyphen ({@code -}) character.
  • * *
* * @param input the input to encode * @return the encoded result */ public static String forUriComponent(String input) { return encode(Encoders.URI_COMPONENT_ENCODER, input); } /** * See {@link #forUriComponent(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forUriComponent(Writer out, String input) throws IOException { encode(Encoders.URI_COMPONENT_ENCODER, out, input); } /** * Encoder for XML and XHTML. See {@link #forHtml(String)} for a * description of the encoding and context. * * @see #forHtml(String) * @param input the input to encode * @return the encoded result */ public static String forXml(String input) { return encode(Encoders.XML_ENCODER, input); } /** * See {@link #forXml(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forXml(Writer out, String input) throws IOException { encode(Encoders.XML_ENCODER, out, input); } /** * Encoder for XML and XHTML text content. See {@link * #forHtmlContent(String)} for description of encoding and * context. * * @see #forHtmlContent(String) * @param input the input to encode * @return the encoded result */ public static String forXmlContent(String input) { return encode(Encoders.XML_CONTENT_ENCODER, input); } /** * See {@link #forXmlContent(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forXmlContent(Writer out, String input) throws IOException { encode(Encoders.XML_CONTENT_ENCODER, out, input); } /** * Encoder for XML and XHTML attribute content. See {@link * #forHtmlAttribute(String)} for description of encoding and * context. * * @see #forHtmlAttribute(String) * @param input the input to encode * @return the encoded result */ public static String forXmlAttribute(String input) { return encode(Encoders.XML_ATTRIBUTE_ENCODER, input); } /** * See {@link #forXmlAttribute(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forXmlAttribute(Writer out, String input) throws IOException { encode(Encoders.XML_ATTRIBUTE_ENCODER, out, input); } /** * Encoder for XML comments. NOT FOR USE WITH * (X)HTML CONTEXTS. (X)HTML comments may be interpreted by * browsers as something other than a comment, typically in vendor * specific extensions (e.g. {@code <--if[IE]-->}). * For (X)HTML it is recommend that unsafe content never be included * in a comment. * *

The caller must provide the comment start and end sequences.

* *

This method replaces all invalid XML characters with spaces, * and replaces the "--" sequence (which is invalid in XML comments) * with "-~" (hyphen-tilde). This encoding behavior may change * in future releases. If the comments need to be decoded, the * caller will need to come up with their own encode/decode system.

* *
     *     out.println("<?xml version='1.0'?>");
     *     out.println("<data>");
     *     out.println("<!-- "+Encode.forXmlComment(comment)+" -->");
     *     out.println("</data>");
     * 
* * @param input the input to encode * @return the encoded result */ public static String forXmlComment(String input) { return encode(Encoders.XML_COMMENT_ENCODER, input); } /** * See {@link #forXmlComment(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forXmlComment(Writer out, String input) throws IOException { encode(Encoders.XML_COMMENT_ENCODER, out, input); } /** * Encodes data for an XML CDATA section. On the chance that the input * contains a terminating {@code "]]>"}, it will be replaced by * {@code "]]>]]"}. * As with all XML contexts, characters that are invalid according to the * XML specification will be replaced by a space character. Caller must * provide the CDATA section boundaries. * *
     *     <xml-data><![CDATA[<%=Encode.forCDATA(...)%>]]></xml-data>
     * 
* * @param input the input to encode * @return the encoded result */ public static String forCDATA(String input) { return encode(Encoders.CDATA_ENCODER, input); } /** * See {@link #forCDATA(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forCDATA(Writer out, String input) throws IOException { encode(Encoders.CDATA_ENCODER, out, input); } /** * Encodes for a Java string. This method will use "\b", "\t", "\r", "\f", * "\n", "\"", "\'", "\\", octal and unicode escapes. Valid surrogate * pairing is not checked. The caller must provide the enclosing quotation * characters. This method is useful for when writing code generators and * outputting debug messages. * *
     *     out.println("public class Hello {");
     *     out.println("    public static void main(String[] args) {");
     *     out.println("        System.out.println(\"" + Encode.forJava(message) + "\");");
     *     out.println("    }");
     *     out.println("}");
     * 
* * @param input the input to encode * @return the input encoded for java strings. */ public static String forJava(String input) { return encode(Encoders.JAVA_ENCODER, input); } /** * See {@link #forJava(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forJava(Writer out, String input) throws IOException { encode(Encoders.JAVA_ENCODER, out, input); } /** *

Encodes for a JavaScript string. It is safe for use in HTML * script attributes (such as {@code onclick}), script * blocks, JSON files, and JavaScript source. The caller MUST * provide the surrounding quotation characters for the string. * Since this performs additional encoding so it can work in all * of the JavaScript contexts listed, it may be slightly less * efficient than using one of the methods targetted to a specific * JavaScript context ({@link #forJavaScriptAttribute(String)}, * {@link #forJavaScriptBlock}, {@link #forJavaScriptSource}). * Unless you are interested in saving a few bytes of output or * are writing a framework on top of this library, it is recommend * that you use this method over the others.

* * Example JSP Usage: *
     *    <button onclick="alert('<%=Encode.forJavaScript(data)%>');">
     *    <script type="text/javascript">
     *        var data = "<%=Encode.forJavaScript(data)%>";
     *    </script>
     * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Encoding Description
Input CharacterEncoded ResultNotes
U+0008BS\bBackspace character
U+0009HT\tHorizontal tab character
U+000ALF\nLine feed character
U+000CFF\fForm feed character
U+000DCR\rCarriage return character
U+0022"\x22The encoding \" is not used here because * it is not safe for use in HTML attributes. (In HTML * attributes, it would also be correct to use * "\&quot;".)
U+0026&\x26Ampersand character
U+0027'\x27The encoding \' is not used here because * it is not safe for use in HTML attributes. (In HTML * attributes, it would also be correct to use * "\&#39;".)
U+002F/\/This encoding is used to avoid an input sequence * "</" from prematurely terminating a </script> * block.
U+005C\\\
U+0000 to U+001F\x##Hexadecimal encoding is used for characters in this * range that were not already mentioned in above.
* * @param input the input string to encode * @return the input encoded for JavaScript * @see #forJavaScriptAttribute(String) * @see #forJavaScriptBlock(String) */ public static String forJavaScript(String input) { return encode(Encoders.JAVASCRIPT_ENCODER, input); } /** * See {@link #forJavaScript(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forJavaScript(Writer out, String input) throws IOException { encode(Encoders.JAVASCRIPT_ENCODER, out, input); } /** *

This method encodes for JavaScript strings contained within * HTML script attributes (such as {@code onclick}). It is * NOT safe for use in script blocks. The caller MUST provide the * surrounding quotation characters. This method performs the * same encode as {@link #forJavaScript(String)} with the * exception that / is not escaped.

* *

Unless you are interested in saving a few bytes of * output or are writing a framework on top of this library, it is * recommend that you use {@link #forJavaScript(String)} over this * method.

* * Example JSP Usage: *
     *    <button onclick="alert('<%=Encode.forJavaScriptAttribute(data)%>');">
     * 
* * @param input the input string to encode * @return the input encoded for JavaScript * @see #forJavaScript(String) * @see #forJavaScriptBlock(String) */ public static String forJavaScriptAttribute(String input) { return encode(Encoders.JAVASCRIPT_ATTRIBUTE_ENCODER, input); } /** * See {@link #forJavaScriptAttribute(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forJavaScriptAttribute(Writer out, String input) throws IOException { encode(Encoders.JAVASCRIPT_ATTRIBUTE_ENCODER, out, input); } /** *

This method encodes for JavaScript strings contained within * HTML script blocks. It is NOT safe for use in script * attributes (such as onclick). The caller must * provide the surrounding quotation characters. This method * performs the same encode as {@link #forJavaScript(String)} with * the exception that " and ' are * encoded as \" and \' * respectively.

* *

Unless you are interested in saving a few bytes of * output or are writing a framework on top of this library, it is * recommend that you use {@link #forJavaScript(String)} over this * method.

* * Example JSP Usage: *
     *    <script type="text/javascript">
     *        var data = "<%=Encode.forJavaScriptBlock(data)%>";
     *    </script>
     * 
* * @param input the input string to encode * @return the input encoded for JavaScript * @see #forJavaScript(String) * @see #forJavaScriptAttribute(String) */ public static String forJavaScriptBlock(String input) { return encode(Encoders.JAVASCRIPT_BLOCK_ENCODER, input); } /** * See {@link #forJavaScriptBlock(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forJavaScriptBlock(Writer out, String input) throws IOException { encode(Encoders.JAVASCRIPT_BLOCK_ENCODER, out, input); } /** *

This method encodes for JavaScript strings contained within * a JavaScript or JSON file. This method is NOT safe for * use in ANY context embedded in HTML. The caller must * provide the surrounding quotation characters. This method * performs the same encode as {@link #forJavaScript(String)} with * the exception that / and & are not * escaped and " and ' are encoded as * \" and \' respectively.

* *

Unless you are interested in saving a few bytes of * output or are writing a framework on top of this library, it is * recommend that you use {@link #forJavaScript(String)} over this * method.

* * Example JSP Usage: * This example is serving up JavaScript source directly: *
     *    <%@page contentType="text/javascript; charset=UTF-8"%>
     *    var data = "<%=Encode.forJavaScriptSource(data)%>";
     * 
* * This example is serving up JSON data (users of this use-case * are encouraged to read up on "JSON Hijacking"): *
     *    <%@page contentType="application/json; charset=UTF-8"%>
     *    <% myapp.jsonHijackingPreventionMeasure(); %>
     *    {"data":"<%=Encode.forJavaScriptSource(data)%>"}
     * 
* * @param input the input string to encode * @return the input encoded for JavaScript * @see #forJavaScript(String) * @see #forJavaScriptAttribute(String) * @see #forJavaScriptBlock(String) */ public static String forJavaScriptSource(String input) { return encode(Encoders.JAVASCRIPT_SOURCE_ENCODER, input); } /** * See {@link #forJavaScriptSource(String)} for description of encoding. This * version writes directly to a Writer without an intervening string. * * @param out where to write encoded output * @param input the input string to encode * @throws IOException if thrown by writer */ public static void forJavaScriptSource(Writer out, String input) throws IOException { encode(Encoders.JAVASCRIPT_SOURCE_ENCODER, out, input); } // Additional? // MySQL // PostreSQL // Oracle // ... /** * Core encoding loop shared by public methods. It first uses the * encoder to scan the input for characters that need encoding. If * no characters require encoding, the input string is returned. * Otherwise a buffer is used to encode the remainder * of the input. * * @param encoder the encoder to use * @param str the string to encode * @return the input string encoded with the provided encoder. */ static String encode(Encoder encoder, String str) { if (str == null) { // consistent with String.valueOf(...) use "null" for null. str = "null"; } // quick pass--see if we need to actually encode anything, if not // return the value unchanged. final int n = str.length(); int j = encoder.firstEncodedOffset(str, 0, n); if (j == n) { return str; } // otherwise, we need to encode. We use a buffer to avoid // excessive memory allocation for these calls. Note: this means that // an encoder implementation must NEVER call this method internally. return new Buffer().encode(encoder, str, j); } /** * Core encoding loop shared by public methods. It first uses the * encoder to scan the input for characters that need encoding. If no * characters require encoding, the input string is written directly to * the writer. Otherwise a buffer is used to encode the * remainder of the input to the buffers. This version saves a wrapping * in an String. * * @param encoder the encoder to use * @param out the writer for the encoded output * @param str the string to encode * @throws IOException if thrown by the writer */ static void encode(Encoder encoder, Writer out, String str) throws IOException { if (str == null) { // consistent with String.valueOf(...) use "null" for null. str = "null"; } // quick pass--see if we need to actually encode anything, if not // return the value unchanged. final int n = str.length(); int j = encoder.firstEncodedOffset(str, 0, n); if (j == n) { out.write(str); return; } // otherwise, we need to encode. We use a buffer to avoid // excessive memory allocation for these calls. Note: this means that // an encoder implementation must NEVER call this method internally. new Buffer().encode(encoder, out, str, j); } /** * A buffer used for encoding. */ static class Buffer { /** * Input buffer size, used to extract a copy of the input * from a string and then send to the encoder. */ static final int INPUT_BUFFER_SIZE = 1024; /** * Output buffer size used to store the encoded output before * wrapping in a string. */ static final int OUTPUT_BUFFER_SIZE = INPUT_BUFFER_SIZE * 2; /** * The input buffer. A heap-allocated, array-backed buffer of * INPUT_BUFFER_SIZE used for holding the characters to encode. */ final CharBuffer _input = CharBuffer.allocate(INPUT_BUFFER_SIZE); /** * The output buffer. A heap-allocated, array-backed buffer of * OUTPUT_BUFFER_SIZE used for holding the encoded output. */ final CharBuffer _output = CharBuffer.allocate(OUTPUT_BUFFER_SIZE); /** * The core String encoding routine of this class. It uses the input * and output buffers to allow the encoders to work in reuse arrays. * When the input and/or output exceeds the capacity of the reused * arrays, temporary ones are allocated and then discarded after * the encode is done. * * @param encoder the encoder to use * @param str the string to encode * @param j the offset in {@code str} to start encoding * @return the encoded result */ String encode(Encoder encoder, String str, int j) { final int n = str.length(); final int remaining = n - j; if (remaining <= INPUT_BUFFER_SIZE && j <= OUTPUT_BUFFER_SIZE) { // the remaining input to encode fits completely in the pre- // allocated buffer. str.getChars(0, j, _output.array(), 0); str.getChars(j, n, _input.array(), 0); _input.limit(remaining).position(0); _output.clear().position(j); CoderResult cr = encoder.encodeArrays(_input, _output, true); if (cr.isUnderflow()) { return new String(_output.array(), 0, _output.position()); } // else, it's an overflow, we need to use a new output buffer // we'll allocate this buffer to be the exact size of the worst // case, guaranteeing a second overflow would not be possible. CharBuffer tmp = CharBuffer.allocate(_output.position() + encoder.maxEncodedLength(_input.remaining())); // copy over everything that has been encoded so far tmp.put(_output.array(), 0, _output.position()); cr = encoder.encodeArrays(_input, tmp, true); if (cr.isOverflow()) { throw new AssertionError("unexpected result from encoder"); } return new String(tmp.array(), 0, tmp.position()); } else { // the input it too large for our pre-allocated buffers // we'll use a temporary direct heap allocation final int m = j + encoder.maxEncodedLength(remaining); CharBuffer buffer = CharBuffer.allocate(m); str.getChars(0, j, buffer.array(), 0); str.getChars(j, n, buffer.array(), m - remaining); CharBuffer input = buffer.duplicate(); input.limit(m).position(m-remaining); buffer.position(j); CoderResult cr = encoder.encodeArrays(input, buffer, true); if (cr.isOverflow()) { throw new AssertionError("unexpected result from encoder"); } return new String(buffer.array(), 0, buffer.position()); } } /** * The core Writer encoding routing of this class. It uses the * input and output buffers to allow the encoders to reuse arrays. * Unlike the string version, this method will never allocate more * memory, instead encoding is done in batches and flushed to the * writer in batches as large as possible. * * @param encoder the encoder to use * @param out where to write the encoded output * @param str the string to encode * @param j the position in the string at which the first character * needs encoding. * @throws IOException if thrown by the writer. */ void encode(Encoder encoder, Writer out, String str, int j) throws IOException { out.write(str, 0, j); final int n = str.length(); _input.clear(); _output.clear(); final char[] inputArray = _input.array(); final char[] outputArray = _output.array(); for (;;) { final int remainingInput = n - j; final int startPosition = _input.position(); final int batchSize = Math.min(remainingInput, _input.remaining()); str.getChars(j, j+batchSize, inputArray, startPosition); _input.limit(startPosition + batchSize); for (;;) { CoderResult cr = encoder.encodeArrays( _input, _output, batchSize == remainingInput); if (cr.isUnderflow()) { // get next input batch break; } // else, output buffer full, flush and continue. out.write(outputArray, 0, _output.position()); _output.clear(); } j += _input.position() - startPosition; if (j == n) { // done. flush remaining output buffer and return out.write(outputArray, 0, _output.position()); return; } _input.compact(); } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy