org.unbescape.javascript.JavaScriptEscape Maven / Gradle / Ivy
Show all versions of unbescape Show documentation
/*
* =============================================================================
*
* Copyright (c) 2014, The UNBESCAPE team (http://www.unbescape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* =============================================================================
*/
package org.unbescape.javascript;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
/**
*
* Utility class for performing JavaScript escape/unescape operations.
*
*
* Configuration of escape/unescape operations
*
*
* Escape operations can be (optionally) configured by means of:
*
*
* - Level, which defines how deep the escape operation must be (what
* chars are to be considered eligible for escaping, depending on the specific
* needs of the scenario). Its values are defined by the {@link org.unbescape.javascript.JavaScriptEscapeLevel}
* enum.
* - Type, which defines whether escaping should be performed by means of SECs
* (Single Escape Characters like \n) or additionally by means of x-based or u-based
* hexadecimal escapes (\xE1 or \u00E1).
* Its values are defined by the {@link org.unbescape.javascript.JavaScriptEscapeType} enum.
*
*
* Unescape operations need no configuration parameters. Unescape operations
* will always perform complete unescape of SECs (\n), x-based (\xE1)
* and u-based (\u00E1) hexadecimal escapes, and even octal escapes (\057, which
* are deprecated since ECMAScript v5 and therefore not used for escaping).
*
*
* Features
*
*
* Specific features of the JavaScript escape/unescape operations performed by means of this class:
*
*
* - The JavaScript basic escape set is supported. This basic set consists of:
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* - X-based hexadecimal escapes (a.k.a. hexadecimal escapes) are supported both in escape
* and unescape operations: \xE1.
* - U-based hexadecimal escapes (a.k.a. unicode escapes) are supported both in escape
* and unescape operations: \u00E1.
* - Octal escapes are supported, though only in unescape operations: \071. These are not supported
* in escape operations because octal escapes were deprecated in version 5 of the ECMAScript
* specification.
* - Support for the whole Unicode character set: \u0000 to \u10FFFF, including
* characters not representable by only one char in Java (>\uFFFF).
*
*
* Input/Output
*
*
* There are four different input/output modes that can be used in escape/unescape operations:
*
*
* - String input, String output: Input is specified as a String object
* and output is returned as another. In order to improve memory performance, all escape and unescape
* operations will return the exact same input object as output if no escape/unescape modifications
* are required.
* - String input, java.io.Writer output: Input will be read from a String
* and output will be written into the specified java.io.Writer.
* - java.io.Reader input, java.io.Writer output: Input will be read from a Reader
* and output will be written into the specified java.io.Writer.
* - char[] input, java.io.Writer output: Input will be read from a char array
* (char[]) and output will be written into the specified java.io.Writer.
* Two int arguments called offset and len will be
* used for specifying the part of the char[] that should be escaped/unescaped. These methods
* should be called with offset = 0 and len = text.length in order to process
* the whole char[].
*
*
* Glossary
*
*
* - SEC
* - Single Escape Character:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F) (optional, only in </).
*
* - XHEXA escapes
* - Also called x-based hexadecimal escapes or simply hexadecimal escapes:
* compact representation of unicode codepoints up to U+00FF, with \x
* followed by exactly two hexadecimal figures: \xE1. XHEXA is many times used
* instead of UHEXA (when possible) in order to obtain shorter escaped strings.
* - UHEXA escapes
* - Also called u-based hexadecimal escapes or simply unicode escapes:
* complete representation of unicode codepoints up to U+FFFF, with \u
* followed by exactly four hexadecimal figures: \u00E1. Unicode codepoints >
* U+FFFF can be represented in JavaScript by mean of two UHEXA escapes (a
* surrogate pair).
* - Octal escapes
* - Octal representation of unicode codepoints up to U+00FF, with \
* followed by up to three octal figures: \071. Though up to three octal figures
* are allowed, octal numbers > 377 (0xFF) are not supported. Note
* octal escapes have been deprecated as of version 5 of the ECMAScript specification.
* - Unicode Codepoint
* - Each of the int values conforming the Unicode code space.
* Normally corresponding to a Java char primitive value (codepoint <= \uFFFF),
* but might be two chars for codepoints \u10000 to \u10FFFF if the
* first char is a high surrogate (\uD800 to \uDBFF) and the
* second is a low surrogate (\uDC00 to \uDFFF).
*
*
* References
*
*
* The following references apply:
*
*
* - The ECMAScript Specification
* [ecmascript.org]
* - JavaScript
* character escape sequences [mathiasbynens.be]
*
*
*
* @author Daniel Fernández
*
* @since 1.0.0
*
*/
public final class JavaScriptEscape {
/**
*
* Perform a JavaScript level 1 (only basic set) escape operation
* on a String input.
*
*
* Level 1 means this method will only escape the JavaScript basic escape set:
*
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* This method calls {@link #escapeJavaScript(String, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_1_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param text the String to be escaped.
* @return The escaped result String. As a memory-performance improvement, will return the exact
* same object as the text input argument if no escaping modifications were required (and
* no additional String objects will be created during processing). Will
* return null if input is null.
*/
public static String escapeJavaScriptMinimal(final String text) {
return escapeJavaScript(text,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_1_BASIC_ESCAPE_SET);
}
/**
*
* Perform a JavaScript level 2 (basic set and all non-ASCII chars) escape operation
* on a String input.
*
*
* Level 2 means this method will escape:
*
*
* - The JavaScript basic escape set:
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* - All non ASCII characters.
*
*
* This escape will be performed by using the Single Escape Chars whenever possible. For escaped
* characters that do not have an associated SEC, default to using \xFF Hexadecimal Escapes
* if possible (characters <= U+00FF), then default to \uFFFF
* Hexadecimal Escapes. This type of escape produces the smallest escaped string possible.
*
*
* This method calls {@link #escapeJavaScript(String, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param text the String to be escaped.
* @return The escaped result String. As a memory-performance improvement, will return the exact
* same object as the text input argument if no escaping modifications were required (and
* no additional String objects will be created during processing). Will
* return null if input is null.
*/
public static String escapeJavaScript(final String text) {
return escapeJavaScript(text,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET);
}
/**
*
* Perform a (configurable) JavaScript escape operation on a String input.
*
*
* This method will perform an escape operation according to the specified
* {@link org.unbescape.javascript.JavaScriptEscapeType} and
* {@link org.unbescape.javascript.JavaScriptEscapeLevel} argument values.
*
*
* All other String-based escapeJavaScript*(...) methods call this one with preconfigured
* type and level values.
*
*
* This method is thread-safe.
*
*
* @param text the String to be escaped.
* @param type the type of escape operation to be performed, see
* {@link org.unbescape.javascript.JavaScriptEscapeType}.
* @param level the escape level to be applied, see {@link org.unbescape.javascript.JavaScriptEscapeLevel}.
* @return The escaped result String. As a memory-performance improvement, will return the exact
* same object as the text input argument if no escaping modifications were required (and
* no additional String objects will be created during processing). Will
* return null if input is null.
*/
public static String escapeJavaScript(final String text,
final JavaScriptEscapeType type, final JavaScriptEscapeLevel level) {
if (type == null) {
throw new IllegalArgumentException("The 'type' argument cannot be null");
}
if (level == null) {
throw new IllegalArgumentException("The 'level' argument cannot be null");
}
return JavaScriptEscapeUtil.escape(text, type, level);
}
/**
*
* Perform a JavaScript level 1 (only basic set) escape operation
* on a String input, writing results to a Writer.
*
*
* Level 1 means this method will only escape the JavaScript basic escape set:
*
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* This method calls {@link #escapeJavaScript(String, Writer, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_1_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param text the String to be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void escapeJavaScriptMinimal(final String text, final Writer writer)
throws IOException {
escapeJavaScript(text, writer,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_1_BASIC_ESCAPE_SET);
}
/**
*
* Perform a JavaScript level 2 (basic set and all non-ASCII chars) escape operation
* on a String input, writing results to a Writer.
*
*
* Level 2 means this method will escape:
*
*
* - The JavaScript basic escape set:
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* - All non ASCII characters.
*
*
* This escape will be performed by using the Single Escape Chars whenever possible. For escaped
* characters that do not have an associated SEC, default to using \xFF Hexadecimal Escapes
* if possible (characters <= U+00FF), then default to \uFFFF
* Hexadecimal Escapes. This type of escape produces the smallest escaped string possible.
*
*
* This method calls {@link #escapeJavaScript(String, Writer, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param text the String to be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void escapeJavaScript(final String text, final Writer writer)
throws IOException {
escapeJavaScript(text, writer,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET);
}
/**
*
* Perform a (configurable) JavaScript escape operation on a String input,
* writing results to a Writer.
*
*
* This method will perform an escape operation according to the specified
* {@link org.unbescape.javascript.JavaScriptEscapeType} and
* {@link org.unbescape.javascript.JavaScriptEscapeLevel} argument values.
*
*
* All other String/Writer-based escapeJavaScript*(...) methods call this one with preconfigured
* type and level values.
*
*
* This method is thread-safe.
*
*
* @param text the String to be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @param type the type of escape operation to be performed, see
* {@link org.unbescape.javascript.JavaScriptEscapeType}.
* @param level the escape level to be applied, see {@link org.unbescape.javascript.JavaScriptEscapeLevel}.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void escapeJavaScript(final String text, final Writer writer,
final JavaScriptEscapeType type, final JavaScriptEscapeLevel level)
throws IOException {
if (writer == null) {
throw new IllegalArgumentException("Argument 'writer' cannot be null");
}
if (type == null) {
throw new IllegalArgumentException("The 'type' argument cannot be null");
}
if (level == null) {
throw new IllegalArgumentException("The 'level' argument cannot be null");
}
JavaScriptEscapeUtil.escape(new InternalStringReader(text), writer, type, level);
}
/**
*
* Perform a JavaScript level 1 (only basic set) escape operation
* on a Reader input, writing results to a Writer.
*
*
* Level 1 means this method will only escape the JavaScript basic escape set:
*
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* This method calls {@link #escapeJavaScript(Reader, Writer, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_1_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param reader the Reader reading the text to be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void escapeJavaScriptMinimal(final Reader reader, final Writer writer)
throws IOException {
escapeJavaScript(reader, writer,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_1_BASIC_ESCAPE_SET);
}
/**
*
* Perform a JavaScript level 2 (basic set and all non-ASCII chars) escape operation
* on a Reader input, writing results to a Writer.
*
*
* Level 2 means this method will escape:
*
*
* - The JavaScript basic escape set:
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* - All non ASCII characters.
*
*
* This escape will be performed by using the Single Escape Chars whenever possible. For escaped
* characters that do not have an associated SEC, default to using \xFF Hexadecimal Escapes
* if possible (characters <= U+00FF), then default to \uFFFF
* Hexadecimal Escapes. This type of escape produces the smallest escaped string possible.
*
*
* This method calls {@link #escapeJavaScript(Reader, Writer, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param reader the Reader reading the text to be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void escapeJavaScript(final Reader reader, final Writer writer)
throws IOException {
escapeJavaScript(reader, writer,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET);
}
/**
*
* Perform a (configurable) JavaScript escape operation on a Reader input,
* writing results to a Writer.
*
*
* This method will perform an escape operation according to the specified
* {@link org.unbescape.javascript.JavaScriptEscapeType} and
* {@link org.unbescape.javascript.JavaScriptEscapeLevel} argument values.
*
*
* All other Reader/Writer-based escapeJavaScript*(...) methods call this one with preconfigured
* type and level values.
*
*
* This method is thread-safe.
*
*
* @param reader the Reader reading the text to be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @param type the type of escape operation to be performed, see
* {@link org.unbescape.javascript.JavaScriptEscapeType}.
* @param level the escape level to be applied, see {@link org.unbescape.javascript.JavaScriptEscapeLevel}.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void escapeJavaScript(final Reader reader, final Writer writer,
final JavaScriptEscapeType type, final JavaScriptEscapeLevel level)
throws IOException {
if (writer == null) {
throw new IllegalArgumentException("Argument 'writer' cannot be null");
}
if (type == null) {
throw new IllegalArgumentException("The 'type' argument cannot be null");
}
if (level == null) {
throw new IllegalArgumentException("The 'level' argument cannot be null");
}
JavaScriptEscapeUtil.escape(reader, writer, type, level);
}
/**
*
* Perform a JavaScript level 1 (only basic set) escape operation
* on a char[] input.
*
*
* Level 1 means this method will only escape the JavaScript basic escape set:
*
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* This method calls
* {@link #escapeJavaScript(char[], int, int, java.io.Writer, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_1_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param text the char[] to be escaped.
* @param offset the position in text at which the escape operation should start.
* @param len the number of characters in text that should be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*/
public static void escapeJavaScriptMinimal(final char[] text, final int offset, final int len, final Writer writer)
throws IOException {
escapeJavaScript(text, offset, len, writer,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_1_BASIC_ESCAPE_SET);
}
/**
*
* Perform a JavaScript level 2 (basic set and all non-ASCII chars) escape operation
* on a char[] input.
*
*
* Level 2 means this method will escape:
*
*
* - The JavaScript basic escape set:
*
* - The Single Escape Characters:
* \0 (U+0000),
* \b (U+0008),
* \t (U+0009),
* \n (U+000A),
* \v (U+000B),
* \f (U+000C),
* \r (U+000D),
* \" (U+0022),
* \' (U+0027),
* \\ (U+005C) and
* \/ (U+002F).
* Note that \/ is optional, and will only be used when the /
* symbol appears after <, as in </. This is to avoid accidentally
* closing <script> tags in HTML. Also, note that \v
* (U+000B) is actually included as a Single Escape
* Character in the JavaScript (ECMAScript) specification, but will not be used as it
* is not supported by Microsoft Internet Explorer versions < 9.
*
* -
* Two ranges of non-displayable, control characters (some of which are already part of the
* single escape characters list): U+0001 to U+001F and
* U+007F to U+009F.
*
*
*
* - All non ASCII characters.
*
*
* This escape will be performed by using the Single Escape Chars whenever possible. For escaped
* characters that do not have an associated SEC, default to using \xFF Hexadecimal Escapes
* if possible (characters <= U+00FF), then default to \uFFFF
* Hexadecimal Escapes. This type of escape produces the smallest escaped string possible.
*
*
* This method calls
* {@link #escapeJavaScript(char[], int, int, java.io.Writer, JavaScriptEscapeType, JavaScriptEscapeLevel)}
* with the following preconfigured values:
*
*
* - type:
* {@link org.unbescape.javascript.JavaScriptEscapeType#SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA}
* - level:
* {@link org.unbescape.javascript.JavaScriptEscapeLevel#LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET}
*
*
* This method is thread-safe.
*
*
* @param text the char[] to be escaped.
* @param offset the position in text at which the escape operation should start.
* @param len the number of characters in text that should be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*/
public static void escapeJavaScript(final char[] text, final int offset, final int len, final Writer writer)
throws IOException {
escapeJavaScript(text, offset, len, writer,
JavaScriptEscapeType.SINGLE_ESCAPE_CHARS_DEFAULT_TO_XHEXA_AND_UHEXA,
JavaScriptEscapeLevel.LEVEL_2_ALL_NON_ASCII_PLUS_BASIC_ESCAPE_SET);
}
/**
*
* Perform a (configurable) JavaScript escape operation on a char[] input.
*
*
* This method will perform an escape operation according to the specified
* {@link org.unbescape.javascript.JavaScriptEscapeType} and
* {@link org.unbescape.javascript.JavaScriptEscapeLevel} argument values.
*
*
* All other char[]-based escapeJavaScript*(...) methods call this one with preconfigured
* type and level values.
*
*
* This method is thread-safe.
*
*
* @param text the char[] to be escaped.
* @param offset the position in text at which the escape operation should start.
* @param len the number of characters in text that should be escaped.
* @param writer the java.io.Writer to which the escaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @param type the type of escape operation to be performed, see
* {@link org.unbescape.javascript.JavaScriptEscapeType}.
* @param level the escape level to be applied, see {@link org.unbescape.javascript.JavaScriptEscapeLevel}.
* @throws IOException if an input/output exception occurs
*/
public static void escapeJavaScript(final char[] text, final int offset, final int len, final Writer writer,
final JavaScriptEscapeType type, final JavaScriptEscapeLevel level)
throws IOException {
if (writer == null) {
throw new IllegalArgumentException("Argument 'writer' cannot be null");
}
if (type == null) {
throw new IllegalArgumentException("The 'type' argument cannot be null");
}
if (level == null) {
throw new IllegalArgumentException("The 'level' argument cannot be null");
}
final int textLen = (text == null? 0 : text.length);
if (offset < 0 || offset > textLen) {
throw new IllegalArgumentException(
"Invalid (offset, len). offset=" + offset + ", len=" + len + ", text.length=" + textLen);
}
if (len < 0 || (offset + len) > textLen) {
throw new IllegalArgumentException(
"Invalid (offset, len). offset=" + offset + ", len=" + len + ", text.length=" + textLen);
}
JavaScriptEscapeUtil.escape(text, offset, len, writer, type, level);
}
/**
*
* Perform a JavaScript unescape operation on a String input.
*
*
* No additional configuration arguments are required. Unescape operations
* will always perform complete JavaScript unescape of SECs, x-based, u-based
* and octal escapes.
*
*
* This method is thread-safe.
*
*
* @param text the String to be unescaped.
* @return The unescaped result String. As a memory-performance improvement, will return the exact
* same object as the text input argument if no unescaping modifications were required (and
* no additional String objects will be created during processing). Will
* return null if input is null.
*/
public static String unescapeJavaScript(final String text) {
return JavaScriptEscapeUtil.unescape(text);
}
/**
*
* Perform a JavaScript unescape operation on a String input,
* writing results to a Writer.
*
*
* No additional configuration arguments are required. Unescape operations
* will always perform complete JavaScript unescape of SECs, x-based, u-based
* and octal escapes.
*
*
* This method is thread-safe.
*
*
* @param text the String to be unescaped.
* @param writer the java.io.Writer to which the unescaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void unescapeJavaScript(final String text, final Writer writer)
throws IOException {
if (writer == null) {
throw new IllegalArgumentException("Argument 'writer' cannot be null");
}
JavaScriptEscapeUtil.unescape(new InternalStringReader(text), writer);
}
/**
*
* Perform a JavaScript unescape operation on a Reader input,
* writing results to a Writer.
*
*
* No additional configuration arguments are required. Unescape operations
* will always perform complete JavaScript unescape of SECs, x-based, u-based
* and octal escapes.
*
*
* This method is thread-safe.
*
*
* @param reader the Reader reading the text to be unescaped.
* @param writer the java.io.Writer to which the unescaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*
* @since 1.1.2
*/
public static void unescapeJavaScript(final Reader reader, final Writer writer)
throws IOException {
if (writer == null) {
throw new IllegalArgumentException("Argument 'writer' cannot be null");
}
JavaScriptEscapeUtil.unescape(reader, writer);
}
/**
*
* Perform a JavaScript unescape operation on a char[] input.
*
*
* No additional configuration arguments are required. Unescape operations
* will always perform complete JavaScript unescape of SECs, x-based, u-based
* and octal escapes.
*
*
* This method is thread-safe.
*
*
* @param text the char[] to be unescaped.
* @param offset the position in text at which the unescape operation should start.
* @param len the number of characters in text that should be unescaped.
* @param writer the java.io.Writer to which the unescaped result will be written. Nothing will
* be written at all to this writer if input is null.
* @throws IOException if an input/output exception occurs
*/
public static void unescapeJavaScript(final char[] text, final int offset, final int len, final Writer writer)
throws IOException{
if (writer == null) {
throw new IllegalArgumentException("Argument 'writer' cannot be null");
}
final int textLen = (text == null? 0 : text.length);
if (offset < 0 || offset > textLen) {
throw new IllegalArgumentException(
"Invalid (offset, len). offset=" + offset + ", len=" + len + ", text.length=" + textLen);
}
if (len < 0 || (offset + len) > textLen) {
throw new IllegalArgumentException(
"Invalid (offset, len). offset=" + offset + ", len=" + len + ", text.length=" + textLen);
}
JavaScriptEscapeUtil.unescape(text, offset, len, writer);
}
private JavaScriptEscape() {
super();
}
/*
* This is basically a very simplified, thread-unsafe version of StringReader that should
* perform better than the original StringReader by removing all synchronization structures.
*
* Note the only implemented methods are those that we know are really used from within the
* stream-based escape/unescape operations.
*/
private static final class InternalStringReader extends Reader {
private String str;
private int length;
private int next = 0;
public InternalStringReader(final String s) {
super();
this.str = s;
this.length = s.length();
}
@Override
public int read() throws IOException {
if (this.next >= length) {
return -1;
}
return this.str.charAt(this.next++);
}
@Override
public int read(final char[] cbuf, final int off, final int len) throws IOException {
if ((off < 0) || (off > cbuf.length) || (len < 0) ||
((off + len) > cbuf.length) || ((off + len) < 0)) {
throw new IndexOutOfBoundsException();
} else if (len == 0) {
return 0;
}
if (this.next >= this.length) {
return -1;
}
int n = Math.min(this.length - this.next, len);
this.str.getChars(this.next, this.next + n, cbuf, off);
this.next += n;
return n;
}
@Override
public void close() throws IOException {
this.str = null; // Just set the reference to null, help the GC
}
}
}