All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.lang.StringEscapeUtils Maven / Gradle / Ivy

There is a newer version: 0.40.13
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.lang;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Locale;

import org.apache.commons.lang.exception.NestableRuntimeException;
import org.apache.commons.lang.text.StrBuilder;

/**
 * 

Escapes and unescapes Strings for * Java, Java Script, HTML, XML, and SQL.

* *

#ThreadSafe#

* @author Apache Software Foundation * @author Apache Jakarta Turbine * @author Purple Technology * @author Alexander Day Chaffee * @author Antony Riley * @author Helge Tesgaard * @author Sean Brown * @author Gary Gregory * @author Phil Steitz * @author Pete Gieser * @since 2.0 * @version $Id: StringEscapeUtils.java 1057072 2011-01-10 01:55:57Z niallp $ */ public class StringEscapeUtils { private static final char CSV_DELIMITER = ','; private static final char CSV_QUOTE = '"'; private static final String CSV_QUOTE_STR = String.valueOf(CSV_QUOTE); private static final char[] CSV_SEARCH_CHARS = new char[] {CSV_DELIMITER, CSV_QUOTE, CharUtils.CR, CharUtils.LF}; /** *

StringEscapeUtils instances should NOT be constructed in * standard programming.

* *

Instead, the class should be used as: *

StringEscapeUtils.escapeJava("foo");

* *

This constructor is public to permit tools that require a JavaBean * instance to operate.

*/ public StringEscapeUtils() { super(); } // Java and JavaScript //-------------------------------------------------------------------------- /** *

Escapes the characters in a String using Java String rules.

* *

Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)

* *

So a tab becomes the characters '\\' and * 't'.

* *

The only difference between Java strings and JavaScript strings * is that in JavaScript, a single quote must be escaped.

* *

Example: *

     * input string: He didn't say, "Stop!"
     * output string: He didn't say, \"Stop!\"
     * 
*

* * @param str String to escape values in, may be null * @return String with escaped values, null if null string input */ public static String escapeJava(String str) { return escapeJavaStyleString(str, false, false); } /** *

Escapes the characters in a String using Java String rules to * a Writer.

* *

A null string input has no effect.

* * @see #escapeJava(java.lang.String) * @param out Writer to write escaped string into * @param str String to escape values in, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on underlying Writer */ public static void escapeJava(Writer out, String str) throws IOException { escapeJavaStyleString(out, str, false, false); } /** *

Escapes the characters in a String using JavaScript String rules.

*

Escapes any values it finds into their JavaScript String form. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)

* *

So a tab becomes the characters '\\' and * 't'.

* *

The only difference between Java strings and JavaScript strings * is that in JavaScript, a single quote must be escaped.

* *

Example: *

     * input string: He didn't say, "Stop!"
     * output string: He didn\'t say, \"Stop!\"
     * 
*

* * @param str String to escape values in, may be null * @return String with escaped values, null if null string input */ public static String escapeJavaScript(String str) { return escapeJavaStyleString(str, true, true); } /** *

Escapes the characters in a String using JavaScript String rules * to a Writer.

* *

A null string input has no effect.

* * @see #escapeJavaScript(java.lang.String) * @param out Writer to write escaped string into * @param str String to escape values in, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on underlying Writer **/ public static void escapeJavaScript(Writer out, String str) throws IOException { escapeJavaStyleString(out, str, true, true); } /** *

Worker method for the {@link #escapeJavaScript(String)} method.

* * @param str String to escape values in, may be null * @param escapeSingleQuotes escapes single quotes if true * @param escapeForwardSlash TODO * @return the escaped string */ private static String escapeJavaStyleString(String str, boolean escapeSingleQuotes, boolean escapeForwardSlash) { if (str == null) { return null; } try { StringWriter writer = new StringWriter(str.length() * 2); escapeJavaStyleString(writer, str, escapeSingleQuotes, escapeForwardSlash); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter throw new UnhandledException(ioe); } } /** *

Worker method for the {@link #escapeJavaScript(String)} method.

* * @param out write to receieve the escaped string * @param str String to escape values in, may be null * @param escapeSingleQuote escapes single quotes if true * @param escapeForwardSlash TODO * @throws IOException if an IOException occurs */ private static void escapeJavaStyleString(Writer out, String str, boolean escapeSingleQuote, boolean escapeForwardSlash) throws IOException { if (out == null) { throw new IllegalArgumentException("The Writer must not be null"); } if (str == null) { return; } int sz; sz = str.length(); for (int i = 0; i < sz; i++) { char ch = str.charAt(i); // handle unicode if (ch > 0xfff) { out.write("\\u" + hex(ch)); } else if (ch > 0xff) { out.write("\\u0" + hex(ch)); } else if (ch > 0x7f) { out.write("\\u00" + hex(ch)); } else if (ch < 32) { switch (ch) { case '\b' : out.write('\\'); out.write('b'); break; case '\n' : out.write('\\'); out.write('n'); break; case '\t' : out.write('\\'); out.write('t'); break; case '\f' : out.write('\\'); out.write('f'); break; case '\r' : out.write('\\'); out.write('r'); break; default : if (ch > 0xf) { out.write("\\u00" + hex(ch)); } else { out.write("\\u000" + hex(ch)); } break; } } else { switch (ch) { case '\'' : if (escapeSingleQuote) { out.write('\\'); } out.write('\''); break; case '"' : out.write('\\'); out.write('"'); break; case '\\' : out.write('\\'); out.write('\\'); break; case '/' : if (escapeForwardSlash) { out.write('\\'); } out.write('/'); break; default : out.write(ch); break; } } } } /** *

Returns an upper case hexadecimal String for the given * character.

* * @param ch The character to convert. * @return An upper case hexadecimal String */ private static String hex(char ch) { return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH); } /** *

Unescapes any Java literals found in the String. * For example, it will turn a sequence of '\' and * 'n' into a newline character, unless the '\' * is preceded by another '\'.

* * @param str the String to unescape, may be null * @return a new unescaped String, null if null string input */ public static String unescapeJava(String str) { if (str == null) { return null; } try { StringWriter writer = new StringWriter(str.length()); unescapeJava(writer, str); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter throw new UnhandledException(ioe); } } /** *

Unescapes any Java literals found in the String to a * Writer.

* *

For example, it will turn a sequence of '\' and * 'n' into a newline character, unless the '\' * is preceded by another '\'.

* *

A null string input has no effect.

* * @param out the Writer used to output unescaped characters * @param str the String to unescape, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on underlying Writer */ public static void unescapeJava(Writer out, String str) throws IOException { if (out == null) { throw new IllegalArgumentException("The Writer must not be null"); } if (str == null) { return; } int sz = str.length(); StrBuilder unicode = new StrBuilder(4); boolean hadSlash = false; boolean inUnicode = false; for (int i = 0; i < sz; i++) { char ch = str.charAt(i); if (inUnicode) { // if in unicode, then we're reading unicode // values in somehow unicode.append(ch); if (unicode.length() == 4) { // unicode now contains the four hex digits // which represents our unicode character try { int value = Integer.parseInt(unicode.toString(), 16); out.write((char) value); unicode.setLength(0); inUnicode = false; hadSlash = false; } catch (NumberFormatException nfe) { throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe); } } continue; } if (hadSlash) { // handle an escaped value hadSlash = false; switch (ch) { case '\\': out.write('\\'); break; case '\'': out.write('\''); break; case '\"': out.write('"'); break; case 'r': out.write('\r'); break; case 'f': out.write('\f'); break; case 't': out.write('\t'); break; case 'n': out.write('\n'); break; case 'b': out.write('\b'); break; case 'u': { // uh-oh, we're in unicode country.... inUnicode = true; break; } default : out.write(ch); break; } continue; } else if (ch == '\\') { hadSlash = true; continue; } out.write(ch); } if (hadSlash) { // then we're in the weird case of a \ at the end of the // string, let's output it anyway. out.write('\\'); } } /** *

Unescapes any JavaScript literals found in the String.

* *

For example, it will turn a sequence of '\' and 'n' * into a newline character, unless the '\' is preceded by another * '\'.

* * @see #unescapeJava(String) * @param str the String to unescape, may be null * @return A new unescaped String, null if null string input */ public static String unescapeJavaScript(String str) { return unescapeJava(str); } /** *

Unescapes any JavaScript literals found in the String to a * Writer.

* *

For example, it will turn a sequence of '\' and 'n' * into a newline character, unless the '\' is preceded by another * '\'.

* *

A null string input has no effect.

* * @see #unescapeJava(Writer,String) * @param out the Writer used to output unescaped characters * @param str the String to unescape, may be null * @throws IllegalArgumentException if the Writer is null * @throws IOException if error occurs on underlying Writer */ public static void unescapeJavaScript(Writer out, String str) throws IOException { unescapeJava(out, str); } // HTML and XML //-------------------------------------------------------------------------- /** *

Escapes the characters in a String using HTML entities.

* *

* For example: *

*

"bread" & "butter"

* becomes: *

* &quot;bread&quot; &amp; &quot;butter&quot;. *

* *

Supports all known HTML 4.0 entities, including funky accents. * Note that the commonly used apostrophe escape character (&apos;) * is not a legal entity and so is not supported).

* * @param str the String to escape, may be null * @return a new escaped String, null if null string input * * @see #unescapeHtml(String) * @see ISO Entities * @see HTML 3.2 Character Entities for ISO Latin-1 * @see HTML 4.0 Character entity references * @see HTML 4.01 Character References * @see HTML 4.01 Code positions */ public static String escapeHtml(String str) { if (str == null) { return null; } try { StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); escapeHtml(writer, str); return writer.toString(); } catch (IOException ioe) { //should be impossible throw new UnhandledException(ioe); } } /** *

Escapes the characters in a String using HTML entities and writes * them to a Writer.

* *

* For example: *

* "bread" & "butter" *

becomes:

* &quot;bread&quot; &amp; &quot;butter&quot;. * *

Supports all known HTML 4.0 entities, including funky accents. * Note that the commonly used apostrophe escape character (&apos;) * is not a legal entity and so is not supported).

* * @param writer the writer receiving the escaped string, not null * @param string the String to escape, may be null * @throws IllegalArgumentException if the writer is null * @throws IOException when Writer passed throws the exception from * calls to the {@link Writer#write(int)} methods. * * @see #escapeHtml(String) * @see #unescapeHtml(String) * @see ISO Entities * @see HTML 3.2 Character Entities for ISO Latin-1 * @see HTML 4.0 Character entity references * @see HTML 4.01 Character References * @see HTML 4.01 Code positions */ public static void escapeHtml(Writer writer, String string) throws IOException { if (writer == null ) { throw new IllegalArgumentException ("The Writer must not be null."); } if (string == null) { return; } Entities.HTML40.escape(writer, string); } //----------------------------------------------------------------------- /** *

Unescapes a string containing entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes. Supports HTML 4.0 entities.

* *

For example, the string "&lt;Fran&ccedil;ais&gt;" * will become "<Français>"

* *

If an entity is unrecognized, it is left alone, and inserted * verbatim into the result string. e.g. "&gt;&zzzz;x" will * become ">&zzzz;x".

* * @param str the String to unescape, may be null * @return a new unescaped String, null if null string input * @see #escapeHtml(Writer, String) */ public static String unescapeHtml(String str) { if (str == null) { return null; } try { StringWriter writer = new StringWriter ((int)(str.length() * 1.5)); unescapeHtml(writer, str); return writer.toString(); } catch (IOException ioe) { //should be impossible throw new UnhandledException(ioe); } } /** *

Unescapes a string containing entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes. Supports HTML 4.0 entities.

* *

For example, the string "&lt;Fran&ccedil;ais&gt;" * will become "<Français>"

* *

If an entity is unrecognized, it is left alone, and inserted * verbatim into the result string. e.g. "&gt;&zzzz;x" will * become ">&zzzz;x".

* * @param writer the writer receiving the unescaped string, not null * @param string the String to unescape, may be null * @throws IllegalArgumentException if the writer is null * @throws IOException if an IOException occurs * @see #escapeHtml(String) */ public static void unescapeHtml(Writer writer, String string) throws IOException { if (writer == null ) { throw new IllegalArgumentException ("The Writer must not be null."); } if (string == null) { return; } Entities.HTML40.unescape(writer, string); } //----------------------------------------------------------------------- /** *

Escapes the characters in a String using XML entities.

* *

For example: "bread" & "butter" => * &quot;bread&quot; &amp; &quot;butter&quot;. *

* *

Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.

* *

Note that unicode characters greater than 0x7f are currently escaped to * their numerical \\u equivalent. This may change in future releases.

* * @param writer the writer receiving the unescaped string, not null * @param str the String to escape, may be null * @throws IllegalArgumentException if the writer is null * @throws IOException if there is a problem writing * @see #unescapeXml(java.lang.String) */ public static void escapeXml(Writer writer, String str) throws IOException { if (writer == null ) { throw new IllegalArgumentException ("The Writer must not be null."); } if (str == null) { return; } Entities.XML.escape(writer, str); } /** *

Escapes the characters in a String using XML entities.

* *

For example: "bread" & "butter" => * &quot;bread&quot; &amp; &quot;butter&quot;. *

* *

Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.

* *

Note that unicode characters greater than 0x7f are currently escaped to * their numerical \\u equivalent. This may change in future releases.

* * @param str the String to escape, may be null * @return a new escaped String, null if null string input * @see #unescapeXml(java.lang.String) */ public static String escapeXml(String str) { if (str == null) { return null; } return Entities.XML.escape(str); } //----------------------------------------------------------------------- /** *

Unescapes a string containing XML entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes.

* *

Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.

* *

Note that numerical \\u unicode codes are unescaped to their respective * unicode characters. This may change in future releases.

* * @param writer the writer receiving the unescaped string, not null * @param str the String to unescape, may be null * @throws IllegalArgumentException if the writer is null * @throws IOException if there is a problem writing * @see #escapeXml(String) */ public static void unescapeXml(Writer writer, String str) throws IOException { if (writer == null ) { throw new IllegalArgumentException ("The Writer must not be null."); } if (str == null) { return; } Entities.XML.unescape(writer, str); } /** *

Unescapes a string containing XML entity escapes to a string * containing the actual Unicode characters corresponding to the * escapes.

* *

Supports only the five basic XML entities (gt, lt, quot, amp, apos). * Does not support DTDs or external entities.

* *

Note that numerical \\u unicode codes are unescaped to their respective * unicode characters. This may change in future releases.

* * @param str the String to unescape, may be null * @return a new unescaped String, null if null string input * @see #escapeXml(String) */ public static String unescapeXml(String str) { if (str == null) { return null; } return Entities.XML.unescape(str); } //----------------------------------------------------------------------- /** *

Escapes the characters in a String to be suitable to pass to * an SQL query.

* *

For example, *

statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" + 
     *   StringEscapeUtils.escapeSql("McHale's Navy") + 
     *   "'");
*

* *

At present, this method only turns single-quotes into doubled single-quotes * ("McHale's Navy" => "McHale''s Navy"). It does not * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.

* * see http://www.jguru.com/faq/view.jsp?EID=8881 * @param str the string to escape, may be null * @return a new String, escaped for SQL, null if null string input */ public static String escapeSql(String str) { if (str == null) { return null; } return StringUtils.replace(str, "'", "''"); } //----------------------------------------------------------------------- /** *

Returns a String value for a CSV column enclosed in double quotes, * if required.

* *

If the value contains a comma, newline or double quote, then the * String value is returned enclosed in double quotes.

*

* *

Any double quote characters in the value are escaped with another double quote.

* *

If the value does not contain a comma, newline or double quote, then the * String value is returned unchanged.

*

* * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @return the input String, enclosed in double quotes if the value contains a comma, * newline or double quote, null if null string input * @since 2.4 */ public static String escapeCsv(String str) { if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { return str; } try { StringWriter writer = new StringWriter(); escapeCsv(writer, str); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter throw new UnhandledException(ioe); } } /** *

Writes a String value for a CSV column enclosed in double quotes, * if required.

* *

If the value contains a comma, newline or double quote, then the * String value is written enclosed in double quotes.

*

* *

Any double quote characters in the value are escaped with another double quote.

* *

If the value does not contain a comma, newline or double quote, then the * String value is written unchanged (null values are ignored).

*

* * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @param out Writer to write input string to, enclosed in double quotes if it contains * a comma, newline or double quote * @throws IOException if error occurs on underlying Writer * @since 2.4 */ public static void escapeCsv(Writer out, String str) throws IOException { if (StringUtils.containsNone(str, CSV_SEARCH_CHARS)) { if (str != null) { out.write(str); } return; } out.write(CSV_QUOTE); for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); if (c == CSV_QUOTE) { out.write(CSV_QUOTE); // escape double quote } out.write(c); } out.write(CSV_QUOTE); } /** *

Returns a String value for an unescaped CSV column.

* *

If the value is enclosed in double quotes, and contains a comma, newline * or double quote, then quotes are removed. *

* *

Any double quote escaped characters (a pair of double quotes) are unescaped * to just one double quote.

* *

If the value is not enclosed in double quotes, or is and does not contain a * comma, newline or double quote, then the String value is returned unchanged.

*

* * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @return the input String, with enclosing double quotes removed and embedded double * quotes unescaped, null if null string input * @since 2.4 */ public static String unescapeCsv(String str) { if (str == null) { return null; } try { StringWriter writer = new StringWriter(); unescapeCsv(writer, str); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter throw new UnhandledException(ioe); } } /** *

Returns a String value for an unescaped CSV column.

* *

If the value is enclosed in double quotes, and contains a comma, newline * or double quote, then quotes are removed. *

* *

Any double quote escaped characters (a pair of double quotes) are unescaped * to just one double quote.

* *

If the value is not enclosed in double quotes, or is and does not contain a * comma, newline or double quote, then the String value is returned unchanged.

*

* * see Wikipedia and * RFC 4180. * * @param str the input CSV column String, may be null * @param out Writer to write the input String to, with enclosing double quotes * removed and embedded double quotes unescaped, null if null string input * @throws IOException if error occurs on underlying Writer * @since 2.4 */ public static void unescapeCsv(Writer out, String str) throws IOException { if (str == null) { return; } if (str.length() < 2) { out.write(str); return; } if ( str.charAt(0) != CSV_QUOTE || str.charAt(str.length() - 1) != CSV_QUOTE ) { out.write(str); return; } // strip quotes String quoteless = str.substring(1, str.length() - 1); if ( StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS) ) { // deal with escaped quotes; ie) "" str = StringUtils.replace(quoteless, CSV_QUOTE_STR + CSV_QUOTE_STR, CSV_QUOTE_STR); } out.write(str); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy