All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.text.StringEscapeUtils Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.text;

import java.io.IOException;
import java.io.Writer;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.translate.AggregateTranslator;
import org.apache.commons.text.translate.CharSequenceTranslator;
import org.apache.commons.text.translate.CsvTranslators;
import org.apache.commons.text.translate.EntityArrays;
import org.apache.commons.text.translate.JavaUnicodeEscaper;
import org.apache.commons.text.translate.LookupTranslator;
import org.apache.commons.text.translate.NumericEntityEscaper;
import org.apache.commons.text.translate.NumericEntityUnescaper;
import org.apache.commons.text.translate.OctalUnescaper;
import org.apache.commons.text.translate.UnicodeUnescaper;
import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;

/**
 * 

* Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML. *

* *

* #ThreadSafe# *

* *

* This code has been adapted from Apache Commons Lang 3.5. *

* * @since 1.0 */ public class StringEscapeUtils { /* ESCAPE TRANSLATORS */ /** * Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods. * *

Example:

*
     * new Builder(ESCAPE_HTML4)
     *      .append("<p>")
     *      .escape("This is paragraph 1 and special chars like & get escaped.")
     *      .append("</p><p>")
     *      .escape("This is paragraph 2 & more...")
     *      .append("</p>")
     *      .toString()
     * 
* */ public static final class Builder { /** * StringBuilder to be used in the Builder class. */ private final StringBuilder sb; /** * CharSequenceTranslator to be used in the Builder class. */ private final CharSequenceTranslator translator; /** * Builder constructor. * * @param translator a CharSequenceTranslator. */ private Builder(final CharSequenceTranslator translator) { this.sb = new StringBuilder(); this.translator = translator; } /** * Literal append, no escaping being done. * * @param input the String to append * @return {@code this}, to enable chaining */ public Builder append(final String input) { sb.append(input); return this; } /** * Escape {@code input} according to the given {@link CharSequenceTranslator}. * * @param input the String to escape * @return {@code this}, to enable chaining */ public Builder escape(final String input) { sb.append(translator.translate(input)); return this; } /** * Return the escaped string. * * @return The escaped string */ @Override public String toString() { return sb.toString(); } } /** * Translator object for unescaping backslash escaped entries. */ static class XsiUnescaper extends CharSequenceTranslator { /** * Escaped backslash constant. */ private static final char BACKSLASH = '\\'; @Override public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { if (index != 0) { throw new IllegalStateException("XsiUnescaper should never reach the [1] index"); } final String s = input.toString(); int segmentStart = 0; int searchOffset = 0; while (true) { final int pos = s.indexOf(BACKSLASH, searchOffset); if (pos == -1) { if (segmentStart < s.length()) { writer.write(s.substring(segmentStart)); } break; } if (pos > segmentStart) { writer.write(s.substring(segmentStart, pos)); } segmentStart = pos + 1; searchOffset = pos + 2; } return Character.codePointCount(input, 0, input.length()); } } /** * Translator object for escaping Java. * * While {@link #escapeJava(String)} is the expected method of use, this * object allows the Java escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_JAVA; static { final Map escapeJavaMap = new HashMap<>(); escapeJavaMap.put("\"", "\\\""); escapeJavaMap.put("\\", "\\\\"); ESCAPE_JAVA = new AggregateTranslator( new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), JavaUnicodeEscaper.outsideOf(32, 0x7f) ); } /** * Translator object for escaping EcmaScript/JavaScript. * * While {@link #escapeEcmaScript(String)} is the expected method of use, this * object allows the EcmaScript escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_ECMASCRIPT; static { final Map escapeEcmaScriptMap = new HashMap<>(); escapeEcmaScriptMap.put("'", "\\'"); escapeEcmaScriptMap.put("\"", "\\\""); escapeEcmaScriptMap.put("\\", "\\\\"); escapeEcmaScriptMap.put("/", "\\/"); ESCAPE_ECMASCRIPT = new AggregateTranslator( new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), JavaUnicodeEscaper.outsideOf(32, 0x7f) ); } /** * Translator object for escaping Json. * * While {@link #escapeJson(String)} is the expected method of use, this * object allows the Json escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_JSON; static { final Map escapeJsonMap = new HashMap<>(); escapeJsonMap.put("\"", "\\\""); escapeJsonMap.put("\\", "\\\\"); escapeJsonMap.put("/", "\\/"); ESCAPE_JSON = new AggregateTranslator( new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE), JavaUnicodeEscaper.outsideOf(32, 0x7e) ); } /** * Translator object for escaping XML 1.0. * * While {@link #escapeXml10(String)} is the expected method of use, this * object allows the XML escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_XML10; static { final Map escapeXml10Map = new HashMap<>(); escapeXml10Map.put("\u0000", StringUtils.EMPTY); escapeXml10Map.put("\u0001", StringUtils.EMPTY); escapeXml10Map.put("\u0002", StringUtils.EMPTY); escapeXml10Map.put("\u0003", StringUtils.EMPTY); escapeXml10Map.put("\u0004", StringUtils.EMPTY); escapeXml10Map.put("\u0005", StringUtils.EMPTY); escapeXml10Map.put("\u0006", StringUtils.EMPTY); escapeXml10Map.put("\u0007", StringUtils.EMPTY); escapeXml10Map.put("\u0008", StringUtils.EMPTY); escapeXml10Map.put("\u000b", StringUtils.EMPTY); escapeXml10Map.put("\u000c", StringUtils.EMPTY); escapeXml10Map.put("\u000e", StringUtils.EMPTY); escapeXml10Map.put("\u000f", StringUtils.EMPTY); escapeXml10Map.put("\u0010", StringUtils.EMPTY); escapeXml10Map.put("\u0011", StringUtils.EMPTY); escapeXml10Map.put("\u0012", StringUtils.EMPTY); escapeXml10Map.put("\u0013", StringUtils.EMPTY); escapeXml10Map.put("\u0014", StringUtils.EMPTY); escapeXml10Map.put("\u0015", StringUtils.EMPTY); escapeXml10Map.put("\u0016", StringUtils.EMPTY); escapeXml10Map.put("\u0017", StringUtils.EMPTY); escapeXml10Map.put("\u0018", StringUtils.EMPTY); escapeXml10Map.put("\u0019", StringUtils.EMPTY); escapeXml10Map.put("\u001a", StringUtils.EMPTY); escapeXml10Map.put("\u001b", StringUtils.EMPTY); escapeXml10Map.put("\u001c", StringUtils.EMPTY); escapeXml10Map.put("\u001d", StringUtils.EMPTY); escapeXml10Map.put("\u001e", StringUtils.EMPTY); escapeXml10Map.put("\u001f", StringUtils.EMPTY); escapeXml10Map.put("\ufffe", StringUtils.EMPTY); escapeXml10Map.put("\uffff", StringUtils.EMPTY); ESCAPE_XML10 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_ESCAPE), new LookupTranslator(EntityArrays.APOS_ESCAPE), new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)), NumericEntityEscaper.between(0x7f, 0x84), NumericEntityEscaper.between(0x86, 0x9f), new UnicodeUnpairedSurrogateRemover() ); } /** * Translator object for escaping XML 1.1. * * While {@link #escapeXml11(String)} is the expected method of use, this * object allows the XML escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_XML11; static { final Map escapeXml11Map = new HashMap<>(); escapeXml11Map.put("\u0000", StringUtils.EMPTY); escapeXml11Map.put("\u000b", " "); escapeXml11Map.put("\u000c", " "); escapeXml11Map.put("\ufffe", StringUtils.EMPTY); escapeXml11Map.put("\uffff", StringUtils.EMPTY); ESCAPE_XML11 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_ESCAPE), new LookupTranslator(EntityArrays.APOS_ESCAPE), new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)), NumericEntityEscaper.between(0x1, 0x8), NumericEntityEscaper.between(0xe, 0x1f), NumericEntityEscaper.between(0x7f, 0x84), NumericEntityEscaper.between(0x86, 0x9f), new UnicodeUnpairedSurrogateRemover() ); } /** * Translator object for escaping HTML version 3.0. * * While {@link #escapeHtml3(String)} is the expected method of use, this * object allows the HTML escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_HTML3 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_ESCAPE), new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE) ); /** * Translator object for escaping HTML version 4.0. * * While {@link #escapeHtml4(String)} is the expected method of use, this * object allows the HTML escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_HTML4 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_ESCAPE), new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE), new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE) ); /** * Translator object for escaping individual Comma Separated Values. * * While {@link #escapeCsv(String)} is the expected method of use, this * object allows the CSV escaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper(); /* UNESCAPE TRANSLATORS */ /** * Translator object for escaping Shell command language. * * @see Shell Command Language */ public static final CharSequenceTranslator ESCAPE_XSI; static { final Map escapeXsiMap = new HashMap<>(); escapeXsiMap.put("|", "\\|"); escapeXsiMap.put("&", "\\&"); escapeXsiMap.put(";", "\\;"); escapeXsiMap.put("<", "\\<"); escapeXsiMap.put(">", "\\>"); escapeXsiMap.put("(", "\\("); escapeXsiMap.put(")", "\\)"); escapeXsiMap.put("$", "\\$"); escapeXsiMap.put("`", "\\`"); escapeXsiMap.put("\\", "\\\\"); escapeXsiMap.put("\"", "\\\""); escapeXsiMap.put("'", "\\'"); escapeXsiMap.put(" ", "\\ "); escapeXsiMap.put("\t", "\\\t"); escapeXsiMap.put("\r\n", StringUtils.EMPTY); escapeXsiMap.put("\n", StringUtils.EMPTY); escapeXsiMap.put("*", "\\*"); escapeXsiMap.put("?", "\\?"); escapeXsiMap.put("[", "\\["); escapeXsiMap.put("#", "\\#"); escapeXsiMap.put("~", "\\~"); escapeXsiMap.put("=", "\\="); escapeXsiMap.put("%", "\\%"); ESCAPE_XSI = new LookupTranslator( Collections.unmodifiableMap(escapeXsiMap) ); } /** * Translator object for unescaping escaped Java. * * While {@link #unescapeJava(String)} is the expected method of use, this * object allows the Java unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_JAVA; static { final Map unescapeJavaMap = new HashMap<>(); unescapeJavaMap.put("\\\\", "\\"); unescapeJavaMap.put("\\\"", "\""); unescapeJavaMap.put("\\'", "'"); unescapeJavaMap.put("\\", StringUtils.EMPTY); UNESCAPE_JAVA = new AggregateTranslator( new OctalUnescaper(), // .between('\1', '\377'), new UnicodeUnescaper(), new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE), new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap)) ); } /** * Translator object for unescaping escaped EcmaScript. * * While {@link #unescapeEcmaScript(String)} is the expected method of use, this * object allows the EcmaScript unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA; /** * Translator object for unescaping escaped Json. * * While {@link #unescapeJson(String)} is the expected method of use, this * object allows the Json unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA; /** * Translator object for unescaping escaped HTML 3.0. * * While {@link #unescapeHtml3(String)} is the expected method of use, this * object allows the HTML unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_HTML3 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_UNESCAPE), new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), new NumericEntityUnescaper() ); /** * Translator object for unescaping escaped HTML 4.0. * * While {@link #unescapeHtml4(String)} is the expected method of use, this * object allows the HTML unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_HTML4 = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_UNESCAPE), new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE), new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE), new NumericEntityUnescaper() ); /** * Translator object for unescaping escaped XML. * * While {@link #unescapeXml(String)} is the expected method of use, this * object allows the XML unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_XML = new AggregateTranslator( new LookupTranslator(EntityArrays.BASIC_UNESCAPE), new LookupTranslator(EntityArrays.APOS_UNESCAPE), new NumericEntityUnescaper() ); /** * Translator object for unescaping escaped Comma Separated Value entries. * * While {@link #unescapeCsv(String)} is the expected method of use, this * object allows the CSV unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper(); /* Helper functions */ /** * Translator object for unescaping escaped XSI Value entries. * * While {@link #unescapeXSI(String)} is the expected method of use, this * object allows the XSI unescaping functionality to be used * as the foundation for a custom translator. */ public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper(); /** * Get a {@link Builder}. * @param translator the text translator * @return {@link Builder} */ public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) { return new Builder(translator); } /** * Returns a {@code String} value for a CSV column enclosed in double quotes, * if required. * *

If the value contains a comma, newline or double quote, then the * String value is returned enclosed in double quotes.

* *

Any double quote characters in the value are escaped with another double quote.

* *

If the value does not contain a comma, newline or double quote, then the * String value is returned unchanged.

* * see Wikipedia and * RFC 4180. * * @param input the input CSV column String, may be null * @return The input String, enclosed in double quotes if the value contains a comma, * newline or double quote, {@code null} if null string input */ public static final String escapeCsv(final String input) { return ESCAPE_CSV.translate(input); } /** * Escapes the characters in a {@code String} using EcmaScript String rules. * *

Escapes any values it finds into their EcmaScript String form. * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.)

* *

So a tab becomes the characters {@code '\\'} and * {@code 't'}.

* *

The only difference between Java strings and EcmaScript strings * is that in EcmaScript, a single quote and forward-slash (/) are escaped.

* *

Note that EcmaScript is best known by the JavaScript and ActionScript dialects.

* *

Example:

*
     * input string: He didn't say, "Stop!"
     * output string: He didn\'t say, \"Stop!\"
     * 
* * Security Note. We only provide backslash escaping in this method. For example, {@code '\"'} has the output * {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used * in an HTML tag like {@code