All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aspectran.web.support.util.HtmlUtils Maven / Gradle / Ivy

There is a newer version: 8.1.5
Show newest version
/*
 * Copyright (c) 2008-2023 The Aspectran Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.aspectran.web.support.util;

import com.aspectran.core.util.Assert;
import jakarta.servlet.ServletRequest;

/**
 * 

This class is a clone of org.springframework.web.util.HtmlUtils

* * Utility class for HTML escaping. * *

Escapes and unescapes based on the W3C HTML 4.01 recommendation, handling * character entity references. * *

Reference: * https://www.w3.org/TR/html4/charset.html * *

For a comprehensive set of String escaping utilities, consider * Apache Commons Text * and its {@code StringEscapeUtils} class. We do not use that class here in order * to avoid a runtime dependency on Commons Text just for HTML escaping. Furthermore, * Spring's HTML escaping is more flexible and 100% HTML 4.0 compliant. */ public abstract class HtmlUtils { /** * Default character encoding to use when {@code request.getCharacterEncoding} * returns {@code null}, according to the Servlet spec. * @see ServletRequest#getCharacterEncoding */ public static final String DEFAULT_CHARACTER_ENCODING = "ISO-8859-1"; /** * Shared instance of pre-parsed HTML character entity references. */ private static final HtmlCharacterEntityReferences characterEntityReferences = new HtmlCharacterEntityReferences(); /** * Turn special characters into HTML character references. *

Handles complete character set defined in HTML 4.01 recommendation.

*

Escapes all special characters to their corresponding * entity reference (e.g. {@code <}).

*

Reference: * * https://www.w3.org/TR/html4/sgml/entities.html *

* @param input the (unescaped) input string * @return the escaped string */ public static String htmlEscape(String input) { return htmlEscape(input, DEFAULT_CHARACTER_ENCODING); } /** * Turn special characters into HTML character references. *

Handles complete character set defined in HTML 4.01 recommendation.

*

Escapes all special characters to their corresponding * entity reference (e.g. {@code <}) at least as required by the * specified encoding. In other words, if a special character does * not have to be escaped for the given encoding, it may not be.

*

Reference: * * https://www.w3.org/TR/html4/sgml/entities.html *

* @param input the (unescaped) input string * @param encoding the name of a supported {@link java.nio.charset.Charset charset} * @return the escaped string * @since 4.1.2 */ public static String htmlEscape(String input, String encoding) { Assert.notNull(input, "Input is required"); Assert.notNull(encoding, "Encoding is required"); StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); String reference = characterEntityReferences.convertToReference(character, encoding); if (reference != null) { escaped.append(reference); } else { escaped.append(character); } } return escaped.toString(); } /** * Turn special characters into HTML character references. *

Handles complete character set defined in HTML 4.01 recommendation.

*

Escapes all special characters to their corresponding numeric * reference in decimal format (&#Decimal;).

*

Reference: * * https://www.w3.org/TR/html4/sgml/entities.html *

* @param input the (unescaped) input string * @return the escaped string */ public static String htmlEscapeDecimal(String input) { return htmlEscapeDecimal(input, DEFAULT_CHARACTER_ENCODING); } /** * Turn special characters into HTML character references. *

Handles complete character set defined in HTML 4.01 recommendation.

*

Escapes all special characters to their corresponding numeric * reference in decimal format (&#Decimal;) at least as required by the * specified encoding. In other words, if a special character does * not have to be escaped for the given encoding, it may not be.

*

Reference: * * https://www.w3.org/TR/html4/sgml/entities.html *

* @param input the (unescaped) input string * @param encoding the name of a supported {@link java.nio.charset.Charset charset} * @return the escaped string * @since 4.1.2 */ public static String htmlEscapeDecimal(String input, String encoding) { Assert.notNull(input, "Input is required"); Assert.notNull(encoding, "Encoding is required"); StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); if (characterEntityReferences.isMappedToReference(character, encoding)) { escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START); escaped.append((int) character); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); } else { escaped.append(character); } } return escaped.toString(); } /** * Turn special characters into HTML character references. *

Handles complete character set defined in HTML 4.01 recommendation.

*

Escapes all special characters to their corresponding numeric * reference in hex format (&#xHex;).

*

Reference: * * https://www.w3.org/TR/html4/sgml/entities.html *

* @param input the (unescaped) input string * @return the escaped string */ public static String htmlEscapeHex(String input) { return htmlEscapeHex(input, DEFAULT_CHARACTER_ENCODING); } /** * Turn special characters into HTML character references. *

Handles complete character set defined in HTML 4.01 recommendation.

*

Escapes all special characters to their corresponding numeric * reference in hex format (&#xHex;) at least as required by the * specified encoding. In other words, if a special character does * not have to be escaped for the given encoding, it may not be.

*

Reference: * * https://www.w3.org/TR/html4/sgml/entities.html *

* @param input the (unescaped) input string * @param encoding the name of a supported {@link java.nio.charset.Charset charset} * @return the escaped string * @since 4.1.2 */ public static String htmlEscapeHex(String input, String encoding) { Assert.notNull(input, "Input is required"); Assert.notNull(encoding, "Encoding is required"); StringBuilder escaped = new StringBuilder(input.length() * 2); for (int i = 0; i < input.length(); i++) { char character = input.charAt(i); if (characterEntityReferences.isMappedToReference(character, encoding)) { escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START); escaped.append(Integer.toString(character, 16)); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); } else { escaped.append(character); } } return escaped.toString(); } /** * Turn HTML character references into their plain text UNICODE equivalent. *

Handles complete character set defined in HTML 4.01 recommendation * and all reference types (decimal, hex, and entity).

*

Correctly converts the following formats: *

* &#Entity; - (Example: &amp;) case sensitive * &#Decimal; - (Example: &#68;)
* &#xHex; - (Example: &#xE5;) case insensitive
*

*

Gracefully handles malformed character references by copying original * characters as is when encountered.

*

Reference: * * https://www.w3.org/TR/html4/sgml/entities.html *

* @param input the (escaped) input string * @return the unescaped string */ public static String htmlUnescape(String input) { return new HtmlCharacterEntityDecoder(characterEntityReferences, input).decode(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy