All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.liferay.portal.util.HtmlImpl Maven / Gradle / Ivy

/**
 * Copyright (c) 2000-present Liferay, Inc. All rights reserved.
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation; either version 2.1 of the License, or (at your option)
 * any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 */

package com.liferay.portal.util;

import com.liferay.petra.string.CharPool;
import com.liferay.petra.string.StringBundler;
import com.liferay.petra.string.StringPool;
import com.liferay.portal.kernel.util.HashMapBuilder;
import com.liferay.portal.kernel.util.Html;
import com.liferay.portal.kernel.util.StringUtil;
import com.liferay.portal.kernel.util.URLCodec;
import com.liferay.portal.kernel.util.Validator;

import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Provides the implementation of the HTML utility interface for escaping,
 * replacing, and stripping HTML text. This class uses XSS
 * recommendations from http://www.owasp.org/index.php/Cross_Site_Scripting#How_to_Protect_Yourself
 * when escaping HTML text.
 *
 * @author Brian Wing Shun Chan
 * @author Clarence Shen
 * @author Harry Mark
 * @author Samuel Kong
 * @author Connor McKay
 * @author Shuyang Zhou
 */
public class HtmlImpl implements Html {

	/**
	 * Generates a string with the data-* attributes generated from the keys and
	 * values of a map. For example, a map containing
	 * {key1=value1;key2=value2} is returned as the string
	 * data-key1=value1 data-key2=value2.
	 *
	 * @param  data the map of values to convert to data-* attributes
	 * @return a string with the data attributes, or null if the
	 *         map is null
	 */
	@Override
	public String buildData(Map data) {
		if ((data == null) || data.isEmpty()) {
			return StringPool.BLANK;
		}

		StringBundler sb = new StringBundler(data.size() * 5);

		for (Map.Entry entry : data.entrySet()) {
			sb.append("data-");
			sb.append(entry.getKey());
			sb.append("=\"");
			sb.append(escapeAttribute(String.valueOf(entry.getValue())));
			sb.append("\" ");
		}

		return sb.toString();
	}

	/**
	 * Escapes the text so that it is safe to use in an HTML context.
	 *
	 * @param  text the text to escape
	 * @return the escaped HTML text, or null if the text is
	 *         null
	 */
	@Override
	public String escape(String text) {
		if (text == null) {
			return null;
		}

		if (text.length() == 0) {
			return StringPool.BLANK;
		}

		// Escape using XSS recommendations from
		// http://www.owasp.org/index.php/Cross_Site_Scripting
		// #How_to_Protect_Yourself

		StringBundler sb = null;

		int lastReplacementIndex = 0;

		for (int i = 0; i < text.length(); i++) {
			char c = text.charAt(i);

			if ((c < 256) && ((c >= 128) || _VALID_CHARS[c])) {
				continue;
			}

			String replacement = null;

			if (c == '<') {
				replacement = "<";
			}
			else if (c == '>') {
				replacement = ">";
			}
			else if (c == '&') {
				replacement = "&";
			}
			else if (c == '"') {
				replacement = """;
			}
			else if (c == '\'') {
				replacement = "'";
			}
			else if (c == '\u00bb') {
				replacement = "»";
			}
			else if (c == '\u2013') {
				replacement = "–";
			}
			else if (c == '\u2014') {
				replacement = "—";
			}
			else if (c == '\u2028') {
				replacement = "
";
			}
			else if (!_isValidXmlCharacter(c) ||
					 _isUnicodeCompatibilityCharacter(c)) {

				replacement = StringPool.SPACE;
			}
			else {
				continue;
			}

			if (sb == null) {
				sb = new StringBundler();
			}

			if (i > lastReplacementIndex) {
				sb.append(text.substring(lastReplacementIndex, i));
			}

			sb.append(replacement);

			lastReplacementIndex = i + 1;
		}

		if (sb == null) {
			return text;
		}

		if (lastReplacementIndex < text.length()) {
			sb.append(text.substring(lastReplacementIndex));
		}

		return sb.toString();
	}

	/**
	 * Escapes the attribute value so that it is safe to use within a quoted
	 * attribute.
	 *
	 * @param  attribute the attribute to escape
	 * @return the escaped attribute value, or null if the
	 *         attribute value is null
	 */
	@Override
	public String escapeAttribute(String attribute) {
		if (attribute == null) {
			return null;
		}

		if (attribute.length() == 0) {
			return StringPool.BLANK;
		}

		StringBuilder sb = null;
		int lastReplacementIndex = 0;

		for (int i = 0; i < attribute.length(); i++) {
			char c = attribute.charAt(i);

			if (c < _ATTRIBUTE_ESCAPES.length) {
				String replacement = _ATTRIBUTE_ESCAPES[c];

				if (replacement == null) {
					continue;
				}

				if (sb == null) {
					sb = new StringBuilder(attribute.length() + 64);
				}

				if (i > lastReplacementIndex) {
					sb.append(attribute, lastReplacementIndex, i);
				}

				sb.append(replacement);

				lastReplacementIndex = i + 1;
			}
			else if (!_isValidXmlCharacter(c) ||
					 _isUnicodeCompatibilityCharacter(c)) {

				if (sb == null) {
					sb = new StringBuilder(attribute.length() + 64);
				}

				if (i > lastReplacementIndex) {
					sb.append(attribute, lastReplacementIndex, i);
				}

				sb.append(CharPool.SPACE);

				lastReplacementIndex = i + 1;
			}
		}

		if (sb == null) {
			return attribute;
		}

		if (lastReplacementIndex < attribute.length()) {
			sb.append(attribute, lastReplacementIndex, attribute.length());
		}

		return sb.toString();
	}

	/**
	 * Escapes the CSS value so that it is safe to use in a CSS context.
	 *
	 * @param  css the CSS value to escape
	 * @return the escaped CSS value, or null if the CSS value is
	 *         null
	 */
	@Override
	public String escapeCSS(String css) {
		if (css == null) {
			return null;
		}

		if (css.length() == 0) {
			return StringPool.BLANK;
		}

		String prefix = StringPool.BACK_SLASH;

		StringBuilder sb = null;
		char[] hexBuffer = new char[4];
		int lastReplacementIndex = 0;

		for (int i = 0; i < css.length(); i++) {
			char c = css.charAt(i);

			if ((c < _VALID_CHARS.length) && !_VALID_CHARS[c]) {
				if (sb == null) {
					sb = new StringBuilder(css.length() + 64);
				}

				if (i > lastReplacementIndex) {
					sb.append(css, lastReplacementIndex, i);
				}

				sb.append(prefix);

				_appendHexChars(sb, hexBuffer, c);

				if (i < (css.length() - 1)) {
					char nextChar = css.charAt(i + 1);

					if ((nextChar >= CharPool.NUMBER_0) &&
						(nextChar <= CharPool.NUMBER_9)) {

						sb.append(CharPool.SPACE);
					}
				}

				lastReplacementIndex = i + 1;
			}
		}

		if (sb == null) {
			return css;
		}

		if (lastReplacementIndex < css.length()) {
			sb.append(css, lastReplacementIndex, css.length());
		}

		return sb.toString();
	}

	/**
	 * Escapes the HREF attribute so that it is safe to use as an HREF
	 * attribute.
	 *
	 * @param  href the HREF attribute to escape
	 * @return the escaped HREF attribute, or null if the HREF
	 *         attribute is null
	 */
	@Override
	public String escapeHREF(String href) {
		if (href == null) {
			return null;
		}

		if (href.length() == 0) {
			return StringPool.BLANK;
		}

		char c = href.charAt(0);

		if ((c == CharPool.BACK_SLASH) || (c == CharPool.SLASH)) {
			return escapeAttribute(href);
		}

		c = Character.toLowerCase(c);

		if ((c >= CharPool.LOWER_CASE_A) && (c <= CharPool.LOWER_CASE_Z) &&
			(c != CharPool.LOWER_CASE_D) && (c != CharPool.LOWER_CASE_J)) {

			return escapeAttribute(href);
		}

		int index = href.indexOf(StringPool.COLON);

		if (index > -1) {
			href = StringUtil.replaceFirst(
				href, StringPool.COLON, "%3a", index);
		}

		return escapeAttribute(href);
	}

	/**
	 * Escapes the JavaScript value so that it is safe to use in a JavaScript
	 * context.
	 *
	 * @param  js the JavaScript value to escape
	 * @return the escaped JavaScript value, or null if the
	 *         JavaScript value is null
	 */
	@Override
	public String escapeJS(String js) {
		if (js == null) {
			return null;
		}

		if (js.length() == 0) {
			return StringPool.BLANK;
		}

		String prefix = "\\x";

		StringBuilder sb = null;
		char[] hexBuffer = new char[4];
		int lastReplacementIndex = 0;

		for (int i = 0; i < js.length(); i++) {
			char c = js.charAt(i);

			if (c < _VALID_CHARS.length) {
				if (!_VALID_CHARS[c]) {
					if (sb == null) {
						sb = new StringBuilder(js.length() + 64);
					}

					if (i > lastReplacementIndex) {
						sb.append(js, lastReplacementIndex, i);
					}

					sb.append(prefix);

					_appendHexChars(sb, hexBuffer, c);

					lastReplacementIndex = i + 1;
				}
			}
			else if ((c == '\u2028') || (c == '\u2029')) {
				if (sb == null) {
					sb = new StringBuilder(js.length() + 64);
				}

				if (i > lastReplacementIndex) {
					sb.append(js, lastReplacementIndex, i);
				}

				sb.append("\\u");

				_appendHexChars(sb, hexBuffer, c);

				lastReplacementIndex = i + 1;
			}
		}

		if (sb == null) {
			return js;
		}

		if (lastReplacementIndex < js.length()) {
			sb.append(js, lastReplacementIndex, js.length());
		}

		return sb.toString();
	}

	@Override
	public String escapeJSLink(String link) {
		if (Validator.isNull(link)) {
			return StringPool.BLANK;
		}

		link = StringUtil.trim(link);

		if (link.indexOf(StringPool.COLON) == 10) {
			String protocol = StringUtil.toLowerCase(link.substring(0, 10));

			if (protocol.equals("javascript")) {
				link = StringUtil.replaceFirst(link, CharPool.COLON, "%3a");
			}
		}

		return link;
	}

	/**
	 * Escapes the URL value so that it is safe to use as a URL.
	 *
	 * @param  url the URL value to escape
	 * @return the escaped URL value, or null if the URL value is
	 *         null
	 */
	@Override
	public String escapeURL(String url) {
		return URLCodec.encodeURL(url, true);
	}

	@Override
	public String escapeXPath(String xPath) {
		if (Validator.isNull(xPath)) {
			return xPath;
		}

		StringBundler sb = new StringBundler(xPath.length());

		for (int i = 0; i < xPath.length(); i++) {
			char c = xPath.charAt(i);

			boolean hasToken = false;

			for (char xPathToken : _XPATH_TOKENS) {
				if (c == xPathToken) {
					hasToken = true;

					break;
				}
			}

			if (hasToken) {
				sb.append(CharPool.UNDERLINE);
			}
			else {
				sb.append(c);
			}
		}

		return sb.toString();
	}

	@Override
	public String escapeXPathAttribute(String xPathAttribute) {
		boolean hasApostrophe = xPathAttribute.contains(StringPool.APOSTROPHE);
		boolean hasQuote = xPathAttribute.contains(StringPool.QUOTE);

		if (hasQuote && hasApostrophe) {
			String[] parts = xPathAttribute.split(StringPool.APOSTROPHE);

			return StringBundler.concat(
				"concat('", StringUtil.merge(parts, "', \"'\", '"), "')");
		}

		if (hasQuote) {
			return StringBundler.concat(
				StringPool.APOSTROPHE, xPathAttribute, StringPool.APOSTROPHE);
		}

		return StringBundler.concat(
			StringPool.QUOTE, xPathAttribute, StringPool.QUOTE);
	}

	@Override
	public String fromInputSafe(String text) {
		return StringUtil.replace(
			text, new String[] {"&", """}, new String[] {"&", "\""});
	}

	@Override
	public String getAUICompatibleId(String text) {
		if (Validator.isNull(text)) {
			return text;
		}

		StringBundler sb = null;

		int lastReplacementIndex = 0;

		for (int i = 0; i < text.length(); i++) {
			char c = text.charAt(i);

			if (((c <= 127) && (Validator.isChar(c) || Validator.isDigit(c))) ||
				((c > 127) && (c != CharPool.FIGURE_SPACE) &&
				 (c != CharPool.NARROW_NO_BREAK_SPACE) &&
				 (c != CharPool.NO_BREAK_SPACE))) {

				continue;
			}

			if (sb == null) {
				sb = new StringBundler();
			}

			if (i > lastReplacementIndex) {
				sb.append(text.substring(lastReplacementIndex, i));
			}

			sb.append(StringPool.UNDERLINE);

			if (c != CharPool.UNDERLINE) {
				sb.append(StringUtil.toHexString(c));
			}

			sb.append(StringPool.UNDERLINE);

			lastReplacementIndex = i + 1;
		}

		if (sb == null) {
			return text;
		}

		if (lastReplacementIndex < text.length()) {
			sb.append(text.substring(lastReplacementIndex));
		}

		return sb.toString();
	}

	/**
	 * Replaces all new lines or carriage returns with the 
* HTML tag. * * @param html the text * @return the converted text, or null if the text is * null */ @Override public String replaceNewLine(String html) { if (html == null) { return null; } html = StringUtil.replace(html, StringPool.RETURN_NEW_LINE, "
"); return StringUtil.replace(html, CharPool.NEW_LINE, "
"); } /** * Strips all content delimited by the tag out of the text. * *

* If the tag appears multiple times, all occurrences (including the tag) * are stripped. The tag may have attributes. In order for this method to * recognize the tag, it must consist of a separate opening and closing tag. * Self-closing tags remain in the result. *

* * @param text the text * @param tag the tag used for delimiting, which should only be the tag's * name (e.g. no <) * @return the text, without the stripped tag and its contents, or * null if the text is null */ @Override public String stripBetween(String text, String tag) { return StringUtil.stripBetween(text, "<" + tag, ""); } /** * Strips all XML comments out of the text. * * @param text the text * @return the text, without the stripped XML comments, or null * if the text is null */ @Override public String stripComments(String text) { return StringUtil.stripBetween(text, ""); } @Override public String stripHtml(String text) { if (text == null) { return null; } text = stripComments(text); StringBundler sb = new StringBundler(text.length()); int x = 0; int y = text.indexOf("<"); while (y != -1) { sb.append(text.substring(x, y)); // Look for text enclosed by if (isTag(_TAG_NOSCRIPT, text, y + 1)) { y = stripTag(_TAG_NOSCRIPT, text, y); } else if (isTag(_TAG_SCRIPT, text, y + 1)) { y = stripTag(_TAG_SCRIPT, text, y); } else if (isTag(_TAG_STYLE, text, y + 1)) { y = stripTag(_TAG_STYLE, text, y); } x = text.indexOf(">", y); if (x == -1) { break; } x++; if (x < y) { // Hello * For example, the & character is replaced by * &amp;. *

* * @param text the text * @return the encoded text that is safe to use as an HTML input field * value, or null if the text is null */ @Override public String toInputSafe(String text) { return StringUtil.replace( text, new char[] {'&', '\"'}, new String[] {"&", """}); } @Override public String unescape(String text) { return StringUtil.replace(text, "&", ";", _unescapeMap); } @Override public String unescapeCDATA(String text) { if (text == null) { return null; } if (text.length() == 0) { return StringPool.BLANK; } text = StringUtil.replace(text, "<![CDATA[", ""); return text; } @Override public String wordBreak(String text, int columns) { StringBundler sb = new StringBundler(); int length = 0; int lastWrite = 0; int pos = 0; Matcher matcher = _pattern.matcher(text); while (matcher.find()) { if (matcher.start() < pos) { continue; } while ((length + matcher.start() - pos) >= columns) { pos += columns - length; sb.append(text.substring(lastWrite, pos)); sb.append("­"); length = 0; lastWrite = pos; } length += matcher.start() - pos; String group = matcher.group(); if (group.equals(StringPool.AMPERSAND)) { int x = text.indexOf(StringPool.SEMICOLON, matcher.start()); if (x != -1) { length++; pos = x + 1; } continue; } if (group.equals(StringPool.LESS_THAN)) { int x = text.indexOf(StringPool.GREATER_THAN, matcher.start()); if (x != -1) { pos = x + 1; } continue; } if (group.equals(StringPool.SPACE) || group.equals(StringPool.NEW_LINE)) { length = 0; pos = matcher.start() + 1; } } sb.append(text.substring(lastWrite)); return sb.toString(); } protected boolean isTag(char[] tag, String text, int pos) { if ((pos + tag.length + 1) <= text.length()) { char item = '\0'; for (char c : tag) { item = text.charAt(pos++); if (Character.toLowerCase(item) != c) { return false; } } item = text.charAt(pos); // Check that char after tag is not a letter (i.e. another tag) return !Character.isLetter(item); } return false; } protected int stripTag(char[] tag, String text, int pos) { int x = pos + tag.length; // Find end of the tag x = text.indexOf(">", x); if (x < 0) { return pos; } // Check if preceding character is / (i.e. is this instance of ) if (text.charAt(x - 1) == '/') { return pos; } // Search for the ending tag while (true) { x = text.indexOf("= 0) { if (isTag(tag, text, x + 2)) { pos = x; break; } // Skip past "= CharPool.SPACE) && (c <= '\ud7ff')) || ((c >= '\ue000') && (c <= '\ufffd')) || Character.isSurrogate(c) || (c == CharPool.TAB) || (c == CharPool.NEW_LINE) || (c == CharPool.RETURN)) { return true; } return false; } private void _appendHexChars(StringBuilder sb, char[] buffer, char c) { int index = buffer.length; do { buffer[--index] = _HEX_DIGITS[c & 15]; c >>>= 4; } while (c != 0); if (index == (buffer.length - 1)) { sb.append(CharPool.NUMBER_0); sb.append(buffer[index]); return; } sb.append(buffer, index, buffer.length - index); } private boolean _isUnicodeCompatibilityCharacter(char c) { if (((c >= '\u007f') && (c <= '\u0084')) || ((c >= '\u0086') && (c <= '\u009f')) || ((c >= '\ufdd0') && (c <= '\ufdef'))) { return true; } return false; } private static final String[] _ATTRIBUTE_ESCAPES = new String[256]; private static final char[] _HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; private static final char[] _TAG_NOSCRIPT = { 'n', 'o', 's', 'c', 'r', 'i', 'p', 't' }; private static final char[] _TAG_SCRIPT = {'s', 'c', 'r', 'i', 'p', 't'}; private static final char[] _TAG_STYLE = {'s', 't', 'y', 'l', 'e'}; private static final boolean[] _VALID_CHARS = new boolean[256]; // See http://www.w3.org/TR/xpath20/#lexical-structure private static final char[] _XPATH_TOKENS = { '(', ')', '[', ']', '.', '@', ',', ':', '/', '|', '+', '-', '=', '!', '<', '>', '*', '$', '"', '"', ' ', 9, 10, 13, 133, 8232 }; private static final Pattern _pattern = Pattern.compile("([\\s<&]|$)"); private static final Map _unescapeMap = HashMapBuilder.put( "#34", "\"" ).put( "#35", "#" ).put( "#37", "%" ).put( "#39", "'" ).put( "#40", "(" ).put( "#41", ")" ).put( "#43", "+" ).put( "#44", "," ).put( "#45", "-" ).put( "#59", ";" ).put( "#61", "=" ).put( "amp", "&" ).put( "gt", ">" ).put( "lt", "<" ).put( "nbsp", " " ).put( "rsquo", "\u2019" ).build(); static { for (int i = 0; i < 256; i++) { char c = (char)i; if (!_isValidXmlCharacter(c)) { _ATTRIBUTE_ESCAPES[i] = StringPool.SPACE; } _ATTRIBUTE_ESCAPES[CharPool.AMPERSAND] = StringPool.AMPERSAND_ENCODED; _ATTRIBUTE_ESCAPES[CharPool.APOSTROPHE] = "'"; _ATTRIBUTE_ESCAPES[CharPool.GREATER_THAN] = ">"; _ATTRIBUTE_ESCAPES[CharPool.LESS_THAN] = "<"; _ATTRIBUTE_ESCAPES[CharPool.QUOTE] = """; if (Character.isLetterOrDigit(c)) { _VALID_CHARS[i] = true; } } _VALID_CHARS['-'] = true; _VALID_CHARS['_'] = true; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy