All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xwiki.xml.internal.html.SecureHTMLElementSanitizer Maven / Gradle / Ivy

/*
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */
/*
 * Alternatively, at your choice, the contents of this file may be used under the terms of the Mozilla Public License,
 * v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */
package org.xwiki.xml.internal.html;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;

import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;

import org.apache.commons.lang3.StringUtils;
import org.xwiki.component.annotation.Component;
import org.xwiki.component.phase.Initializable;
import org.xwiki.component.phase.InitializationException;
import org.xwiki.xml.html.HTMLElementSanitizer;

/**
 * Secure default implementation of {@link HTMLElementSanitizer} based on a definition of allowed elements and
 * attributes.
 * 

* This is heavily inspired by DOMPurify by Cure53 and other contributors | Released under the Apache license 2.0 and * Mozilla Public License 2.0 - LICENSE. * * @version $Id: b1052fa4338932982ce55eb9a6283fb92e443e29 $ * @since 14.6RC1 */ @Component @Named(SecureHTMLElementSanitizer.HINT) @Singleton public class SecureHTMLElementSanitizer implements HTMLElementSanitizer, Initializable { /** * The hint of this component. */ public static final String HINT = "secure"; static final Pattern IS_SCRIPT_OR_DATA = Pattern.compile("^(?:\\w+script|data):", Pattern.CASE_INSENSITIVE); static final Pattern ATTR_WHITESPACE = Pattern.compile("[\\u0000-\\u0020\\u00A0\\u1680\\u180E\\u2000-\\u2029\\u205F\\u3000]"); /** * Pattern that matches valid data-attributes. *

* Following the HTML standard * this means that the name starts with "data-", has at least one character after the hyphen and is * XML-compatible, * i.e., matches the Name production without ":". */ static final Pattern DATA_ATTR = Pattern.compile("^data-[A-Z_a-z\\u00C0-\\u00D6\\u00D8-\\u00F6" + "\\u00F8-\\u02ff\\u0370-\\u037d\\u037f-\\u1fff\\u200c\\u200d\\u2070-\\u218f\\u2c00-\\u2fef\\u3001-\\ud7ff" + "\\uf900-\\ufdcf\\ufdf0-\\ufffd\\x{10000}-\\x{EFFFF}\\-.0-9\\u00b7\\u0300-\\u036f\\u203f-\\u2040]+$"); static final Pattern ARIA_ATTR = Pattern.compile("^aria-[\\-\\w]+$"); static final Pattern IS_ALLOWED_URI = Pattern.compile("^(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp):", Pattern.CASE_INSENSITIVE); static final Pattern IS_NO_URI = Pattern.compile("^(?:[^a-z]|[a-z+.\\-]+(?:[^a-z+.\\-:]|$))", Pattern.CASE_INSENSITIVE); private static final String XLINK_HREF = "xlink:href"; @Inject private HTMLElementSanitizerConfiguration htmlElementSanitizerConfiguration; @Inject private HTMLDefinitions htmlDefinitions; @Inject private SVGDefinitions svgDefinitions; @Inject private MathMLDefinitions mathMLDefinitions; /** * Additionally allowed elements. */ private final Set extraAllowedTags; /** * Additionally allowed attributes. */ private final Set extraAllowedAttributes; /** * XML attributes that should be allowed. */ private final Set xmlAttributes; /** * Tags that are safe for data: URIs. */ private final Set dataUriTags; /** * Attributes safe for values like "javascript:". */ private final Set uriSafeAttributes; private final Set forbidTags; private final Set forbidAttributes; private boolean allowUnknownProtocols; private Pattern allowedUriPattern; /** * Default constructor. */ public SecureHTMLElementSanitizer() { this.dataUriTags = new HashSet<>(Arrays.asList("audio", "video", "img", "source", "image", "track")); this.uriSafeAttributes = new HashSet<>( Arrays.asList("alt", "class", "for", "id", "label", "name", "pattern", "placeholder", "role", "summary", "title", "value", "style", "xmlns")); this.xmlAttributes = new HashSet<>(Arrays.asList(XLINK_HREF, "xml:id", "xlink:title", "xml:space", "xmlns:xlink")); this.extraAllowedTags = new HashSet<>(); this.extraAllowedAttributes = new HashSet<>(); this.forbidTags = new HashSet<>(); this.forbidAttributes = new HashSet<>(); this.allowedUriPattern = IS_ALLOWED_URI; } @Override public void initialize() throws InitializationException { this.extraAllowedTags.addAll(this.htmlElementSanitizerConfiguration.getExtraAllowedTags()); this.extraAllowedAttributes.addAll(this.htmlElementSanitizerConfiguration.getExtraAllowedAttributes()); this.uriSafeAttributes.addAll(this.htmlElementSanitizerConfiguration.getExtraUriSafeAttributes()); this.dataUriTags.addAll(this.htmlElementSanitizerConfiguration.getExtraDataUriTags()); this.allowUnknownProtocols = this.htmlElementSanitizerConfiguration.isAllowUnknownProtocols(); this.forbidTags.addAll(this.htmlElementSanitizerConfiguration.getForbidTags()); this.forbidAttributes.addAll(this.htmlElementSanitizerConfiguration.getForbidAttributes()); String configuredRegexp = this.htmlElementSanitizerConfiguration.getAllowedUriRegexp(); if (StringUtils.isNotBlank(configuredRegexp)) { this.allowedUriPattern = Pattern.compile(configuredRegexp, Pattern.CASE_INSENSITIVE); } } @Override public boolean isElementAllowed(String elementName) { return !this.forbidTags.contains(elementName) && (this.extraAllowedTags.contains(elementName) || isElementSafe(elementName)); } private boolean isElementSafe(String elementName) { return this.htmlDefinitions.isSafeTag(elementName) || this.svgDefinitions.isSafeTag(elementName) || this.mathMLDefinitions.isSafeTag(elementName); } @Override public boolean isAttributeAllowed(String elementName, String attributeName, String attributeValue) { boolean result = false; String lowerElement = elementName.toLowerCase(); String lowerAttribute = attributeName.toLowerCase(); if ((DATA_ATTR.matcher(lowerAttribute).matches() || ARIA_ATTR.matcher(lowerAttribute).matches()) && !this.forbidAttributes.contains(lowerAttribute)) { result = true; } else if (isAttributeAllowed(lowerAttribute) && !this.forbidAttributes.contains(lowerAttribute)) { result = isAllowedValue(lowerElement, lowerAttribute, attributeValue); } return result; } private boolean isAllowedValue(String lowercaseElementName, String lowercaseAttributeName, String attributeValue) { // Break into several statements to avoid too long boolean expression. boolean result = StringUtils.isBlank(attributeValue); if (!result) { String valueNoWhitespace = ATTR_WHITESPACE.matcher(attributeValue).replaceAll(""); result = this.uriSafeAttributes.contains(lowercaseAttributeName); result = result || IS_NO_URI.matcher(valueNoWhitespace).find(); result = result || this.allowedUriPattern.matcher(valueNoWhitespace).find(); result = result || isAllowedDataValue(lowercaseElementName, lowercaseAttributeName, attributeValue); result = result || (this.allowUnknownProtocols && !isScriptOrData(attributeValue)); } return result; } private boolean isAttributeAllowed(String attributeName) { boolean result = this.extraAllowedAttributes.contains(attributeName); result = result || this.htmlDefinitions.isAllowedAttribute(attributeName); result = result || this.svgDefinitions.isAllowedAttribute(attributeName); result = result || this.mathMLDefinitions.isAllowedAttribute(attributeName); result = result || this.xmlAttributes.contains(attributeName); return result; } private boolean isScriptOrData(String attributeValue) { return IS_SCRIPT_OR_DATA.matcher(ATTR_WHITESPACE.matcher(attributeValue).replaceAll("")).find(); } private boolean isAllowedDataValue(String elementName, String attributeName, String attributeValue) { boolean attributeAllowsData = "src".equals(attributeName) || XLINK_HREF.equals(attributeName) || "href".equals(attributeName); return attributeAllowsData && !"script".equals(elementName) && attributeValue.startsWith("data:") && this.dataUriTags.contains(elementName); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy