org.owasp.validator.css.CssScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of antisamy Show documentation
Show all versions of antisamy Show documentation
A library for performing fast, configurable cleansing of HTML coming from untrusted sources.
The newest version!
/*
* Copyright (c) 2007-2022, Arshan Dabirsiaghi, Jason Li
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of OWASP nor the names of its contributors may be used to
* endorse or promote products derived from this software without specific
* prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.owasp.validator.css;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URI;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.ResourceBundle;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.batik.css.parser.ParseException;
import org.apache.batik.css.parser.Parser;
import org.apache.hc.client5.http.ClientProtocolException;
import org.apache.hc.client5.http.classic.HttpClient;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.config.RequestConfig;
import org.apache.hc.client5.http.impl.classic.HttpClientBuilder;
import org.apache.hc.core5.http.ClassicHttpResponse;
import org.apache.hc.core5.http.HttpEntity;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.hc.core5.http.io.HttpClientResponseHandler;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.util.Timeout;
import org.owasp.validator.html.CleanResults;
import org.owasp.validator.html.InternalPolicy;
import org.owasp.validator.html.Policy;
import org.owasp.validator.html.ScanException;
import org.owasp.validator.html.util.ErrorMessageUtil;
import org.owasp.validator.html.util.HTMLEntityEncoder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.css.sac.InputSource;
/**
* Encapsulates the parsing and validation of a CSS stylesheet or inline declaration. To make use of
* this class, instantiate the scanner with the desired policy and call either
* scanInlineSheet()
or scanStyleSheet
as appropriate.
*
* @see #scanInlineStyle(String, String, int)
* @see #scanStyleSheet(String, int)
* @author Jason Li
*/
public class CssScanner {
protected static final Logger logger = LoggerFactory.getLogger(CssScanner.class);
protected static final Timeout DEFAULT_TIMEOUT = Timeout.ofMilliseconds(1000);
private static final String CDATA = "^\\s*\\s*$";
/** The parser to be used in any scanning */
private final Parser parser = new CssParser();
/** The policy file to be used in any scanning */
private final InternalPolicy policy;
/** The message bundled to pull error messages from. */
private final ResourceBundle messages;
/** The message bundled to pull error messages from. */
private final boolean shouldParseImportedStyles;
private static final Pattern cdataMatchPattern = Pattern.compile(CDATA, Pattern.DOTALL);
/**
* Constructs a scanner based on the given AntiSamy policy. This version of the constructor
* defaults shouldParseImportedStyles to false. Look at the other constructor for a description of
* that parameter.
*
* @param policy the policy to follow when scanning
* @param messages the error message bundle to pull from
*/
public CssScanner(InternalPolicy policy, ResourceBundle messages) {
this(policy, messages, false);
}
/**
* Constructs a scanner based on the given AntiSamy policy.
*
* @param policy the policy to follow when scanning
* @param messages the error message bundle to pull from
* @param shouldParseImportedStyles Flag to indicate if styles within @import directives should be
* imported and parsed in the resulting style sheet. This boolean determines if URLs should be
* recognized when parsing styles (i.e., to fetch them or ignore them).
* @deprecated Support for remote import of styles will be removed as that is a dangerous
* practice. The simpler constructor should be used which defaults to disallow such imports.
*/
@Deprecated
public CssScanner(
InternalPolicy policy, ResourceBundle messages, boolean shouldParseImportedStyles) {
this.policy = policy;
this.messages = messages;
this.shouldParseImportedStyles = shouldParseImportedStyles;
if (shouldParseImportedStyles) {
logger.warn(
"Allowing CSS imports from external URLs is a dangerous practice. It is recommended you "
+ "disable this feature. Support for this feature in AntiSamy is deprecated and will "
+ "be removed in a future release.");
}
}
/**
* Scans the contents of a full stylesheet (ex. a file based stylesheet or the complete stylesheet
* contents as declared within <style> tags)
*
* @param taintedCss a String
containing the contents of the CSS stylesheet to
* validate
* @param sizeLimit the limit on the total size in bytes of any imported stylesheets
* @return a CleanResuts
object containing the results of the scan
* @throws ScanException if an error occurs during scanning
*/
public CleanResults scanStyleSheet(String taintedCss, int sizeLimit) throws ScanException {
long startOfScan = System.currentTimeMillis();
List errorMessages = new ArrayList();
/* Check to see if the text starts with (\s)*(\s)*.
*/
Matcher m = cdataMatchPattern.matcher(taintedCss);
boolean isCdata = m.matches();
if (isCdata) {
taintedCss = m.group(1);
}
CssHandler handler = new CssHandler(policy, errorMessages, messages);
// parse the stylesheet
parser.setDocumentHandler(handler);
try {
// parse the style declaration
// note this does not count against the size limit because it
// should already have been counted by the caller since it was
// embedded in the HTML
parser.parseStyleSheet(new InputSource(new StringReader(taintedCss)));
} catch (IOException | ParseException e) {
/*
* ParseException, from batik, is unfortunately a RuntimeException.
*/
throw new ScanException(e);
}
String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler);
if (isCdata) {
cleaned = "";
}
return new CleanResults(startOfScan, cleaned, null, errorMessages);
}
/**
* Scans the contents of an inline style declaration (ex. in the style attribute of an HTML tag)
* and validates the style sheet according to this CssScanner
's policy file.
*
* @param taintedCss a String
containing the contents of the CSS stylesheet to
* validate
* @param tagName the name of the tag for which this inline style was declared
* @param sizeLimit the limit on the total size in bites of any imported stylesheets
* @return a CleanResuts
object containing the results of the scan
* @throws ScanException if an error occurs during scanning
*/
public CleanResults scanInlineStyle(String taintedCss, String tagName, int sizeLimit)
throws ScanException {
long startOfScan = System.currentTimeMillis();
List errorMessages = new ArrayList();
CssHandler handler = new CssHandler(policy, errorMessages, messages, tagName);
parser.setDocumentHandler(handler);
try {
// parse the inline style declaration
// note this does not count against the size limit because it
// should already have been counted by the caller since it was
// embedded in the HTML
parser.parseStyleDeclaration(taintedCss);
} catch (IOException ioe) {
throw new ScanException(ioe);
}
String cleaned = getCleanStylesheetWithImports(sizeLimit, errorMessages, handler);
return new CleanResults(startOfScan, cleaned, null, errorMessages);
}
private String getCleanStylesheetWithImports(
int sizeLimit, List errorMessages, CssHandler handler) throws ScanException {
String cleaned = handler.getCleanStylesheet();
if (shouldParseImportedStyles) {
handler.emptyStyleSheet();
parseImportedStylesheets(handler.getImportedStylesheetsURIList(), errorMessages, sizeLimit);
// If there are styles to import they must be added to the beginning
cleaned = handler.getCleanStylesheet() + cleaned;
}
return cleaned;
}
/**
* Parses through a LinkedList
of imported stylesheet URIs, this method parses
* through those stylesheets and validates them
*
* @param stylesheets the LinkedList
of stylesheet URIs to parse
* @param errorMessages the list of error messages to append to
* @param sizeLimit the limit on the total size in bites of any imported stylesheets
* @throws ScanException if an error occurs during scanning
* @deprecated Support for remote import of styles will be removed as that is dangerous.
*/
@Deprecated
private void parseImportedStylesheets(
LinkedList stylesheets, List errorMessages, int sizeLimit) throws ScanException {
// if stylesheets were imported by the inline style declaration,
// continue parsing the nested styles. Note this only happens
// if CSS importing was enabled in the policy file
if (!stylesheets.isEmpty()) {
int importedStylesheets = 0;
// Ensure that we have appropriate timeout values so we don't
// get DoSed waiting for returns
Timeout timeout = DEFAULT_TIMEOUT;
try {
timeout =
Timeout.ofMilliseconds(Long.parseLong(policy.getDirective(Policy.CONNECTION_TIMEOUT)));
} catch (NumberFormatException nfe) {
// Use default if can't parse policy specified value
}
RequestConfig requestConfig =
RequestConfig.custom()
.setConnectTimeout(timeout)
.setResponseTimeout(timeout)
.setConnectionRequestTimeout(timeout)
.build();
HttpClient httpClient =
HttpClientBuilder.create()
.disableAutomaticRetries()
.disableConnectionState()
.disableCookieManagement()
.setDefaultRequestConfig(requestConfig)
.build();
int allowedImports = Policy.DEFAULT_MAX_STYLESHEET_IMPORTS;
try {
allowedImports = Integer.parseInt(policy.getDirective("maxStyleSheetImports"));
} catch (NumberFormatException nfe) {
// Use default if can't parse policy specified value
}
while (!stylesheets.isEmpty()) {
URI stylesheetUri = stylesheets.removeFirst();
if (++importedStylesheets > allowedImports) {
errorMessages.add(
ErrorMessageUtil.getMessage(
messages,
ErrorMessageUtil.ERROR_CSS_IMPORT_EXCEEDED,
new Object[] {
HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()),
String.valueOf(allowedImports)
}));
continue;
}
// Pulled directly from:
// https://github.com/apache/httpcomponents-client/blob/5.1.x/httpclient5/src/test/java/org/apache/hc/client5/http/examples/ClientWithResponseHandler.java
// Create a custom response handler to read in the stylesheet
final HttpClientResponseHandler responseHandler =
new HttpClientResponseHandler() {
@Override
public String handleResponse(final ClassicHttpResponse response) throws IOException {
final int status = response.getCode();
if (status >= HttpStatus.SC_SUCCESS && status < HttpStatus.SC_REDIRECTION) {
final HttpEntity entity = response.getEntity();
try {
return entity != null ? EntityUtils.toString(entity) : null;
} catch (final ParseException | org.apache.hc.core5.http.ParseException ex) {
throw new ClientProtocolException(ex);
}
} else {
throw new ClientProtocolException("Unexpected response status: " + status);
}
}
};
byte[] stylesheet = null;
try {
String responseBody = httpClient.execute(new HttpGet(stylesheetUri), responseHandler);
// pull down stylesheet, observing size limit.
// Note: There is a SpotBugs warning on the next line: "Found reliance on default encoding
// in org.owasp.validator.css.CssScanner.parseImportedStylesheets(LinkedList, List, int):
// String.getBytes()" but since this method is deprecated, not going to address it as it
// will 'go away' eventually.
stylesheet = responseBody.getBytes();
if (stylesheet != null && stylesheet.length > sizeLimit) {
errorMessages.add(
ErrorMessageUtil.getMessage(
messages,
ErrorMessageUtil.ERROR_CSS_IMPORT_INPUT_SIZE,
new Object[] {
HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString()),
String.valueOf(policy.getMaxInputSize())
}));
stylesheet = null;
}
} catch (IOException ioe) {
errorMessages.add(
ErrorMessageUtil.getMessage(
messages,
ErrorMessageUtil.ERROR_CSS_IMPORT_FAILURE,
new Object[] {HTMLEntityEncoder.htmlEntityEncode(stylesheetUri.toString())}));
}
if (stylesheet != null) {
// decrease the size limit based on the
sizeLimit -= stylesheet.length;
try {
InputSource nextStyleSheet =
new InputSource(
new InputStreamReader(
new ByteArrayInputStream(stylesheet), Charset.forName("UTF8")));
parser.parseStyleSheet(nextStyleSheet);
} catch (IOException ioe) {
throw new ScanException(ioe);
}
}
} // end while
} // end if
} // end parseImportedStylesheets()
}