
org.owasp.validator.html.Policy Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2007-2008, Arshan Dabirsiaghi, Jason Li
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.owasp.validator.html;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.Pattern;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.owasp.validator.html.model.AntiSamyPattern;
import org.owasp.validator.html.model.Attribute;
import org.owasp.validator.html.model.Property;
import org.owasp.validator.html.model.Tag;
import org.owasp.validator.html.util.XMLUtil;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
/**
* Policy.java
*
* This file holds the model for our policy engine.
*
* @author Arshan Dabirsiaghi
*
*/
public class Policy {
public static final Pattern ANYTHING_REGEXP = Pattern.compile(".*");
private static final String DEFAULT_POLICY_URI = "resources/antisamy.xml";
private static final String DEFAULT_ONINVALID = "removeAttribute";
public static final int DEFAULT_MAX_INPUT_SIZE = 100000;
public static final int DEFAULT_MAX_STYLESHEET_IMPORTS = 1;
public static final String OMIT_XML_DECLARATION = "omitXmlDeclaration";
public static final String OMIT_DOCTYPE_DECLARATION = "omitDoctypeDeclaration";
public static final String MAX_INPUT_SIZE = "maxInputSize";
public static final String USE_XHTML = "useXHTML";
public static final String FORMAT_OUTPUT = "formatOutput";
public static final String EMBED_STYLESHEETS = "embedStyleSheets";
public static final String CONNECTION_TIMEOUT = "connectionTimeout";
public static final String ANCHORS_NOFOLLOW = "nofollowAnchors";
public static final String VALIDATE_PARAM_AS_EMBED = "validateParamAsEmbed";
public static final String PRESERVE_SPACE = "preserveSpace";
public static final String PRESERVE_COMMENTS = "preserveComments";
public static final String ENCODE_TAGS = "onUnknownTag";
public static final String ACTION_VALIDATE = "validate";
public static final String ACTION_FILTER = "filter";
public static final String ACTION_TRUNCATE = "truncate";
private static char REGEXP_BEGIN = '^';
private static char REGEXP_END = '$';
private HashMap commonRegularExpressions;
private HashMap commonAttributes;
private HashMap tagRules;
private HashMap cssRules;
private HashMap directives;
private HashMap globalAttributes;
private ArrayList encodeTags;
private ArrayList tagNames;
public boolean isTagInListToEncode(String s) {
for(int i=0;i section.
*/
Element commonRegularExpressionListNode = (Element)topLevelElement.getElementsByTagName("common-regexps").item(0);
this.commonRegularExpressions = parseCommonRegExps(commonRegularExpressionListNode);
/**
* Next we read in the directives.
*/
Element directiveListNode = (Element)topLevelElement.getElementsByTagName("directives").item(0);
this.directives = parseDirectives(directiveListNode);
/**
* Next we read in the common attributes.
*/
Element commonAttributeListNode = (Element)topLevelElement.getElementsByTagName("common-attributes").item(0);
this.commonAttributes = parseCommonAttributes(commonAttributeListNode);
/**
* Next we need the global tag attributes (id, style, etc.)
*/
Element globalAttributeListNode = (Element)topLevelElement.getElementsByTagName("global-tag-attributes").item(0);
this.globalAttributes = parseGlobalAttributes(globalAttributeListNode);
/**
* Next we read in the tags that should be encoded when they're encountered like .
*/
NodeList tagsToEncodeList = topLevelElement.getElementsByTagName("tags-to-encode");
if ( tagsToEncodeList != null && tagsToEncodeList.getLength() != 0 ) {
this.encodeTags = parseTagsToEncode((Element)tagsToEncodeList.item(0));
} else {
this.encodeTags = new ArrayList();
}
/**
* Next, we read in the tag restrictions.
*/
Element tagListNode = (Element)topLevelElement.getElementsByTagName("tag-rules").item(0);
this.tagRules = parseTagRules(tagListNode);
/**
* Finally, we read in the CSS rules.
*/
Element cssListNode = (Element)topLevelElement.getElementsByTagName("css-rules").item(0);
this.cssRules = parseCSSRules(cssListNode);
} catch (SAXException e) {
throw new PolicyException(e);
} catch (ParserConfigurationException e) {
throw new PolicyException(e);
} catch (IOException e) {
throw new PolicyException(e);
}
}
/**
* Go through section of the policy file.
* @param directiveListNode Top level of
* @return A HashMap of directives for validation behavior.
*/
private HashMap parseDirectives(Element root) {
HashMap directives = new HashMap();
NodeList directiveNodes = root.getElementsByTagName("directive");
for(int i=0;i section of the policy file.
* @param root Top level of
* @return A HashMap of String tags that are to be encoded when they're encountered.
* @throws PolicyException
*/
private ArrayList parseTagsToEncode(Element root) throws PolicyException {
ArrayList tagsToEncode = new ArrayList();
NodeList tagsToEncodeNodes = root.getElementsByTagName("tag");
if ( tagsToEncodeNodes != null ) {
for(int i=0;i section of the policy file.
* @param globalAttributeListNode Top level of
* @return A HashMap of global Attributes that need validation for every tag.
* @throws PolicyException
*/
private HashMap parseGlobalAttributes(Element root) throws PolicyException {
HashMap globalAttributes = new HashMap();
NodeList globalAttributeNodes = root.getElementsByTagName("attribute");
/*
* Loop through the list of regular expressions and add them to the collection.
*/
for(int i=0;i");
}
}
return globalAttributes;
}
/**
* Go through the section of the policy file.
* @param root Top level of
* @return An ArrayList of AntiSamyPattern objects.
*/
private HashMap parseCommonRegExps(Element root) {
HashMap commonRegularExpressions = new HashMap();
NodeList commonRegExpPatternNodes = root.getElementsByTagName("regexp");
/*
* Loop through the list of regular expressions and add them to the collection.
*/
for(int i=0;i section of the policy file.
* @param root Top level of
* @return An ArrayList of Attribute objects.
*/
private HashMap parseCommonAttributes(Element root) {
HashMap commonAttributes = new HashMap();
NodeList commonAttributesNodes = root.getElementsByTagName("attribute");
/*
* Loop through the list of attributes and add them to the collection.
*/
for(int i=0;i 0 ) {
attribute.setOnInvalid(onInvalid);
} else {
attribute.setOnInvalid(DEFAULT_ONINVALID);
}
Element regExpListNode = (Element)ele.getElementsByTagName("regexp-list").item(0);
if ( regExpListNode != null ) {
NodeList regExpList = regExpListNode.getElementsByTagName("regexp");
/*
* First go through the allowed regular expressions.
*/
for(int j=0;j 0 ) {
/*
* Get the common regular expression.
*/
attribute.addAllowedRegExp(getRegularExpression(regExpName).getPattern());
} else {
attribute.addAllowedRegExp(Pattern.compile(REGEXP_BEGIN+value+REGEXP_END)) ;
}
}
}
Element literalListNode = (Element)ele.getElementsByTagName("literal-list").item(0);
if ( literalListNode != null ) {
NodeList literalList = literalListNode.getElementsByTagName("literal");
/*
* Then go through the allowed constants.
*/
for(int j=0;j 0 ) {
attribute.addAllowedValue(value);
} else if ( literalNode.getNodeValue() != null ) {
attribute.addAllowedValue(literalNode.getNodeValue());
}
}
}
commonAttributes.put(name.toLowerCase(),attribute);
}
return commonAttributes;
}
/**
* Private method for parsing the from the XML file.
* @param root The root element for
* @return A List containing the rules.
* @throws PolicyException
*/
private HashMap parseTagRules(Element root) throws PolicyException {
HashMap tags = new HashMap();
NodeList tagList = root.getElementsByTagName("tag");
/*
* Go through tags.
*/
for(int i=0;i");
}
} else {
/*
* Custom attribute for this tag.
*/
Attribute attribute = new Attribute(XMLUtil.getAttributeValue(attributeNode,"name"));
attribute.setOnInvalid(XMLUtil.getAttributeValue(attributeNode,"onInvalid"));
attribute.setDescription(XMLUtil.getAttributeValue(attributeNode,"description"));
/*
* Get the list of regexps for the attribute.
*/
Element regExpListNode = (Element)attributeNode.getElementsByTagName("regexp-list").item(0);
if ( regExpListNode != null ) {
NodeList regExpList = regExpListNode.getElementsByTagName("regexp");
for(int k=0;k 0 ) {
AntiSamyPattern pattern = getRegularExpression(regExpName);
if ( pattern != null ) {
attribute.addAllowedRegExp(pattern.getPattern());
} else {
throw new PolicyException("Regular expression '"+regExpName+"' was referenced as a common regexp in definition of '"+tag.getName()+"', but does not exist in ");
}
} else if ( value != null && value.length() > 0 ) {
attribute.addAllowedRegExp(Pattern.compile(REGEXP_BEGIN+value+REGEXP_END));
}
}
}
/*
* Get the list of constant values for the attribute.
*/
Element literalListNode = (Element)attributeNode.getElementsByTagName("literal-list").item(0);
if ( literalListNode != null ) {
NodeList literalList = literalListNode.getElementsByTagName("literal");
for(int k=0;k 0 ) {
attribute.addAllowedValue(value);
} else if ( literalNode.getNodeValue() != null ) {
attribute.addAllowedValue(literalNode.getNodeValue());
}
}
}
/*
* Add fully built attribute.
*/
tag.addAttribute(attribute);
}
}
tags.put(name.toLowerCase(),tag);
}
return tags;
}
/**
* Go through the section of the policy file.
* @param root Top level of
* @return An ArrayList of Property objects.
* @throws PolicyException
*/
private HashMap parseCSSRules(Element root) throws PolicyException {
HashMap properties = new HashMap();
NodeList propertyNodes = root.getElementsByTagName("property");
/*
* Loop through the list of attributes and add them to the collection.
*/
for(int i=0;i 0 ) {
property.setOnInvalid(onInvalid);
} else {
property.setOnInvalid(DEFAULT_ONINVALID);
}
Element regExpListNode = (Element)ele.getElementsByTagName("regexp-list").item(0);
if ( regExpListNode != null ) {
NodeList regExpList = regExpListNode.getElementsByTagName("regexp");
/*
* First go through the allowed regular expressions.
*/
for(int j=0;j");
}
}
}
Element literalListNode = (Element)ele.getElementsByTagName("literal-list").item(0);
if ( literalListNode != null ) {
NodeList literalList = literalListNode.getElementsByTagName("literal");
/*
* Then go through the allowed constants.
*/
for(int j=0;j entries by
* name.
*
* @param name The name of the common regexp we want to look up.
* @return An AntiSamyPattern associated with the lookup name specified.
*/
public AntiSamyPattern getRegularExpression(String name) {
return (AntiSamyPattern) commonRegularExpressions.get(name);
}
/**
* A simple method for returning on of the entries by
* name.
* @param name The name of the global-attribute we want to look up.
* @return An Attribute associated with the global-attribute lookup name specified.
*/
public Attribute getGlobalAttributeByName(String name) {
return (Attribute) globalAttributes.get(name.toLowerCase());
}
/**
* A simple method for returning on of the entries by
* name.
* @param name The name of the common-attribute we want to look up.
* @return An Attribute associated with the common-attribute lookup name specified.
*/
private Attribute getCommonAttributeByName(String attributeName) {
return (Attribute) commonAttributes.get(attributeName.toLowerCase());
}
/**
* Return all the tags accepted by the Policy object.
* @return A String array of all the tag names accepted by the current Policy.
*/
public String[] getTags() {
return (String[])tagNames.toArray(new String[1]);
}
/**
* Return a directive value based on a lookup name.
* @return A String object containing the directive associated with the lookup name, or null if none is found.
*/
public String getDirective(String name) {
return (String) directives.get(name);
}
/**
* Set a directive for a value based on a name.
* @param name A directive to set a value for.
* @param value The new value for the directive.
*/
public void setDirective(String name, String value) {
directives.put(name, value);
}
/**
* Returns the maximum input size. If this value is not specified by
* the policy, the DEFAULT_MAX_INPUT_SIZE
is used.
* @return the maximium input size.
*/
public int getMaxInputSize() {
int maxInputSize = Policy.DEFAULT_MAX_INPUT_SIZE;
try {
maxInputSize = Integer.parseInt(getDirective("maxInputSize"));
} catch (NumberFormatException nfe) {}
return maxInputSize;
}
/**
* Main test unit.
* @param args
*/
public static void main(String[] args) throws Exception {
// parse an XML document into a DOM tree
DocumentBuilder parser = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document document = parser.parse(new File("resources/antisamy.xml"));
// create a SchemaFactory capable of understanding WXS schemas
SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
// load a WXS schema, represented by a Schema instance
Source schemaFile = new StreamSource(new File("resources/antisamy.xsd"));
Schema schema = factory.newSchema(schemaFile);
// create a Validator instance, which can be used to validate an instance document
Validator validator = schema.newValidator();
// validate the DOM tree
try {
validator.validate(new DOMSource(document));
System.out.println("made it through!");
} catch (SAXException e) {
// instance document is invalid!
e.printStackTrace();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy