All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.validator.html.Policy Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2007-2008, Arshan Dabirsiaghi, Jason Li
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 *
 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.owasp.validator.html;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.Pattern;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;

import org.owasp.validator.html.model.AntiSamyPattern;
import org.owasp.validator.html.model.Attribute;
import org.owasp.validator.html.model.Property;
import org.owasp.validator.html.model.Tag;
import org.owasp.validator.html.util.XMLUtil;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 * Policy.java
 *
 * This file holds the model for our policy engine.
 *
 * @author Arshan Dabirsiaghi
 *
 */

public class Policy {

	public static final Pattern ANYTHING_REGEXP = Pattern.compile(".*");

	private static final String DEFAULT_POLICY_URI = "resources/antisamy.xml";
	private static final String DEFAULT_ONINVALID = "removeAttribute";

	public static final int DEFAULT_MAX_INPUT_SIZE = 100000;
	public static final int DEFAULT_MAX_STYLESHEET_IMPORTS = 1;

	public static final String OMIT_XML_DECLARATION = "omitXmlDeclaration";
	public static final String OMIT_DOCTYPE_DECLARATION = "omitDoctypeDeclaration";
	public static final String MAX_INPUT_SIZE = "maxInputSize";
	public static final String USE_XHTML = "useXHTML";
	public static final String FORMAT_OUTPUT = "formatOutput";
	public static final String EMBED_STYLESHEETS = "embedStyleSheets";
	public static final String CONNECTION_TIMEOUT = "connectionTimeout";
	public static final String ANCHORS_NOFOLLOW = "nofollowAnchors";
	public static final String VALIDATE_PARAM_AS_EMBED = "validateParamAsEmbed";
	public static final String PRESERVE_SPACE = "preserveSpace";
	public static final String PRESERVE_COMMENTS = "preserveComments";
	
	public static final String ENCODE_TAGS = "onUnknownTag";
	
	public static final String ACTION_VALIDATE	= "validate";
	public static final String ACTION_FILTER	= "filter";
	public static final String ACTION_TRUNCATE	= "truncate";

	private static char REGEXP_BEGIN = '^';
	private static char REGEXP_END = '$';

	private HashMap commonRegularExpressions;
	private HashMap commonAttributes;
	private HashMap tagRules;
	private HashMap cssRules;
	private HashMap directives;
	private HashMap globalAttributes;
	private ArrayList encodeTags;

	private ArrayList tagNames;

	public boolean isTagInListToEncode(String s) {
		for(int i=0;i section.
			 */

			Element commonRegularExpressionListNode = (Element)topLevelElement.getElementsByTagName("common-regexps").item(0);

			this.commonRegularExpressions = parseCommonRegExps(commonRegularExpressionListNode);


			/**
			 * Next we read in the directives.
			 */

			Element directiveListNode = (Element)topLevelElement.getElementsByTagName("directives").item(0);
			this.directives = parseDirectives(directiveListNode);


			/**
			 * Next we read in the common attributes.
			 */
			Element commonAttributeListNode = (Element)topLevelElement.getElementsByTagName("common-attributes").item(0);

			this.commonAttributes = parseCommonAttributes(commonAttributeListNode);

			/**
			 * Next we need the global tag attributes (id, style, etc.)
			 */

			Element globalAttributeListNode = (Element)topLevelElement.getElementsByTagName("global-tag-attributes").item(0);

			this.globalAttributes = parseGlobalAttributes(globalAttributeListNode);

			/**
			 * Next we read in the tags that should be encoded when they're encountered like .
			 */

			NodeList tagsToEncodeList = topLevelElement.getElementsByTagName("tags-to-encode");
			if ( tagsToEncodeList != null && tagsToEncodeList.getLength() != 0 ) {
				this.encodeTags = parseTagsToEncode((Element)tagsToEncodeList.item(0));
			} else {
				this.encodeTags = new ArrayList();
			}

			/**
			 * Next, we read in the tag restrictions.
			 */
			Element tagListNode = (Element)topLevelElement.getElementsByTagName("tag-rules").item(0);

			this.tagRules = parseTagRules(tagListNode);

			/**
			 * Finally, we read in the CSS rules.
			 */
			Element cssListNode = (Element)topLevelElement.getElementsByTagName("css-rules").item(0);

			this.cssRules = parseCSSRules(cssListNode);


		} catch (SAXException e) {
			throw new PolicyException(e);
		} catch (ParserConfigurationException e) {
			throw new PolicyException(e);
		} catch (IOException e) {
			throw new PolicyException(e);
		}
	}



	/**
	 * Go through  section of the policy file.
	 * @param directiveListNode Top level of 
	 * @return A HashMap of directives for validation behavior.
	 */
	private HashMap parseDirectives(Element root) {

		HashMap directives = new HashMap();

		NodeList directiveNodes = root.getElementsByTagName("directive");

		for(int i=0;i section of the policy file.
	 * @param root Top level of 
	 * @return A HashMap of String tags that are to be encoded when they're encountered.
	 * @throws PolicyException
	 */
	private ArrayList parseTagsToEncode(Element root) throws PolicyException {

		ArrayList tagsToEncode = new ArrayList();

		NodeList tagsToEncodeNodes = root.getElementsByTagName("tag");

		if ( tagsToEncodeNodes != null ) {

			for(int i=0;i section of the policy file.
	 * @param globalAttributeListNode Top level of 
	 * @return A HashMap of global Attributes that need validation for every tag.
	 * @throws PolicyException
	 */
	private HashMap parseGlobalAttributes(Element root) throws PolicyException {

		HashMap globalAttributes = new HashMap();

		NodeList globalAttributeNodes = root.getElementsByTagName("attribute");

		/*
		 * Loop through the list of regular expressions and add them to the collection.
		 */
		for(int i=0;i");
			}
		}

		return globalAttributes;
	}

	/**
	 * Go through the  section of the policy file.
	 * @param root Top level of 
	 * @return An ArrayList of AntiSamyPattern objects.
	 */
	private HashMap parseCommonRegExps(Element root) {

		HashMap commonRegularExpressions = new HashMap();

		NodeList commonRegExpPatternNodes = root.getElementsByTagName("regexp");

		/*
		 * Loop through the list of regular expressions and add them to the collection.
		 */
		for(int i=0;i section of the policy file.
	 * @param root Top level of 
	 * @return An ArrayList of Attribute objects.
	 */
	private HashMap parseCommonAttributes(Element root) {

		HashMap commonAttributes = new HashMap();

		NodeList commonAttributesNodes = root.getElementsByTagName("attribute");

		/*
		 * Loop through the list of attributes and add them to the collection.
		 */
		for(int i=0;i 0 ) {
				attribute.setOnInvalid(onInvalid);
			} else {
				attribute.setOnInvalid(DEFAULT_ONINVALID);
			}

			Element regExpListNode = (Element)ele.getElementsByTagName("regexp-list").item(0);


			if ( regExpListNode != null ) {
				NodeList regExpList = regExpListNode.getElementsByTagName("regexp");

				/*
				 * First go through the allowed regular expressions.
				 */
				for(int j=0;j 0 ) {
						/*
						 * Get the common regular expression.
						 */
						attribute.addAllowedRegExp(getRegularExpression(regExpName).getPattern());
					} else {
						attribute.addAllowedRegExp(Pattern.compile(REGEXP_BEGIN+value+REGEXP_END)) ;
					}
				}
			}

			Element literalListNode = (Element)ele.getElementsByTagName("literal-list").item(0);

			if ( literalListNode != null ) {

				NodeList literalList = literalListNode.getElementsByTagName("literal");
				/*
				 * Then go through the allowed constants.
				 */
				for(int j=0;j 0 ) {
						attribute.addAllowedValue(value);
					} else if ( literalNode.getNodeValue() != null ) {
						attribute.addAllowedValue(literalNode.getNodeValue());
					}

				}

			}

			commonAttributes.put(name.toLowerCase(),attribute);

		}

		return commonAttributes;
	}


	/**
	 * Private method for parsing the  from the XML file.
	 * @param root The root element for 
	 * @return A List containing the rules.
	 * @throws PolicyException
	 */
	private HashMap parseTagRules(Element root) throws PolicyException {

		HashMap tags = new HashMap();

		NodeList tagList = root.getElementsByTagName("tag");

		/*
		 * Go through tags.
		 */
		for(int i=0;i");

					}

				} else {
					/*
					 * Custom attribute for this tag.
					 */
					Attribute attribute = new Attribute(XMLUtil.getAttributeValue(attributeNode,"name"));
					attribute.setOnInvalid(XMLUtil.getAttributeValue(attributeNode,"onInvalid"));
					attribute.setDescription(XMLUtil.getAttributeValue(attributeNode,"description"));

					/*
					 * Get the list of regexps for the attribute.
					 */
					Element regExpListNode = (Element)attributeNode.getElementsByTagName("regexp-list").item(0);

					if ( regExpListNode != null ) {
						NodeList regExpList = regExpListNode.getElementsByTagName("regexp");

						for(int k=0;k 0 ) {

								AntiSamyPattern pattern = getRegularExpression(regExpName);

								if ( pattern != null ) {

									attribute.addAllowedRegExp(pattern.getPattern());
								} else {

									throw new PolicyException("Regular expression '"+regExpName+"' was referenced as a common regexp in definition of '"+tag.getName()+"', but does not exist in ");
								}

							} else if ( value != null && value.length() > 0 ) {
								attribute.addAllowedRegExp(Pattern.compile(REGEXP_BEGIN+value+REGEXP_END));
							}
						}
					}

					/*
					 * Get the list of constant values for the attribute.
					 */
					Element literalListNode = (Element)attributeNode.getElementsByTagName("literal-list").item(0);

					if ( literalListNode != null ) {
						NodeList literalList = literalListNode.getElementsByTagName("literal");

						for(int k=0;k 0 ) {
								attribute.addAllowedValue(value);
							} else if ( literalNode.getNodeValue() != null ) {
								attribute.addAllowedValue(literalNode.getNodeValue());
							}

						}
					}
					/*
					 * Add fully built attribute.
					 */
					tag.addAttribute(attribute);
				}

			}

			tags.put(name.toLowerCase(),tag);
		}

		return tags;
	}

	/**
	 * Go through the  section of the policy file.
	 * @param root Top level of 
	 * @return An ArrayList of Property objects.
	 * @throws PolicyException
	 */
	private HashMap parseCSSRules(Element root) throws PolicyException {

		HashMap properties = new HashMap();

		NodeList propertyNodes = root.getElementsByTagName("property");

		/*
		 * Loop through the list of attributes and add them to the collection.
		 */
		for(int i=0;i 0 ) {
				property.setOnInvalid(onInvalid);
			} else {
				property.setOnInvalid(DEFAULT_ONINVALID);
			}

			Element regExpListNode = (Element)ele.getElementsByTagName("regexp-list").item(0);


			if ( regExpListNode != null ) {
				NodeList regExpList = regExpListNode.getElementsByTagName("regexp");

				/*
				 * First go through the allowed regular expressions.
				 */
				for(int j=0;j");
					}

				}
			}

			Element literalListNode = (Element)ele.getElementsByTagName("literal-list").item(0);

			if ( literalListNode != null ) {

				NodeList literalList = literalListNode.getElementsByTagName("literal");
				/*
				 * Then go through the allowed constants.
				 */
				for(int j=0;j entries by
	 * name.
	 *
	 * @param name The name of the common regexp we want to look up.
	 * @return An AntiSamyPattern associated with the lookup name specified.
	 */
	public AntiSamyPattern getRegularExpression(String name) {

		return (AntiSamyPattern) commonRegularExpressions.get(name);

	}

	/**
	 * A simple method for returning on of the  entries by
	 * name.
	 * @param name The name of the global-attribute we want to look up.
	 * @return An Attribute associated with the global-attribute lookup name specified.
	 */
	public Attribute getGlobalAttributeByName(String name) {

		return (Attribute) globalAttributes.get(name.toLowerCase());

	}

	/**
	 * A simple method for returning on of the  entries by
	 * name.
	 * @param name The name of the common-attribute we want to look up.
	 * @return An Attribute associated with the common-attribute lookup name specified.
	 */
	private Attribute getCommonAttributeByName(String attributeName) {

		return (Attribute) commonAttributes.get(attributeName.toLowerCase());

	}


	/**
	 * Return all the tags accepted by the Policy object.
	 * @return A String array of all the tag names accepted by the current Policy.
	 */
	public String[] getTags() {
		return (String[])tagNames.toArray(new String[1]);
	}

	/**
	 * Return a directive value based on a lookup name.
	 * @return A String object containing the directive associated with the lookup name, or null if none is found.
	 */
	public String getDirective(String name) {
		return (String) directives.get(name);
	}

	/**
	 * Set a directive for a value based on a name.
	 * @param name A directive to set a value for.
	 * @param value The new value for the directive.
	 */
	public void setDirective(String name, String value) {
		directives.put(name, value);
	}

	/**
	 * Returns the maximum input size. If this value is not specified by
	 * the policy, the DEFAULT_MAX_INPUT_SIZE is used.
	 * @return the maximium input size.
	 */
	public int getMaxInputSize() {
		int maxInputSize = Policy.DEFAULT_MAX_INPUT_SIZE;

		try {
			maxInputSize = Integer.parseInt(getDirective("maxInputSize"));
		} catch (NumberFormatException nfe) {}

		return maxInputSize;
	}

	/**
	 * Main test unit.
	 * @param args
	 */
	public static void main(String[] args) throws Exception {
	    // parse an XML document into a DOM tree
	    DocumentBuilder parser = DocumentBuilderFactory.newInstance().newDocumentBuilder();
	    Document document = parser.parse(new File("resources/antisamy.xml"));

	    // create a SchemaFactory capable of understanding WXS schemas
	    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);

	    // load a WXS schema, represented by a Schema instance
	    Source schemaFile = new StreamSource(new File("resources/antisamy.xsd"));
	    Schema schema = factory.newSchema(schemaFile);

	    // create a Validator instance, which can be used to validate an instance document
	    Validator validator = schema.newValidator();

	    // validate the DOM tree
	    try {
	        validator.validate(new DOMSource(document));
	        System.out.println("made it through!");
	    } catch (SAXException e) {
	        // instance document is invalid!
	    	e.printStackTrace();
	    }

	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy