All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.owasp.validator.html.AntiSamy Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2007-2008, Arshan Dabirsiaghi, Jason Li
 * 
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 * 
 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 * Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.owasp.validator.html;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;

import org.owasp.validator.html.scan.AntiSamyDOMScanner;


/**
 * 
 * This is the only class from which the outside world should be calling. The scan() method holds
 * the meat and potatoes of AntiSamy. The file contains a number of ways for scan()'ing depending
 * on the accessibility of the policy file.
 * 
 * @author Arshan Dabirsiaghi
 *
 */

public class AntiSamy {
		
	private String inputEncoding = AntiSamyDOMScanner.DEFAULT_ENCODING_ALGORITHM;
	private String outputEncoding = AntiSamyDOMScanner.DEFAULT_ENCODING_ALGORITHM;
	
	private Policy policy = null;
	
	public AntiSamy () {}
	
	public AntiSamy (Policy policy) {
		this.policy = policy;
	}
	
	/**
	 * The meat and potatoes. The scan() family of methods are the only methods the outside world should
	 * be calling to invoke AntiSamy.
	 * 
	 * @param taintedHTML Untrusted HTML which may contain malicious code.
	 * @param inputEncoding The encoding of the input.
	 * @param outputEncoding The encoding that the output should be in.
	 * @return A CleanResults object which contains information about the scan (including the results).
	 * @throws ScanException When there is a problem encountered while scanning the HTML.
	 * @throws PolicyException When there is a problem reading the policy file.
	 */
	
	public CleanResults scan(String taintedHTML) throws ScanException, PolicyException {

		if ( policy == null ) {
			throw new PolicyException("No policy loaded");
		}
		
		return this.scan(taintedHTML, this.policy);

	}
	
	
	/**
	 * This method wraps scan() using the Policy object passed in.
	 */
	public CleanResults scan(String taintedHTML, Policy policy) throws ScanException, PolicyException {
		return new AntiSamyDOMScanner(policy).scan(taintedHTML, inputEncoding, outputEncoding);
	}
	
	/**
	 * This method wraps scan() using the Policy object passed in.
	 */
	public CleanResults scan(String taintedHTML, String filename) throws ScanException, PolicyException {
		
		Policy policy = null;
		
		/*
		 * Get or reload the policy document (antisamy.xml). We'll need to pass that to the
		 * scanner so it knows what to look for.
		 */
		policy = Policy.getInstance(filename);

		return this.scan(taintedHTML, policy);
	}
	
	/**
	 * This method wraps scan() using the policy File object passed in.
	 */
	public CleanResults scan(String taintedHTML, File policyFile) throws ScanException, PolicyException {
		
		Policy policy = null;
	
		/*
		 * Get or reload the policy document (antisamy.xml). We'll need to pass that to the
		 * scanner so it knows what to look for.
		 */
		policy = Policy.getInstance(policyFile);

		return this.scan(taintedHTML, policy);
	}
	
	
	
	/**
	 * Main method for testing AntiSamy.
	 * @param args Command line arguments. Only 1 argument is processed, and it should be a URL or filename to run through AntiSamy using the default policy location.
	 */	
	public static void main(String[] args) {

		if ( args.length == 0 ) {
			System.err.println("Please specify a URL or file name to filter - thanks!");	
			return;
		}
		
		try {
			
			StringBuffer buff = new StringBuffer();

			URL httpUrl = null;
			FileReader fileUrl = null;
			BufferedReader in = null;			
			
			try {
			
				httpUrl = new URL(args[0]);
				in = new BufferedReader( new InputStreamReader(httpUrl.openStream()));
				
			} catch (MalformedURLException e) {
				
				try {
					fileUrl = new FileReader(new File(args[0]));
				} catch (FileNotFoundException e1) {
					System.err.println("Please specify a URL or file name to filter - thanks!");
					return;
				}
				
				in = new BufferedReader(fileUrl);
				
			} catch (IOException e) {

				System.err.println("Encountered an IOException while reading URL: ");
				e.printStackTrace();
			}
			
			String inputLine;

			while ((inputLine = in.readLine()) != null)
				buff.append(inputLine);

			in.close();

			AntiSamy as = new AntiSamy();
			
			CleanResults test = as.scan(buff.toString(), Policy.getInstance("C:/Users/adabirsiaghi/workspace/AntiSamy Current Branch/resources/antisamy.xml"));
			
			System.out.println("[1] Finished scan [" + test.getCleanHTML().length() + " bytes] in " + test.getScanTime() + " seconds\n");
	        System.out.println("[2] Clean HTML fragment:\n" +  test.getCleanHTML());
	        System.out.println("[3] Error Messages ("+test.getNumberOfErrors() +"):");
	        
			for(int i=0;i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy