
org.owasp.validator.html.AntiSamy Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2007-2008, Arshan Dabirsiaghi, Jason Li
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
* Neither the name of OWASP nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.owasp.validator.html;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import org.owasp.validator.html.scan.AntiSamyDOMScanner;
/**
*
* This is the only class from which the outside world should be calling. The scan()
method holds
* the meat and potatoes of AntiSamy. The file contains a number of ways for scan()
'ing depending
* on the accessibility of the policy file.
*
* @author Arshan Dabirsiaghi
*
*/
public class AntiSamy {
private String inputEncoding = AntiSamyDOMScanner.DEFAULT_ENCODING_ALGORITHM;
private String outputEncoding = AntiSamyDOMScanner.DEFAULT_ENCODING_ALGORITHM;
private Policy policy = null;
public AntiSamy () {}
public AntiSamy (Policy policy) {
this.policy = policy;
}
/**
* The meat and potatoes. The scan()
family of methods are the only methods the outside world should
* be calling to invoke AntiSamy.
*
* @param taintedHTML Untrusted HTML which may contain malicious code.
* @param inputEncoding The encoding of the input.
* @param outputEncoding The encoding that the output should be in.
* @return A CleanResults
object which contains information about the scan (including the results).
* @throws ScanException
When there is a problem encountered while scanning the HTML.
* @throws PolicyException
When there is a problem reading the policy file.
*/
public CleanResults scan(String taintedHTML) throws ScanException, PolicyException {
if ( policy == null ) {
throw new PolicyException("No policy loaded");
}
return this.scan(taintedHTML, this.policy);
}
/**
* This method wraps scan()
using the Policy object passed in.
*/
public CleanResults scan(String taintedHTML, Policy policy) throws ScanException, PolicyException {
return new AntiSamyDOMScanner(policy).scan(taintedHTML, inputEncoding, outputEncoding);
}
/**
* This method wraps scan()
using the Policy object passed in.
*/
public CleanResults scan(String taintedHTML, String filename) throws ScanException, PolicyException {
Policy policy = null;
/*
* Get or reload the policy document (antisamy.xml). We'll need to pass that to the
* scanner so it knows what to look for.
*/
policy = Policy.getInstance(filename);
return this.scan(taintedHTML, policy);
}
/**
* This method wraps scan()
using the policy File object passed in.
*/
public CleanResults scan(String taintedHTML, File policyFile) throws ScanException, PolicyException {
Policy policy = null;
/*
* Get or reload the policy document (antisamy.xml). We'll need to pass that to the
* scanner so it knows what to look for.
*/
policy = Policy.getInstance(policyFile);
return this.scan(taintedHTML, policy);
}
/**
* Main method for testing AntiSamy.
* @param args Command line arguments. Only 1 argument is processed, and it should be a URL or filename to run through AntiSamy using the default policy location.
*/
public static void main(String[] args) {
if ( args.length == 0 ) {
System.err.println("Please specify a URL or file name to filter - thanks!");
return;
}
try {
StringBuffer buff = new StringBuffer();
URL httpUrl = null;
FileReader fileUrl = null;
BufferedReader in = null;
try {
httpUrl = new URL(args[0]);
in = new BufferedReader( new InputStreamReader(httpUrl.openStream()));
} catch (MalformedURLException e) {
try {
fileUrl = new FileReader(new File(args[0]));
} catch (FileNotFoundException e1) {
System.err.println("Please specify a URL or file name to filter - thanks!");
return;
}
in = new BufferedReader(fileUrl);
} catch (IOException e) {
System.err.println("Encountered an IOException while reading URL: ");
e.printStackTrace();
}
String inputLine;
while ((inputLine = in.readLine()) != null)
buff.append(inputLine);
in.close();
AntiSamy as = new AntiSamy();
CleanResults test = as.scan(buff.toString(), Policy.getInstance("C:/Users/adabirsiaghi/workspace/AntiSamy Current Branch/resources/antisamy.xml"));
System.out.println("[1] Finished scan [" + test.getCleanHTML().length() + " bytes] in " + test.getScanTime() + " seconds\n");
System.out.println("[2] Clean HTML fragment:\n" + test.getCleanHTML());
System.out.println("[3] Error Messages ("+test.getNumberOfErrors() +"):");
for(int i=0;i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy