samples.src.JSPTest Maven / Gradle / Ivy
Go to download
Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of
parts of an HTML document, including some common server-side tags, while reproducing verbatim any
unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.
import au.id.jericho.lib.html.*;
import java.util.*;
import java.io.*;
import java.net.*;
public class JSPTest {
public static void main(String[] args) throws Exception {
String sourceUrlString="data/jsp_test.html";
if (args.length==0)
System.err.println("Using default argument of \""+sourceUrlString+'"');
else
sourceUrlString=args[0];
if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
URL sourceUrl=new URL(sourceUrlString);
String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
Source source=new Source(htmlText);
source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
System.out.println("The following elements are found without first ignoring JSP tags:\n");
System.out.println("(Notice the errors encountered by the parser in some of the HTML elements)\n");
displayAllElements(source);
System.out.println("*******************************************************************************\n\n\n");
System.out.println("The following elements are found with first ignoring JSP tags:\n");
source=new Source(htmlText); // have to recreate the source object otherwise the cached results will be used
source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
ignoreJSPTags(source);
displayAllElements(source);
}
private static void ignoreJSPTags(Source source) {
List jspTags=source.findAllStartTags(Tag.SERVER_COMMON);
// First find any taglib libraries being used:
for (Iterator i=jspTags.iterator(); i.hasNext();) {
StartTag jspTag=(StartTag)i.next();
String jspTagSourceText=jspTag.toString();
if (jspTagSourceText.charAt(2)!='@') continue;
Attributes attributes=jspTag.parseAttributes();
if (attributes==null || !source.toString().startsWith("taglib",attributes.getBegin())) continue;
// we now know jspTag is a taglib declaration
Attribute prefixAttribute=attributes.get("prefix");
if (prefixAttribute==null) continue;
String prefix=prefixAttribute.getValue();
if (prefix==null) continue;
// find all tags in the namespace specified in the "prefix" attribute:
List taglibTags=source.findAllStartTags(prefix+':');
// ignore all taglib tags:
source.ignoreWhenParsing(taglibTags);
}
// also ignore normal jsp tags:
source.ignoreWhenParsing(jspTags);
}
private static void displayAllElements(Source source) {
for (Iterator i=source.findAllElements().iterator(); i.hasNext();) {
Element element=(Element)i.next();
System.out.println("-------------------------------------------------------------------------------");
System.out.println(element.getDebugInfo());
System.out.println(element);
}
}
}