samples.src.JSPTest Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of jericho-html

Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of parts of an HTML document, including some common server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.

There is a newer version: 2.3

Show newest version

import au.id.jericho.lib.html.*;
import java.util.*;
import java.io.*;
import java.net.*;

public class JSPTest {
	public static void main(String[] args) throws Exception {
		String sourceUrlString="data/jsp_test.html";
		if (args.length==0)
		  System.err.println("Using default argument of \""+sourceUrlString+'"');
		else
			sourceUrlString=args[0];
		if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
		URL sourceUrl=new URL(sourceUrlString);
 		String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
		Source source=new Source(htmlText);
		source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
		System.out.println("The following elements are found without first ignoring JSP tags:\n");
		System.out.println("(Notice the errors encountered by the parser in some of the HTML elements)\n");
		displayAllElements(source);
		System.out.println("*******************************************************************************\n\n\n");
		System.out.println("The following elements are found with first ignoring JSP tags:\n");
		source=new Source(htmlText); // have to recreate the source object otherwise the cached results will be used
		source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
		ignoreJSPTags(source);
		displayAllElements(source);
	}

	private static void ignoreJSPTags(Source source) {
		List jspTags=source.findAllStartTags(Tag.SERVER_COMMON);
		// First find any taglib libraries being used:
		for (Iterator i=jspTags.iterator(); i.hasNext();) {
			StartTag jspTag=(StartTag)i.next();
			String jspTagSourceText=jspTag.toString();
			if (jspTagSourceText.charAt(2)!='@') continue;
			Attributes attributes=jspTag.parseAttributes();
			if (attributes==null || !source.toString().startsWith("taglib",attributes.getBegin())) continue;
			// we now know jspTag is a taglib declaration
			Attribute prefixAttribute=attributes.get("prefix");
			if (prefixAttribute==null) continue;
			String prefix=prefixAttribute.getValue();
			if (prefix==null) continue;
			// find all tags in the namespace specified in the "prefix" attribute:
			List taglibTags=source.findAllStartTags(prefix+':');
			// ignore all taglib tags:
			source.ignoreWhenParsing(taglibTags);
 		}
		// also ignore normal jsp tags:
		source.ignoreWhenParsing(jspTags);
	}

	private static void displayAllElements(Source source) {
		for (Iterator i=source.findAllElements().iterator(); i.hasNext();) {
			Element element=(Element)i.next();
			System.out.println("-------------------------------------------------------------------------------");
			System.out.println(element.getDebugInfo());
			System.out.println(element);
		}
	}
}