samples.src.DisplayAllElements Maven / Gradle / Ivy
Go to download
Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of
parts of an HTML document, including some common server-side tags, while reproducing verbatim any
unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.
import au.id.jericho.lib.html.*;
import java.util.*;
import java.io.*;
import java.net.*;
public class DisplayAllElements {
public static void main(String[] args) throws Exception {
String sourceUrlString="data/test.html";
if (args.length==0)
System.err.println("Using default argument of \""+sourceUrlString+'"');
else
sourceUrlString=args[0];
if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
URL sourceUrl=new URL(sourceUrlString);
String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
Source source=new Source(htmlText);
source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
for (Iterator i=source.findAllElements().iterator(); i.hasNext();) {
Element element=(Element)i.next();
System.out.println("-------------------------------------------------------------------------------");
System.out.println(element.getDebugInfo());
System.out.println(element);
}
}
}