All Downloads are FREE. Search and download functionalities are using the official Maven repository.

samples.src.DisplayAllElements Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of parts of an HTML document, including some common server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.

There is a newer version: 2.3
Show newest version
import au.id.jericho.lib.html.*;
import java.util.*;
import java.io.*;
import java.net.*;

public class DisplayAllElements {
	public static void main(String[] args) throws Exception {
		String sourceUrlString="data/test.html";
		if (args.length==0)
		  System.err.println("Using default argument of \""+sourceUrlString+'"');
		else
			sourceUrlString=args[0];
		if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
		URL sourceUrl=new URL(sourceUrlString);
 		String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
		Source source=new Source(htmlText);
		source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
		for (Iterator i=source.findAllElements().iterator(); i.hasNext();) {
			Element element=(Element)i.next();
			System.out.println("-------------------------------------------------------------------------------");
			System.out.println(element.getDebugInfo());
			System.out.println(element);
		}
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy