samples.src.FormFieldCSVOutput Maven / Gradle / Ivy
Go to download
Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of
parts of an HTML document, including some common server-side tags, while reproducing verbatim any
unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.
import au.id.jericho.lib.html.*;
import java.util.*;
import java.io.*;
import java.net.*;
public class FormFieldCSVOutput {
public static void main(String[] args) throws Exception {
String sourceUrlString="data/form.html";
if (args.length==0)
System.err.println("Using default argument of \""+sourceUrlString+'"');
else
sourceUrlString=args[0];
if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
URL sourceUrl=new URL(sourceUrlString);
String htmlText=Util.getString(new InputStreamReader(sourceUrl.openStream()));
Source source=new Source(htmlText);
source.setLogWriter(new OutputStreamWriter(System.err)); // send log messages to stderr
FormFields formFields=source.findFormFields();
Writer out=new FileWriter("FormData.csv");
Util.outputCSVLine(out,formFields.getColumnHeadings());
Util.outputCSVLine(out,formFields.getColumnValues());
out.close();
System.err.println("\nThe data in the form has been output to the CSV file FormData.csv");
System.err.println("This will open automatically after you press a key.");
}
}