hex.genmodel.tools.MungeCsv Maven / Gradle / Ivy
package hex.genmodel.tools;
import hex.genmodel.GenMunger;
import hex.genmodel.easy.RowData;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;
/**
* Simple driver program for reading a CSV file and munging it.
*
* This driver program is used as a test harness by several tests in the testdir_javamunge directory.
*
* See the top-of-tree master version of this file here on github.
*/
public class MungeCsv {
private static String assemblyClassName;
private static String inputCSVFileName;
private static String outputCSVFileName;
private static int haveHeaders = -1;
private static void usage() {
System.out.println("");
System.out.println("usage: java [...java args...] hex.genmodel.tools.MungeCsv --header --model modelClassName --input inputCSVFileName --output outputCSVFileName");
System.out.println("");
System.out.println(" assembly class name is something like AssemblyPojo_bleehbleehbleeh.");
System.out.println("");
System.out.println(" inputCSVFileName is the test data set.");
System.out.println(" Specifying --header is required for h2o-3.");
System.out.println("");
System.out.println(" outputCSVFileName is the munged data set (one row per data set row).");
System.out.println("");
System.exit(1);
}
private static void parseArgs(String[] args) {
for (int i = 0; i < args.length; i++) {
String s = args[i];
switch( s ) {
case "--munger":
i++;
if (i >= args.length) usage();
assemblyClassName = args[i];
break;
case "--input":
i++;
if (i >= args.length) usage();
inputCSVFileName = args[i];
break;
case "--output":
i++;
if (i >= args.length) usage();
outputCSVFileName = args[i];
break;
case "--header":
haveHeaders = 1;
break;
default:
// skip
System.out.println("bad param... skipping.");
}
}
if (haveHeaders != 1) {
System.out.println("ERROR: header not specified");
usage();
}
if (assemblyClassName == null) {
System.out.println("ERROR: model not specified");
usage();
}
if (inputCSVFileName == null) {
System.out.println("ERROR: input not specified");
usage();
}
if (outputCSVFileName == null) {
System.out.println("ERROR: output not specified");
usage();
}
}
/**
* This CSV parser is as bare bones as it gets.
* Our test data doesn't have funny quoting, spacing, or other issues.
* Can't handle cases where the number of data columns is less than the number of header columns.
*/
private static RowData parseDataRow(String line, GenMunger munger) {
if( line.isEmpty() || line.equals("") )
return null;
String[] inputData = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)|(,)", -1);
for(int i=0;i