All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hex.genmodel.tools.MungeCsv Maven / Gradle / Ivy

There is a newer version: 3.46.0.6
Show newest version
package hex.genmodel.tools;

import hex.genmodel.GenMunger;
import hex.genmodel.easy.RowData;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileReader;
import java.io.FileWriter;

/**
 * Simple driver program for reading a CSV file and munging it.
 *
 * This driver program is used as a test harness by several tests in the testdir_javamunge directory.
 * 

* See the top-of-tree master version of this file here on github. */ public class MungeCsv { private static String assemblyClassName; private static String inputCSVFileName; private static String outputCSVFileName; private static int haveHeaders = -1; private static void usage() { System.out.println(""); System.out.println("usage: java [...java args...] hex.genmodel.tools.MungeCsv --header --model modelClassName --input inputCSVFileName --output outputCSVFileName"); System.out.println(""); System.out.println(" assembly class name is something like AssemblyPojo_bleehbleehbleeh."); System.out.println(""); System.out.println(" inputCSVFileName is the test data set."); System.out.println(" Specifying --header is required for h2o-3."); System.out.println(""); System.out.println(" outputCSVFileName is the munged data set (one row per data set row)."); System.out.println(""); System.exit(1); } private static void parseArgs(String[] args) { for (int i = 0; i < args.length; i++) { String s = args[i]; switch( s ) { case "--munger": i++; if (i >= args.length) usage(); assemblyClassName = args[i]; break; case "--input": i++; if (i >= args.length) usage(); inputCSVFileName = args[i]; break; case "--output": i++; if (i >= args.length) usage(); outputCSVFileName = args[i]; break; case "--header": haveHeaders = 1; break; default: // skip System.out.println("bad param... skipping."); } } if (haveHeaders != 1) { System.out.println("ERROR: header not specified"); usage(); } if (assemblyClassName == null) { System.out.println("ERROR: model not specified"); usage(); } if (inputCSVFileName == null) { System.out.println("ERROR: input not specified"); usage(); } if (outputCSVFileName == null) { System.out.println("ERROR: output not specified"); usage(); } } /** * This CSV parser is as bare bones as it gets. * Our test data doesn't have funny quoting, spacing, or other issues. * Can't handle cases where the number of data columns is less than the number of header columns. */ private static RowData parseDataRow(String line, GenMunger munger) { if( line.isEmpty() || line.equals("") ) return null; String[] inputData = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)|(,)", -1); for(int i=0;i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy