www.flow.packs.test-large.billionRow_twoCol_gz.flow Maven / Gradle / Ivy
The newest version!
{
"version": "1.0.0",
"cells": [
{
"type": "cs",
"input": "importFiles"
},
{
"type": "cs",
"input": "importFiles [ \"hdfs://mr-0x6.0xdata.loc/datasets/billion_rows.csv.gz\" ]"
},
{
"type": "cs",
"input": "setupParse paths: [ \"hdfs://mr-0x6.0xdata.loc/datasets/billion_rows.csv.gz\" ]"
},
{
"type": "cs",
"input": "parseFiles\n paths: [\"hdfs://mr-0x6.0xdata.loc/datasets/billion_rows.csv.gz\"]\n destination_frame: \"billion_rows.hex\"\n parse_type: \"CSV\"\n separator: 44\n number_columns: 2\n single_quotes: false\n column_types: [\"Numeric\",\"Numeric\"]\n delete_on_done: true\n check_header: -1\n chunk_size: 4194304"
},
{
"type": "cs",
"input": "getFrameSummary \"billion_rows.hex\""
},
{
"type": "cs",
"input": "assist splitFrame, \"billion_rows.hex\", 123456"
},
{
"type": "cs",
"input": "splitFrame \"billion_rows.hex\", [0.2], [\"billion_rows.hex_0.20\",\"billion_rows.hex_0.80\"], 123456"
},
{
"type": "cs",
"input": "getFrameSummary \"billion_rows.hex_0.80\""
},
{
"type": "cs",
"input": "assist buildModel, null, training_frame: \"billion_rows.hex_0.80\""
},
{
"type": "cs",
"input": "buildModel 'glm', {\"model_id\":\"glm-d6f3fce4-308f-47d5-9373-8beecad3ea68\",\"training_frame\":\"billion_rows.hex_0.80\",\"validation_frame\":\"billion_rows.hex_0.20\",\"ignore_const_cols\":true,\"response_column\":\"C1\",\"family\":\"gaussian\",\"solver\":\"IRLSM\",\"alpha\":[],\"lambda\":[],\"lambda_search\":false,\"standardize\":true,\"non_negative\":false,\"score_each_iteration\":false,\"max_iterations\":-1,\"link\":\"family_default\",\"intercept\":true,\"objective_epsilon\":0.00001,\"beta_epsilon\":0.0001,\"gradient_epsilon\":0.0001,\"prior\":-1,\"max_active_predictors\":-1}"
}
]
}