Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package ai.h2o.automl;
import ai.h2o.automl.StepDefinition.Step;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import static ai.h2o.automl.ModelingStep.GridStep.DEFAULT_GRID_GROUP;
import static ai.h2o.automl.ModelingStep.GridStep.DEFAULT_GRID_TRAINING_WEIGHT;
import static ai.h2o.automl.ModelingStep.ModelStep.DEFAULT_MODEL_GROUP;
import static ai.h2o.automl.ModelingStep.ModelStep.DEFAULT_MODEL_TRAINING_WEIGHT;
final class ModelingPlans {
/**
* Plan reflecting the behaviour of H2O AutoML prior v3.34 as close as possible.
*
* Keeping it mainly for reference and for tests.
*/
final static StepDefinition[] ONE_LAYERED = {
new StepDefinition(Algo.XGBoost.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.GLM.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.DRF.name(), "def_1"),
new StepDefinition(Algo.GBM.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.DeepLearning.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.DRF.name(), "XRT"),
new StepDefinition(Algo.XGBoost.name(),
new Step("grid_1", DEFAULT_MODEL_GROUP, 3*DEFAULT_GRID_TRAINING_WEIGHT)),
new StepDefinition(Algo.GBM.name(),
new Step("grid_1", DEFAULT_MODEL_GROUP, 2*DEFAULT_GRID_TRAINING_WEIGHT)),
new StepDefinition(Algo.DeepLearning.name(),
new Step("grid_1", DEFAULT_MODEL_GROUP, DEFAULT_GRID_TRAINING_WEIGHT/2),
new Step("grid_2", DEFAULT_MODEL_GROUP, DEFAULT_GRID_TRAINING_WEIGHT/2),
new Step("grid_3", DEFAULT_MODEL_GROUP, DEFAULT_GRID_TRAINING_WEIGHT/2)),
new StepDefinition(Algo.GBM.name(),
new Step("lr_annealing", DEFAULT_MODEL_GROUP, DEFAULT_MODEL_TRAINING_WEIGHT)),
new StepDefinition(Algo.XGBoost.name(),
new Step("lr_search", DEFAULT_MODEL_GROUP, DEFAULT_GRID_TRAINING_WEIGHT)),
new StepDefinition(Algo.StackedEnsemble.name(),
new Step("best_of_family", DEFAULT_MODEL_GROUP, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("all", DEFAULT_MODEL_GROUP, DEFAULT_MODEL_TRAINING_WEIGHT)),
};
/**
* A simple improvement on the one-layered version using mainly default settings:
*
*
the first layer attempts to train all the base models.
*
if this layer completes, a second layer trains all the grids.
*
an optional 3rd layer is trained if exploitation ratio is on.
*
2 SEs are trained at the end of each layer
*
*
* Keeping this as an example of simple plan, mainly used for tests.
*/
final static StepDefinition[] TWO_LAYERED = {
new StepDefinition(Algo.XGBoost.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.GLM.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.DRF.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.GBM.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.DeepLearning.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.XGBoost.name(), StepDefinition.Alias.grids),
new StepDefinition(Algo.GBM.name(), StepDefinition.Alias.grids),
new StepDefinition(Algo.DeepLearning.name(), StepDefinition.Alias.grids),
new StepDefinition(Algo.GBM.name(),
new Step("lr_annealing", DEFAULT_GRID_GROUP+1, Step.DEFAULT_WEIGHT)),
new StepDefinition(Algo.XGBoost.name(),
new Step("lr_search", DEFAULT_GRID_GROUP+1, Step.DEFAULT_WEIGHT)),
new StepDefinition(Algo.StackedEnsemble.name(), StepDefinition.Alias.defaults),
};
/**
* a multi-layered plan:
*
*
a short first layer with only 3 base models to be able to produce at least a few decent models
* on larger datasets if time budget is small,
* followed by an SE of those models.
*
another layer with more base models if all the models in the first layer were able to converge
* + SEs for that layer/li>
*
another layer with the remaining base models + SEs
*
another layer with the usually fast and best performing grids + SEs
*
another layer with the remaining grids + SEs
*
another layer doing a learning_rate search on the best GBM+XGB and adding some optional SEs
*
another layer with more optional SEs
*
a final layer resuming the best 2 grids so far, this time running without grid early stopping,
* and followed by 2 final SEs
*
*
*/
final static StepDefinition[] TEN_LAYERED = {
// order of step definitions and steps defines the order of steps in the same priority group.
new StepDefinition(Algo.XGBoost.name(),
new Step("def_2", 1, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("def_1", 2, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("def_3", 3, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("grid_1", 4, 3*DEFAULT_GRID_TRAINING_WEIGHT),
new Step("lr_search", 6, DEFAULT_GRID_TRAINING_WEIGHT)
),
new StepDefinition(Algo.GLM.name(),
new Step("def_1", 1, DEFAULT_MODEL_TRAINING_WEIGHT)
),
new StepDefinition(Algo.DRF.name(),
new Step("def_1", 2, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("XRT", 3, DEFAULT_MODEL_TRAINING_WEIGHT)
),
new StepDefinition(Algo.GBM.name(),
new Step("def_5", 1, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("def_2", 2, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("def_3", 2, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("def_4", 2, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("def_1", 3, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("grid_1", 4, 2*DEFAULT_GRID_TRAINING_WEIGHT),
new Step("lr_annealing", 6, DEFAULT_MODEL_TRAINING_WEIGHT)
),
new StepDefinition(Algo.DeepLearning.name(),
new Step("def_1", 3, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("grid_1", 4, DEFAULT_GRID_TRAINING_WEIGHT),
new Step("grid_2", 5, DEFAULT_GRID_TRAINING_WEIGHT),
new Step("grid_3", 5, DEFAULT_GRID_TRAINING_WEIGHT)
),
new StepDefinition("completion",
new Step("resume_best_grids", 10, 2*DEFAULT_GRID_TRAINING_WEIGHT)
),
// generates BoF and All SE for each group, but we prefer to customize instances and weights below
// new StepDefinition(Algo.StackedEnsemble.name(), StepDefinition.Alias.defaults),
new StepDefinition(Algo.StackedEnsemble.name(), Stream.of(new Step[][] {
IntStream.rangeClosed(1, 5).mapToObj(group -> // BoF should be fast, giving it half-budget for optimization.
new Step("best_of_family_"+group, group, DEFAULT_MODEL_TRAINING_WEIGHT/2)
).toArray(Step[]::new),
IntStream.rangeClosed(2, 5).mapToObj(group -> // starts at 2 as we don't need an ALL SE for first group.
new Step("all_"+group, group, DEFAULT_MODEL_TRAINING_WEIGHT)
).toArray(Step[]::new),
{
new Step("monotonic", 6, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("best_of_family_xgboost", 6, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("best_of_family_gbm", 6, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("all_xgboost", 7, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("all_gbm", 7, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("best_of_family_xglm", 8, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("all_xglm", 8, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("best_of_family", 10, DEFAULT_MODEL_TRAINING_WEIGHT),
new Step("best_N", 10, DEFAULT_MODEL_TRAINING_WEIGHT),
}
}).flatMap(Stream::of).toArray(Step[]::new)),
};
public static StepDefinition[] defaultPlan() {
return TEN_LAYERED;
}
private ModelingPlans() {}
}