ai.h2o.automl.ModelingStep Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of h2o-automl Show documentation
H2O AutoML
The newest version!
package ai.h2o.automl;

import ai.h2o.automl.AutoMLBuildSpec.AutoMLCustomParameters;
import ai.h2o.automl.ModelSelectionStrategies.LeaderboardHolder;
import ai.h2o.automl.ModelSelectionStrategy.Selection;
import ai.h2o.automl.StepResultState.ResultStatus;
import ai.h2o.automl.WorkAllocations.JobType;
import ai.h2o.automl.WorkAllocations.Work;
import ai.h2o.automl.events.EventLog;
import ai.h2o.automl.events.EventLogEntry;
import ai.h2o.automl.events.EventLogEntry.Stage;
import ai.h2o.automl.preprocessing.PreprocessingConfig;
import ai.h2o.automl.preprocessing.PreprocessingStep;
import hex.Model;
import hex.Model.Parameters.FoldAssignmentScheme;
import hex.ModelBuilder;
import hex.ModelContainer;
import hex.ScoreKeeper.StoppingMetric;
import hex.genmodel.utils.DistributionFamily;
import hex.grid.Grid;
import hex.grid.GridSearch;
import hex.grid.HyperSpaceSearchCriteria;
import hex.grid.HyperSpaceSearchCriteria.RandomDiscreteValueSearchCriteria;
import hex.grid.HyperSpaceWalker;
import hex.leaderboard.Leaderboard;
import jsr166y.CountedCompleter;
import org.apache.commons.lang.builder.ToStringBuilder;
import water.*;
import water.exceptions.H2OIllegalArgumentException;
import water.util.ArrayUtils;
import water.util.Countdown;
import water.util.EnumUtils;
import water.util.Log;

import java.util.*;
import java.util.function.Consumer;
import java.util.function.Predicate;

/**
 * Parent class defining common properties and common logic for actual {@link AutoML} training steps.
 */
public abstract class ModelingStep extends Iced {

    protected enum SeedPolicy {
        /** No seed will be used (= random). */
        None,
        /** The global AutoML seed will be used. */
        Global,
        /** The seed is incremented for each model, starting from the global seed if there is one. */
        Incremental
    }

    static Predicate isDefaultModel = w -> w._type == JobType.ModelBuild;
    static Predicate isExplorationWork = w -> w._type == JobType.ModelBuild || w._type == JobType.HyperparamSearch;
    static Predicate isExploitationWork = w -> w._type == JobType.Selection;

    protected  Job startSearch(
            final Key resultKey,
            final MP baseParams,
            final Map hyperParams,
            final HyperSpaceSearchCriteria searchCriteria)
    {
        assert resultKey != null;
        assert baseParams != null;
        assert hyperParams.size() > 0;
        assert searchCriteria != null;
        applyPreprocessing(baseParams);
        aml().eventLog().info(Stage.ModelTraining, "AutoML: starting "+resultKey+" hyperparameter search")
                .setNamedValue("start_"+_provider+"_"+_id, new Date(), EventLogEntry.epochFormat.get());
        return GridSearch.create(
                resultKey, 
                HyperSpaceWalker.BaseWalker.WalkerFactory.create(
                        baseParams, 
                        hyperParams,
                        new GridSearch.SimpleParametersBuilderFactory<>(), 
                        searchCriteria
                ))
                .withParallelism(GridSearch.SEQUENTIAL_MODEL_BUILDING)
                .withMaxConsecutiveFailures(aml()._maxConsecutiveModelFailures)
                .start();
    }

    @SuppressWarnings("unchecked")
    protected  Job startModel(
            final Key resultKey,
            final MP params
    ) {
        assert resultKey != null;
        assert params != null;
        Job job = new Job<>(resultKey, ModelBuilder.javaName(_algo.urlName()), _description);
        applyPreprocessing(params);
        ModelBuilder builder = ModelBuilder.make(_algo.urlName(), job, (Key) resultKey);
        builder._parms = params;
        aml().eventLog().info(Stage.ModelTraining, "AutoML: starting "+resultKey+" model training")
                .setNamedValue("start_"+_provider+"_"+_id, new Date(), EventLogEntry.epochFormat.get());
        builder.init(false);          // validate parameters
        if (builder._messages.length > 0) {
            for (ModelBuilder.ValidationMessage vm : builder._messages) {
                if (vm.log_level() == Log.WARN) {
                    aml().eventLog().warn(Stage.ModelTraining, vm.field()+" param, "+vm.message());
                } else if (vm.log_level() == Log.ERRR) {
                    aml().eventLog().error(Stage.ModelTraining, vm.field()+" param, "+vm.message());
                }
            }
        }
        return builder.trainModelOnH2ONode();
    }

    private boolean validParameters(Model.Parameters parms, String[] fields) {
        try {
            Model.Parameters params = parms.clone();
            // some algos check if distribution has proper _nclass(es) so we need to set training frame and response etc
            setCommonModelBuilderParams(params);
            ModelBuilder mb = ModelBuilder.make(params);
            mb.init(false);
            return Arrays.stream(fields)
                    .allMatch((field) ->
                            mb.getMessagesByFieldAndSeverity(field, Log.ERRR).length == 0);
        } catch (H2OIllegalArgumentException e) {
            return false;
        }
    }

    protected void setDistributionParameters(Model.Parameters parms) {
        switch (aml().getDistributionFamily()) {
            case custom:
                parms._custom_distribution_func = aml().getBuildSpec().build_control.custom_distribution_func;
                break;
            case huber:
                parms._huber_alpha = aml().getBuildSpec().build_control.huber_alpha;
                break;
            case tweedie:
                parms._tweedie_power = aml().getBuildSpec().build_control.tweedie_power;
                break;
            case quantile:
                parms._quantile_alpha = aml().getBuildSpec().build_control.quantile_alpha;
                break;
        }

        try {
            parms.setDistributionFamily(aml().getDistributionFamily());
        } catch (H2OIllegalArgumentException e) {
            parms.setDistributionFamily(DistributionFamily.AUTO);
        }
        if (!validParameters(parms, new String[]{"_distribution", "_family"}))
            parms.setDistributionFamily(DistributionFamily.AUTO);

        if (!aml().getDistributionFamily().equals(parms.getDistributionFamily())) {
            aml().eventLog().info(Stage.ModelTraining,"Algo " + parms.algoName() +
                    " doesn't support " + _aml.getDistributionFamily().name() + " distribution. Using AUTO distribution instead.");
        }
    }

    private final transient AutoML _aml;

    protected final IAlgo _algo;
    protected final String _provider;
    protected final String _id;
    protected int _weight;
    protected int _priorityGroup;
    protected AutoML.Constraint[] _ignoredConstraints = new AutoML.Constraint[0];  // whether or not to ignore the max_models/max_runtime constraints
    protected String _description;
    protected Work _work;
    private final transient List> _onDone = new ArrayList<>();

    StepDefinition _fromDef;
    transient final Predicate _isSamePriorityGroup = w -> w._priorityGroup == _priorityGroup;

    protected ModelingStep(String provider, IAlgo algo, String id, int priorityGroup, int weight, AutoML autoML) {
        assert priorityGroup >= 0;
        _provider = provider;
        _algo = algo;
        _id = id;
        _priorityGroup = priorityGroup;
        _weight = weight;
        _aml = autoML;
        _description = provider+" "+id;
    }

    /**
     * Each provider (usually one class) defining a collection of steps must have a unique name.
     * @return the name of the provider (usually simply the name of an algo) defining this step. 
     */
    public String getProvider() {
        return _provider;
    }

    /**
     * @return the step identifier: should be unique inside its provider.
     */
    public String getId() {
        return _id;
    }

    /**
     * @return a string that identifies the step uniquely among all steps defined by all providers.
     */
    public String getGlobalId() {
        return _provider+":"+_id;
    }
    
    public IAlgo getAlgo() {
        return _algo;
    }
    
    public int getWeight() {
        return _weight;
    }
    
    public int getPriorityGroup() {
        return _priorityGroup;
    }

    public boolean isResumable() {
        return false;
    }
    
    public boolean ignores(AutoML.Constraint constraint) {
        return ArrayUtils.contains(_ignoredConstraints, constraint);
    }
    
    public boolean limitModelTrainingTime() {
      // if max_models is used, then the global time limit should have no impact on model training budget due to reproducibility concerns.
      return !ignores(AutoML.Constraint.TIMEOUT) && aml().getBuildSpec().build_control.stopping_criteria.max_models() == 0;
    }

    /**
     * @return true iff we can call {@link #run()} on this modeling step to start a new job.
     */
    public boolean canRun() {
        Work work = getAllocatedWork();
        return work != null && work._weight > 0;
    }

    /**
     * Execute this modeling step, returning the job associated to it if any.
     * @return
     */
    public Job run() {
        Job job = startJob();
        if (job != null && job._result != null) {
            register(job._result);
            if (isResumable()) aml().session().addResumableKey(job._result);
        }
        return job;
    }

    /**
     * @return an {@link Iterator} for the potential sub-steps provided by this modeling step.
     */
    public Iterator iterateSubSteps() {
        return Collections.emptyIterator();
    }
    
    /**
     * @param id
     * @return the sub-step (if any) with the given identifier, or null if there's no sub-step 
     */
    protected Optional getSubStep(String id) {
        return Optional.empty();
    }


    protected abstract JobType getJobType();
    
    /**
     * Starts a new {@link Job} as part of this step.
     * @return the newly started job.
     */
    protected abstract Job startJob();

    protected void onDone(Job job) {
        for (Consumer exec : _onDone) {
            exec.accept(job);
        }
        _onDone.clear();
    }

    protected void register(Key key) {
        aml().session().registerKeySource(key, this);
    }

    protected AutoML aml() {
        return _aml;
    }

    /**
     * @return the total work allocated for this step.
     */
    protected Work getAllocatedWork() {
        if (_work == null) {
            _work = getWorkAllocations().getAllocation(_id, _algo);
        }
        return _work;
    }

    /**
     * Creates the {@link Work} instance representing the total work handled by this step.
     * @return
     */
    protected Work makeWork() {
        return new Work(getId(), getAlgo(), getJobType(), getPriorityGroup(), getWeight());
    }
    
    protected Key makeKey(String name, boolean withCounter) {
        return aml().makeKey(name, null, withCounter);
    }

    protected WorkAllocations getWorkAllocations() {
        return aml()._workAllocations;
    }

    /**
     * @return the models trained until now, sorted by the default leaderboard metric.
     */
    protected Model[] getTrainedModels() {
        return aml().leaderboard().getModels();
    }

    protected Key[] getTrainedModelsKeys() {
        return aml().leaderboard().getModelKeys();
    }
    
    protected boolean isCVEnabled() {
        return aml().isCVEnabled();
    }
    
    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        ModelingStep that = (ModelingStep) o;
        return _provider.equals(that._provider) && _id.equals(that._id);
    }

    @Override
    public int hashCode() {
        return Objects.hash(_provider, _id);
    }

    /**
     * Assign common parameters to the model params before building the model or set of models.
     * This includes:
     * 
     *   data-related parameters: frame/columns parameters, class distribution.
     *   cross-validation parameters/
     *   memory-optimization: if certain objects build during training should be kept after training or not/
     *   model management: checkpoints.
     * 
     * @param params the model parameters to which the common parameters will be added.
     */
    protected void setCommonModelBuilderParams(Model.Parameters params) {
        params._train = aml()._trainingFrame._key;
        if (null != aml()._validationFrame)
            params._valid = aml()._validationFrame._key;

        AutoMLBuildSpec buildSpec = aml().getBuildSpec();
        params._response_column = buildSpec.input_spec.response_column;
        params._ignored_columns = buildSpec.input_spec.ignored_columns;

        setCrossValidationParams(params);
        setWeightingParams(params);
        setClassBalancingParams(params);
        params._custom_metric_func = buildSpec.build_control.custom_metric_func;

        params._keep_cross_validation_models = buildSpec.build_control.keep_cross_validation_models;
        params._keep_cross_validation_fold_assignment = buildSpec.build_control.nfolds != 0 && buildSpec.build_control.keep_cross_validation_fold_assignment;
        params._export_checkpoints_dir = buildSpec.build_control.export_checkpoints_dir;
        
        /** Using _main_model_time_budget_factor to determine if and how we should restrict the time for the main model.
         *  Value 0 means do not use time constraint for the main model.
         *  More details in {@link ModelBuilder#setMaxRuntimeSecsForMainModel()}.
         */
        params._main_model_time_budget_factor = 2;
    }
    
    protected void setCrossValidationParams(Model.Parameters params) {
        AutoMLBuildSpec buildSpec = aml().getBuildSpec();
        params._keep_cross_validation_predictions = aml().getBlendingFrame() == null || buildSpec.build_control.keep_cross_validation_predictions;
        params._fold_column = buildSpec.input_spec.fold_column;

        if (buildSpec.input_spec.fold_column == null) {
            params._nfolds = buildSpec.build_control.nfolds;
            if (buildSpec.build_control.nfolds > 1) {
                // TODO: below allow the user to specify this (vs Modulo)
                params._fold_assignment = FoldAssignmentScheme.Modulo;
            }
        }
    }
    
    protected void setWeightingParams(Model.Parameters params) {
        AutoMLBuildSpec buildSpec = aml().getBuildSpec();
        params._weights_column = buildSpec.input_spec.weights_column;
    }
    
    protected void setClassBalancingParams(Model.Parameters params) {
        AutoMLBuildSpec buildSpec = aml().getBuildSpec();
        if (buildSpec.build_control.balance_classes) {
            params._balance_classes = buildSpec.build_control.balance_classes;
            params._class_sampling_factors = buildSpec.build_control.class_sampling_factors;
            params._max_after_balance_size = buildSpec.build_control.max_after_balance_size;
        }
    }

    protected void setCustomParams(Model.Parameters params) {
        AutoMLCustomParameters customParams = aml().getBuildSpec().build_models.algo_parameters;
        if (customParams == null) return;
        customParams.applyCustomParameters(_algo, params);
    }
    
    protected void applyPreprocessing(Model.Parameters params) {
        if (aml().getPreprocessing() == null) return;
        for (PreprocessingStep preprocessingStep : aml().getPreprocessing()) {
            PreprocessingStep.Completer complete = preprocessingStep.apply(params, getPreprocessingConfig());
            _onDone.add(j -> complete.run());
        }
    }
    
    protected PreprocessingConfig getPreprocessingConfig() {
        return new PreprocessingConfig();
    }

    /**
     * Configures early-stopping for the model or set of models to be built.
     *
     * @param parms the model parameters to which the stopping criteria will be added.
     * @param defaults the default parameters for the corresponding {@link ModelBuilder}.
     */
    protected void setStoppingCriteria(Model.Parameters parms, Model.Parameters defaults) {
        // If the caller hasn't set ModelBuilder stopping criteria, set it from our global criteria.
        AutoMLBuildSpec buildSpec = aml().getBuildSpec();

        //FIXME: Do we really need to compare with defaults before setting the buildSpec value instead?
        // This can create subtle bugs: e.g. if dev wanted to enforce a stopping criteria for a specific algo/model,
        // he wouldn't be able to enforce the default value, that would always be overridden by buildSpec.
        // We should instead provide hooks and ensure that properties are always set in the following order:
        //  1. defaults, 2. user defined, 3. internal logic/algo specific based on the previous state (esp. handling of AUTO properties).

        if (parms._stopping_metric == defaults._stopping_metric)
            parms._stopping_metric = buildSpec.build_control.stopping_criteria.stopping_metric();

        if (parms._stopping_metric == StoppingMetric.AUTO) {
            parms._stopping_metric = aml().getResponseColumn().cardinality() == -1 ? StoppingMetric.deviance : StoppingMetric.logloss;
        }

        if (parms._stopping_rounds == defaults._stopping_rounds)
            parms._stopping_rounds = buildSpec.build_control.stopping_criteria.stopping_rounds();

        if (parms._stopping_tolerance == defaults._stopping_tolerance)
            parms._stopping_tolerance = buildSpec.build_control.stopping_criteria.stopping_tolerance();
    }

    /**
     * @param parms the model parameters to which the stopping criteria will be added.
     * @param defaults the default parameters for the corresponding {@link ModelBuilder}.
     * @param seedPolicy the policy defining how the seed will be assigned to the model parameters.
     */
    protected void setSeed(Model.Parameters parms, Model.Parameters defaults, SeedPolicy seedPolicy) {
        AutoMLBuildSpec buildSpec = aml().getBuildSpec();
        // Don't use the same exact seed so that, e.g., if we build two GBMs they don't do the same row and column sampling.
        if (parms._seed == defaults._seed) {
            switch (seedPolicy) {
                case Global:
                    parms._seed = buildSpec.build_control.stopping_criteria.seed();
                    break;
                case Incremental:
                    parms._seed = _aml._incrementalSeed.get() == defaults._seed ? defaults._seed : _aml._incrementalSeed.getAndIncrement();
                    break;
                default:
                    break;
            }
        }
    }
    
    protected void initTimeConstraints(Model.Parameters parms, double upperLimit) {
        AutoMLBuildSpec buildSpec = aml().getBuildSpec();
        if (parms._max_runtime_secs == 0) {
            double maxPerModel = buildSpec.build_control.stopping_criteria.max_runtime_secs_per_model();
            parms._max_runtime_secs = upperLimit <= 0 ? maxPerModel : Math.min(maxPerModel, upperLimit);
        }
    }

    private String getSortMetric() {
        //ensures that the sort metric is always updated according to the defaults set by leaderboard
        Leaderboard leaderboard = aml().leaderboard();
        return leaderboard == null ? null : leaderboard.getSortMetric();
    }

    private static StoppingMetric metricValueOf(String name) {
        if (name == null) return StoppingMetric.AUTO;
        switch (name) {
            case "mean_residual_deviance": return StoppingMetric.deviance;
            default:
                try {
                    return EnumUtils.valueOf(StoppingMetric.class, name);
                } catch (IllegalArgumentException ignored) { }
                return StoppingMetric.AUTO;
        }
    }

    /**
     * Step designed to build a single/default model.
     */
    public static abstract class ModelStep extends ModelingStep {

        public static final int DEFAULT_MODEL_TRAINING_WEIGHT = 10;
        public static final int DEFAULT_MODEL_GROUP = 1;

        public ModelStep(String provider, IAlgo algo, String id, AutoML autoML) {
            this(provider, algo, id, DEFAULT_MODEL_GROUP, DEFAULT_MODEL_TRAINING_WEIGHT, autoML);
        }
        
        public ModelStep(String provider, IAlgo algo, String id, int priorityGroup, int weight, AutoML autoML) {
            super(provider, algo, id, priorityGroup, weight, autoML);
        }

        @Override
        protected JobType getJobType() {
            return JobType.ModelBuild;
        }

        public abstract Model.Parameters prepareModelParameters();

        @Override
        protected Job startJob() {
            return trainModel(prepareModelParameters());
        }

        protected Job trainModel(Model.Parameters parms) {
            return trainModel(null, parms);
        }

        /**
         * @param key (optional) model key.
         * @param parms the model builder params.
         * @return a started training model.
         */
        protected Job trainModel(Key key, Model.Parameters parms) {
            String algoName = ModelBuilder.algoName(_algo.urlName());

            if (null == key) key = makeKey(algoName, true);

            Model.Parameters defaults = ModelBuilder.make(_algo.urlName(), null, null)._parms;
            initTimeConstraints(parms, 0);
            setCommonModelBuilderParams(parms);
            setSeed(parms, defaults, SeedPolicy.Incremental);
            setStoppingCriteria(parms, defaults);
            setCustomParams(parms);
            setDistributionParameters(parms);

            // override model's max_runtime_secs to ensure that the total max_runtime doesn't exceed expectations
            if (limitModelTrainingTime()) {
                Work work = getAllocatedWork();
//                double maxAssignedTimeSecs = aml().timeRemainingMs() / 1e3; // legacy
//                double maxAssignedTimeSecs = aml().timeRemainingMs() * getWorkAllocations().remainingWorkRatio(work) / 1e3; //including default models in the distribution of the time budget.
//                double maxAssignedTimeSecs = aml().timeRemainingMs() * getWorkAllocations().remainingWorkRatio(work, isDefaultModel) / 1e3; //PUBDEV-7595
                double maxAssignedTimeSecs = aml().timeRemainingMs() * getWorkAllocations().remainingWorkRatio(work, _isSamePriorityGroup) / 1e3; // Models from a priority group + SEs
                parms._max_runtime_secs = parms._max_runtime_secs == 0 ? maxAssignedTimeSecs
                        : Math.min(parms._max_runtime_secs, maxAssignedTimeSecs);
            } else {
              parms._max_runtime_secs = 0;
            }
            Log.debug("Training model: " + algoName + ", time remaining (ms): " + aml().timeRemainingMs());
            aml().eventLog().debug(Stage.ModelTraining, parms._max_runtime_secs == 0
                    ? "No time limitation for "+key
                    : "Time assigned for "+key+": "+parms._max_runtime_secs+"s");
            return startModel(key, parms);
        }
    }

    /**
     * Step designed to build multiple models using a (random) grid search.
     */
    public static abstract class GridStep extends ModelingStep {

        public static final int DEFAULT_GRID_TRAINING_WEIGHT = 30;
        public static final int DEFAULT_GRID_GROUP = 2;
        protected static final int GRID_STOPPING_ROUND_FACTOR = 2;

        public GridStep(String provider, IAlgo algo, String id, AutoML autoML) {
            this(provider, algo, id, DEFAULT_GRID_GROUP, DEFAULT_GRID_TRAINING_WEIGHT, autoML);
        }
        
        public GridStep(String provider, IAlgo algo, String id, int priorityGroup, int weight, AutoML autoML) {
            super(provider, algo, id, priorityGroup, weight, autoML);
        }

        @Override
        protected JobType getJobType() {
            return JobType.HyperparamSearch;
        }

        @Override
        public boolean isResumable() {
            return true;
        }

        public abstract Model.Parameters prepareModelParameters();
        
        public abstract Map prepareSearchParameters();

        @Override
        protected Job startJob() {
            return hyperparameterSearch(prepareModelParameters(), prepareSearchParameters());
        }

        @Override
        @SuppressWarnings("unchecked")
        protected Key makeKey(String name, boolean withCounter) {
            return aml().makeKey(name, "grid", withCounter);
        }

        protected Job hyperparameterSearch(Model.Parameters baseParms, Map searchParms) {
            return hyperparameterSearch(null, baseParms, searchParms);
        }

        /**
         * @param key optional grid key
         * @param baseParms ModelBuilder parameter values that are common across all models in the search.
         * @param searchParms hyperparameter search space,
         * @return the started hyperparameter search job.
         */
        protected Job hyperparameterSearch(Key key, Model.Parameters baseParms, Map searchParms) {
            Model.Parameters defaults;
            try {
                defaults = baseParms.getClass().newInstance();
            } catch (Exception e) {
                aml().eventLog().error(Stage.ModelTraining, "Internal error doing hyperparameter search");
                throw new H2OIllegalArgumentException("Hyperparameter search can't create a new instance of Model.Parameters subclass: " + baseParms.getClass());
            }

            initTimeConstraints(baseParms, 0);
            setCommonModelBuilderParams(baseParms);
            // grid seed is provided later through the searchCriteria
            setStoppingCriteria(baseParms, defaults);
            setCustomParams(baseParms);
            setDistributionParameters(baseParms);

            AutoMLBuildSpec buildSpec = aml().getBuildSpec();
            RandomDiscreteValueSearchCriteria searchCriteria = (RandomDiscreteValueSearchCriteria) buildSpec.build_control.stopping_criteria.getSearchCriteria().clone();
            setSearchCriteria(searchCriteria, baseParms);

            if (null == key) key = makeKey(_provider, true);
            aml().trackKeys(key);

            Log.debug("Hyperparameter search: " + _provider + ", time remaining (ms): " + aml().timeRemainingMs());
            aml().eventLog().debug(Stage.ModelTraining, searchCriteria.max_runtime_secs() == 0
                    ? "No time limitation for " + key
                    : "Time assigned for " + key + ": " + searchCriteria.max_runtime_secs() + "s");
            return startSearch(
                    key,
                    baseParms,
                    searchParms,
                    searchCriteria
            );
        }
        
        protected void setSearchCriteria(RandomDiscreteValueSearchCriteria searchCriteria, Model.Parameters baseParms) {
            Work work = getAllocatedWork();
            // for time limit, this is allocated in proportion of the entire work budget.
            double maxAssignedTimeSecs = limitModelTrainingTime() 
                    ? aml().timeRemainingMs() * getWorkAllocations().remainingWorkRatio(work, _isSamePriorityGroup) / 1e3
                    : 0;
            // SE predicate can be removed if/when we decide to include SEs in the max_models limit
            // for models limit, this is not assigned in the same proportion as for time,
            // as the exploitation phase is not supposed to "add" models but just to replace some by better ones,
            // instead, allocation is done in proportion of the entire exploration budget.
            int maxAssignedModels = (int) Math.ceil(aml().remainingModels() * getWorkAllocations().remainingWorkRatio(work, isExplorationWork.and(w -> w._algo != Algo.StackedEnsemble)));

            searchCriteria.set_max_runtime_secs(searchCriteria.max_runtime_secs() == 0
                    ? maxAssignedTimeSecs
                    : Math.min(searchCriteria.max_runtime_secs(), maxAssignedTimeSecs));

            searchCriteria.set_max_models(searchCriteria.max_models() == 0
                    ? maxAssignedModels
                    : Math.min(searchCriteria.max_models(), maxAssignedModels));

            searchCriteria.set_stopping_rounds(baseParms._stopping_rounds * GRID_STOPPING_ROUND_FACTOR);
        }

    }

    /**
     * Step designed to train some models (or not) and then deciding to make a selection
     * and add and/or remove models to/from the current leaderboard.
     */
    public static abstract class SelectionStep extends ModelingStep {

        public static final int DEFAULT_SELECTION_TRAINING_WEIGHT = 20;
        public static final int DEFAULT_SELECTION_GROUP = 3;

        public SelectionStep(String provider, IAlgo algo, String id, AutoML autoML) {
            this(provider, algo, id, DEFAULT_SELECTION_GROUP, DEFAULT_SELECTION_TRAINING_WEIGHT, autoML);
        }
        
        public SelectionStep(String provider, IAlgo algo, String id, int priorityGroup, int weight, AutoML autoML) {
            super(provider, algo, id, priorityGroup, weight, autoML);
        }

        @Override
        protected JobType getJobType() {
            return JobType.Selection;
        }

        @Override
        @SuppressWarnings("unchecked")
        protected Key makeKey(String name, boolean withCounter) {
            return aml().makeKey(name, "selection", withCounter);
        }

        private LeaderboardHolder makeLeaderboard(String name, EventLog eventLog) {
            Leaderboard amlLeaderboard = aml().leaderboard();
            EventLog tmpEventLog = eventLog == null ? EventLog.getOrMake(Key.make(name)) : eventLog;
            Leaderboard tmpLeaderboard = Leaderboard.getOrMake(
                    name,
                    tmpEventLog.asLogger(Stage.ModelTraining),
                    amlLeaderboard.leaderboardFrame(),
                    amlLeaderboard.getSortMetric()
            );
            return new LeaderboardHolder() {
                @Override
                public Leaderboard get() {
                    return tmpLeaderboard;
                }

                @Override
                public void cleanup() {
                    //by default, just empty the leaderboard and remove the container without touching anything model-related.
                    tmpLeaderboard.removeModels(tmpLeaderboard.getModelKeys(), false);
                    tmpLeaderboard.remove(false);
                    if (eventLog == null) {
                        tmpEventLog.remove();
                    }
                }
            };
        }

        protected LeaderboardHolder makeTmpLeaderboard(String name) {
            return makeLeaderboard("tmp_"+name, null);
        }

        @Override
        protected Job startJob() {
            Key[] trainedModelKeys = getTrainedModelsKeys();
            Key key = makeKey(_provider+"_"+_id, false);
            aml().trackKeys(key);
            Job job = new Job<>(key, Models.class.getName(), _description);
            Work work = getAllocatedWork();

            double maxAssignedTimeSecs = limitModelTrainingTime()
                    ? aml().timeRemainingMs() * getWorkAllocations().remainingWorkRatio(work) / 1e3
                    : 0;

            aml().eventLog().debug(Stage.ModelTraining, maxAssignedTimeSecs == 0
                    ? "No time limitation for "+key
                    : "Time assigned for "+key+": "+maxAssignedTimeSecs+"s");

            return job.start(new H2O.H2OCountedCompleter() {

                final Models result = new Models(key, Model.class, job);
                final Key selectionKey = Key.make(key+"_select");
                final EventLog selectionEventLog = EventLog.getOrMake(selectionKey);
//                EventLog selectionEventLog = aml().eventLog();
final LeaderboardHolder selectionLeaderboard = makeLeaderboard(selectionKey.toString(), selectionEventLog);

                {
                    result.delete_and_lock(job);
                }

                @Override
                public void compute2() {
                    Countdown countdown = Countdown.fromSeconds(maxAssignedTimeSecs);
                    Selection selection = null;
                    try {
                        ModelingStepsExecutor localExecutor = new ModelingStepsExecutor(selectionLeaderboard.get(), selectionEventLog, countdown);
                        localExecutor.start();
                        Job innerTraining = startTraining(selectionKey, maxAssignedTimeSecs);
                        StepResultState state = localExecutor.monitor(innerTraining, SelectionStep.this, job);

                        if (state.is(ResultStatus.success)) {
                            Log.debug("Selection leaderboard "+selectionLeaderboard.get()._key, selectionLeaderboard.get().toLogString());
                            selection = getSelectionStrategy().select(trainedModelKeys, selectionLeaderboard.get().getModelKeys());
                            Leaderboard lb = aml().leaderboard();
                            Log.debug("Selection result for job "+key, ToStringBuilder.reflectionToString(selection));
                            lb.removeModels(selection._remove, false); // do remove the model immediately from DKV: if it were part of a grid, it prevents the grid from being resumed.
                            aml().trackKeys(selection._remove);
                            lb.addModels(selection._add);
                        } else if (state.is(ResultStatus.failed)) {
                            throw (RuntimeException)state.error();
                        } else if (state.is(ResultStatus.cancelled)) {
                            throw new Job.JobCancelledException(innerTraining);
                        }
                    } finally {
                        result.unlock(job);
                        if (selection != null) {
                            result.addModels(selection._add);
                        }
                    }
                    tryComplete();
                }

                @Override
                public void onCompletion(CountedCompleter caller) {
                    Keyed.remove(selectionKey, new Futures(), false); // don't cascade: tmp models removal is is done using the logic below.
                    selectionLeaderboard.get().removeModels(trainedModelKeys, false); // if original models were added to selection leaderboard, just remove them.
                    selectionLeaderboard.get().removeModels( // for newly trained models, fully remove those that don't appear in the result container.
                            Arrays.stream(selectionLeaderboard.get().getModelKeys()).filter(k -> !ArrayUtils.contains(result.getModelKeys(), k)).toArray(Key[]::new),
                            true
                    );
                    selectionLeaderboard.cleanup();
                    if (!aml().eventLog()._key.equals(selectionEventLog._key)) selectionEventLog.remove();
                    super.onCompletion(caller);
                }

                @Override
                public boolean onExceptionalCompletion(Throwable ex, CountedCompleter caller) {
                    result.unlock(job._key, false);
                    Keyed.remove(selectionKey);
                    selectionLeaderboard.get().remove();
                    if (!aml().eventLog()._key.equals(selectionEventLog._key)) selectionEventLog.remove();
                    return super.onExceptionalCompletion(ex, caller);
                }

            }, work._weight, maxAssignedTimeSecs);
        }

        protected abstract Job startTraining(Key result, double maxRuntimeSecs);

        protected abstract ModelSelectionStrategy getSelectionStrategy();

        protected Job asModelsJob(Job job, Key result){
            Job jModels = new Job<>(result, Models.class.getName(), job._description); // can use the same result key as original job, as it is dropped once its result is read
            return jModels.start(new H2O.H2OCountedCompleter() {

                final Models models = new Models(result, Model.class, jModels);
                {
                    models.delete_and_lock(jModels);
                }

                @Override
                public void compute2() {
                    ModelingStepsExecutor.ensureStopRequestPropagated(job, jModels, ModelingStepsExecutor.DEFAULT_POLLING_INTERVAL_IN_MILLIS);
                    Keyed res = job.get();
                    models.unlock(jModels);
                    if (res instanceof Model) {
                        models.addModel(res.getKey());
                    } else if (res instanceof ModelContainer) {
                        models.addModels(((ModelContainer) res).getModelKeys());
                        res.remove(false);
                    } else if (res == null && jModels.stop_requested()) {
                        // Do nothing - stop was requested before we managed to train any model
                    } else {
                        throw new H2OIllegalArgumentException("Can only convert jobs producing a single Model or ModelContainer.");
                    }
                    tryComplete();
                }
            }, job._work, job._max_runtime_msecs);
        }
    }


    /**
     * Step designed to dynamically choose to train a model or another, a grid or anything else,
     * based on the current automl workflow history.
     */
    public static abstract class DynamicStep extends ModelingStep {
        
        public static final int DEFAULT_DYNAMIC_TRAINING_WEIGHT = 20;
        public static final int DEFAULT_DYNAMIC_GROUP = 100;

        public static class VirtualAlgo implements IAlgo {

            public VirtualAlgo() {}
            
            @Override
            public String name() {
                return "virtual";
            }
        }
        
        private transient Collection _subSteps;

        public DynamicStep(String provider, String id, AutoML autoML) {
            this(provider, id, DEFAULT_DYNAMIC_GROUP, DEFAULT_DYNAMIC_TRAINING_WEIGHT, autoML);
        }
        
        public DynamicStep(String provider, String id, int priorityGroup, int weight, AutoML autoML) {
            super(provider, new VirtualAlgo(), id, priorityGroup, weight, autoML);
        }

        @Override
        public boolean canRun() {
            // this step is designed to delegate its work to sub-steps by default, 
            // so the parent step itself has nothing to run.
            return false;
        }
        
        @Override
        protected Job startJob() {
            // see comment in canRun().
            return null;
        }
        
        @Override
        protected JobType getJobType() {
            return JobType.Dynamic;
        }

        @Override
        @SuppressWarnings("unchecked")
        protected Key makeKey(String name, boolean withCounter) {
            return aml().makeKey(name, "decision", withCounter);
        }
        
        private void initSubSteps() {
            if (_subSteps == null) {
                _subSteps = prepareModelingSteps();
            }
        }

        @Override
        public Iterator iterateSubSteps() {
            initSubSteps();
            return _subSteps.iterator();
        }

        @Override
        protected Optional getSubStep(String id) {
            initSubSteps();
            return _subSteps.stream()
                    .filter(step -> step._id.equals(id))
                    .findFirst();
        }

        protected abstract Collection prepareModelingSteps();
    }

}