hex.schemas.NaiveBayesV3 Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of h2o-algos Show documentation
H2O Algorithms
There is a newer version: 3.46.0.6
package hex.schemas;

import hex.naivebayes.NaiveBayes;
import hex.naivebayes.NaiveBayesModel.NaiveBayesParameters;
import water.api.API;
import water.api.FrameV3.ColSpecifierV3;
import water.api.ModelParametersSchema;

public class NaiveBayesV3 extends ModelBuilderSchema {
  public static final class NaiveBayesParametersV3 extends ModelParametersSchema {
    static public String[] own_fields = new String[]{
      "response_column",
      "balance_classes",
      "class_sampling_factors",
      "max_after_balance_size",
      "max_confusion_matrix_size",
      "max_hit_ratio_k",
      "laplace", "min_sdev", "eps_sdev", "min_prob", "eps_prob", "compute_metrics"};

    // supervised Schema

    // TODO: pass these as a new helper class that contains frame and vec; right now we have no automagic way to
    // know which frame a Vec name corresponds to, so there's hardwired logic in the adaptor which knows that these
    // column names are related to training_frame.
    @API(help = "Response column", is_member_of_frames = {"training_frame", "validation_frame"}, is_mutually_exclusive_with = {"ignored_columns"}, direction = API.Direction.INOUT)
    public ColSpecifierV3 response_column;

  /*Imbalanced Classes*/
    /**
     * For imbalanced data, balance training data class counts via
     * over/under-sampling. This can result in improved predictive accuracy.
     */
    @API(help = "Balance training data class counts via over/under-sampling (for imbalanced data).", level = API.Level.secondary, direction = API.Direction.INOUT)
    public boolean balance_classes;

    /**
     * Desired over/under-sampling ratios per class (lexicographic order).
     * Only when balance_classes is enabled.
     * If not specified, they will be automatically computed to obtain class balance during training.
     */
    @API(help = "Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes.", level = API.Level.expert, direction = API.Direction.INOUT)
    public float[] class_sampling_factors;

    /**
     * When classes are balanced, limit the resulting dataset size to the
     * specified multiple of the original dataset size.
     */
    @API(help = "Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes.", /* dmin=1e-3, */ level = API.Level.expert, direction = API.Direction.INOUT)
    public float max_after_balance_size;

    /** For classification models, the maximum size (in terms of classes) of
     *  the confusion matrix for it to be printed. This option is meant to
     *  avoid printing extremely large confusion matrices.  */
    @API(help = "Maximum size (# classes) for confusion matrices to be printed in the Logs", level = API.Level.secondary, direction = API.Direction.INOUT)
    public int max_confusion_matrix_size;

    /**
     * The maximum number (top K) of predictions to use for hit ratio computation (for multi-class only, 0 to disable)
     */
    @API(help = "Max. number (top K) of predictions to use for hit ratio computation (for multi-class only, 0 to disable)", level = API.Level.secondary, direction=API.Direction.INOUT)
    public int max_hit_ratio_k;

    //

    @API(help = "Laplace smoothing parameter")
    public double laplace;

    @API(help = "Min. standard deviation to use for observations with not enough data")
    public double min_sdev;

    @API(help = "Cutoff below which standard deviation is replaced with min_sdev")
    public double eps_sdev;

    @API(help = "Min. probability to use for observations with not enough data")
    public double min_prob;

    @API(help = "Cutoff below which probability is replaced with min_prob")
    public double eps_prob;

    @API(help = "Compute metrics on training data")
    public boolean compute_metrics;
  }
}