All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hex.schemas.SharedTreeV3 Maven / Gradle / Ivy

There is a newer version: 3.46.0.6
Show newest version
package hex.schemas;

import hex.tree.SharedTree;
import hex.tree.SharedTreeModel.SharedTreeParameters;
import water.api.*;
import water.api.FrameV3.ColSpecifierV3;

public class SharedTreeV3, P extends SharedTreeV3.SharedTreeParametersV3> extends ModelBuilderSchema {

  public static class SharedTreeParametersV3

> extends ModelParametersSchema { static public String[] own_fields = new String[] { "response_column", "balance_classes", "class_sampling_factors", "max_after_balance_size", "max_confusion_matrix_size", "max_hit_ratio_k", "ntrees", "max_depth", "min_rows", "nbins", "nbins_cats", "r2_stopping", "seed" }; // supervised Schema // TODO: pass these as a new helper class that contains frame and vec; right now we have no automagic way to // know which frame a Vec name corresponds to, so there's hardwired logic in the adaptor which knows that these // column names are related to training_frame. @API(help = "Response column", is_member_of_frames = {"training_frame", "validation_frame"}, is_mutually_exclusive_with = {"ignored_columns"}, direction = API.Direction.INOUT) public ColSpecifierV3 response_column; /*Imbalanced Classes*/ /** * For imbalanced data, balance training data class counts via * over/under-sampling. This can result in improved predictive accuracy. */ @API(help = "Balance training data class counts via over/under-sampling (for imbalanced data).", level = API.Level.secondary, direction = API.Direction.INOUT) public boolean balance_classes; /** * Desired over/under-sampling ratios per class (lexicographic order). * Only when balance_classes is enabled. * If not specified, they will be automatically computed to obtain class balance during training. */ @API(help = "Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes.", level = API.Level.expert, direction = API.Direction.INOUT) public float[] class_sampling_factors; /** * When classes are balanced, limit the resulting dataset size to the * specified multiple of the original dataset size. */ @API(help = "Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes.", /* dmin=1e-3, */ level = API.Level.expert, direction = API.Direction.INOUT) public float max_after_balance_size; /** For classification models, the maximum size (in terms of classes) of * the confusion matrix for it to be printed. This option is meant to * avoid printing extremely large confusion matrices. */ @API(help = "Maximum size (# classes) for confusion matrices to be printed in the Logs", level = API.Level.secondary, direction = API.Direction.INOUT) public int max_confusion_matrix_size; /** * The maximum number (top K) of predictions to use for hit ratio computation (for multi-class only, 0 to disable) */ @API(help = "Max. number (top K) of predictions to use for hit ratio computation (for multi-class only, 0 to disable)", level = API.Level.secondary, direction=API.Direction.INOUT) public int max_hit_ratio_k; // @API(help="Number of trees.", gridable = true) public int ntrees; @API(help="Maximum tree depth.", gridable = true) public int max_depth; @API(help="Fewest allowed observations in a leaf (in R called 'nodesize').", gridable = true) public int min_rows; @API(help="For numerical columns (real/int), build a histogram of this many bins, then split at the best point", gridable = true) public int nbins; @API(help="For categorical columns (enum), build a histogram of this many bins, then split at the best point. Higher values can lead to more overfitting.", gridable = true) public int nbins_cats; @API(help="Stop making trees when the R^2 metric equals or exceeds this", level = API.Level.secondary) public double r2_stopping; @API(help = "Seed for pseudo random number generator (if applicable)") public long seed; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy