All Downloads are FREE. Search and download functionalities are using the official Maven repository.

water.bindings.pojos.GAMParametersV3 Maven / Gradle / Ivy

There is a newer version: 3.46.0.5
Show newest version
/*
 * This file is auto-generated by h2o-3/h2o-bindings/bin/gen_java.py
 * Copyright 2016 H2O.ai;  Apache License Version 2.0 (see LICENSE for details)
 */
package water.bindings.pojos;

import com.google.gson.Gson;
import com.google.gson.annotations.*;


public class GAMParametersV3 extends ModelParametersSchemaV3 {

    /**
     * Seed for pseudo random number generator (if applicable)
     */
    public long seed;

    /**
     * Family. Use binomial for classification with logistic regression, others are for regression problems.
     */
    public GLMFamily family;

    /**
     * Tweedie variance power
     */
    @SerializedName("tweedie_variance_power")
    public double tweedieVariancePower;

    /**
     * Tweedie link power
     */
    @SerializedName("tweedie_link_power")
    public double tweedieLinkPower;

    /**
     * Theta
     */
    public double theta;

    /**
     * AUTO will set the solver based on given data and the other parameters. IRLSM is fast on on problems with small
     * number of predictors and for lambda-search with L1 penalty, L_BFGS scales better for datasets with many columns.
     */
    public GLMSolver solver;

    /**
     * Distribution of regularization between the L1 (Lasso) and L2 (Ridge) penalties. A value of 1 for alpha represents
     * Lasso regression, a value of 0 produces Ridge regression, and anything in between specifies the amount of mixing
     * between the two. Default value of alpha is 0 when SOLVER = 'L-BFGS'; 0.5 otherwise.
     */
    public double[] alpha;

    /**
     * Regularization strength
     */
    public double[] lambda;

    /**
     * double array to initialize coefficients for GAM.
     */
    public double[] startval;

    /**
     * Use lambda search starting at lambda max, given lambda is then interpreted as lambda min
     */
    @SerializedName("lambda_search")
    public boolean lambdaSearch;

    /**
     * Stop early when there is no more relative improvement on train or validation (if provided)
     */
    @SerializedName("early_stopping")
    public boolean earlyStopping;

    /**
     * Number of lambdas to be used in a search. Default indicates: If alpha is zero, with lambda search set to True,
     * the value of nlamdas is set to 30 (fewer lambdas are needed for ridge regression) otherwise it is set to 100.
     */
    public int nlambdas;

    /**
     * Standardize numeric columns to have zero mean and unit variance
     */
    public boolean standardize;

    /**
     * Handling of missing values. Either MeanImputation, Skip or PlugValues.
     */
    @SerializedName("missing_values_handling")
    public GLMMissingValuesHandling missingValuesHandling;

    /**
     * Plug Values (a single row frame containing values that will be used to impute missing values of the
     * training/validation frame, use with conjunction missing_values_handling = PlugValues)
     */
    @SerializedName("plug_values")
    public FrameKeyV3 plugValues;

    /**
     * Restrict coefficients (not intercept) to be non-negative
     */
    @SerializedName("non_negative")
    public boolean nonNegative;

    /**
     * Maximum number of iterations
     */
    @SerializedName("max_iterations")
    public int maxIterations;

    /**
     * Converge if  beta changes less (using L-infinity norm) than beta esilon, ONLY applies to IRLSM solver
     */
    @SerializedName("beta_epsilon")
    public double betaEpsilon;

    /**
     * Converge if  objective value changes less than this. Default indicates: If lambda_search is set to True the value
     * of objective_epsilon is set to .0001. If the lambda_search is set to False and lambda is equal to zero, the value
     * of objective_epsilon is set to .000001, for any other value of lambda the default value of objective_epsilon is
     * set to .0001.
     */
    @SerializedName("objective_epsilon")
    public double objectiveEpsilon;

    /**
     * Converge if  objective changes less (using L-infinity norm) than this, ONLY applies to L-BFGS solver. Default
     * indicates: If lambda_search is set to False and lambda is equal to zero, the default value of gradient_epsilon is
     * equal to .000001, otherwise the default value is .0001. If lambda_search is set to True, the conditional values
     * above are 1E-8 and 1E-6 respectively.
     */
    @SerializedName("gradient_epsilon")
    public double gradientEpsilon;

    /**
     * Likelihood divider in objective value computation, default is 1/nobs
     */
    @SerializedName("obj_reg")
    public double objReg;

    /**
     * Link function.
     */
    public GLMLink link;

    /**
     * Include constant term in the model
     */
    public boolean intercept;

    /**
     * Prior probability for y==1. To be used only for logistic regression iff the data has been sampled and the mean of
     * response does not reflect reality.
     */
    public double prior;

    /**
     * Only applicable to multiple alpha/lambda values when calling GLM from GAM.  If false, build the next model for
     * next set of alpha/lambda values starting from the values provided by current model.  If true will start GLM model
     * from scratch.
     */
    @SerializedName("cold_start")
    public boolean coldStart;

    /**
     * Minimum lambda used in lambda search, specified as a ratio of lambda_max (the smallest lambda that drives all
     * coefficients to zero). Default indicates: if the number of observations is greater than the number of variables,
     * then lambda_min_ratio is set to 0.0001; if the number of observations is less than the number of variables, then
     * lambda_min_ratio is set to 0.01.
     */
    @SerializedName("lambda_min_ratio")
    public double lambdaMinRatio;

    /**
     * Beta constraints
     */
    @SerializedName("beta_constraints")
    public FrameKeyV3 betaConstraints;

    /**
     * Maximum number of active predictors during computation. Use as a stopping criterion to prevent expensive model
     * building with many predictors. Default indicates: If the IRLSM solver is used, the value of max_active_predictors
     * is set to 5000 otherwise it is set to 100000000.
     */
    @SerializedName("max_active_predictors")
    public int maxActivePredictors;

    /**
     * A list of predictor column indices to interact. All pairwise combinations will be computed for the list.
     */
    public String[] interactions;

    /**
     * A list of pairwise (first order) column interactions.
     */
    @SerializedName("interaction_pairs")
    public StringPairV3[] interactionPairs;

    /**
     * Balance training data class counts via over/under-sampling (for imbalanced data).
     */
    @SerializedName("balance_classes")
    public boolean balanceClasses;

    /**
     * Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be
     * automatically computed to obtain class balance during training. Requires balance_classes.
     */
    @SerializedName("class_sampling_factors")
    public float[] classSamplingFactors;

    /**
     * Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires
     * balance_classes.
     */
    @SerializedName("max_after_balance_size")
    public float maxAfterBalanceSize;

    /**
     * [Deprecated] Maximum size (# classes) for confusion matrices to be printed in the Logs
     */
    @SerializedName("max_confusion_matrix_size")
    public int maxConfusionMatrixSize;

    /**
     * Request p-values computation, p-values work only with IRLSM solver and no regularization
     */
    @SerializedName("compute_p_values")
    public boolean computePValues;

    /**
     * In case of linearly dependent columns, remove some of the dependent columns
     */
    @SerializedName("remove_collinear_columns")
    public boolean removeCollinearColumns;

    /**
     * If set to true, will return knot locations as double[][] array for gam column names found knots_for_gam.  Default
     * to false.
     */
    @SerializedName("store_knot_locations")
    public boolean storeKnotLocations;

    /**
     * Number of knots for gam predictors.  If specified, must specify one for each gam predictor.  For monotone
     * I-splines, mininum = 2, for cs spline, minimum = 3.  For thin plate, minimum is size of polynomial basis + 2.
     */
    @SerializedName("num_knots")
    public int[] numKnots;

    /**
     * Order of I-splines or NBSplineTypeI M-splines used for gam predictors. If specified, must be the same size as
     * gam_columns.  For I-splines, the spline_orders will be the same as the polynomials used to generate the splines.
     * For M-splines, the polynomials used to generate the splines will be spline_order-1.  Values for bs=0 or 1 will be
     * ignored.
     */
    @SerializedName("spline_orders")
    public int[] splineOrders;

    /**
     * Valid for I-spline (bs=2) only.  True if the I-splines are monotonically increasing (and monotonically non-
     * decreasing) and False if the I-splines are monotonically decreasing (and monotonically non-increasing).  If
     * specified, must be the same size as gam_columns.  Values for other spline types will be ignored.  Default to
     * true.
     */
    @SerializedName("splines_non_negative")
    public boolean[] splinesNonNegative;

    /**
     * Arrays of predictor column names for gam for smoothers using single or multiple predictors like
     * {{'c1'},{'c2','c3'},{'c4'},...}
     */
    @SerializedName("gam_columns")
    public String[][] gamColumns;

    /**
     * Smoothing parameter for gam predictors.  If specified, must be of the same length as gam_columns
     */
    public double[] scale;

    /**
     * Basis function type for each gam predictors, 0 for cr, 1 for thin plate regression with knots, 2 for monotone
     * I-splines, 3 for NBSplineTypeI M-splines (refer to doc here: https://github.com/h2oai/h2o-3/issues/6926).  If
     * specified, must be the same size as gam_columns
     */
    public int[] bs;

    /**
     * Save keys of model matrix
     */
    @SerializedName("keep_gam_cols")
    public boolean keepGamCols;

    /**
     * standardize tp (thin plate) predictor columns
     */
    @SerializedName("standardize_tp_gam_cols")
    public boolean standardizeTpGamCols;

    /**
     * Scale penalty matrix for tp (thin plate) smoothers as in R
     */
    @SerializedName("scale_tp_penalty_mat")
    public boolean scaleTpPenaltyMat;

    /**
     * Array storing frame keys of knots.  One for each gam column set specified in gam_columns
     */
    @SerializedName("knot_ids")
    public String[] knotIds;


    /*------------------------------------------------------------------------------------------------------------------
    //                                                  INHERITED
    //------------------------------------------------------------------------------------------------------------------

    // Destination id for this model; auto-generated if not specified.
    public ModelKeyV3 modelId;

    // Id of the training data frame.
    public FrameKeyV3 trainingFrame;

    // Id of the validation data frame.
    public FrameKeyV3 validationFrame;

    // Number of folds for K-fold cross-validation (0 to disable or >= 2).
    public int nfolds;

    // Whether to keep the cross-validation models.
    public boolean keepCrossValidationModels;

    // Whether to keep the predictions of the cross-validation models.
    public boolean keepCrossValidationPredictions;

    // Whether to keep the cross-validation fold assignment.
    public boolean keepCrossValidationFoldAssignment;

    // Allow parallel training of cross-validation models
    public boolean parallelizeCrossValidation;

    // Distribution function
    public GenmodelutilsDistributionFamily distribution;

    // Tweedie power for Tweedie regression, must be between 1 and 2.
    public double tweediePower;

    // Desired quantile for Quantile regression, must be between 0 and 1.
    public double quantileAlpha;

    // Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1).
    public double huberAlpha;

    // Response variable column.
    public ColSpecifierV3 responseColumn;

    // Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the
    // dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights
    // are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame.
    // This is typically the number of times a row is repeated, but non-integer values are supported as well. During
    // training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0
    // for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate
    // prediction, remove all rows with weight == 0.
    public ColSpecifierV3 weightsColumn;

    // Offset column. This will be added to the combination of columns before applying the link function.
    public ColSpecifierV3 offsetColumn;

    // Column with cross-validation fold index assignment per observation.
    public ColSpecifierV3 foldColumn;

    // Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify
    // the folds based on the response variable, for classification problems.
    public ModelParametersFoldAssignmentScheme foldAssignment;

    // Encoding scheme for categorical features
    public ModelParametersCategoricalEncodingScheme categoricalEncoding;

    // For every categorical feature, only use this many most frequent categorical levels for model training. Only used
    // for categorical_encoding == EnumLimited.
    public int maxCategoricalLevels;

    // Names of columns to ignore for training.
    public String[] ignoredColumns;

    // Ignore constant columns.
    public boolean ignoreConstCols;

    // Whether to score during each iteration of model training.
    public boolean scoreEachIteration;

    // Model checkpoint to resume training with.
    public ModelKeyV3 checkpoint;

    // Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the
    // stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable)
    public int stoppingRounds;

    // Maximum allowed runtime in seconds for model training. Use 0 to disable.
    public double maxRuntimeSecs;

    // Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and anomaly_score for
    // Isolation Forest). Note that custom and custom_increasing can only be used in GBM and DRF with the Python client.
    public ScoreKeeperStoppingMetric stoppingMetric;

    // Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much)
    public double stoppingTolerance;

    // Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning.
    public int gainsliftBins;

    // Reference to custom evaluation function, format: `language:keyName=funcName`
    public String customMetricFunc;

    // Reference to custom distribution, format: `language:keyName=funcName`
    public String customDistributionFunc;

    // Automatically export generated models to this directory.
    public String exportCheckpointsDir;

    // Set default multinomial AUC type.
    public MultinomialAucType aucType;

    */

    /**
     * Public constructor
     */
    public GAMParametersV3() {
        seed = -1L;
        family = GLMFamily.AUTO;
        tweedieVariancePower = 0.0;
        tweedieLinkPower = 0.0;
        theta = 0.0;
        solver = GLMSolver.AUTO;
        lambdaSearch = false;
        earlyStopping = true;
        nlambdas = -1;
        standardize = false;
        missingValuesHandling = GLMMissingValuesHandling.MeanImputation;
        nonNegative = false;
        maxIterations = -1;
        betaEpsilon = 0.0001;
        objectiveEpsilon = -1.0;
        gradientEpsilon = -1.0;
        objReg = -1.0;
        link = GLMLink.family_default;
        intercept = true;
        prior = -1.0;
        coldStart = false;
        lambdaMinRatio = -1.0;
        maxActivePredictors = -1;
        balanceClasses = false;
        maxAfterBalanceSize = 5.0f;
        maxConfusionMatrixSize = 20;
        computePValues = false;
        removeCollinearColumns = false;
        storeKnotLocations = false;
        keepGamCols = false;
        standardizeTpGamCols = false;
        scaleTpPenaltyMat = false;
        nfolds = 0;
        keepCrossValidationModels = true;
        keepCrossValidationPredictions = false;
        keepCrossValidationFoldAssignment = false;
        parallelizeCrossValidation = true;
        distribution = GenmodelutilsDistributionFamily.AUTO;
        tweediePower = 1.5;
        quantileAlpha = 0.5;
        huberAlpha = 0.9;
        foldAssignment = ModelParametersFoldAssignmentScheme.AUTO;
        categoricalEncoding = ModelParametersCategoricalEncodingScheme.AUTO;
        maxCategoricalLevels = 10;
        ignoreConstCols = true;
        scoreEachIteration = false;
        stoppingRounds = 0;
        maxRuntimeSecs = 0.0;
        stoppingMetric = ScoreKeeperStoppingMetric.AUTO;
        stoppingTolerance = 0.001;
        gainsliftBins = -1;
        customMetricFunc = "";
        customDistributionFunc = "";
        exportCheckpointsDir = "";
        aucType = MultinomialAucType.AUTO;
    }

    /**
     * Return the contents of this object as a JSON String.
     */
    @Override
    public String toString() {
        return new Gson().toJson(this);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy