water.bindings.pojos.PCAParametersV3 Maven / Gradle / Ivy
/*
* This file is auto-generated by h2o-3/h2o-bindings/bin/gen_java.py
* Copyright 2016 H2O.ai; Apache License Version 2.0 (see LICENSE for details)
*/
package water.bindings.pojos;
import com.google.gson.Gson;
import com.google.gson.annotations.*;
public class PCAParametersV3 extends ModelParametersSchemaV3 {
/**
* Transformation of training data
*/
public DataInfoTransformType transform;
/**
* Specify the algorithm to use for computing the principal components: GramSVD - uses a distributed computation of
* the Gram matrix, followed by a local SVD; Power - computes the SVD using the power iteration method
* (experimental); Randomized - uses randomized subspace iteration method; GLRM - fits a generalized low-rank model
* with L2 loss function and no regularization and solves for the SVD using local matrix algebra (experimental)
*/
@SerializedName("pca_method")
public PCAMethod pcaMethod;
/**
* Specify the implementation to use for computing PCA (via SVD or EVD): MTJ_EVD_DENSEMATRIX - eigenvalue
* decompositions for dense matrix using MTJ; MTJ_EVD_SYMMMATRIX - eigenvalue decompositions for symmetric matrix
* using MTJ; MTJ_SVD_DENSEMATRIX - singular-value decompositions for dense matrix using MTJ; JAMA - eigenvalue
* decompositions for dense matrix using JAMA. References: JAMA - http://math.nist.gov/javanumerics/jama/; MTJ -
* https://github.com/fommil/matrix-toolkits-java/
*/
@SerializedName("pca_impl")
public PCAImplementation pcaImpl;
/**
* Rank of matrix approximation
*/
public int k;
/**
* Maximum training iterations
*/
@SerializedName("max_iterations")
public int maxIterations;
/**
* RNG seed for initialization
*/
public long seed;
/**
* Whether first factor level is included in each categorical expansion
*/
@SerializedName("use_all_factor_levels")
public boolean useAllFactorLevels;
/**
* Whether to compute metrics on the training data
*/
@SerializedName("compute_metrics")
public boolean computeMetrics;
/**
* Whether to impute missing entries with the column mean
*/
@SerializedName("impute_missing")
public boolean imputeMissing;
/*------------------------------------------------------------------------------------------------------------------
// INHERITED
//------------------------------------------------------------------------------------------------------------------
// Destination id for this model; auto-generated if not specified.
public ModelKeyV3 modelId;
// Id of the training data frame.
public FrameKeyV3 trainingFrame;
// Id of the validation data frame.
public FrameKeyV3 validationFrame;
// Number of folds for K-fold cross-validation (0 to disable or >= 2).
public int nfolds;
// Whether to keep the cross-validation models.
public boolean keepCrossValidationModels;
// Whether to keep the predictions of the cross-validation models.
public boolean keepCrossValidationPredictions;
// Whether to keep the cross-validation fold assignment.
public boolean keepCrossValidationFoldAssignment;
// Allow parallel training of cross-validation models
public boolean parallelizeCrossValidation;
// Distribution function
public GenmodelutilsDistributionFamily distribution;
// Tweedie power for Tweedie regression, must be between 1 and 2.
public double tweediePower;
// Desired quantile for Quantile regression, must be between 0 and 1.
public double quantileAlpha;
// Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1).
public double huberAlpha;
// Response variable column.
public ColSpecifierV3 responseColumn;
// Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the
// dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights
// are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame.
// This is typically the number of times a row is repeated, but non-integer values are supported as well. During
// training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0
// for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate
// prediction, remove all rows with weight == 0.
public ColSpecifierV3 weightsColumn;
// Offset column. This will be added to the combination of columns before applying the link function.
public ColSpecifierV3 offsetColumn;
// Column with cross-validation fold index assignment per observation.
public ColSpecifierV3 foldColumn;
// Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify
// the folds based on the response variable, for classification problems.
public ModelParametersFoldAssignmentScheme foldAssignment;
// Encoding scheme for categorical features
public ModelParametersCategoricalEncodingScheme categoricalEncoding;
// For every categorical feature, only use this many most frequent categorical levels for model training. Only used
// for categorical_encoding == EnumLimited.
public int maxCategoricalLevels;
// Names of columns to ignore for training.
public String[] ignoredColumns;
// Ignore constant columns.
public boolean ignoreConstCols;
// Whether to score during each iteration of model training.
public boolean scoreEachIteration;
// Model checkpoint to resume training with.
public ModelKeyV3 checkpoint;
// Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the
// stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable)
public int stoppingRounds;
// Maximum allowed runtime in seconds for model training. Use 0 to disable.
public double maxRuntimeSecs;
// Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and anomaly_score for
// Isolation Forest). Note that custom and custom_increasing can only be used in GBM and DRF with the Python client.
public ScoreKeeperStoppingMetric stoppingMetric;
// Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much)
public double stoppingTolerance;
// Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning.
public int gainsliftBins;
// Reference to custom evaluation function, format: `language:keyName=funcName`
public String customMetricFunc;
// Reference to custom distribution, format: `language:keyName=funcName`
public String customDistributionFunc;
// Automatically export generated models to this directory.
public String exportCheckpointsDir;
// Set default multinomial AUC type.
public MultinomialAucType aucType;
*/
/**
* Public constructor
*/
public PCAParametersV3() {
transform = DataInfoTransformType.NONE;
pcaMethod = PCAMethod.GramSVD;
k = 1;
maxIterations = 1000;
seed = -1L;
useAllFactorLevels = false;
computeMetrics = true;
imputeMissing = false;
nfolds = 0;
keepCrossValidationModels = true;
keepCrossValidationPredictions = false;
keepCrossValidationFoldAssignment = false;
parallelizeCrossValidation = true;
distribution = GenmodelutilsDistributionFamily.AUTO;
tweediePower = 1.5;
quantileAlpha = 0.5;
huberAlpha = 0.9;
foldAssignment = ModelParametersFoldAssignmentScheme.AUTO;
categoricalEncoding = ModelParametersCategoricalEncodingScheme.AUTO;
maxCategoricalLevels = 10;
ignoreConstCols = true;
scoreEachIteration = false;
stoppingRounds = 0;
maxRuntimeSecs = 0.0;
stoppingMetric = ScoreKeeperStoppingMetric.AUTO;
stoppingTolerance = 0.001;
gainsliftBins = -1;
customMetricFunc = "";
customDistributionFunc = "";
exportCheckpointsDir = "";
aucType = MultinomialAucType.AUTO;
}
/**
* Return the contents of this object as a JSON String.
*/
@Override
public String toString() {
return new Gson().toJson(this);
}
}