ai.h2o.sparkling.ml.params.H2ODeepLearningParams.scala Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ai.h2o.sparkling.ml.params
import hex.deeplearning.DeepLearningModel.DeepLearningParameters
import ai.h2o.sparkling.H2OFrame
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.InitialWeightDistribution
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Loss
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.ClassSamplingMethod
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.MissingValuesHandling
import hex.genmodel.utils.DistributionFamily
import hex.Model.Parameters.FoldAssignmentScheme
import hex.Model.Parameters.CategoricalEncodingScheme
import hex.ScoreKeeper.StoppingMetric
import hex.MultinomialAucType
trait H2ODeepLearningParams
extends H2OAlgoParamsBase
with HasInitialBiases
with HasInitialWeights
with HasIgnoredCols {
protected def paramTag = reflect.classTag[DeepLearningParameters]
//
// Parameter definitions
//
protected val balanceClasses = booleanParam(
name = "balanceClasses",
doc = """Balance training data class counts via over/under-sampling (for imbalanced data).""")
protected val classSamplingFactors = nullableFloatArrayParam(
name = "classSamplingFactors",
doc = """Desired over/under-sampling ratios per class (in lexicographic order). If not specified, sampling factors will be automatically computed to obtain class balance during training. Requires balance_classes.""")
protected val maxAfterBalanceSize = floatParam(
name = "maxAfterBalanceSize",
doc = """Maximum relative size of the training data after balancing class counts (can be less than 1.0). Requires balance_classes.""")
protected val activation = stringParam(
name = "activation",
doc = """Activation function. Possible values are ``"Tanh"``, ``"TanhWithDropout"``, ``"Rectifier"``, ``"RectifierWithDropout"``, ``"Maxout"``, ``"MaxoutWithDropout"``, ``"ExpRectifier"``, ``"ExpRectifierWithDropout"``.""")
protected val hidden = intArrayParam(
name = "hidden",
doc = """Hidden layer sizes (e.g. [100, 100]).""")
protected val epochs = doubleParam(
name = "epochs",
doc = """How many times the dataset should be iterated (streamed), can be fractional.""")
protected val trainSamplesPerIteration = longParam(
name = "trainSamplesPerIteration",
doc = """Number of training samples (globally) per MapReduce iteration. Special values are 0: one epoch, -1: all available data (e.g., replicated training data), -2: automatic.""")
protected val targetRatioCommToComp = doubleParam(
name = "targetRatioCommToComp",
doc = """Target ratio of communication overhead to computation. Only for multi-node operation and train_samples_per_iteration = -2 (auto-tuning).""")
protected val seed = longParam(
name = "seed",
doc = """Seed for random numbers (affects sampling) - Note: only reproducible when running single threaded.""")
protected val adaptiveRate = booleanParam(
name = "adaptiveRate",
doc = """Adaptive learning rate.""")
protected val rho = doubleParam(
name = "rho",
doc = """Adaptive learning rate time decay factor (similarity to prior updates).""")
protected val epsilon = doubleParam(
name = "epsilon",
doc = """Adaptive learning rate smoothing factor (to avoid divisions by zero and allow progress).""")
protected val rate = doubleParam(
name = "rate",
doc = """Learning rate (higher => less stable, lower => slower convergence).""")
protected val rateAnnealing = doubleParam(
name = "rateAnnealing",
doc = """Learning rate annealing: rate / (1 + rate_annealing * samples).""")
protected val rateDecay = doubleParam(
name = "rateDecay",
doc = """Learning rate decay factor between layers (N-th layer: rate * rate_decay ^ (n - 1).""")
protected val momentumStart = doubleParam(
name = "momentumStart",
doc = """Initial momentum at the beginning of training (try 0.5).""")
protected val momentumRamp = doubleParam(
name = "momentumRamp",
doc = """Number of training samples for which momentum increases.""")
protected val momentumStable = doubleParam(
name = "momentumStable",
doc = """Final momentum after the ramp is over (try 0.99).""")
protected val nesterovAcceleratedGradient = booleanParam(
name = "nesterovAcceleratedGradient",
doc = """Use Nesterov accelerated gradient (recommended).""")
protected val inputDropoutRatio = doubleParam(
name = "inputDropoutRatio",
doc = """Input layer dropout ratio (can improve generalization, try 0.1 or 0.2).""")
protected val hiddenDropoutRatios = nullableDoubleArrayParam(
name = "hiddenDropoutRatios",
doc = """Hidden layer dropout ratios (can improve generalization), specify one value per hidden layer, defaults to 0.5.""")
protected val l1 = doubleParam(
name = "l1",
doc = """L1 regularization (can add stability and improve generalization, causes many weights to become 0).""")
protected val l2 = doubleParam(
name = "l2",
doc = """L2 regularization (can add stability and improve generalization, causes many weights to be small.""")
protected val maxW2 = floatParam(
name = "maxW2",
doc = """Constraint for squared sum of incoming weights per unit (e.g. for Rectifier).""")
protected val initialWeightDistribution = stringParam(
name = "initialWeightDistribution",
doc = """Initial weight distribution. Possible values are ``"UniformAdaptive"``, ``"Uniform"``, ``"Normal"``.""")
protected val initialWeightScale = doubleParam(
name = "initialWeightScale",
doc = """Uniform: -value...value, Normal: stddev.""")
protected val loss = stringParam(
name = "loss",
doc = """Loss function. Possible values are ``"Automatic"``, ``"Quadratic"``, ``"CrossEntropy"``, ``"ModifiedHuber"``, ``"Huber"``, ``"Absolute"``, ``"Quantile"``.""")
protected val scoreInterval = doubleParam(
name = "scoreInterval",
doc = """Shortest time interval (in seconds) between model scoring.""")
protected val scoreTrainingSamples = longParam(
name = "scoreTrainingSamples",
doc = """Number of training set samples for scoring (0 for all).""")
protected val scoreValidationSamples = longParam(
name = "scoreValidationSamples",
doc = """Number of validation set samples for scoring (0 for all).""")
protected val scoreDutyCycle = doubleParam(
name = "scoreDutyCycle",
doc = """Maximum duty cycle fraction for scoring (lower: more training, higher: more scoring).""")
protected val classificationStop = doubleParam(
name = "classificationStop",
doc = """Stopping criterion for classification error fraction on training data (-1 to disable).""")
protected val regressionStop = doubleParam(
name = "regressionStop",
doc = """Stopping criterion for regression error (MSE) on training data (-1 to disable).""")
protected val quietMode = booleanParam(
name = "quietMode",
doc = """Enable quiet mode for less output to standard output.""")
protected val scoreValidationSampling = stringParam(
name = "scoreValidationSampling",
doc = """Method used to sample validation dataset for scoring. Possible values are ``"Uniform"``, ``"Stratified"``.""")
protected val overwriteWithBestModel = booleanParam(
name = "overwriteWithBestModel",
doc = """If enabled, override the final model with the best model found during training.""")
protected val useAllFactorLevels = booleanParam(
name = "useAllFactorLevels",
doc = """Use all factor levels of categorical variables. Otherwise, the first factor level is omitted (without loss of accuracy). Useful for variable importances and auto-enabled for autoencoder.""")
protected val standardize = booleanParam(
name = "standardize",
doc = """If enabled, automatically standardize the data. If disabled, the user must provide properly scaled input data.""")
protected val diagnostics = booleanParam(
name = "diagnostics",
doc = """Enable diagnostics for hidden layers.""")
protected val calculateFeatureImportances = booleanParam(
name = "calculateFeatureImportances",
doc = """Compute variable importances for input features (Gedeon method) - can be slow for large networks.""")
protected val fastMode = booleanParam(
name = "fastMode",
doc = """Enable fast mode (minor approximation in back-propagation).""")
protected val forceLoadBalance = booleanParam(
name = "forceLoadBalance",
doc = """Force extra load balancing to increase training speed for small datasets (to keep all cores busy).""")
protected val replicateTrainingData = booleanParam(
name = "replicateTrainingData",
doc = """Replicate the entire training dataset onto every node for faster training on small datasets.""")
protected val singleNodeMode = booleanParam(
name = "singleNodeMode",
doc = """Run on a single node for fine-tuning of model parameters.""")
protected val shuffleTrainingData = booleanParam(
name = "shuffleTrainingData",
doc = """Enable shuffling of training data (recommended if training data is replicated and train_samples_per_iteration is close to #nodes x #rows, of if using balance_classes).""")
protected val missingValuesHandling = stringParam(
name = "missingValuesHandling",
doc = """Handling of missing values. Either MeanImputation or Skip. Possible values are ``"MeanImputation"``, ``"Skip"``.""")
protected val sparse = booleanParam(
name = "sparse",
doc = """Sparse data handling (more efficient for data with lots of 0 values).""")
protected val averageActivation = doubleParam(
name = "averageActivation",
doc = """Average activation for sparse auto-encoder. #Experimental.""")
protected val sparsityBeta = doubleParam(
name = "sparsityBeta",
doc = """Sparsity regularization. #Experimental.""")
protected val maxCategoricalFeatures = intParam(
name = "maxCategoricalFeatures",
doc = """Max. number of categorical features, enforced via hashing. #Experimental.""")
protected val reproducible = booleanParam(
name = "reproducible",
doc = """Force reproducibility on small data (will be slow - only uses 1 thread).""")
protected val exportWeightsAndBiases = booleanParam(
name = "exportWeightsAndBiases",
doc = """Whether to export Neural Network weights and biases to H2O Frames.""")
protected val miniBatchSize = intParam(
name = "miniBatchSize",
doc = """Mini-batch size (smaller leads to better fit, larger can speed up and generalize better).""")
protected val elasticAveraging = booleanParam(
name = "elasticAveraging",
doc = """Elastic averaging between compute nodes can improve distributed model convergence. #Experimental.""")
protected val elasticAveragingMovingRate = doubleParam(
name = "elasticAveragingMovingRate",
doc = """Elastic averaging moving rate (only if elastic averaging is enabled).""")
protected val elasticAveragingRegularization = doubleParam(
name = "elasticAveragingRegularization",
doc = """Elastic averaging regularization strength (only if elastic averaging is enabled).""")
protected val modelId = nullableStringParam(
name = "modelId",
doc = """Destination id for this model; auto-generated if not specified.""")
protected val nfolds = intParam(
name = "nfolds",
doc = """Number of folds for K-fold cross-validation (0 to disable or >= 2).""")
protected val keepCrossValidationModels = booleanParam(
name = "keepCrossValidationModels",
doc = """Whether to keep the cross-validation models.""")
protected val keepCrossValidationPredictions = booleanParam(
name = "keepCrossValidationPredictions",
doc = """Whether to keep the predictions of the cross-validation models.""")
protected val keepCrossValidationFoldAssignment = booleanParam(
name = "keepCrossValidationFoldAssignment",
doc = """Whether to keep the cross-validation fold assignment.""")
protected val distribution = stringParam(
name = "distribution",
doc = """Distribution function. Possible values are ``"AUTO"``, ``"bernoulli"``, ``"quasibinomial"``, ``"modified_huber"``, ``"multinomial"``, ``"ordinal"``, ``"gaussian"``, ``"poisson"``, ``"gamma"``, ``"tweedie"``, ``"huber"``, ``"laplace"``, ``"quantile"``, ``"fractionalbinomial"``, ``"negativebinomial"``, ``"custom"``.""")
protected val tweediePower = doubleParam(
name = "tweediePower",
doc = """Tweedie power for Tweedie regression, must be between 1 and 2.""")
protected val quantileAlpha = doubleParam(
name = "quantileAlpha",
doc = """Desired quantile for Quantile regression, must be between 0 and 1.""")
protected val huberAlpha = doubleParam(
name = "huberAlpha",
doc = """Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1).""")
protected val labelCol = stringParam(
name = "labelCol",
doc = """Response variable column.""")
protected val weightCol = nullableStringParam(
name = "weightCol",
doc = """Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame. This is typically the number of times a row is repeated, but non-integer values are supported as well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0.""")
protected val offsetCol = nullableStringParam(
name = "offsetCol",
doc = """Offset column. This will be added to the combination of columns before applying the link function.""")
protected val foldCol = nullableStringParam(
name = "foldCol",
doc = """Column with cross-validation fold index assignment per observation.""")
protected val foldAssignment = stringParam(
name = "foldAssignment",
doc = """Cross-validation fold assignment scheme, if fold_column is not specified. The 'Stratified' option will stratify the folds based on the response variable, for classification problems. Possible values are ``"AUTO"``, ``"Random"``, ``"Modulo"``, ``"Stratified"``.""")
protected val categoricalEncoding = stringParam(
name = "categoricalEncoding",
doc = """Encoding scheme for categorical features. Possible values are ``"AUTO"``, ``"OneHotInternal"``, ``"OneHotExplicit"``, ``"Enum"``, ``"Binary"``, ``"Eigen"``, ``"LabelEncoder"``, ``"SortByResponse"``, ``"EnumLimited"``.""")
protected val ignoreConstCols = booleanParam(
name = "ignoreConstCols",
doc = """Ignore constant columns.""")
protected val scoreEachIteration = booleanParam(
name = "scoreEachIteration",
doc = """Whether to score during each iteration of model training.""")
protected val stoppingRounds = intParam(
name = "stoppingRounds",
doc = """Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable).""")
protected val maxRuntimeSecs = doubleParam(
name = "maxRuntimeSecs",
doc = """Maximum allowed runtime in seconds for model training. Use 0 to disable.""")
protected val stoppingMetric = stringParam(
name = "stoppingMetric",
doc = """Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and anomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used in GBM and DRF with the Python client. Possible values are ``"AUTO"``, ``"deviance"``, ``"logloss"``, ``"MSE"``, ``"RMSE"``, ``"MAE"``, ``"RMSLE"``, ``"AUC"``, ``"AUCPR"``, ``"lift_top_group"``, ``"misclassification"``, ``"mean_per_class_error"``, ``"anomaly_score"``, ``"AUUC"``, ``"ATE"``, ``"ATT"``, ``"ATC"``, ``"qini"``, ``"custom"``, ``"custom_increasing"``.""")
protected val stoppingTolerance = doubleParam(
name = "stoppingTolerance",
doc = """Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much).""")
protected val gainsliftBins = intParam(
name = "gainsliftBins",
doc = """Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning.""")
protected val customMetricFunc = nullableStringParam(
name = "customMetricFunc",
doc = """Reference to custom evaluation function, format: `language:keyName=funcName`.""")
protected val exportCheckpointsDir = nullableStringParam(
name = "exportCheckpointsDir",
doc = """Automatically export generated models to this directory.""")
protected val aucType = stringParam(
name = "aucType",
doc = """Set default multinomial AUC type. Possible values are ``"AUTO"``, ``"NONE"``, ``"MACRO_OVR"``, ``"WEIGHTED_OVR"``, ``"MACRO_OVO"``, ``"WEIGHTED_OVO"``.""")
//
// Default values
//
setDefault(
balanceClasses -> false,
classSamplingFactors -> null,
maxAfterBalanceSize -> 5.0f,
activation -> Activation.Rectifier.name(),
hidden -> Array(200, 200),
epochs -> 10.0,
trainSamplesPerIteration -> -2L,
targetRatioCommToComp -> 0.05,
seed -> -1L,
adaptiveRate -> true,
rho -> 0.99,
epsilon -> 1.0e-8,
rate -> 0.005,
rateAnnealing -> 1.0e-6,
rateDecay -> 1.0,
momentumStart -> 0.0,
momentumRamp -> 1000000.0,
momentumStable -> 0.0,
nesterovAcceleratedGradient -> true,
inputDropoutRatio -> 0.0,
hiddenDropoutRatios -> null,
l1 -> 0.0,
l2 -> 0.0,
maxW2 -> 3.402823e38f,
initialWeightDistribution -> InitialWeightDistribution.UniformAdaptive.name(),
initialWeightScale -> 1.0,
loss -> Loss.Automatic.name(),
scoreInterval -> 5.0,
scoreTrainingSamples -> 10000L,
scoreValidationSamples -> 0L,
scoreDutyCycle -> 0.1,
classificationStop -> 0.0,
regressionStop -> 1.0e-6,
quietMode -> false,
scoreValidationSampling -> ClassSamplingMethod.Uniform.name(),
overwriteWithBestModel -> true,
useAllFactorLevels -> true,
standardize -> true,
diagnostics -> true,
calculateFeatureImportances -> true,
fastMode -> true,
forceLoadBalance -> true,
replicateTrainingData -> true,
singleNodeMode -> false,
shuffleTrainingData -> false,
missingValuesHandling -> MissingValuesHandling.MeanImputation.name(),
sparse -> false,
averageActivation -> 0.0,
sparsityBeta -> 0.0,
maxCategoricalFeatures -> 2147483647,
reproducible -> false,
exportWeightsAndBiases -> false,
miniBatchSize -> 1,
elasticAveraging -> false,
elasticAveragingMovingRate -> 0.9,
elasticAveragingRegularization -> 0.001,
modelId -> null,
nfolds -> 0,
keepCrossValidationModels -> true,
keepCrossValidationPredictions -> false,
keepCrossValidationFoldAssignment -> false,
distribution -> DistributionFamily.AUTO.name(),
tweediePower -> 1.5,
quantileAlpha -> 0.5,
huberAlpha -> 0.9,
labelCol -> "label",
weightCol -> null,
offsetCol -> null,
foldCol -> null,
foldAssignment -> FoldAssignmentScheme.AUTO.name(),
categoricalEncoding -> CategoricalEncodingScheme.AUTO.name(),
ignoreConstCols -> true,
scoreEachIteration -> false,
stoppingRounds -> 5,
maxRuntimeSecs -> 0.0,
stoppingMetric -> StoppingMetric.AUTO.name(),
stoppingTolerance -> 0.0,
gainsliftBins -> -1,
customMetricFunc -> null,
exportCheckpointsDir -> null,
aucType -> MultinomialAucType.AUTO.name())
//
// Getters
//
def getBalanceClasses(): Boolean = $(balanceClasses)
def getClassSamplingFactors(): Array[Float] = $(classSamplingFactors)
def getMaxAfterBalanceSize(): Float = $(maxAfterBalanceSize)
def getActivation(): String = $(activation)
def getHidden(): Array[Int] = $(hidden)
def getEpochs(): Double = $(epochs)
def getTrainSamplesPerIteration(): Long = $(trainSamplesPerIteration)
def getTargetRatioCommToComp(): Double = $(targetRatioCommToComp)
def getSeed(): Long = $(seed)
def getAdaptiveRate(): Boolean = $(adaptiveRate)
def getRho(): Double = $(rho)
def getEpsilon(): Double = $(epsilon)
def getRate(): Double = $(rate)
def getRateAnnealing(): Double = $(rateAnnealing)
def getRateDecay(): Double = $(rateDecay)
def getMomentumStart(): Double = $(momentumStart)
def getMomentumRamp(): Double = $(momentumRamp)
def getMomentumStable(): Double = $(momentumStable)
def getNesterovAcceleratedGradient(): Boolean = $(nesterovAcceleratedGradient)
def getInputDropoutRatio(): Double = $(inputDropoutRatio)
def getHiddenDropoutRatios(): Array[Double] = $(hiddenDropoutRatios)
def getL1(): Double = $(l1)
def getL2(): Double = $(l2)
def getMaxW2(): Float = $(maxW2)
def getInitialWeightDistribution(): String = $(initialWeightDistribution)
def getInitialWeightScale(): Double = $(initialWeightScale)
def getLoss(): String = $(loss)
def getScoreInterval(): Double = $(scoreInterval)
def getScoreTrainingSamples(): Long = $(scoreTrainingSamples)
def getScoreValidationSamples(): Long = $(scoreValidationSamples)
def getScoreDutyCycle(): Double = $(scoreDutyCycle)
def getClassificationStop(): Double = $(classificationStop)
def getRegressionStop(): Double = $(regressionStop)
def getQuietMode(): Boolean = $(quietMode)
def getScoreValidationSampling(): String = $(scoreValidationSampling)
def getOverwriteWithBestModel(): Boolean = $(overwriteWithBestModel)
def getUseAllFactorLevels(): Boolean = $(useAllFactorLevels)
def getStandardize(): Boolean = $(standardize)
def getDiagnostics(): Boolean = $(diagnostics)
def getCalculateFeatureImportances(): Boolean = $(calculateFeatureImportances)
def getFastMode(): Boolean = $(fastMode)
def getForceLoadBalance(): Boolean = $(forceLoadBalance)
def getReplicateTrainingData(): Boolean = $(replicateTrainingData)
def getSingleNodeMode(): Boolean = $(singleNodeMode)
def getShuffleTrainingData(): Boolean = $(shuffleTrainingData)
def getMissingValuesHandling(): String = $(missingValuesHandling)
def getSparse(): Boolean = $(sparse)
def getAverageActivation(): Double = $(averageActivation)
def getSparsityBeta(): Double = $(sparsityBeta)
def getMaxCategoricalFeatures(): Int = $(maxCategoricalFeatures)
def getReproducible(): Boolean = $(reproducible)
def getExportWeightsAndBiases(): Boolean = $(exportWeightsAndBiases)
def getMiniBatchSize(): Int = $(miniBatchSize)
def getElasticAveraging(): Boolean = $(elasticAveraging)
def getElasticAveragingMovingRate(): Double = $(elasticAveragingMovingRate)
def getElasticAveragingRegularization(): Double = $(elasticAveragingRegularization)
def getModelId(): String = $(modelId)
def getNfolds(): Int = $(nfolds)
def getKeepCrossValidationModels(): Boolean = $(keepCrossValidationModels)
def getKeepCrossValidationPredictions(): Boolean = $(keepCrossValidationPredictions)
def getKeepCrossValidationFoldAssignment(): Boolean = $(keepCrossValidationFoldAssignment)
def getDistribution(): String = $(distribution)
def getTweediePower(): Double = $(tweediePower)
def getQuantileAlpha(): Double = $(quantileAlpha)
def getHuberAlpha(): Double = $(huberAlpha)
def getLabelCol(): String = $(labelCol)
def getWeightCol(): String = $(weightCol)
def getOffsetCol(): String = $(offsetCol)
def getFoldCol(): String = $(foldCol)
def getFoldAssignment(): String = $(foldAssignment)
def getCategoricalEncoding(): String = $(categoricalEncoding)
def getIgnoreConstCols(): Boolean = $(ignoreConstCols)
def getScoreEachIteration(): Boolean = $(scoreEachIteration)
def getStoppingRounds(): Int = $(stoppingRounds)
def getMaxRuntimeSecs(): Double = $(maxRuntimeSecs)
def getStoppingMetric(): String = $(stoppingMetric)
def getStoppingTolerance(): Double = $(stoppingTolerance)
def getGainsliftBins(): Int = $(gainsliftBins)
def getCustomMetricFunc(): String = $(customMetricFunc)
def getExportCheckpointsDir(): String = $(exportCheckpointsDir)
def getAucType(): String = $(aucType)
//
// Setters
//
def setBalanceClasses(value: Boolean): this.type = {
set(balanceClasses, value)
}
def setClassSamplingFactors(value: Array[Float]): this.type = {
set(classSamplingFactors, value)
}
def setMaxAfterBalanceSize(value: Float): this.type = {
set(maxAfterBalanceSize, value)
}
def setActivation(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[Activation](value)
set(activation, validated)
}
def setHidden(value: Array[Int]): this.type = {
set(hidden, value)
}
def setEpochs(value: Double): this.type = {
set(epochs, value)
}
def setTrainSamplesPerIteration(value: Long): this.type = {
set(trainSamplesPerIteration, value)
}
def setTargetRatioCommToComp(value: Double): this.type = {
set(targetRatioCommToComp, value)
}
def setSeed(value: Long): this.type = {
set(seed, value)
}
def setAdaptiveRate(value: Boolean): this.type = {
set(adaptiveRate, value)
}
def setRho(value: Double): this.type = {
set(rho, value)
}
def setEpsilon(value: Double): this.type = {
set(epsilon, value)
}
def setRate(value: Double): this.type = {
set(rate, value)
}
def setRateAnnealing(value: Double): this.type = {
set(rateAnnealing, value)
}
def setRateDecay(value: Double): this.type = {
set(rateDecay, value)
}
def setMomentumStart(value: Double): this.type = {
set(momentumStart, value)
}
def setMomentumRamp(value: Double): this.type = {
set(momentumRamp, value)
}
def setMomentumStable(value: Double): this.type = {
set(momentumStable, value)
}
def setNesterovAcceleratedGradient(value: Boolean): this.type = {
set(nesterovAcceleratedGradient, value)
}
def setInputDropoutRatio(value: Double): this.type = {
set(inputDropoutRatio, value)
}
def setHiddenDropoutRatios(value: Array[Double]): this.type = {
set(hiddenDropoutRatios, value)
}
def setL1(value: Double): this.type = {
set(l1, value)
}
def setL2(value: Double): this.type = {
set(l2, value)
}
def setMaxW2(value: Float): this.type = {
set(maxW2, value)
}
def setInitialWeightDistribution(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[InitialWeightDistribution](value)
set(initialWeightDistribution, validated)
}
def setInitialWeightScale(value: Double): this.type = {
set(initialWeightScale, value)
}
def setLoss(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[Loss](value)
set(loss, validated)
}
def setScoreInterval(value: Double): this.type = {
set(scoreInterval, value)
}
def setScoreTrainingSamples(value: Long): this.type = {
set(scoreTrainingSamples, value)
}
def setScoreValidationSamples(value: Long): this.type = {
set(scoreValidationSamples, value)
}
def setScoreDutyCycle(value: Double): this.type = {
set(scoreDutyCycle, value)
}
def setClassificationStop(value: Double): this.type = {
set(classificationStop, value)
}
def setRegressionStop(value: Double): this.type = {
set(regressionStop, value)
}
def setQuietMode(value: Boolean): this.type = {
set(quietMode, value)
}
def setScoreValidationSampling(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[ClassSamplingMethod](value)
set(scoreValidationSampling, validated)
}
def setOverwriteWithBestModel(value: Boolean): this.type = {
set(overwriteWithBestModel, value)
}
def setUseAllFactorLevels(value: Boolean): this.type = {
set(useAllFactorLevels, value)
}
def setStandardize(value: Boolean): this.type = {
set(standardize, value)
}
def setDiagnostics(value: Boolean): this.type = {
set(diagnostics, value)
}
def setCalculateFeatureImportances(value: Boolean): this.type = {
set(calculateFeatureImportances, value)
}
def setFastMode(value: Boolean): this.type = {
set(fastMode, value)
}
def setForceLoadBalance(value: Boolean): this.type = {
set(forceLoadBalance, value)
}
def setReplicateTrainingData(value: Boolean): this.type = {
set(replicateTrainingData, value)
}
def setSingleNodeMode(value: Boolean): this.type = {
set(singleNodeMode, value)
}
def setShuffleTrainingData(value: Boolean): this.type = {
set(shuffleTrainingData, value)
}
def setMissingValuesHandling(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[MissingValuesHandling](value)
set(missingValuesHandling, validated)
}
def setSparse(value: Boolean): this.type = {
set(sparse, value)
}
def setAverageActivation(value: Double): this.type = {
set(averageActivation, value)
}
def setSparsityBeta(value: Double): this.type = {
set(sparsityBeta, value)
}
def setMaxCategoricalFeatures(value: Int): this.type = {
set(maxCategoricalFeatures, value)
}
def setReproducible(value: Boolean): this.type = {
set(reproducible, value)
}
def setExportWeightsAndBiases(value: Boolean): this.type = {
set(exportWeightsAndBiases, value)
}
def setMiniBatchSize(value: Int): this.type = {
set(miniBatchSize, value)
}
def setElasticAveraging(value: Boolean): this.type = {
set(elasticAveraging, value)
}
def setElasticAveragingMovingRate(value: Double): this.type = {
set(elasticAveragingMovingRate, value)
}
def setElasticAveragingRegularization(value: Double): this.type = {
set(elasticAveragingRegularization, value)
}
def setModelId(value: String): this.type = {
set(modelId, value)
}
def setNfolds(value: Int): this.type = {
set(nfolds, value)
}
def setKeepCrossValidationModels(value: Boolean): this.type = {
set(keepCrossValidationModels, value)
}
def setKeepCrossValidationPredictions(value: Boolean): this.type = {
set(keepCrossValidationPredictions, value)
}
def setKeepCrossValidationFoldAssignment(value: Boolean): this.type = {
set(keepCrossValidationFoldAssignment, value)
}
def setDistribution(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[DistributionFamily](value)
set(distribution, validated)
}
def setTweediePower(value: Double): this.type = {
set(tweediePower, value)
}
def setQuantileAlpha(value: Double): this.type = {
set(quantileAlpha, value)
}
def setHuberAlpha(value: Double): this.type = {
set(huberAlpha, value)
}
def setLabelCol(value: String): this.type = {
set(labelCol, value)
}
def setWeightCol(value: String): this.type = {
set(weightCol, value)
}
def setOffsetCol(value: String): this.type = {
set(offsetCol, value)
}
def setFoldCol(value: String): this.type = {
set(foldCol, value)
}
def setFoldAssignment(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[FoldAssignmentScheme](value)
set(foldAssignment, validated)
}
def setCategoricalEncoding(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[CategoricalEncodingScheme](value)
set(categoricalEncoding, validated)
}
def setIgnoreConstCols(value: Boolean): this.type = {
set(ignoreConstCols, value)
}
def setScoreEachIteration(value: Boolean): this.type = {
set(scoreEachIteration, value)
}
def setStoppingRounds(value: Int): this.type = {
set(stoppingRounds, value)
}
def setMaxRuntimeSecs(value: Double): this.type = {
set(maxRuntimeSecs, value)
}
def setStoppingMetric(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[StoppingMetric](value)
set(stoppingMetric, validated)
}
def setStoppingTolerance(value: Double): this.type = {
set(stoppingTolerance, value)
}
def setGainsliftBins(value: Int): this.type = {
set(gainsliftBins, value)
}
def setCustomMetricFunc(value: String): this.type = {
set(customMetricFunc, value)
}
def setExportCheckpointsDir(value: String): this.type = {
set(exportCheckpointsDir, value)
}
def setAucType(value: String): this.type = {
val validated = EnumParamValidator.getValidatedEnumValue[MultinomialAucType](value)
set(aucType, validated)
}
override private[sparkling] def getH2OAlgorithmParams(trainingFrame: H2OFrame): Map[String, Any] = {
super.getH2OAlgorithmParams(trainingFrame) ++ getH2ODeepLearningParams(trainingFrame)
}
private[sparkling] def getH2ODeepLearningParams(trainingFrame: H2OFrame): Map[String, Any] = {
Map(
"balance_classes" -> getBalanceClasses(),
"class_sampling_factors" -> getClassSamplingFactors(),
"max_after_balance_size" -> getMaxAfterBalanceSize(),
"activation" -> getActivation(),
"hidden" -> getHidden(),
"epochs" -> getEpochs(),
"train_samples_per_iteration" -> getTrainSamplesPerIteration(),
"target_ratio_comm_to_comp" -> getTargetRatioCommToComp(),
"seed" -> getSeed(),
"adaptive_rate" -> getAdaptiveRate(),
"rho" -> getRho(),
"epsilon" -> getEpsilon(),
"rate" -> getRate(),
"rate_annealing" -> getRateAnnealing(),
"rate_decay" -> getRateDecay(),
"momentum_start" -> getMomentumStart(),
"momentum_ramp" -> getMomentumRamp(),
"momentum_stable" -> getMomentumStable(),
"nesterov_accelerated_gradient" -> getNesterovAcceleratedGradient(),
"input_dropout_ratio" -> getInputDropoutRatio(),
"hidden_dropout_ratios" -> getHiddenDropoutRatios(),
"l1" -> getL1(),
"l2" -> getL2(),
"max_w2" -> getMaxW2(),
"initial_weight_distribution" -> getInitialWeightDistribution(),
"initial_weight_scale" -> getInitialWeightScale(),
"loss" -> getLoss(),
"score_interval" -> getScoreInterval(),
"score_training_samples" -> getScoreTrainingSamples(),
"score_validation_samples" -> getScoreValidationSamples(),
"score_duty_cycle" -> getScoreDutyCycle(),
"classification_stop" -> getClassificationStop(),
"regression_stop" -> getRegressionStop(),
"quiet_mode" -> getQuietMode(),
"score_validation_sampling" -> getScoreValidationSampling(),
"overwrite_with_best_model" -> getOverwriteWithBestModel(),
"use_all_factor_levels" -> getUseAllFactorLevels(),
"standardize" -> getStandardize(),
"diagnostics" -> getDiagnostics(),
"variable_importances" -> getCalculateFeatureImportances(),
"fast_mode" -> getFastMode(),
"force_load_balance" -> getForceLoadBalance(),
"replicate_training_data" -> getReplicateTrainingData(),
"single_node_mode" -> getSingleNodeMode(),
"shuffle_training_data" -> getShuffleTrainingData(),
"missing_values_handling" -> getMissingValuesHandling(),
"sparse" -> getSparse(),
"average_activation" -> getAverageActivation(),
"sparsity_beta" -> getSparsityBeta(),
"max_categorical_features" -> getMaxCategoricalFeatures(),
"reproducible" -> getReproducible(),
"export_weights_and_biases" -> getExportWeightsAndBiases(),
"mini_batch_size" -> getMiniBatchSize(),
"elastic_averaging" -> getElasticAveraging(),
"elastic_averaging_moving_rate" -> getElasticAveragingMovingRate(),
"elastic_averaging_regularization" -> getElasticAveragingRegularization(),
"model_id" -> getModelId(),
"nfolds" -> getNfolds(),
"keep_cross_validation_models" -> getKeepCrossValidationModels(),
"keep_cross_validation_predictions" -> getKeepCrossValidationPredictions(),
"keep_cross_validation_fold_assignment" -> getKeepCrossValidationFoldAssignment(),
"distribution" -> getDistribution(),
"tweedie_power" -> getTweediePower(),
"quantile_alpha" -> getQuantileAlpha(),
"huber_alpha" -> getHuberAlpha(),
"response_column" -> getLabelCol(),
"weights_column" -> getWeightCol(),
"offset_column" -> getOffsetCol(),
"fold_column" -> getFoldCol(),
"fold_assignment" -> getFoldAssignment(),
"categorical_encoding" -> getCategoricalEncoding(),
"ignore_const_cols" -> getIgnoreConstCols(),
"score_each_iteration" -> getScoreEachIteration(),
"stopping_rounds" -> getStoppingRounds(),
"max_runtime_secs" -> getMaxRuntimeSecs(),
"stopping_metric" -> getStoppingMetric(),
"stopping_tolerance" -> getStoppingTolerance(),
"gainslift_bins" -> getGainsliftBins(),
"custom_metric_func" -> getCustomMetricFunc(),
"export_checkpoints_dir" -> getExportCheckpointsDir(),
"auc_type" -> getAucType()) +++
getInitialBiasesParam(trainingFrame) +++
getInitialWeightsParam(trainingFrame) +++
getIgnoredColsParam(trainingFrame)
}
override private[sparkling] def getSWtoH2OParamNameMap(): Map[String, String] = {
super.getSWtoH2OParamNameMap() ++
Map(
"balanceClasses" -> "balance_classes",
"classSamplingFactors" -> "class_sampling_factors",
"maxAfterBalanceSize" -> "max_after_balance_size",
"activation" -> "activation",
"hidden" -> "hidden",
"epochs" -> "epochs",
"trainSamplesPerIteration" -> "train_samples_per_iteration",
"targetRatioCommToComp" -> "target_ratio_comm_to_comp",
"seed" -> "seed",
"adaptiveRate" -> "adaptive_rate",
"rho" -> "rho",
"epsilon" -> "epsilon",
"rate" -> "rate",
"rateAnnealing" -> "rate_annealing",
"rateDecay" -> "rate_decay",
"momentumStart" -> "momentum_start",
"momentumRamp" -> "momentum_ramp",
"momentumStable" -> "momentum_stable",
"nesterovAcceleratedGradient" -> "nesterov_accelerated_gradient",
"inputDropoutRatio" -> "input_dropout_ratio",
"hiddenDropoutRatios" -> "hidden_dropout_ratios",
"l1" -> "l1",
"l2" -> "l2",
"maxW2" -> "max_w2",
"initialWeightDistribution" -> "initial_weight_distribution",
"initialWeightScale" -> "initial_weight_scale",
"loss" -> "loss",
"scoreInterval" -> "score_interval",
"scoreTrainingSamples" -> "score_training_samples",
"scoreValidationSamples" -> "score_validation_samples",
"scoreDutyCycle" -> "score_duty_cycle",
"classificationStop" -> "classification_stop",
"regressionStop" -> "regression_stop",
"quietMode" -> "quiet_mode",
"scoreValidationSampling" -> "score_validation_sampling",
"overwriteWithBestModel" -> "overwrite_with_best_model",
"useAllFactorLevels" -> "use_all_factor_levels",
"standardize" -> "standardize",
"diagnostics" -> "diagnostics",
"calculateFeatureImportances" -> "variable_importances",
"fastMode" -> "fast_mode",
"forceLoadBalance" -> "force_load_balance",
"replicateTrainingData" -> "replicate_training_data",
"singleNodeMode" -> "single_node_mode",
"shuffleTrainingData" -> "shuffle_training_data",
"missingValuesHandling" -> "missing_values_handling",
"sparse" -> "sparse",
"averageActivation" -> "average_activation",
"sparsityBeta" -> "sparsity_beta",
"maxCategoricalFeatures" -> "max_categorical_features",
"reproducible" -> "reproducible",
"exportWeightsAndBiases" -> "export_weights_and_biases",
"miniBatchSize" -> "mini_batch_size",
"elasticAveraging" -> "elastic_averaging",
"elasticAveragingMovingRate" -> "elastic_averaging_moving_rate",
"elasticAveragingRegularization" -> "elastic_averaging_regularization",
"modelId" -> "model_id",
"nfolds" -> "nfolds",
"keepCrossValidationModels" -> "keep_cross_validation_models",
"keepCrossValidationPredictions" -> "keep_cross_validation_predictions",
"keepCrossValidationFoldAssignment" -> "keep_cross_validation_fold_assignment",
"distribution" -> "distribution",
"tweediePower" -> "tweedie_power",
"quantileAlpha" -> "quantile_alpha",
"huberAlpha" -> "huber_alpha",
"labelCol" -> "response_column",
"weightCol" -> "weights_column",
"offsetCol" -> "offset_column",
"foldCol" -> "fold_column",
"foldAssignment" -> "fold_assignment",
"categoricalEncoding" -> "categorical_encoding",
"ignoreConstCols" -> "ignore_const_cols",
"scoreEachIteration" -> "score_each_iteration",
"stoppingRounds" -> "stopping_rounds",
"maxRuntimeSecs" -> "max_runtime_secs",
"stoppingMetric" -> "stopping_metric",
"stoppingTolerance" -> "stopping_tolerance",
"gainsliftBins" -> "gainslift_bins",
"customMetricFunc" -> "custom_metric_func",
"exportCheckpointsDir" -> "export_checkpoints_dir",
"aucType" -> "auc_type")
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy