All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.h2o.sparkling.ml.params.H2OAutoEncoderParams.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package ai.h2o.sparkling.ml.params

import hex.deeplearning.DeepLearningModel.DeepLearningParameters
import ai.h2o.sparkling.H2OFrame
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.InitialWeightDistribution
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Loss
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.ClassSamplingMethod
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.MissingValuesHandling
import hex.Model.Parameters.CategoricalEncodingScheme
import hex.ScoreKeeper.StoppingMetric

trait H2OAutoEncoderParams
  extends H2OAlgoParamsBase
  with HasInitialBiases
  with HasInitialWeights
  with HasIgnoredCols {

  protected def paramTag = reflect.classTag[DeepLearningParameters]

  //
  // Parameter definitions
  //
  protected val activation = stringParam(
    name = "activation",
    doc = """Activation function. Possible values are ``"Tanh"``, ``"TanhWithDropout"``, ``"Rectifier"``, ``"RectifierWithDropout"``, ``"Maxout"``, ``"MaxoutWithDropout"``, ``"ExpRectifier"``, ``"ExpRectifierWithDropout"``.""")

  protected val hidden = intArrayParam(
    name = "hidden",
    doc = """Hidden layer sizes (e.g. [100, 100]).""")

  protected val epochs = doubleParam(
    name = "epochs",
    doc = """How many times the dataset should be iterated (streamed), can be fractional.""")

  protected val trainSamplesPerIteration = longParam(
    name = "trainSamplesPerIteration",
    doc = """Number of training samples (globally) per MapReduce iteration. Special values are 0: one epoch, -1: all available data (e.g., replicated training data), -2: automatic.""")

  protected val targetRatioCommToComp = doubleParam(
    name = "targetRatioCommToComp",
    doc = """Target ratio of communication overhead to computation. Only for multi-node operation and train_samples_per_iteration = -2 (auto-tuning).""")

  protected val seed = longParam(
    name = "seed",
    doc = """Seed for random numbers (affects sampling) - Note: only reproducible when running single threaded.""")

  protected val adaptiveRate = booleanParam(
    name = "adaptiveRate",
    doc = """Adaptive learning rate.""")

  protected val rho = doubleParam(
    name = "rho",
    doc = """Adaptive learning rate time decay factor (similarity to prior updates).""")

  protected val epsilon = doubleParam(
    name = "epsilon",
    doc = """Adaptive learning rate smoothing factor (to avoid divisions by zero and allow progress).""")

  protected val rate = doubleParam(
    name = "rate",
    doc = """Learning rate (higher => less stable, lower => slower convergence).""")

  protected val rateAnnealing = doubleParam(
    name = "rateAnnealing",
    doc = """Learning rate annealing: rate / (1 + rate_annealing * samples).""")

  protected val rateDecay = doubleParam(
    name = "rateDecay",
    doc = """Learning rate decay factor between layers (N-th layer: rate * rate_decay ^ (n - 1).""")

  protected val momentumStart = doubleParam(
    name = "momentumStart",
    doc = """Initial momentum at the beginning of training (try 0.5).""")

  protected val momentumRamp = doubleParam(
    name = "momentumRamp",
    doc = """Number of training samples for which momentum increases.""")

  protected val momentumStable = doubleParam(
    name = "momentumStable",
    doc = """Final momentum after the ramp is over (try 0.99).""")

  protected val nesterovAcceleratedGradient = booleanParam(
    name = "nesterovAcceleratedGradient",
    doc = """Use Nesterov accelerated gradient (recommended).""")

  protected val inputDropoutRatio = doubleParam(
    name = "inputDropoutRatio",
    doc = """Input layer dropout ratio (can improve generalization, try 0.1 or 0.2).""")

  protected val hiddenDropoutRatios = nullableDoubleArrayParam(
    name = "hiddenDropoutRatios",
    doc = """Hidden layer dropout ratios (can improve generalization), specify one value per hidden layer, defaults to 0.5.""")

  protected val l1 = doubleParam(
    name = "l1",
    doc = """L1 regularization (can add stability and improve generalization, causes many weights to become 0).""")

  protected val l2 = doubleParam(
    name = "l2",
    doc = """L2 regularization (can add stability and improve generalization, causes many weights to be small.""")

  protected val maxW2 = floatParam(
    name = "maxW2",
    doc = """Constraint for squared sum of incoming weights per unit (e.g. for Rectifier).""")

  protected val initialWeightDistribution = stringParam(
    name = "initialWeightDistribution",
    doc = """Initial weight distribution. Possible values are ``"UniformAdaptive"``, ``"Uniform"``, ``"Normal"``.""")

  protected val initialWeightScale = doubleParam(
    name = "initialWeightScale",
    doc = """Uniform: -value...value, Normal: stddev.""")

  protected val loss = stringParam(
    name = "loss",
    doc = """Loss function. Possible values are ``"Automatic"``, ``"Quadratic"``, ``"CrossEntropy"``, ``"ModifiedHuber"``, ``"Huber"``, ``"Absolute"``, ``"Quantile"``.""")

  protected val scoreInterval = doubleParam(
    name = "scoreInterval",
    doc = """Shortest time interval (in seconds) between model scoring.""")

  protected val scoreTrainingSamples = longParam(
    name = "scoreTrainingSamples",
    doc = """Number of training set samples for scoring (0 for all).""")

  protected val scoreValidationSamples = longParam(
    name = "scoreValidationSamples",
    doc = """Number of validation set samples for scoring (0 for all).""")

  protected val scoreDutyCycle = doubleParam(
    name = "scoreDutyCycle",
    doc = """Maximum duty cycle fraction for scoring (lower: more training, higher: more scoring).""")

  protected val quietMode = booleanParam(
    name = "quietMode",
    doc = """Enable quiet mode for less output to standard output.""")

  protected val scoreValidationSampling = stringParam(
    name = "scoreValidationSampling",
    doc = """Method used to sample validation dataset for scoring. Possible values are ``"Uniform"``, ``"Stratified"``.""")

  protected val overwriteWithBestModel = booleanParam(
    name = "overwriteWithBestModel",
    doc = """If enabled, override the final model with the best model found during training.""")

  protected val useAllFactorLevels = booleanParam(
    name = "useAllFactorLevels",
    doc = """Use all factor levels of categorical variables. Otherwise, the first factor level is omitted (without loss of accuracy). Useful for variable importances and auto-enabled for autoencoder.""")

  protected val standardize = booleanParam(
    name = "standardize",
    doc = """If enabled, automatically standardize the data. If disabled, the user must provide properly scaled input data.""")

  protected val diagnostics = booleanParam(
    name = "diagnostics",
    doc = """Enable diagnostics for hidden layers.""")

  protected val calculateFeatureImportances = booleanParam(
    name = "calculateFeatureImportances",
    doc = """Compute variable importances for input features (Gedeon method) - can be slow for large networks.""")

  protected val fastMode = booleanParam(
    name = "fastMode",
    doc = """Enable fast mode (minor approximation in back-propagation).""")

  protected val forceLoadBalance = booleanParam(
    name = "forceLoadBalance",
    doc = """Force extra load balancing to increase training speed for small datasets (to keep all cores busy).""")

  protected val replicateTrainingData = booleanParam(
    name = "replicateTrainingData",
    doc = """Replicate the entire training dataset onto every node for faster training on small datasets.""")

  protected val singleNodeMode = booleanParam(
    name = "singleNodeMode",
    doc = """Run on a single node for fine-tuning of model parameters.""")

  protected val shuffleTrainingData = booleanParam(
    name = "shuffleTrainingData",
    doc = """Enable shuffling of training data (recommended if training data is replicated and train_samples_per_iteration is close to #nodes x #rows, of if using balance_classes).""")

  protected val missingValuesHandling = stringParam(
    name = "missingValuesHandling",
    doc = """Handling of missing values. Either MeanImputation or Skip. Possible values are ``"MeanImputation"``, ``"Skip"``.""")

  protected val sparse = booleanParam(
    name = "sparse",
    doc = """Sparse data handling (more efficient for data with lots of 0 values).""")

  protected val averageActivation = doubleParam(
    name = "averageActivation",
    doc = """Average activation for sparse auto-encoder. #Experimental.""")

  protected val sparsityBeta = doubleParam(
    name = "sparsityBeta",
    doc = """Sparsity regularization. #Experimental.""")

  protected val maxCategoricalFeatures = intParam(
    name = "maxCategoricalFeatures",
    doc = """Max. number of categorical features, enforced via hashing. #Experimental.""")

  protected val reproducible = booleanParam(
    name = "reproducible",
    doc = """Force reproducibility on small data (will be slow - only uses 1 thread).""")

  protected val exportWeightsAndBiases = booleanParam(
    name = "exportWeightsAndBiases",
    doc = """Whether to export Neural Network weights and biases to H2O Frames.""")

  protected val miniBatchSize = intParam(
    name = "miniBatchSize",
    doc = """Mini-batch size (smaller leads to better fit, larger can speed up and generalize better).""")

  protected val elasticAveraging = booleanParam(
    name = "elasticAveraging",
    doc = """Elastic averaging between compute nodes can improve distributed model convergence. #Experimental.""")

  protected val elasticAveragingMovingRate = doubleParam(
    name = "elasticAveragingMovingRate",
    doc = """Elastic averaging moving rate (only if elastic averaging is enabled).""")

  protected val elasticAveragingRegularization = doubleParam(
    name = "elasticAveragingRegularization",
    doc = """Elastic averaging regularization strength (only if elastic averaging is enabled).""")

  protected val modelId = nullableStringParam(
    name = "modelId",
    doc = """Destination id for this model; auto-generated if not specified.""")

  protected val weightCol = nullableStringParam(
    name = "weightCol",
    doc = """Column with observation weights. Giving some observation a weight of zero is equivalent to excluding it from the dataset; giving an observation a relative weight of 2 is equivalent to repeating that row twice. Negative weights are not allowed. Note: Weights are per-row observation weights and do not increase the size of the data frame. This is typically the number of times a row is repeated, but non-integer values are supported as well. During training, rows with higher weights matter more, due to the larger loss function pre-factor. If you set weight = 0 for a row, the returned prediction frame at that row is zero and this is incorrect. To get an accurate prediction, remove all rows with weight == 0.""")

  protected val categoricalEncoding = stringParam(
    name = "categoricalEncoding",
    doc = """Encoding scheme for categorical features. Possible values are ``"AUTO"``, ``"OneHotInternal"``, ``"OneHotExplicit"``, ``"Enum"``, ``"Binary"``, ``"Eigen"``, ``"LabelEncoder"``, ``"SortByResponse"``, ``"EnumLimited"``.""")

  protected val ignoreConstCols = booleanParam(
    name = "ignoreConstCols",
    doc = """Ignore constant columns.""")

  protected val scoreEachIteration = booleanParam(
    name = "scoreEachIteration",
    doc = """Whether to score during each iteration of model training.""")

  protected val stoppingRounds = intParam(
    name = "stoppingRounds",
    doc = """Early stopping based on convergence of stopping_metric. Stop if simple moving average of length k of the stopping_metric does not improve for k:=stopping_rounds scoring events (0 to disable).""")

  protected val maxRuntimeSecs = doubleParam(
    name = "maxRuntimeSecs",
    doc = """Maximum allowed runtime in seconds for model training. Use 0 to disable.""")

  protected val stoppingMetric = stringParam(
    name = "stoppingMetric",
    doc = """Metric to use for early stopping (AUTO: logloss for classification, deviance for regression and anomaly_score for Isolation Forest). Note that custom and custom_increasing can only be used in GBM and DRF with the Python client. Possible values are ``"AUTO"``, ``"deviance"``, ``"logloss"``, ``"MSE"``, ``"RMSE"``, ``"MAE"``, ``"RMSLE"``, ``"AUC"``, ``"AUCPR"``, ``"lift_top_group"``, ``"misclassification"``, ``"mean_per_class_error"``, ``"anomaly_score"``, ``"AUUC"``, ``"ATE"``, ``"ATT"``, ``"ATC"``, ``"qini"``, ``"custom"``, ``"custom_increasing"``.""")

  protected val stoppingTolerance = doubleParam(
    name = "stoppingTolerance",
    doc = """Relative tolerance for metric-based stopping criterion (stop if relative improvement is not at least this much).""")

  protected val gainsliftBins = intParam(
    name = "gainsliftBins",
    doc = """Gains/Lift table number of bins. 0 means disabled.. Default value -1 means automatic binning.""")

  protected val customMetricFunc = nullableStringParam(
    name = "customMetricFunc",
    doc = """Reference to custom evaluation function, format: `language:keyName=funcName`.""")

  protected val exportCheckpointsDir = nullableStringParam(
    name = "exportCheckpointsDir",
    doc = """Automatically export generated models to this directory.""")

  //
  // Default values
  //
  setDefault(
    activation -> Activation.Rectifier.name(),
    hidden -> Array(200, 200),
    epochs -> 10.0,
    trainSamplesPerIteration -> -2L,
    targetRatioCommToComp -> 0.05,
    seed -> -1L,
    adaptiveRate -> true,
    rho -> 0.99,
    epsilon -> 1.0e-8,
    rate -> 0.005,
    rateAnnealing -> 1.0e-6,
    rateDecay -> 1.0,
    momentumStart -> 0.0,
    momentumRamp -> 1000000.0,
    momentumStable -> 0.0,
    nesterovAcceleratedGradient -> true,
    inputDropoutRatio -> 0.0,
    hiddenDropoutRatios -> null,
    l1 -> 0.0,
    l2 -> 0.0,
    maxW2 -> 3.402823e38f,
    initialWeightDistribution -> InitialWeightDistribution.UniformAdaptive.name(),
    initialWeightScale -> 1.0,
    loss -> Loss.Automatic.name(),
    scoreInterval -> 5.0,
    scoreTrainingSamples -> 10000L,
    scoreValidationSamples -> 0L,
    scoreDutyCycle -> 0.1,
    quietMode -> false,
    scoreValidationSampling -> ClassSamplingMethod.Uniform.name(),
    overwriteWithBestModel -> true,
    useAllFactorLevels -> true,
    standardize -> true,
    diagnostics -> true,
    calculateFeatureImportances -> true,
    fastMode -> true,
    forceLoadBalance -> true,
    replicateTrainingData -> true,
    singleNodeMode -> false,
    shuffleTrainingData -> false,
    missingValuesHandling -> MissingValuesHandling.MeanImputation.name(),
    sparse -> false,
    averageActivation -> 0.0,
    sparsityBeta -> 0.0,
    maxCategoricalFeatures -> 2147483647,
    reproducible -> false,
    exportWeightsAndBiases -> false,
    miniBatchSize -> 1,
    elasticAveraging -> false,
    elasticAveragingMovingRate -> 0.9,
    elasticAveragingRegularization -> 0.001,
    modelId -> null,
    weightCol -> null,
    categoricalEncoding -> CategoricalEncodingScheme.AUTO.name(),
    ignoreConstCols -> true,
    scoreEachIteration -> false,
    stoppingRounds -> 5,
    maxRuntimeSecs -> 0.0,
    stoppingMetric -> StoppingMetric.AUTO.name(),
    stoppingTolerance -> 0.0,
    gainsliftBins -> -1,
    customMetricFunc -> null,
    exportCheckpointsDir -> null)

  //
  // Getters
  //
  def getActivation(): String = $(activation)

  def getHidden(): Array[Int] = $(hidden)

  def getEpochs(): Double = $(epochs)

  def getTrainSamplesPerIteration(): Long = $(trainSamplesPerIteration)

  def getTargetRatioCommToComp(): Double = $(targetRatioCommToComp)

  def getSeed(): Long = $(seed)

  def getAdaptiveRate(): Boolean = $(adaptiveRate)

  def getRho(): Double = $(rho)

  def getEpsilon(): Double = $(epsilon)

  def getRate(): Double = $(rate)

  def getRateAnnealing(): Double = $(rateAnnealing)

  def getRateDecay(): Double = $(rateDecay)

  def getMomentumStart(): Double = $(momentumStart)

  def getMomentumRamp(): Double = $(momentumRamp)

  def getMomentumStable(): Double = $(momentumStable)

  def getNesterovAcceleratedGradient(): Boolean = $(nesterovAcceleratedGradient)

  def getInputDropoutRatio(): Double = $(inputDropoutRatio)

  def getHiddenDropoutRatios(): Array[Double] = $(hiddenDropoutRatios)

  def getL1(): Double = $(l1)

  def getL2(): Double = $(l2)

  def getMaxW2(): Float = $(maxW2)

  def getInitialWeightDistribution(): String = $(initialWeightDistribution)

  def getInitialWeightScale(): Double = $(initialWeightScale)

  def getLoss(): String = $(loss)

  def getScoreInterval(): Double = $(scoreInterval)

  def getScoreTrainingSamples(): Long = $(scoreTrainingSamples)

  def getScoreValidationSamples(): Long = $(scoreValidationSamples)

  def getScoreDutyCycle(): Double = $(scoreDutyCycle)

  def getQuietMode(): Boolean = $(quietMode)

  def getScoreValidationSampling(): String = $(scoreValidationSampling)

  def getOverwriteWithBestModel(): Boolean = $(overwriteWithBestModel)

  def getUseAllFactorLevels(): Boolean = $(useAllFactorLevels)

  def getStandardize(): Boolean = $(standardize)

  def getDiagnostics(): Boolean = $(diagnostics)

  def getCalculateFeatureImportances(): Boolean = $(calculateFeatureImportances)

  def getFastMode(): Boolean = $(fastMode)

  def getForceLoadBalance(): Boolean = $(forceLoadBalance)

  def getReplicateTrainingData(): Boolean = $(replicateTrainingData)

  def getSingleNodeMode(): Boolean = $(singleNodeMode)

  def getShuffleTrainingData(): Boolean = $(shuffleTrainingData)

  def getMissingValuesHandling(): String = $(missingValuesHandling)

  def getSparse(): Boolean = $(sparse)

  def getAverageActivation(): Double = $(averageActivation)

  def getSparsityBeta(): Double = $(sparsityBeta)

  def getMaxCategoricalFeatures(): Int = $(maxCategoricalFeatures)

  def getReproducible(): Boolean = $(reproducible)

  def getExportWeightsAndBiases(): Boolean = $(exportWeightsAndBiases)

  def getMiniBatchSize(): Int = $(miniBatchSize)

  def getElasticAveraging(): Boolean = $(elasticAveraging)

  def getElasticAveragingMovingRate(): Double = $(elasticAveragingMovingRate)

  def getElasticAveragingRegularization(): Double = $(elasticAveragingRegularization)

  def getModelId(): String = $(modelId)

  def getWeightCol(): String = $(weightCol)

  def getCategoricalEncoding(): String = $(categoricalEncoding)

  def getIgnoreConstCols(): Boolean = $(ignoreConstCols)

  def getScoreEachIteration(): Boolean = $(scoreEachIteration)

  def getStoppingRounds(): Int = $(stoppingRounds)

  def getMaxRuntimeSecs(): Double = $(maxRuntimeSecs)

  def getStoppingMetric(): String = $(stoppingMetric)

  def getStoppingTolerance(): Double = $(stoppingTolerance)

  def getGainsliftBins(): Int = $(gainsliftBins)

  def getCustomMetricFunc(): String = $(customMetricFunc)

  def getExportCheckpointsDir(): String = $(exportCheckpointsDir)

  //
  // Setters
  //
  def setActivation(value: String): this.type = {
    val validated = EnumParamValidator.getValidatedEnumValue[Activation](value)
    set(activation, validated)
  }
           
  def setHidden(value: Array[Int]): this.type = {
    set(hidden, value)
  }
           
  def setEpochs(value: Double): this.type = {
    set(epochs, value)
  }
           
  def setTrainSamplesPerIteration(value: Long): this.type = {
    set(trainSamplesPerIteration, value)
  }
           
  def setTargetRatioCommToComp(value: Double): this.type = {
    set(targetRatioCommToComp, value)
  }
           
  def setSeed(value: Long): this.type = {
    set(seed, value)
  }
           
  def setAdaptiveRate(value: Boolean): this.type = {
    set(adaptiveRate, value)
  }
           
  def setRho(value: Double): this.type = {
    set(rho, value)
  }
           
  def setEpsilon(value: Double): this.type = {
    set(epsilon, value)
  }
           
  def setRate(value: Double): this.type = {
    set(rate, value)
  }
           
  def setRateAnnealing(value: Double): this.type = {
    set(rateAnnealing, value)
  }
           
  def setRateDecay(value: Double): this.type = {
    set(rateDecay, value)
  }
           
  def setMomentumStart(value: Double): this.type = {
    set(momentumStart, value)
  }
           
  def setMomentumRamp(value: Double): this.type = {
    set(momentumRamp, value)
  }
           
  def setMomentumStable(value: Double): this.type = {
    set(momentumStable, value)
  }
           
  def setNesterovAcceleratedGradient(value: Boolean): this.type = {
    set(nesterovAcceleratedGradient, value)
  }
           
  def setInputDropoutRatio(value: Double): this.type = {
    set(inputDropoutRatio, value)
  }
           
  def setHiddenDropoutRatios(value: Array[Double]): this.type = {
    set(hiddenDropoutRatios, value)
  }
           
  def setL1(value: Double): this.type = {
    set(l1, value)
  }
           
  def setL2(value: Double): this.type = {
    set(l2, value)
  }
           
  def setMaxW2(value: Float): this.type = {
    set(maxW2, value)
  }
           
  def setInitialWeightDistribution(value: String): this.type = {
    val validated = EnumParamValidator.getValidatedEnumValue[InitialWeightDistribution](value)
    set(initialWeightDistribution, validated)
  }
           
  def setInitialWeightScale(value: Double): this.type = {
    set(initialWeightScale, value)
  }
           
  def setLoss(value: String): this.type = {
    val validated = EnumParamValidator.getValidatedEnumValue[Loss](value)
    set(loss, validated)
  }
           
  def setScoreInterval(value: Double): this.type = {
    set(scoreInterval, value)
  }
           
  def setScoreTrainingSamples(value: Long): this.type = {
    set(scoreTrainingSamples, value)
  }
           
  def setScoreValidationSamples(value: Long): this.type = {
    set(scoreValidationSamples, value)
  }
           
  def setScoreDutyCycle(value: Double): this.type = {
    set(scoreDutyCycle, value)
  }
           
  def setQuietMode(value: Boolean): this.type = {
    set(quietMode, value)
  }
           
  def setScoreValidationSampling(value: String): this.type = {
    val validated = EnumParamValidator.getValidatedEnumValue[ClassSamplingMethod](value)
    set(scoreValidationSampling, validated)
  }
           
  def setOverwriteWithBestModel(value: Boolean): this.type = {
    set(overwriteWithBestModel, value)
  }
           
  def setUseAllFactorLevels(value: Boolean): this.type = {
    set(useAllFactorLevels, value)
  }
           
  def setStandardize(value: Boolean): this.type = {
    set(standardize, value)
  }
           
  def setDiagnostics(value: Boolean): this.type = {
    set(diagnostics, value)
  }
           
  def setCalculateFeatureImportances(value: Boolean): this.type = {
    set(calculateFeatureImportances, value)
  }
           
  def setFastMode(value: Boolean): this.type = {
    set(fastMode, value)
  }
           
  def setForceLoadBalance(value: Boolean): this.type = {
    set(forceLoadBalance, value)
  }
           
  def setReplicateTrainingData(value: Boolean): this.type = {
    set(replicateTrainingData, value)
  }
           
  def setSingleNodeMode(value: Boolean): this.type = {
    set(singleNodeMode, value)
  }
           
  def setShuffleTrainingData(value: Boolean): this.type = {
    set(shuffleTrainingData, value)
  }
           
  def setMissingValuesHandling(value: String): this.type = {
    val validated = EnumParamValidator.getValidatedEnumValue[MissingValuesHandling](value)
    set(missingValuesHandling, validated)
  }
           
  def setSparse(value: Boolean): this.type = {
    set(sparse, value)
  }
           
  def setAverageActivation(value: Double): this.type = {
    set(averageActivation, value)
  }
           
  def setSparsityBeta(value: Double): this.type = {
    set(sparsityBeta, value)
  }
           
  def setMaxCategoricalFeatures(value: Int): this.type = {
    set(maxCategoricalFeatures, value)
  }
           
  def setReproducible(value: Boolean): this.type = {
    set(reproducible, value)
  }
           
  def setExportWeightsAndBiases(value: Boolean): this.type = {
    set(exportWeightsAndBiases, value)
  }
           
  def setMiniBatchSize(value: Int): this.type = {
    set(miniBatchSize, value)
  }
           
  def setElasticAveraging(value: Boolean): this.type = {
    set(elasticAveraging, value)
  }
           
  def setElasticAveragingMovingRate(value: Double): this.type = {
    set(elasticAveragingMovingRate, value)
  }
           
  def setElasticAveragingRegularization(value: Double): this.type = {
    set(elasticAveragingRegularization, value)
  }
           
  def setModelId(value: String): this.type = {
    set(modelId, value)
  }
           
  def setWeightCol(value: String): this.type = {
    set(weightCol, value)
  }
           
  def setCategoricalEncoding(value: String): this.type = {
    val validated = EnumParamValidator.getValidatedEnumValue[CategoricalEncodingScheme](value)
    set(categoricalEncoding, validated)
  }
           
  def setIgnoreConstCols(value: Boolean): this.type = {
    set(ignoreConstCols, value)
  }
           
  def setScoreEachIteration(value: Boolean): this.type = {
    set(scoreEachIteration, value)
  }
           
  def setStoppingRounds(value: Int): this.type = {
    set(stoppingRounds, value)
  }
           
  def setMaxRuntimeSecs(value: Double): this.type = {
    set(maxRuntimeSecs, value)
  }
           
  def setStoppingMetric(value: String): this.type = {
    val validated = EnumParamValidator.getValidatedEnumValue[StoppingMetric](value)
    set(stoppingMetric, validated)
  }
           
  def setStoppingTolerance(value: Double): this.type = {
    set(stoppingTolerance, value)
  }
           
  def setGainsliftBins(value: Int): this.type = {
    set(gainsliftBins, value)
  }
           
  def setCustomMetricFunc(value: String): this.type = {
    set(customMetricFunc, value)
  }
           
  def setExportCheckpointsDir(value: String): this.type = {
    set(exportCheckpointsDir, value)
  }
           

  override private[sparkling] def getH2OAlgorithmParams(trainingFrame: H2OFrame): Map[String, Any] = {
    super.getH2OAlgorithmParams(trainingFrame) ++ getH2OAutoEncoderParams(trainingFrame)
  }

  private[sparkling] def getH2OAutoEncoderParams(trainingFrame: H2OFrame): Map[String, Any] = {
      Map(
        "activation" -> getActivation(),
        "hidden" -> getHidden(),
        "epochs" -> getEpochs(),
        "train_samples_per_iteration" -> getTrainSamplesPerIteration(),
        "target_ratio_comm_to_comp" -> getTargetRatioCommToComp(),
        "seed" -> getSeed(),
        "adaptive_rate" -> getAdaptiveRate(),
        "rho" -> getRho(),
        "epsilon" -> getEpsilon(),
        "rate" -> getRate(),
        "rate_annealing" -> getRateAnnealing(),
        "rate_decay" -> getRateDecay(),
        "momentum_start" -> getMomentumStart(),
        "momentum_ramp" -> getMomentumRamp(),
        "momentum_stable" -> getMomentumStable(),
        "nesterov_accelerated_gradient" -> getNesterovAcceleratedGradient(),
        "input_dropout_ratio" -> getInputDropoutRatio(),
        "hidden_dropout_ratios" -> getHiddenDropoutRatios(),
        "l1" -> getL1(),
        "l2" -> getL2(),
        "max_w2" -> getMaxW2(),
        "initial_weight_distribution" -> getInitialWeightDistribution(),
        "initial_weight_scale" -> getInitialWeightScale(),
        "loss" -> getLoss(),
        "score_interval" -> getScoreInterval(),
        "score_training_samples" -> getScoreTrainingSamples(),
        "score_validation_samples" -> getScoreValidationSamples(),
        "score_duty_cycle" -> getScoreDutyCycle(),
        "quiet_mode" -> getQuietMode(),
        "score_validation_sampling" -> getScoreValidationSampling(),
        "overwrite_with_best_model" -> getOverwriteWithBestModel(),
        "use_all_factor_levels" -> getUseAllFactorLevels(),
        "standardize" -> getStandardize(),
        "diagnostics" -> getDiagnostics(),
        "variable_importances" -> getCalculateFeatureImportances(),
        "fast_mode" -> getFastMode(),
        "force_load_balance" -> getForceLoadBalance(),
        "replicate_training_data" -> getReplicateTrainingData(),
        "single_node_mode" -> getSingleNodeMode(),
        "shuffle_training_data" -> getShuffleTrainingData(),
        "missing_values_handling" -> getMissingValuesHandling(),
        "sparse" -> getSparse(),
        "average_activation" -> getAverageActivation(),
        "sparsity_beta" -> getSparsityBeta(),
        "max_categorical_features" -> getMaxCategoricalFeatures(),
        "reproducible" -> getReproducible(),
        "export_weights_and_biases" -> getExportWeightsAndBiases(),
        "mini_batch_size" -> getMiniBatchSize(),
        "elastic_averaging" -> getElasticAveraging(),
        "elastic_averaging_moving_rate" -> getElasticAveragingMovingRate(),
        "elastic_averaging_regularization" -> getElasticAveragingRegularization(),
        "model_id" -> getModelId(),
        "weights_column" -> getWeightCol(),
        "categorical_encoding" -> getCategoricalEncoding(),
        "ignore_const_cols" -> getIgnoreConstCols(),
        "score_each_iteration" -> getScoreEachIteration(),
        "stopping_rounds" -> getStoppingRounds(),
        "max_runtime_secs" -> getMaxRuntimeSecs(),
        "stopping_metric" -> getStoppingMetric(),
        "stopping_tolerance" -> getStoppingTolerance(),
        "gainslift_bins" -> getGainsliftBins(),
        "custom_metric_func" -> getCustomMetricFunc(),
        "export_checkpoints_dir" -> getExportCheckpointsDir()) +++
      getInitialBiasesParam(trainingFrame) +++
      getInitialWeightsParam(trainingFrame) +++
      getIgnoredColsParam(trainingFrame)
  }

  override private[sparkling] def getSWtoH2OParamNameMap(): Map[String, String] = {
    super.getSWtoH2OParamNameMap() ++
      Map(
        "activation" -> "activation",
        "hidden" -> "hidden",
        "epochs" -> "epochs",
        "trainSamplesPerIteration" -> "train_samples_per_iteration",
        "targetRatioCommToComp" -> "target_ratio_comm_to_comp",
        "seed" -> "seed",
        "adaptiveRate" -> "adaptive_rate",
        "rho" -> "rho",
        "epsilon" -> "epsilon",
        "rate" -> "rate",
        "rateAnnealing" -> "rate_annealing",
        "rateDecay" -> "rate_decay",
        "momentumStart" -> "momentum_start",
        "momentumRamp" -> "momentum_ramp",
        "momentumStable" -> "momentum_stable",
        "nesterovAcceleratedGradient" -> "nesterov_accelerated_gradient",
        "inputDropoutRatio" -> "input_dropout_ratio",
        "hiddenDropoutRatios" -> "hidden_dropout_ratios",
        "l1" -> "l1",
        "l2" -> "l2",
        "maxW2" -> "max_w2",
        "initialWeightDistribution" -> "initial_weight_distribution",
        "initialWeightScale" -> "initial_weight_scale",
        "loss" -> "loss",
        "scoreInterval" -> "score_interval",
        "scoreTrainingSamples" -> "score_training_samples",
        "scoreValidationSamples" -> "score_validation_samples",
        "scoreDutyCycle" -> "score_duty_cycle",
        "quietMode" -> "quiet_mode",
        "scoreValidationSampling" -> "score_validation_sampling",
        "overwriteWithBestModel" -> "overwrite_with_best_model",
        "useAllFactorLevels" -> "use_all_factor_levels",
        "standardize" -> "standardize",
        "diagnostics" -> "diagnostics",
        "calculateFeatureImportances" -> "variable_importances",
        "fastMode" -> "fast_mode",
        "forceLoadBalance" -> "force_load_balance",
        "replicateTrainingData" -> "replicate_training_data",
        "singleNodeMode" -> "single_node_mode",
        "shuffleTrainingData" -> "shuffle_training_data",
        "missingValuesHandling" -> "missing_values_handling",
        "sparse" -> "sparse",
        "averageActivation" -> "average_activation",
        "sparsityBeta" -> "sparsity_beta",
        "maxCategoricalFeatures" -> "max_categorical_features",
        "reproducible" -> "reproducible",
        "exportWeightsAndBiases" -> "export_weights_and_biases",
        "miniBatchSize" -> "mini_batch_size",
        "elasticAveraging" -> "elastic_averaging",
        "elasticAveragingMovingRate" -> "elastic_averaging_moving_rate",
        "elasticAveragingRegularization" -> "elastic_averaging_regularization",
        "modelId" -> "model_id",
        "weightCol" -> "weights_column",
        "categoricalEncoding" -> "categorical_encoding",
        "ignoreConstCols" -> "ignore_const_cols",
        "scoreEachIteration" -> "score_each_iteration",
        "stoppingRounds" -> "stopping_rounds",
        "maxRuntimeSecs" -> "max_runtime_secs",
        "stoppingMetric" -> "stopping_metric",
        "stoppingTolerance" -> "stopping_tolerance",
        "gainsliftBins" -> "gainslift_bins",
        "customMetricFunc" -> "custom_metric_func",
        "exportCheckpointsDir" -> "export_checkpoints_dir")
  }
      
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy