com.databricks.labs.automl.executor.AutomationConfig.scala Maven / Gradle / Ivy

Go to download
package com.databricks.labs.automl.executor

import com.databricks.labs.automl.params._
import com.databricks.labs.automl.sanitize.SanitizerDefaults

trait AutomationConfig extends Defaults with SanitizerDefaults {

  var _modelingFamily: String = _defaultModelingFamily

  var _labelCol: String = _defaultLabelCol

  var _featuresCol: String = _defaultFeaturesCol

  var _naFillFlag: Boolean = _defaultNAFillFlag

  var _varianceFilterFlag: Boolean = _defaultVarianceFilterFlag

  var _outlierFilterFlag: Boolean = _defaultOutlierFilterFlag

  var _pearsonFilterFlag: Boolean = _defaultPearsonFilterFlag

  var _covarianceFilterFlag: Boolean = _defaultCovarianceFilterFlag

  var _oneHotEncodeFlag: Boolean = _defaultOneHotEncodeFlag

  var _scalingFlag: Boolean = _defaultScalingFlag

  var _featureInteractionFlag: Boolean = _defaultFeatureInteractionFlag

  var _dataPrepCachingFlag: Boolean = _defaultDataPrepCachingFlag

  var _dataPrepParallelism: Int = _defaultDataPrepParallelism

  var _numericBoundaries: Map[String, (Double, Double)] =
    _rfDefaultNumBoundaries

  var _stringBoundaries: Map[String, List[String]] = _rfDefaultStringBoundaries

  var _scoringMetric: String = _scoringDefaultClassifier

  var _scoringOptimizationStrategy: String =
    _scoringOptimizationStrategyClassifier

  var _numericFillStat: String = _fillConfigDefaults.numericFillStat

  var _characterFillStat: String = _fillConfigDefaults.characterFillStat

  var _dateTimeConversionType: String = _defaultDateTimeConversionType

  var _fieldsToIgnoreInVector: Array[String] = _defaultFieldsToIgnoreInVector

  var _naFillFilterPrecision: Double = _fillConfigDefaults.filterPrecision

  var _categoricalNAFillMap: Map[String, String] =
    _fillConfigDefaults.categoricalNAFillMap

  var _numericNAFillMap: Map[String, AnyVal] =
    _fillConfigDefaults.numericNAFillMap

  var _characterNABlanketFillValue: String =
    _fillConfigDefaults.characterNABlanketFillValue

  var _numericNABlanketFillValue: Double =
    _fillConfigDefaults.numericNABlanketFillValue

  var _naFillMode: String = _fillConfigDefaults.naFillMode

  var _cardinalitySwitchFlag: Boolean = _fillConfigDefaults.cardinalitySwitch

  var _cardinalityType: String = _fillConfigDefaults.cardinalityType

  var _cardinalityLimit: Int = _fillConfigDefaults.cardinalityLimit

  var _cardinalityPrecision: Double = _fillConfigDefaults.cardinalityPrecision

  var _cardinalityCheckMode: String = _fillConfigDefaults.cardinalityCheckMode

  var _modelSelectionDistinctThreshold: Int =
    _fillConfigDefaults.modelSelectionDistinctThreshold

  var _fillConfig: FillConfig = _fillConfigDefaults

  var _filterBounds: String = _outlierConfigDefaults.filterBounds

  var _lowerFilterNTile: Double = _outlierConfigDefaults.lowerFilterNTile

  var _upperFilterNTile: Double = _outlierConfigDefaults.upperFilterNTile

  var _filterPrecision: Double = _outlierConfigDefaults.filterPrecision

  var _continuousDataThreshold: Int =
    _outlierConfigDefaults.continuousDataThreshold

  var _fieldsToIgnore: Array[String] = _outlierConfigDefaults.fieldsToIgnore

  var _outlierConfig: OutlierConfig = _outlierConfigDefaults

  var _pearsonFilterStatistic: String = _pearsonConfigDefaults.filterStatistic

  var _pearsonFilterDirection: String = _pearsonConfigDefaults.filterDirection

  var _pearsonFilterManualValue: Double =
    _pearsonConfigDefaults.filterManualValue

  var _pearsonFilterMode: String = _pearsonConfigDefaults.filterMode

  var _pearsonAutoFilterNTile: Double = _pearsonConfigDefaults.autoFilterNTile

  var _pearsonConfig: PearsonConfig = _pearsonConfigDefaults

  var _correlationCutoffLow: Double =
    _covarianceConfigDefaults.correlationCutoffLow

  var _correlationCutoffHigh: Double =
    _covarianceConfigDefaults.correlationCutoffHigh

  var _covarianceConfig: CovarianceConfig = _covarianceConfigDefaults

  var _scalerType: String = defaultScalerType

  var _scalerMin: Double = defaultScalerMin

  var _scalerMax: Double = defaultScalerMax

  var _standardScalerMeanFlag: Boolean = defaultStandardScalerMeanFlag

  var _standardScalerStdDevFlag: Boolean = defaultStandardScalerStdDevFlag

  var _pNorm: Double = defaultPNorm

  var _scalingConfig: ScalingConfig = _scalingConfigDefaults

  var _featureInteractionConfig: FeatureInteractionConfig =
    _defaultFeatureInteractionConfig

  var _parallelism: Int = _geneticTunerDefaults.parallelism

  var _kFold: Int = _geneticTunerDefaults.kFold

  var _trainPortion: Double = _geneticTunerDefaults.trainPortion

  var _trainSplitMethod: String = _geneticTunerDefaults.trainSplitMethod

  var _kSampleConfig: KSampleConfig = _geneticTunerDefaults.kSampleConfig

  var _syntheticCol: String = _geneticTunerDefaults.kSampleConfig.syntheticCol

  var _kGroups: Int = _geneticTunerDefaults.kSampleConfig.kGroups

  var _kMeansMaxIter: Int = _geneticTunerDefaults.kSampleConfig.kMeansMaxIter

  var _kMeansTolerance: Double =
    _geneticTunerDefaults.kSampleConfig.kMeansTolerance

  var _kMeansDistanceMeasurement: String =
    _geneticTunerDefaults.kSampleConfig.kMeansDistanceMeasurement

  var _kMeansSeed: Long = _geneticTunerDefaults.kSampleConfig.kMeansSeed

  var _kMeansPredictionCol: String =
    _geneticTunerDefaults.kSampleConfig.kMeansPredictionCol

  var _lshHashTables: Int = _geneticTunerDefaults.kSampleConfig.lshHashTables

  var _lshSeed: Long = _geneticTunerDefaults.kSampleConfig.lshSeed

  var _lshOutputCol: String = _geneticTunerDefaults.kSampleConfig.lshOutputCol

  var _quorumCount: Int = _geneticTunerDefaults.kSampleConfig.quorumCount

  var _minimumVectorCountToMutate: Int =
    _geneticTunerDefaults.kSampleConfig.minimumVectorCountToMutate

  var _vectorMutationMethod: String =
    _geneticTunerDefaults.kSampleConfig.vectorMutationMethod

  var _mutationMode: String = _geneticTunerDefaults.kSampleConfig.mutationMode

  var _mutationValue: Double = _geneticTunerDefaults.kSampleConfig.mutationValue

  var _labelBalanceMode: String =
    _geneticTunerDefaults.kSampleConfig.labelBalanceMode

  var _cardinalityThreshold: Int =
    _geneticTunerDefaults.kSampleConfig.cardinalityThreshold

  var _numericRatio: Double = _geneticTunerDefaults.kSampleConfig.numericRatio

  var _numericTarget: Int = _geneticTunerDefaults.kSampleConfig.numericTarget

  var _outputDfRepartitionScaleFactor: Int =
    _geneticTunerDefaults.kSampleConfig.outputDfRepartitionScaleFactor

  var _trainSplitChronologicalColumn: String =
    _geneticTunerDefaults.trainSplitChronologicalColumn

  var _trainSplitChronologicalRandomPercentage: Double =
    _geneticTunerDefaults.trainSplitChronologicalRandomPercentage

  var _trainSplitColumnSet: Boolean = false

  var _seed: Long = _geneticTunerDefaults.seed

  var _firstGenerationGenePool: Int =
    _geneticTunerDefaults.firstGenerationGenePool

  var _numberOfGenerations: Int = _geneticTunerDefaults.numberOfGenerations

  var _numberOfParentsToRetain: Int =
    _geneticTunerDefaults.numberOfParentsToRetain

  var _numberOfMutationsPerGeneration: Int =
    _geneticTunerDefaults.numberOfMutationsPerGeneration

  var _geneticMixing: Double = _geneticTunerDefaults.geneticMixing

  var _generationalMutationStrategy: String =
    _geneticTunerDefaults.generationalMutationStrategy

  var _fixedMutationValue: Int = _geneticTunerDefaults.fixedMutationValue

  var _mutationMagnitudeMode: String =
    _geneticTunerDefaults.mutationMagnitudeMode

  var _modelSeedMap: Map[String, Any] = Map.empty

  var _modelSeedSetStatus: Boolean = false

  var _firstGenerationConfig: FirstGenerationConfig =
    _defaultFirstGenerationConfig

  var _firstGenerationPermutationCount: Int =
    _geneticTunerDefaults.initialGenerationConfig.permutationCount

  var _firstGenerationIndexMixingMode: String =
    _geneticTunerDefaults.initialGenerationConfig.indexMixingMode

  var _firstGenerationArraySeed: Long =
    _geneticTunerDefaults.initialGenerationConfig.arraySeed

  var _hyperSpaceInference: Boolean = _defaultHyperSpaceInference

  var _hyperSpaceInferenceCount: Int = _defaultHyperSpaceInferenceCount

  var _hyperSpaceModelType: String = _defaultHyperSpaceModelType

  var _hyperSpaceModelCount: Int = _defaultHyperSpaceModelCount

  var _firstGenerationMode: String = _defaultInitialGenerationMode

  var _deltaCacheBackingDirectory: String =
    _geneticTunerDefaults.deltaCacheBackingDirectory

  var _splitCachingStrategy: String = _geneticTunerDefaults.splitCachingStrategy

  var _deltaCacheBackingDirectoryRemovalFlag: Boolean =
    _geneticTunerDefaults.deltaCacheBackingDirectoryRemovalFlag

  var _geneticConfig: GeneticConfig = _geneticTunerDefaults

  var _mainConfig: MainConfig = _mainConfigDefaults

  var _featureImportancesConfig: MainConfig = _featureImportancesDefaults

  var _treeSplitsConfig: MainConfig = _treeSplitDefaults

  var _mlFlowConfig: MLFlowConfig = _mlFlowConfigDefaults

  var _mlFlowLoggingFlag: Boolean = _defaultMlFlowLoggingFlag

  var _mlFlowArtifactsFlag: Boolean = _defaultMlFlowArtifactsFlag

  var _mlFlowTrackingURI: String = _mlFlowConfigDefaults.mlFlowTrackingURI

  var _mlFlowExperimentName: String = _mlFlowConfigDefaults.mlFlowExperimentName

  var _mlFlowAPIToken: String = _mlFlowConfigDefaults.mlFlowAPIToken

  var _mlFlowModelSaveDirectory: String =
    _mlFlowConfigDefaults.mlFlowModelSaveDirectory

  var _mlFlowLoggingMode: String = _mlFlowConfigDefaults.mlFlowLoggingMode

  var _mlFlowBestSuffix: String = _mlFlowConfigDefaults.mlFlowBestSuffix

  var _mlFlowCustomRunTags: Map[String, String] =
    _mlFlowConfigDefaults.mlFlowCustomRunTags

  var _autoStoppingFlag: Boolean = _defaultAutoStoppingFlag

  var _autoStoppingScore: Double = _defaultAutoStoppingScore

  var _featureImportanceCutoffType: String = _defaultFeatureImportanceCutoffType

  var _featureImportanceCutoffValue: Double =
    _defaultFeatureImportanceCutoffValue

  var _evolutionStrategy: String = _geneticTunerDefaults.evolutionStrategy

  var _continuousEvolutionImprovementThreshold: Int =
    _geneticTunerDefaults.continuousEvolutionImprovementThreshold

  var _geneticMBORegressorType: String =
    _geneticTunerDefaults.geneticMBORegressorType

  var _geneticMBOCandidateFactor: Int =
    _geneticTunerDefaults.geneticMBOCandidateFactor

  var _continuousEvolutionMaxIterations: Int =
    _geneticTunerDefaults.continuousEvolutionMaxIterations

  var _continuousEvolutionStoppingScore: Double =
    _geneticTunerDefaults.continuousEvolutionStoppingScore

  var _continuousEvolutionParallelism: Int =
    _geneticTunerDefaults.continuousEvolutionParallelism

  var _continuousEvolutionMutationAggressiveness: Int =
    _geneticTunerDefaults.continuousEvolutionMutationAggressiveness

  var _continuousEvolutionGeneticMixing: Double =
    _geneticTunerDefaults.continuousEvolutionGeneticMixing

  var _continuousEvolutionRollingImprovementCount: Int =
    _geneticTunerDefaults.continuousEvolutionRollingImprovementCount

  var _inferenceConfigSaveLocation: String = _inferenceConfigSaveLocationDefault

  var _dataReductionFactor: Double = _defaultDataReductionFactor

  var _pipelineDebugFlag: Boolean = _defaultPipelineDebugFlag

  var _featureInteractionRetentionMode: String =
    _defaultFeatureInteractionConfig.retentionMode
  var _featureInteractionContinuousDiscretizerBucketCount: Int =
    _defaultFeatureInteractionConfig.continuousDiscretizerBucketCount
  var _featureInteractionParallelism: Int =
    _defaultFeatureInteractionConfig.parallelism
  var _featureInteractionTargetInteractionPercentage: Double =
    _defaultFeatureInteractionConfig.targetInteractionPercentage

  var _pipelineId: String = _defaultPipelineId

  def setPipelineId(value: String): this.type = {
    _pipelineId = value
    this
  }

  private def setConfigs(): this.type = {
    setMainConfig()
  }

  def setModelingFamily(value: String): this.type = {
    _modelingFamily = value
    _numericBoundaries = value match {
      case "RandomForest"       => _rfDefaultNumBoundaries
      case "MLPC"               => _mlpcDefaultNumBoundaries
      case "Trees"              => _treesDefaultNumBoundaries
      case "GBT"                => _gbtDefaultNumBoundaries
      case "LinearRegression"   => _linearRegressionDefaultNumBoundaries
      case "LogisticRegression" => _logisticRegressionDefaultNumBoundaries
      case "SVM"                => _svmDefaultNumBoundaries
      case "XGBoost"            => _xgboostDefaultNumBoundaries
      case "gbmBinary" | "gbmMulti" | "gbmMultiOVA" | "gbmHuber" | "gbmFair" |
          "gbmLasso" | "gbmRidge" | "gbmPoisson" | "gbmQuantile" | "gbmMape" |
          "gbmTweedie" | "gbmGamma" =>
        _lightGBMDefaultNumBoundaries
      case _ =>
        throw new IllegalArgumentException(
          s"$value is an unsupported Model Type"
        )
    }
    _stringBoundaries = value match {
      case "RandomForest"       => _rfDefaultStringBoundaries
      case "MLPC"               => _mlpcDefaultStringBoundaries
      case "Trees"              => _treesDefaultStringBoundaries
      case "GBT"                => _gbtDefaultStringBoundaries
      case "LinearRegression"   => _linearRegressionDefaultStringBoundaries
      case "LogisticRegression" => _logisticRegressionDefaultStringBoundaries
      case "SVM"                => _svmDefaultStringBoundaries
      case "XGBoost"            => Map()
      case "gbmBinary" | "gbmMulti" | "gbmMultiOVA" | "gbmHuber" | "gbmFair" |
          "gbmLasso" | "gbmRidge" | "gbmPoisson" | "gbmQuantile" | "gbmMape" |
          "gbmTweedie" | "gbmGamma" =>
        _lightGBMDefaultStringBoundaries
      case _ =>
        throw new IllegalArgumentException(
          s"$value is an unsupported Model Type"
        )
    }
    setConfigs()
    this
  }

  def setLabelCol(value: String): this.type = {
    _labelCol = value
    setConfigs()
    this
  }

  def setFeaturesCol(value: String): this.type = {
    _featuresCol = value
    setConfigs()
    this
  }

  def naFillOn(): this.type = {
    _naFillFlag = true
    setConfigs()
    this
  }

  def naFillOff(): this.type = {
    _naFillFlag = false
    setConfigs()
    this
  }

  def varianceFilterOn(): this.type = {
    _varianceFilterFlag = true
    setConfigs()
    this
  }

  def varianceFilterOff(): this.type = {
    _varianceFilterFlag = false
    setConfigs()
    this
  }

  def outlierFilterOn(): this.type = {
    _outlierFilterFlag = true
    setConfigs()
    this
  }

  def outlierFilterOff(): this.type = {
    _outlierFilterFlag = false
    setConfigs()
    this
  }

  def pearsonFilterOn(): this.type = {
    _pearsonFilterFlag = true
    setConfigs()
    this
  }

  def pearsonFilterOff(): this.type = {
    _pearsonFilterFlag = false
    setConfigs()
    this
  }

  def covarianceFilterOn(): this.type = {
    _covarianceFilterFlag = true
    setConfigs()
    this
  }

  def covarianceFilterOff(): this.type = {
    _covarianceFilterFlag = false
    setConfigs()
    this
  }

  def oneHotEncodingOn(): this.type = {
    _oneHotEncodeFlag = true
    setConfigs()
    this
  }

  def oneHotEncodingOff(): this.type = {
    _oneHotEncodeFlag = false
    setConfigs()
    this
  }

  def scalingOn(): this.type = {
    _scalingFlag = true
    setConfigs()
    this
  }

  def scalingOff(): this.type = {
    _scalingFlag = false
    setConfigs()
    this
  }

  def dataPrepCachingOn(): this.type = {
    _dataPrepCachingFlag = true
    setConfigs()
    this
  }

  def dataPrepCachingOff(): this.type = {
    _dataPrepCachingFlag = false
    setConfigs()
    this
  }

  def featureInteractionOn(): this.type = {
    _featureInteractionFlag = true
    setConfigs()
    this
  }

  def featureInteractionOff(): this.type = {
    _featureInteractionFlag = false
    setConfigs()
    this
  }

  /**
    * Setter for defining the number of concurrent threads allocated to performing asynchronous data prep tasks within
    * the feature engineering aspect of this application.
    * @param value Int: A value that must be greater than zero.
    * @note This value has an upper limit, depending on driver size, that will restrict the efficacy of the asynchronous
    *       tasks within the pool.  Setting this too high may cause cluster instability.
    * @author Ben Wilson, Databricks
    * @since 0.6.0
    * @throws IllegalArgumentException if a value less than or equal to zero is supplied.
    */
  @throws(classOf[IllegalArgumentException])
  def setDataPrepParallelism(value: Int): this.type = {

    require(value > 0, s"DataPrepParallelism must be greater than zero.")
    _dataPrepParallelism = value
    setConfigs()
    this
  }

  def setNumericBoundaries(value: Map[String, (Double, Double)]): this.type = {
    _numericBoundaries = value
    setConfigs()
    this
  }

  def setStringBoundaries(value: Map[String, List[String]]): this.type = {
    _stringBoundaries = value
    setConfigs()
    this
  }

  def setScoringMetric(value: String): this.type = {
    val adjusted_value = value.toLowerCase
    val matched_value = adjusted_value match {
      case "f1"                => "f1"
      case "weightedprecision" => "weightedPrecision"
      case "weightedrecall"    => "weightedRecall"
      case "accuracy"          => "accuracy"
      case "areaunderpr"       => "areaUnderPR"
      case "areaunderroc"      => "areaUnderROC"
      case "rmse"              => "rmse"
      case "mse"               => "mse"
      case "r2"                => "r2"
      case "mae"               => "mae"
      case _ =>
        throw new IllegalArgumentException(
          s"Supplied Scoring Metric '${value}' is not supported. " +
            s"Must be one of: weightedPrecision, weightedRecall, accuracy, areaUnderPR, areaUnderROC, rmse, mse, r2, mae.'"
        )
    }
    _scoringMetric = matched_value
    setConfigs()
    this
  }

  def setScoringOptimizationStrategy(value: String): this.type = {
    require(
      Array("minimize", "maximize").contains(value),
      s"$value is not a member of allowed scoring optimizations: " +
        s"'minimize' or 'maximize'"
    )
    _scoringOptimizationStrategy = value
    setConfigs()
    this
  }

  def setNumericFillStat(value: String): this.type = {
    _numericFillStat = value
    setFillConfig()
    setConfigs()
    this
  }

  def setCharacterFillStat(value: String): this.type = {
    _characterFillStat = value
    setFillConfig()
    setConfigs()
    this
  }

  def setDateTimeConversionType(value: String): this.type = {
    _dateTimeConversionType = value
    setConfigs()
    this
  }

  def setFieldsToIgnoreInVector(value: Array[String]): this.type = {
    _fieldsToIgnoreInVector = value
    if (_trainSplitColumnSet)
      _fieldsToIgnoreInVector = _fieldsToIgnoreInVector :+ _trainSplitChronologicalColumn
    setConfigs()
    this
  }

  /**
    * Setter for defining the precision for calculating the model type as per the label column
    *
    * @note setting this value to zero (0) for a large regression problem will incur a long processing time and
    *       an expensive shuffle.
    * @param value Double: Precision accuracy for approximate distinct calculation.
    * @throws java.lang.AssertionError If the value is outside of the allowable range of {0, 1}
    * @since 0.5.2
    * @author Ben Wilson, Databricks
    */
  @throws(classOf[AssertionError])
  def setNAFillFilterPrecision(value: Double): this.type = {
    require(
      value >= 0,
      s"Filter Precision for NA Fill must be greater than or equal to 0."
    )
    require(
      value <= 1,
      s"Filter Precision for NA Fill must be less than or equal to 1."
    )
    _naFillFilterPrecision = value
    setFillConfig()
    setConfigs()
    this
  }

  /**
    * Setter for providing a map of [Column Name -> String Fill Value] for manual by-column overrides.  Any non-specified
    * fields in this map will utilize the "auto" statistics-based fill paradigm to calculate and fill any NA values
    * in non-numeric columns.
    *
    * @note if naFillMode is specified as using Map Fill modes, this setter or the numeric na fill map MUST be set.
    * @note If fields are specified in here that are not part of the DataFrame's schema, an exception will be thrown.
    * @param value Map[String, String]: Column Name as String -> Fill Value as String
    * @since 0.5.2
    * @author Ben Wilson, Databricks
    */
  def setCategoricalNAFillMap(value: Map[String, String]): this.type = {
    _categoricalNAFillMap = value
    setFillConfig()
    setConfigs()
    this
  }

  /**
    * Setter for providing a map of [Column Name -> AnyVal Fill Value] (must be numeric). Any non-specified
    * fields in this map will utilize the "auto" statistics-based fill paradigm to calculate and fill any NA values
    * in numeric columns.
    *
    * @note if naFillMode is specified as using Map Fill modes, this setter or the categorical na fill map MUST be set.
    * @note If fields are specified in here that are not part of the DataFrame's schema, an exception will be thrown.
    * @param value Map[String, AnyVal]: Column Name as String -> Fill Numeric Type Value
    * @since 0.5.2
    * @author Ben Wilson, Databricks
    */
  def setNumericNAFillMap(value: Map[String, AnyVal]): this.type = {
    _numericNAFillMap = value
    setFillConfig()
    setConfigs()
    this
  }

  /**
    * Setter for providing a 'blanket override' value (fill all found categorical columns' missing values with this
    * specified value).
    *
    * @param value String: A value to fill all categorical na values in the DataFrame with.
    * @since 0.5.2
    * @author Ben Wilson, Databricks
    */
  def setCharacterNABlanketFillValue(value: String): this.type = {
    _characterNABlanketFillValue = value
    setFillConfig()
    setConfigs()
    this
  }

  /**
    * Setter for providing a 'blanket override'  value (fill all found numeric columns' missing values with this
    * specified value)
    *
    * @param value Double: A value to fill all numeric na value in the DataFrame with.
    * @since 0.5.2
    * @author Ben Wilson, Databricks
    */
  def setNumericNABlanketFillValue(value: Double): this.type = {
    _numericNABlanketFillValue = value
    setFillConfig()
    setConfigs()
    this
  }

  /**
    * Mode for na fill

    *                Available modes: 

    *                  auto : Stats-based na fill for fields.  Usage of .setNumericFillStat and
    *                  .setCharacterFillStat will inform the type of statistics that will be used to fill.

    *                  mapFill : Custom by-column overrides to 'blanket fill' na values on a per-column
    *                  basis.  The categorical (string) fields are set via .setCategoricalNAFillMap while the
    *                  numeric fields are set via .setNumericNAFillMap.

    *                  blanketFillAll : Fills all fields based on the values specified by
    *                  .setCharacterNABlanketFillValue and .setNumericNABlanketFillValue.  All NA's for the
    *                  appropriate types will be filled in accordingly throughout all columns.

    *                  blanketFillCharOnly Will use statistics to fill in numeric fields, but will replace
    *                  all categorical character fields na values with a blanket fill value. 

    *                  blanketFillNumOnly Will use statistics to fill in character fields, but will replace
    *                  all numeric fields na values with a blanket value.
    *
    * @throws IllegalArgumentException if the mods specified is not supported.
    * @param value String: Mode for NA Fill
    * @since 0.5.2
    * @author Ben Wilson, Databricks
    */
  @throws(classOf[IllegalArgumentException])
  def setNAFillMode(value: String): this.type = {
    require(
      _allowableNAFillModes.contains(value),
      s"NA Fill Mode '$value' is not a supported mode.  Must be one of:" +
        s"${_allowableNAFillModes.mkString(", ")}"
    )
    _naFillMode = value
    setFillConfig()
    setConfigs()
    this
  }

  def setModelSelectionDistinctThreshold(value: Int): this.type = {
    _modelSelectionDistinctThreshold = value
    setFillConfig()
    setConfigs()
    this
  }

  def cardinalitySwitchOn(): this.type = {
    _cardinalitySwitchFlag = true
    setFillConfig()
    setConfigs()
    this
  }

  def cardinalitySwitchOff(): this.type = {
    _cardinalitySwitchFlag = false
    setFillConfig()
    setConfigs()
    this
  }
  def setCardinalitySwitch(value: Boolean): this.type = {
    _cardinalitySwitchFlag = value
    setFillConfig()
    setConfigs()
    this
  }

  @throws(classOf[AssertionError])
  def setCardinalityType(value: String): this.type = {
    _cardinalityType = value
    assert(
      allowableCardinalilties.contains(value),
      s"Supplied CardinalityType '$value' is not in: " +
        s"${allowableCardinalilties.mkString(", ")}"
    )
    setFillConfig()
    setConfigs()
    this
  }

  @throws(classOf[IllegalArgumentException])
  def setCardinalityLimit(value: Int): this.type = {
    require(value > 0, s"Cardinality limit must be greater than 0")
    _cardinalityLimit = value
    setFillConfig()
    setConfigs()
    this
  }

  @throws(classOf[IllegalArgumentException])
  def setCardinalityPrecision(value: Double): this.type = {
    require(value >= 0.0, s"Precision must be greater than or equal to 0.")
    require(value <= 1.0, s"Precision must be less than or equal to 1.")
    _cardinalityPrecision = value
    setFillConfig()
    setConfigs()
    this
  }

  @throws(classOf[AssertionError])
  def setCardinalityCheckMode(value: String): this.type = {
    assert(
      allowableCategoricalFilterModes.contains(value),
      s"Supplied CardinalityCheckMode $value is not in: ${allowableCategoricalFilterModes.mkString(", ")}"
    )
    _cardinalityCheckMode = value
    setFillConfig()
    setConfigs()
    this
  }

  private def setFillConfig(): this.type = {
    _fillConfig = FillConfig(
      numericFillStat = _numericFillStat,
      characterFillStat = _characterFillStat,
      modelSelectionDistinctThreshold = _modelSelectionDistinctThreshold,
      cardinalitySwitch = _cardinalitySwitchFlag,
      cardinalityType = _cardinalityType,
      cardinalityLimit = _cardinalityLimit,
      cardinalityPrecision = _cardinalityPrecision,
      cardinalityCheckMode = _cardinalityCheckMode,
      filterPrecision = _naFillFilterPrecision,
      categoricalNAFillMap = _categoricalNAFillMap,
      numericNAFillMap = _numericNAFillMap,
      characterNABlanketFillValue = _characterNABlanketFillValue,
      numericNABlanketFillValue = _numericNABlanketFillValue,
      naFillMode = _naFillMode
    )
    this
  }

  def setFilterBounds(value: String): this.type = {
    _filterBounds = value
    setOutlierConfig()
    setConfigs()
    this
  }

  def setLowerFilterNTile(value: Double): this.type = {
    _lowerFilterNTile = value
    setOutlierConfig()
    setConfigs()
    this
  }

  def setUpperFilterNTile(value: Double): this.type = {
    _upperFilterNTile = value
    setOutlierConfig()
    setConfigs()
    this
  }

  def setFilterPrecision(value: Double): this.type = {
    _filterPrecision = value
    setOutlierConfig()
    setConfigs()
    this
  }

  def setContinuousDataThreshold(value: Int): this.type = {
    _continuousDataThreshold = value
    setOutlierConfig()
    setConfigs()
    this
  }

  def setFieldsToIgnore(value: Array[String]): this.type = {
    _fieldsToIgnore = value
    setOutlierConfig()
    setConfigs()
    this
  }

  private def setOutlierConfig(): this.type = {
    _outlierConfig = OutlierConfig(
      filterBounds = _filterBounds,
      lowerFilterNTile = _lowerFilterNTile,
      upperFilterNTile = _upperFilterNTile,
      filterPrecision = _filterPrecision,
      continuousDataThreshold = _continuousDataThreshold,
      fieldsToIgnore = _fieldsToIgnore
    )
    this
  }

  def setPearsonFilterStatistic(value: String): this.type = {
    _pearsonFilterStatistic = value
    setPearsonConfig()
    setConfigs()
    this
  }

  def setPearsonFilterDirection(value: String): this.type = {
    _pearsonFilterDirection = value
    setPearsonConfig()
    setConfigs()
    this
  }

  def setPearsonFilterManualValue(value: Double): this.type = {
    _pearsonFilterManualValue = value
    setPearsonConfig()
    setConfigs()
    this
  }

  def setPearsonFilterMode(value: String): this.type = {
    _pearsonFilterMode = value
    setPearsonConfig()
    setConfigs()
    this
  }

  def setPearsonAutoFilterNTile(value: Double): this.type = {
    _pearsonAutoFilterNTile = value
    setPearsonConfig()
    setConfigs()
    this
  }

  private def setPearsonConfig(): this.type = {
    _pearsonConfig = PearsonConfig(
      filterStatistic = _pearsonFilterStatistic,
      filterDirection = _pearsonFilterDirection,
      filterManualValue = _pearsonFilterManualValue,
      filterMode = _pearsonFilterMode,
      autoFilterNTile = _pearsonAutoFilterNTile
    )
    this
  }

  def setCorrelationCutoffLow(value: Double): this.type = {
    _correlationCutoffLow = value
    setCovarianceConfig()
    setConfigs()
    this
  }

  def setCorrelationCutoffHigh(value: Double): this.type = {
    _correlationCutoffHigh = value
    setCovarianceConfig()
    setConfigs()
    this
  }

  private def setCovarianceConfig(): this.type = {
    _covarianceConfig = CovarianceConfig(
      correlationCutoffLow = _correlationCutoffLow,
      correlationCutoffHigh = _correlationCutoffHigh
    )
    this
  }

  def setScalerType(value: String): this.type = {
    _scalerType = value
    setScalerConfig()
    setConfigs()
    this
  }

  def setScalerMin(value: Double): this.type = {
    _scalerMin = value
    setScalerConfig()
    setConfigs()
    this
  }

  def setScalerMax(value: Double): this.type = {
    _scalerMax = value
    setScalerConfig()
    setConfigs()
    this
  }

  def setStandardScalerMeanFlagOn(): this.type = {
    _standardScalerMeanFlag = true
    setScalerConfig()
    setConfigs()
    this
  }

  def setStandardScalerMeanFlagOff(): this.type = {
    _standardScalerMeanFlag = false
    setScalerConfig()
    setConfigs()
    this
  }

  def setStandardScalerStdDevFlagOn(): this.type = {
    _standardScalerStdDevFlag = true
    setScalerConfig()
    setConfigs()
    this
  }

  def setStandardScalerStdDevFlagOff(): this.type = {
    _standardScalerStdDevFlag = false
    setScalerConfig()
    setConfigs()
    this
  }

  def setPNorm(value: Double): this.type = {
    _pNorm = value
    setScalerConfig()
    setConfigs()
    this
  }

  private def setScalerConfig(): this.type = {
    _scalingConfig = ScalingConfig(
      scalerType = _scalerType,
      scalerMin = _scalerMin,
      scalerMax = _scalerMax,
      standardScalerMeanFlag = _standardScalerMeanFlag,
      standardScalerStdDevFlag = _standardScalerStdDevFlag,
      pNorm = _pNorm
    )
    this
  }

  /**
    * Setter for determining the mode of operation for inclusion of interacted features.
    * Modes are:
    *   - all -> Includes all interactions between all features (after string indexing of categorical values)
    *   - optimistic -> If the Information Gain / Variance, as compared to at least ONE of the parents of the interaction
    *       is above the threshold set by featureInteractionTargetInteractionPercentage
    *         (e.g. if IG of left parent is 0.5 and right parent is 0.9, with threshold set at 10, if the interaction
    *         between these two parents has an IG of 0.42, it would be rejected, but if it was 0.46, it would be kept)
    *   - strict -> the threshold percentage must be met for BOTH parents.
    *         (in the above example, the IG for the interaction would have to be > 0.81 in order to be included in
    *         the feature vector).
    * @param value String -> one of: 'all', 'optimistic', or 'strict'
    * @throws IllegalArgumentException if the specified value submitted is not permitted
    * @since 0.6.2
    * @author Ben Wilson, Databricks
    */
  @throws(classOf[IllegalArgumentException])
  def setFeatureInteractionRetentionMode(value: String): this.type = {
    require(
      allowableFeatureInteractionModes.contains(value),
      s"FeatureInteractionRetentionMode is invalid.  Must be one of: ${allowableFeatureInteractionModes
        .mkString(", ")}"
    )
    _featureInteractionRetentionMode = value
    setFeatureInteractionConfig()
    setConfigs()
    this
  }

  /**
    * Setter for determining the behavior of continuous feature columns.  In order to calculate Entropy for a continuous
    * variable, the distribution must be converted to nominal values for estimation of per-split information gain.
    * This setting defines how many nominal categorical values to create out of a continuously distributed feature
    * in order to calculate Entropy.
    * @param value Int -> must be greater than 1
    * @throws IllegalArgumentException if the value specified is <= 1
    * @since 0.6.2
    * @author Ben Wilson, Databricks
    */
  def setFeatureInteractionContinuousDiscretizerBucketCount(
    value: Int
  ): this.type = {
    require(
      value > 1,
      s"FeatureInteractionContinuousDiscretizerBucketCount must be greater than 1."
    )
    _featureInteractionContinuousDiscretizerBucketCount = value
    setFeatureInteractionConfig()
    setConfigs()
    this
  }

  /**
    * Setter for configuring the concurrent count for scoring of feature interaction candidates.
    * Due to the nature of these operations, the configuration here may need to be set differently to that of
    * the modeling and general feature engineering phases of the toolkit.  This is highly dependent on the row
    * count of the data set being submitted.
    * @param value Int -> must be greater than 0
    * @since 0.6.2
    * @author Ben Wilson, Databricks
    * @throws IllegalArgumentException if the value is < 1
    */
  @throws(classOf[IllegalArgumentException])
  def setFeatureInteractionParallelism(value: Int): this.type = {
    require(
      value >= 1,
      s"FeatureInteractionParallelism must be set to a value >= 1."
    )
    _featureInteractionParallelism = value
    setFeatureInteractionConfig()
    setConfigs()
    this
  }

  /**
    * Setter for establishing the minimum acceptable InformationGain or Variance allowed for an interaction
    * candidate based on comparison to the scores of its parents.
    * @param value Double in range of -inf -> inf
    * @since 0.6.2
    * @author Ben Wilson, Databricks
    */
  def setFeatureInteractionTargetInteractionPercentage(
    value: Double
  ): this.type = {
    _featureInteractionTargetInteractionPercentage = value
    setFeatureInteractionConfig()
    setConfigs()
    this
  }

  /**
    * Private setter for establishing the feature interaction configuration
    * @since 0.6.2
    * @author Ben Wilson, Databricks
    */
  private def setFeatureInteractionConfig(): this.type = {
    _featureInteractionConfig = FeatureInteractionConfig(
      retentionMode = _featureInteractionRetentionMode,
      continuousDiscretizerBucketCount =
        _featureInteractionContinuousDiscretizerBucketCount,
      parallelism = _featureInteractionParallelism,
      targetInteractionPercentage =
        _featureInteractionTargetInteractionPercentage
    )
    this
  }

  def setParallelism(value: Int): this.type = {
    require(
      _parallelism < 100,
      s"Parallelism above 100 will result in cluster instability."
    )
    _parallelism = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setKFold(value: Int): this.type = {
    _kFold = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setTrainPortion(value: Double): this.type = {
    _trainPortion = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setTrainSplitMethod(value: String): this.type = {
    require(
      trainSplitMethods.contains(value),
      s"TrainSplitMethod $value must be one of: ${trainSplitMethods.mkString(", ")}"
    )
    _trainSplitMethod = value
    if (value == "chronological")
      println(
        "[WARNING] setTrainSplitMethod() -> Chronological splits is shuffle-intensive and will increase " +
          "runtime significantly.  Only use if necessary for modeling scenario!"
      )
    setGeneticConfig()
    setConfigs()
    this
  }

  def setKSampleConfig(): this.type = {

    _kSampleConfig = KSampleConfig(
      syntheticCol = _syntheticCol,
      kGroups = _kGroups,
      kMeansMaxIter = _kMeansMaxIter,
      kMeansTolerance = _kMeansTolerance,
      kMeansDistanceMeasurement = _kMeansDistanceMeasurement,
      kMeansSeed = _kMeansSeed,
      kMeansPredictionCol = _kMeansPredictionCol,
      lshHashTables = _lshHashTables,
      lshSeed = _lshSeed,
      lshOutputCol = _lshOutputCol,
      quorumCount = _quorumCount,
      minimumVectorCountToMutate = _minimumVectorCountToMutate,
      vectorMutationMethod = _vectorMutationMethod,
      mutationMode = _mutationMode,
      mutationValue = _mutationValue,
      labelBalanceMode = _labelBalanceMode,
      cardinalityThreshold = _cardinalityThreshold,
      numericRatio = _numericRatio,
      numericTarget = _numericTarget,
      outputDfRepartitionScaleFactor = _outputDfRepartitionScaleFactor
    )
    this
  }

  /**
    * Setter - for setting the name of the Synthetic column name
    *
    * @param value String: A column name that is uniquely not part of the main DataFrame
    * @since 0.5.1
    * @author Ben Wilson
    */
  def setSyntheticCol(value: String): this.type = {
    _syntheticCol = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for specifying the number of K-Groups to generate in the KMeans model
    *
    * @param value Int: number of k groups to generate
    * @return this
    */
  def setKGroups(value: Int): this.type = {
    _kGroups = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for specifying the maximum number of iterations for the KMeans model to go through to converge
    *
    * @param value Int: Maximum limit on iterations
    * @return this
    */
  def setKMeansMaxIter(value: Int): this.type = {
    _kMeansMaxIter = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for Setting the tolerance for KMeans (must be >0)
    *
    * @param value The tolerance value setting for KMeans
    * @see reference: [[http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.ml.clustering.KMeans]]
    *      for further details.
    * @return this
    * @throws IllegalArgumentException() if a value less than 0 is entered
    */
  @throws(classOf[IllegalArgumentException])
  def setKMeansTolerance(value: Double): this.type = {
    require(
      value > 0,
      s"KMeans tolerance value ${value.toString} is out of range.  Must be > 0."
    )
    _kMeansTolerance = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for which distance measurement to use to calculate the nearness of vectors to a centroid
    *
    * @param value String: Options -> "euclidean" or "cosine" Default: "euclidean"
    * @return this
    * @throws IllegalArgumentException() if an invalid value is entered
    */
  @throws(classOf[IllegalArgumentException])
  def setKMeansDistanceMeasurement(value: String): this.type = {
    require(
      allowableKMeansDistanceMeasurements.contains(value),
      s"Kmeans Distance Measurement $value is not " +
        s"a valid mode of operation.  Must be one of: ${allowableKMeansDistanceMeasurements.mkString(", ")}"
    )
    _kMeansDistanceMeasurement = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for a KMeans seed for the clustering algorithm
    *
    * @param value Long: Seed value
    * @return this
    */
  def setKMeansSeed(value: Long): this.type = {
    _kMeansSeed = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for the internal KMeans column for cluster membership attribution
    *
    * @param value String: column name for internal algorithm column for group membership
    * @return this
    */
  def setKMeansPredictionCol(value: String): this.type = {
    _kMeansPredictionCol = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for Configuring the number of Hash Tables to use for MinHashLSH
    *
    * @param value Int: Count of hash tables to use
    * @see [[http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.ml.feature.MinHashLSH]]
    *     for more information
    * @return this
    */
  def setLSHHashTables(value: Int): this.type = {
    _lshHashTables = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for Configuring the Seed value for the LSH MinHash model
    *
    * @param value Long: A Seed value
    * @since 0.5.1
    * @author Ben Wilson
    */
  def setLSHSeed(value: Long): this.type = {
    _lshSeed = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for the internal LSH output hash information column
    *
    * @param value String: column name for the internal MinHashLSH Model transformation value
    * @return this
    */
  def setLSHOutputCol(value: String): this.type = {
    _lshOutputCol = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for how many vectors to find in adjacency to the centroid for generation of synthetic data
    *
    * @note the higher the value set here, the higher the variance in synthetic data generation
    * @param value Int: Number of vectors to find nearest each centroid within the class
    * @return this
    */
  def setQuorumCount(value: Int): this.type = {
    _quorumCount = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for minimum threshold for vector indexes to mutate within the feature vector.
    *
    * @note In vectorMutationMethod "fixed" this sets the fixed count of how many vector positions to mutate.
    *       In vectorMutationMethod "random" this sets the lower threshold for 'at least this many indexes will
    *       be mutated'
    * @param value The minimum (or fixed) number of indexes to mutate.
    * @return this
    */
  def setMinimumVectorCountToMutate(value: Int): this.type = {
    _minimumVectorCountToMutate = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for the Vector Mutation Method
    *
    * @note Options:
    *       "fixed" - will use the value of minimumVectorCountToMutate to select random indexes of this number of indexes.
    *       "random" - will use this number as a lower bound on a random selection of indexes between this and the vector length.
    *       "all" - will mutate all of the vectors.
    * @param value String - the mode to use.
    * @return this
    * @throws IllegalArgumentException() if the mode is not supported.
    */
  @throws(classOf[IllegalArgumentException])
  def setVectorMutationMethod(value: String): this.type = {
    require(
      allowableVectorMutationMethods.contains(value),
      s"Vector Mutation Mode $value is not supported.  " +
        s"Must be one of: ${allowableVectorMutationMethods.mkString(", ")} "
    )
    _vectorMutationMethod = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for the Mutation Mode of the feature vector individual values
    *
    * @note Options:
    *       "weighted" - uses weighted averaging to scale the euclidean distance between the centroid vector and mutation candidate vectors
    *       "random" - randomly selects a position on the euclidean vector between the centroid vector and the candidate mutation vectors
    *       "ratio" - uses a ratio between the values of the centroid vector and the mutation vector    *
    * @param value String: the mode to use.
    * @return this
    * @throws IllegalArgumentException() if the mode is not supported.
    */
  @throws(classOf[IllegalArgumentException])
  def setMutationMode(value: String): this.type = {
    require(
      allowableMutationModes.contains(value),
      s"Mutation Mode $value is not a valid mode of operation.  " +
        s"Must be one of: ${allowableMutationModes.mkString(", ")}"
    )
    _mutationMode = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for specifying the mutation magnitude for the modes 'weighted' and 'ratio' in mutationMode
    *
    * @param value Double: value between 0 and 1 for mutation magnitude adjustment.
    * @note the higher this value, the closer to the centroid vector vs. the candidate mutation vector the synthetic row data will be.
    * @return this
    * @throws IllegalArgumentException() if the value specified is outside of the range (0, 1)
    */
  @throws(classOf[IllegalArgumentException])
  def setMutationValue(value: Double): this.type = {
    require(
      value > 0 & value < 1,
      s"Mutation Value must be between 0 and 1. Value $value is not permitted."
    )
    _mutationValue = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter - for determining the label balance approach mode.
    *
    * @note Available modes: 

    *         'match': Will match all smaller class counts to largest class count.  [WARNING] - May significantly increase memory pressure!

    *         'percentage' Will adjust smaller classes to a percentage value of the largest class count.
    *         'target' Will increase smaller class counts to a fixed numeric target of rows.
    * @param value String: one of: 'match', 'percentage' or 'target'
    * @note Default: "percentage"
    * @since 0.5.1
    * @author Ben Wilson
    * @throws UnsupportedOperationException() if the provided mode is not supported.
    */
  @throws(classOf[UnsupportedOperationException])
  def setLabelBalanceMode(value: String): this.type = {
    require(
      allowableLabelBalanceModes.contains(value),
      s"Label Balance Mode $value is not supported." +
        s"Must be one of: ${allowableLabelBalanceModes.mkString(", ")}"
    )
    _labelBalanceMode = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter - for overriding the cardinality threshold exception threshold.  [WARNING] increasing this value on
    * a sufficiently large data set could incur, during runtime, excessive memory and cpu pressure on the cluster.
    *
    * @param value Int: the limit above which an exception will be thrown for a classification problem wherein the
    *              label distinct count is too large to successfully generate synthetic data.
    * @note Default: 20
    * @since 0.5.1
    * @author Ben Wilson
    */
  def setCardinalityThreshold(value: Int): this.type = {
    _cardinalityThreshold = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter - for specifying the percentage ratio for the mode 'percentage' in setLabelBalanceMode()
    *
    * @param value Double: A fractional double in the range of 0.0 to 1.0.
    * @note Setting this value to 1.0 is equivalent to setting the label balance mode to 'match'
    * @note Default: 0.2
    * @since 0.5.1
    * @author Ben Wilson
    * @throws UnsupportedOperationException() if the provided value is outside of the range of 0.0 -> 1.0
    */
  @throws(classOf[UnsupportedOperationException])
  def setNumericRatio(value: Double): this.type = {
    require(
      value <= 1.0 & value > 0.0,
      s"Invalid Numeric Ratio entered!  Must be between 0 and 1." +
        s"${value.toString} is not valid."
    )
    _numericRatio = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter - for specifying the target row count to generate for 'target' mode in setLabelBalanceMode()
    *
    * @param value Int: The desired final number of rows per minority class label
    * @note [WARNING] Setting this value to too high of a number will greatly increase runtime and memory pressure.
    * @since 0.5.1
    * @author Ben Wilson
    */
  def setNumericTarget(value: Int): this.type = {
    _numericTarget = value
    setKSampleConfig()
    setGeneticConfig()
    setConfigs()
    this
  }

  def setTrainSplitChronologicalColumn(value: String): this.type = {
    _trainSplitChronologicalColumn = value
    val ignoredFields: Array[String] = _fieldsToIgnoreInVector ++ Array(value)
    setFieldsToIgnoreInVector(ignoredFields)
    _trainSplitColumnSet = true
    setGeneticConfig()
    setConfigs()
    this
  }

  def setTrainSplitChronologicalRandomPercentage(value: Double): this.type = {
    _trainSplitChronologicalRandomPercentage = value
    if (value > 10)
      println(
        "[WARNING] setTrainSplitChronologicalRandomPercentage() setting this value above 10 " +
          "percent will cause significant per-run train/test skew and variability in row counts during training.  " +
          "Use higher values only if this is desired."
      )
    setGeneticConfig()
    setConfigs()
    this
  }

  def setSeed(value: Long): this.type = {
    _seed = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setFirstGenerationGenePool(value: Int): this.type = {
    _firstGenerationGenePool = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setNumberOfGenerations(value: Int): this.type = {
    _numberOfGenerations = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setNumberOfParentsToRetain(value: Int): this.type = {
    _numberOfParentsToRetain = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setNumberOfMutationsPerGeneration(value: Int): this.type = {
    _numberOfMutationsPerGeneration = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setGeneticMixing(value: Double): this.type = {
    _geneticMixing = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setGenerationalMutationStrategy(value: String): this.type = {
    _generationalMutationStrategy = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setFixedMutationValue(value: Int): this.type = {
    _fixedMutationValue = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setMutationMagnitudeMode(value: String): this.type = {
    _mutationMagnitudeMode = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setModelSeedString(value: String): this.type = {
    _modelSeedMap = extractGenericModelReturnMap(value)
    _modelSeedSetStatus = true
    setGeneticConfig()
    setConfigs()
    this
  }

  def setModelSeedMap(value: Map[String, Any]): this.type = {
    _modelSeedMap = value
    _modelSeedSetStatus = true
    setGeneticConfig()
    setConfigs()
    this
  }

  private def setFirstGenerationConfig(): this.type = {
    _firstGenerationConfig = FirstGenerationConfig(
      permutationCount = _firstGenerationPermutationCount,
      indexMixingMode = _firstGenerationIndexMixingMode,
      arraySeed = _firstGenerationArraySeed
    )
    setGeneticConfig()
    setConfigs()
    this
  }

  def setFirstGenerationPermutationCount(value: Int): this.type = {
    _firstGenerationPermutationCount = value
    setFirstGenerationConfig()
    this
  }

  def setFirstGenerationIndexMixingMode(value: String): this.type = {
    require(
      _allowableInitialGenerationIndexMixingModes.contains(value),
      s"Invalid First Generation Index Mixing " +
        s"Mode: $value .  First Generation Index Mixing Mode must be one of: " +
        s"${_allowableInitialGenerationIndexMixingModes.mkString(", ")}"
    )
    _firstGenerationIndexMixingMode = value
    setFirstGenerationConfig()
    this
  }

  def setFirstGenerationArraySeed(value: Long): this.type = {
    _firstGenerationArraySeed = value
    setFirstGenerationConfig()
    this
  }

  def hyperSpaceInferenceOn(): this.type = {
    _hyperSpaceInference = true
    setGeneticConfig()
    setConfigs()
    this
  }

  def hyperSpaceInferenceOff(): this.type = {
    _hyperSpaceInference = false
    setGeneticConfig()
    setConfigs()
    this
  }

  def setHyperSpaceInferenceCount(value: Int): this.type = {
    if (value > 500000)
      println(
        "WARNING! Setting permutation counts above 500,000 will put stress on the driver."
      )
    if (value > 1000000)
      throw new UnsupportedOperationException(
        s"Setting permutation above 1,000,000 is not supported" +
          s" due to runtime considerations.  $value is too large of a value."
      )
    _hyperSpaceInferenceCount = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setHyperSpaceModelType(value: String): this.type = {
    require(
      Array("RandomForest", "LinearRegression", "XGBoost").contains(value),
      s"Model type $value is not supported for post " +
        s"modeling hyper space optimization!  Please choose either RandomForest or LinearRegression"
    )
    _hyperSpaceModelType = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setHyperSpaceModelCount(value: Int): this.type = {
    if (value > 50)
      println(
        "WARNING! Setting this value above 50 will incur 50 additional models to be built.  Proceed" +
          "only if this is intended."
      )
    _hyperSpaceModelCount = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setFirstGenerationMode(value: String): this.type = {
    require(
      _allowableInitialGenerationModes.contains(value),
      s"Invalid First Generation Mode: $value . " +
        s"First Generation Mode must be one of : ${_allowableInitialGenerationModes.mkString(", ")}"
    )
    _firstGenerationMode = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setMlFlowConfig(value: MLFlowConfig): this.type = {
    _mlFlowConfig = value
    setConfigs()
    this
  }

  def mlFlowLoggingOn(): this.type = {
    _mlFlowLoggingFlag = true
    setConfigs()
    this
  }

  def mlFlowLoggingOff(): this.type = {
    _mlFlowLoggingFlag = false
    setConfigs()
    this
  }

  def mlFlowLogArtifactsOn(): this.type = {
    _mlFlowArtifactsFlag = true
    setConfigs()
    this
  }

  def mlFlowLogArtifactsOff(): this.type = {
    _mlFlowArtifactsFlag = false
    setConfigs()
    this
  }

  def setMlFlowTrackingURI(value: String): this.type = {
    _mlFlowTrackingURI = value
    setMlFlowConfig()
    setConfigs()
    this
  }

  def setMlFlowExperimentName(value: String): this.type = {
    _mlFlowExperimentName = value
    setMlFlowConfig()
    setConfigs()
    this
  }

  def setMlFlowAPIToken(value: String): this.type = {
    _mlFlowAPIToken = value
    setMlFlowConfig()
    setConfigs()
    this
  }

  @throws(classOf[IllegalArgumentException])
  def setMlFlowModelSaveDirectory(value: String): this.type = {
    require(
      value.take(6) == "dbfs:/",
      s"Model save directory must be written to dbfs:/."
    )
    _mlFlowModelSaveDirectory = value
    setMlFlowConfig()
    setConfigs()
    this
  }

  def setMlFlowLoggingMode(value: String): this.type = {
    require(
      _allowableMlFlowLoggingModes.contains(value),
      s"MlFlow logging mode $value is not permitted.  Must be " +
        s"one of: ${_allowableMlFlowLoggingModes.mkString(",")}"
    )
    _mlFlowLoggingMode = value
    setMlFlowConfig()
    setConfigs()
    this
  }

  def setMlFlowBestSuffix(value: String): this.type = {
    _mlFlowBestSuffix = value
    setMlFlowConfig()
    setConfigs()
    this
  }

  def setMlFlowCustomRunTags(value: Map[String, String]): this.type = {
    _mlFlowCustomRunTags = value
    setMlFlowConfig()
    setConfigs()
    this
  }

  private def setMlFlowConfig(): this.type = {
    _mlFlowConfig = MLFlowConfig(
      mlFlowTrackingURI = _mlFlowTrackingURI,
      mlFlowExperimentName = _mlFlowExperimentName,
      mlFlowAPIToken = _mlFlowAPIToken,
      mlFlowModelSaveDirectory = _mlFlowModelSaveDirectory,
      mlFlowLoggingMode = _mlFlowLoggingMode,
      mlFlowBestSuffix = _mlFlowBestSuffix,
      mlFlowCustomRunTags = _mlFlowCustomRunTags
    )
    this
  }

  def autoStoppingOn(): this.type = {
    _autoStoppingFlag = true
    setConfigs()
    this
  }

  def autoStoppingOff(): this.type = {
    _autoStoppingFlag = false
    setConfigs()
    this
  }

  def setAutoStoppingScore(value: Double): this.type = {
    _autoStoppingScore = value
    setConfigs()
    this
  }

  /**
    * Setter for defining the secondary stopping criteria for continuous training mode ( number of consistentlt
    * not-improving runs to terminate the learning algorithm due to diminishing returns.
    * @param value Negative Integer (an improvement to a priori will reset the counter and subsequent non-improvements
    *              will decrement a mutable counter.  If the counter hits this limit specified in value, the continuous
    *              mode algorithm will stop).
    * @author Ben Wilson, Databricks
    * @since 0.6.0
    * @throws IllegalArgumentException if the value is positive.
    */
  @throws(classOf[IllegalArgumentException])
  def setContinuousEvolutionImprovementThreshold(value: Int): this.type = {
    require(
      value < 0,
      s"ContinuousEvolutionImprovementThreshold must be less than zero.  It is " +
        s"recommended to set this value to less than -4."
    )
    _continuousEvolutionImprovementThreshold = value
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for selecting the type of Regressor to use for the within-epoch generation MBO of candidates
    * @param value String - one of "XGBoost", "LinearRegression" or "RandomForest"
    * @author Ben Wilson, Databricks
    * @since 0.6.0
    * @throws IllegalArgumentException if the value is not supported
    */
  @throws(classOf[IllegalArgumentException])
  def setGeneticMBORegressorType(value: String): this.type = {
    require(
      allowableMBORegressorTypes.contains(value),
      s"GeneticRegressorType $value is not a supported Regressor " +
        s"Type.  Must be one of: ${allowableMBORegressorTypes.mkString(", ")}"
    )
    _geneticMBORegressorType = value
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for defining the factor to be applied to the candidate listing of hyperparameters to generate through
    * mutation for each generation other than the initial and post-modeling optimization phases.  The larger this
    * value (default: 10), the more potential space can be searched.  There is not a large performance hit to this,
    * and as such, values in excess of 100 are viable.
    * @param value Int - a factor to multiply the numberOfMutationsPerGeneration by to generate a count of potential
    *              candidates.
    * @author Ben Wilson, Databricks
    * @since 0.6.0
    * @throws IllegalArgumentException if the value is not greater than zero.
    */
  @throws(classOf[IllegalArgumentException])
  def setGeneticMBOCandidateFactor(value: Int): this.type = {
    require(value > 0, s"GeneticMBOCandidateFactor must be greater than zero.")
    _geneticMBOCandidateFactor = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setFeatureImportanceCutoffType(value: String): this.type = {

    require(
      _supportedFeatureImportanceCutoffTypes.contains(value),
      s"Feature Importance Cutoff Type '$value' is not supported.  Allowable values: " +
        s"${_supportedFeatureImportanceCutoffTypes.mkString(" ,")}"
    )
    _featureImportanceCutoffType = value
    setConfigs()
    this
  }

  def setFeatureImportanceCutoffValue(value: Double): this.type = {
    _featureImportanceCutoffValue = value
    setConfigs()
    this
  }

  def setEvolutionStrategy(value: String): this.type = {
    require(
      _allowableEvolutionStrategies.contains(value),
      s"Evolution Strategy '$value' is not a supported mode.  Must be one of: ${_allowableEvolutionStrategies
        .mkString(", ")}"
    )
    _evolutionStrategy = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setContinuousEvolutionMaxIterations(value: Int): this.type = {
    if (value > 500)
      println(
        s"[WARNING] Total Modeling count $value is higher than recommended limit of 500.  " +
          s"This tuning will take a long time to run."
      )
    _continuousEvolutionMaxIterations = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setContinuousEvolutionStoppingScore(value: Double): this.type = {
    _continuousEvolutionStoppingScore = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setContinuousEvolutionParallelism(value: Int): this.type = {
    if (value > 10)
      println(
        s"[WARNING] ContinuousEvolutionParallelism -> $value is higher than recommended " +
          s"concurrency for efficient optimization for convergence." +
          s"\n  Setting this value below 11 will converge faster in most cases."
      )
    _continuousEvolutionParallelism = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setContinuousEvolutionMutationAggressiveness(value: Int): this.type = {
    if (value > 4)
      println(
        s"[WARNING] ContinuousEvolutionMutationAggressiveness -> $value. " +
          s"\n  Setting this higher than 4 will result in extensive random search and will take longer to converge " +
          s"to optimal hyperparameters."
      )
    _continuousEvolutionMutationAggressiveness = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setContinuousEvolutionGeneticMixing(value: Double): this.type = {
    require(
      value < 1.0 & value > 0.0,
      s"Mutation Aggressiveness must be in range (0,1). Current Setting of $value is not permitted."
    )
    _continuousEvolutionGeneticMixing = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def setContinuousEvolutionRollingImprovementCount(value: Int): this.type = {
    require(
      value > 0,
      s"ContinuousEvolutionRollingImprovementCount must be > 0. $value is invalid."
    )
    if (value < 10)
      println(
        s"[WARNING] ContinuousEvolutionRollingImprovementCount -> $value setting is low.  " +
          s"Optimal Convergence may not occur due to early stopping."
      )
    _continuousEvolutionRollingImprovementCount = value
    setGeneticConfig()
    setConfigs()
    this
  }

  @throws(classOf[IllegalArgumentException])
  def setInferenceConfigSaveLocation(value: String): this.type = {
    require(
      value.take(6) == "dbfs:/",
      s"Inference save location must be on dbfs:/."
    )
    _inferenceConfigSaveLocation = value
    setConfigs()
    this
  }

  def setDataReductionFactor(value: Double): this.type = {
    require(value > 0, s"Data Reduction Factor must be between 0 and 1")
    require(value < 1, s"Data Reduction Factor must be between 0 and 1")
    _dataReductionFactor = value
    setConfigs()
    this
  }

  /**
    * Setter for providing a path to write the kfold train/test splits as Delta data sets to (useful for extremely
    * large data sets or a situation where using local disk storage might be prohibitively expensive)
    * @param value String path to a dbfs location for creating the temporary (or persisted)
    * @since 0.7.1
    * @author Ben Wilson, Databricks
    */
  def setDeltaCacheBackingDirectory(value: String): this.type = {

    if (value != "") {
      require(
        value.take(6) == "dbfs:/",
        s"Delta backing location must be written to dbfs."
      )
    }
    _deltaCacheBackingDirectory = value
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for determining the split caching strategy (either persist to disk for each kfold split or backing to Delta)
    * @param value Configuration string either 'persist' or 'delta'
    * @since 0.7.1
    * @author Ben Wilson, Databricks
    */
  def setSplitCachingStrategy(value: String): this.type = {
    val valueSet = value.toLowerCase
    require(
      valueSet == "persist" || valueSet == "delta" || valueSet == "cache",
      s"SplitCachingStrategy '${}' is invalid.  Must be either 'delta', 'cache', or 'persist'"
    )
    _splitCachingStrategy = valueSet
    setGeneticConfig()
    setConfigs()
    this
  }

  /**
    * Setter for whether or not to delete the written train/test splits for the run in Delta.  Defaulted to true
    * which means that the job will delete the data on Object store to clean itself up after the run is completed
    * if the splitCachingStrategy is set to 'delta'
    * @param value Boolean - true => delete false => leave on Object Store
    * @since 0.7.1
    * @author Ben Wilson, Databricks
    */
  def setDeltaCacheBackingDirectoryRemovalFlag(value: Boolean): this.type = {
    _deltaCacheBackingDirectoryRemovalFlag = value
    setGeneticConfig()
    setConfigs()
    this
  }

  def deltaCheckBackingDirectoryRemovalOn(): this.type = {
    _deltaCacheBackingDirectoryRemovalFlag = true
    setGeneticConfig()
    setConfigs()
    this
  }

  def deltaCheckBackingDirectoryRemovalOff(): this.type = {
    _deltaCacheBackingDirectoryRemovalFlag = false
    setGeneticConfig()
    setConfigs()
    this
  }

  private def setGeneticConfig(): this.type = {
    _geneticConfig = GeneticConfig(
      parallelism = _parallelism,
      kFold = _kFold,
      trainPortion = _trainPortion,
      trainSplitMethod = _trainSplitMethod,
      kSampleConfig = _kSampleConfig,
      trainSplitChronologicalColumn = _trainSplitChronologicalColumn,
      trainSplitChronologicalRandomPercentage =
        _trainSplitChronologicalRandomPercentage,
      seed = _seed,
      firstGenerationGenePool = _firstGenerationGenePool,
      numberOfGenerations = _numberOfGenerations,
      numberOfParentsToRetain = _numberOfParentsToRetain,
      numberOfMutationsPerGeneration = _numberOfMutationsPerGeneration,
      geneticMixing = _geneticMixing,
      generationalMutationStrategy = _generationalMutationStrategy,
      fixedMutationValue = _fixedMutationValue,
      mutationMagnitudeMode = _mutationMagnitudeMode,
      evolutionStrategy = _evolutionStrategy,
      geneticMBORegressorType = _geneticMBORegressorType,
      geneticMBOCandidateFactor = _geneticMBOCandidateFactor,
      continuousEvolutionMaxIterations = _continuousEvolutionMaxIterations,
      continuousEvolutionStoppingScore = _continuousEvolutionStoppingScore,
      continuousEvolutionImprovementThreshold =
        _continuousEvolutionImprovementThreshold,
      continuousEvolutionParallelism = _continuousEvolutionParallelism,
      continuousEvolutionMutationAggressiveness =
        _continuousEvolutionMutationAggressiveness,
      continuousEvolutionGeneticMixing = _continuousEvolutionGeneticMixing,
      continuousEvolutionRollingImprovementCount =
        _continuousEvolutionRollingImprovementCount,
      modelSeed = _modelSeedMap,
      hyperSpaceInference = _hyperSpaceInference,
      hyperSpaceInferenceCount = _hyperSpaceInferenceCount,
      hyperSpaceModelType = _hyperSpaceModelType,
      hyperSpaceModelCount = _hyperSpaceModelCount,
      initialGenerationMode = _firstGenerationMode,
      initialGenerationConfig = _firstGenerationConfig,
      deltaCacheBackingDirectory = _deltaCacheBackingDirectory,
      splitCachingStrategy = _splitCachingStrategy,
      deltaCacheBackingDirectoryRemovalFlag =
        _deltaCacheBackingDirectoryRemovalFlag
    )
    this
  }

  def setMainConfig(): this.type = {
    _mainConfig = MainConfig(
      modelFamily = _modelingFamily,
      labelCol = _labelCol,
      featuresCol = _featuresCol,
      naFillFlag = _naFillFlag,
      varianceFilterFlag = _varianceFilterFlag,
      outlierFilterFlag = _outlierFilterFlag,
      pearsonFilteringFlag = _pearsonFilterFlag,
      covarianceFilteringFlag = _covarianceFilterFlag,
      oneHotEncodeFlag = _oneHotEncodeFlag,
      scalingFlag = _scalingFlag,
      featureInteractionFlag = _featureInteractionFlag,
      dataPrepCachingFlag = _dataPrepCachingFlag,
      dataPrepParallelism = _dataPrepParallelism,
      autoStoppingFlag = _autoStoppingFlag,
      autoStoppingScore = _autoStoppingScore,
      featureImportanceCutoffType = _featureImportanceCutoffType,
      featureImportanceCutoffValue = _featureImportanceCutoffValue,
      dateTimeConversionType = _dateTimeConversionType,
      fieldsToIgnoreInVector = _fieldsToIgnoreInVector,
      numericBoundaries = _numericBoundaries,
      stringBoundaries = _stringBoundaries,
      scoringMetric = _scoringMetric,
      scoringOptimizationStrategy = _scoringOptimizationStrategy,
      fillConfig = _fillConfig,
      outlierConfig = _outlierConfig,
      pearsonConfig = _pearsonConfig,
      covarianceConfig = _covarianceConfig,
      scalingConfig = _scalingConfig,
      featureInteractionConfig = _featureInteractionConfig,
      geneticConfig = _geneticConfig,
      mlFlowLoggingFlag = _mlFlowLoggingFlag,
      mlFlowLogArtifactsFlag = _mlFlowArtifactsFlag,
      mlFlowConfig = _mlFlowConfig,
      inferenceConfigSaveLocation = _inferenceConfigSaveLocation,
      dataReductionFactor = _dataReductionFactor,
      pipelineDebugFlag = _pipelineDebugFlag,
      pipelineId = _pipelineId
    )
    this
  }

  private def setFillConfig(config: FillConfig): this.type = {

    _fillConfig = config
    _numericFillStat = config.numericFillStat
    _characterFillStat = config.characterFillStat
    _modelSelectionDistinctThreshold = config.modelSelectionDistinctThreshold
    _cardinalitySwitchFlag = config.cardinalitySwitch
    _cardinalityType = config.cardinalityType
    _cardinalityLimit = config.cardinalityLimit
    _cardinalityPrecision = config.cardinalityPrecision
    _cardinalityCheckMode = config.cardinalityCheckMode
    _naFillFilterPrecision = config.filterPrecision
    _categoricalNAFillMap = config.categoricalNAFillMap
    _numericNAFillMap = config.numericNAFillMap
    _characterNABlanketFillValue = config.characterNABlanketFillValue
    _numericNABlanketFillValue = config.numericNABlanketFillValue
    _naFillMode = config.naFillMode

    this
  }

  private def setOutlierConfig(config: OutlierConfig): this.type = {

    _outlierConfig = config
    _filterBounds = config.filterBounds
    _lowerFilterNTile = config.lowerFilterNTile
    _upperFilterNTile = config.upperFilterNTile
    _filterPrecision = config.filterPrecision
    _continuousDataThreshold = config.continuousDataThreshold
    _fieldsToIgnore = config.fieldsToIgnore

    this
  }

  private def setPearsonConfig(config: PearsonConfig): this.type = {

    _pearsonConfig = config
    _pearsonFilterStatistic = config.filterStatistic
    _pearsonFilterDirection = config.filterDirection
    _pearsonFilterManualValue = config.filterManualValue
    _pearsonFilterMode = config.filterMode
    _pearsonAutoFilterNTile = config.autoFilterNTile

    this
  }

  private def setCovarianceConfig(config: CovarianceConfig): this.type = {
    _covarianceConfig = config
    _correlationCutoffLow = config.correlationCutoffLow
    _correlationCutoffHigh = config.correlationCutoffHigh

    this
  }

  private def setScalerConfig(config: ScalingConfig): this.type = {

    _scalingConfig = config
    _scalerType = config.scalerType
    _scalerMin = config.scalerMin
    _scalerMax = config.scalerMax
    _standardScalerMeanFlag = config.standardScalerMeanFlag
    _standardScalerStdDevFlag = config.standardScalerStdDevFlag
    _pNorm = config.pNorm

    this
  }

  private def setFeatureInteractionConfig(
    config: FeatureInteractionConfig
  ): this.type = {

    _featureInteractionConfig = config
    _featureInteractionRetentionMode = config.retentionMode
    _featureInteractionContinuousDiscretizerBucketCount =
      config.continuousDiscretizerBucketCount
    _featureInteractionParallelism = config.parallelism
    _featureInteractionTargetInteractionPercentage =
      config.targetInteractionPercentage

    this
  }

  private def setKSampleConfig(config: KSampleConfig): this.type = {

    _kSampleConfig = config
    _syntheticCol = config.syntheticCol
    _kGroups = config.kGroups
    _kMeansMaxIter = config.kMeansMaxIter
    _kMeansTolerance = config.kMeansTolerance
    _kMeansDistanceMeasurement = config.kMeansDistanceMeasurement
    _kMeansSeed = config.kMeansSeed
    _kMeansPredictionCol = config.kMeansPredictionCol
    _lshHashTables = config.lshHashTables
    _lshSeed = config.lshSeed
    _lshOutputCol = config.lshOutputCol
    _quorumCount = config.quorumCount
    _minimumVectorCountToMutate = config.minimumVectorCountToMutate
    _vectorMutationMethod = config.vectorMutationMethod
    _mutationMode = config.mutationMode
    _mutationValue = config.mutationValue
    _labelBalanceMode = config.labelBalanceMode
    _cardinalityThreshold = config.cardinalityThreshold
    _numericRatio = config.numericRatio
    _numericTarget = config.numericTarget
    _outputDfRepartitionScaleFactor = config.outputDfRepartitionScaleFactor

    this
  }

  private def setFirstGenerationConfig(
    config: FirstGenerationConfig
  ): this.type = {

    _firstGenerationConfig = config
    _firstGenerationPermutationCount = config.permutationCount
    _firstGenerationIndexMixingMode = config.indexMixingMode
    _firstGenerationArraySeed = config.arraySeed

    this
  }

  private def setGeneticConfig(config: GeneticConfig): this.type = {

    _geneticConfig = config
    _parallelism = config.parallelism
    _kFold = config.kFold
    _trainPortion = config.trainPortion
    _trainSplitMethod = config.trainSplitMethod
    setKSampleConfig(config.kSampleConfig)
    _trainSplitChronologicalColumn = config.trainSplitChronologicalColumn
    _trainSplitChronologicalRandomPercentage =
      config.trainSplitChronologicalRandomPercentage
    _seed = config.seed
    _firstGenerationGenePool = config.firstGenerationGenePool
    _numberOfGenerations = config.numberOfGenerations
    _numberOfParentsToRetain = config.numberOfParentsToRetain
    _numberOfMutationsPerGeneration = config.numberOfMutationsPerGeneration
    _geneticMixing = config.geneticMixing
    _generationalMutationStrategy = config.generationalMutationStrategy
    _fixedMutationValue = config.fixedMutationValue
    _mutationMagnitudeMode = config.mutationMagnitudeMode
    _evolutionStrategy = config.evolutionStrategy
    _continuousEvolutionMaxIterations = config.continuousEvolutionMaxIterations
    _continuousEvolutionStoppingScore = config.continuousEvolutionStoppingScore
    _continuousEvolutionParallelism = config.continuousEvolutionParallelism
    _continuousEvolutionMutationAggressiveness =
      config.continuousEvolutionMutationAggressiveness
    _continuousEvolutionGeneticMixing = config.continuousEvolutionGeneticMixing
    _continuousEvolutionRollingImprovementCount =
      config.continuousEvolutionRollingImprovementCount
    _modelSeedMap = config.modelSeed
    _hyperSpaceInference = config.hyperSpaceInference
    _hyperSpaceInferenceCount = config.hyperSpaceInferenceCount
    _hyperSpaceModelType = config.hyperSpaceModelType
    _hyperSpaceModelCount = config.hyperSpaceModelCount
    _firstGenerationMode = config.initialGenerationMode
    _continuousEvolutionImprovementThreshold =
      config.continuousEvolutionImprovementThreshold
    _geneticMBORegressorType = config.geneticMBORegressorType
    _geneticMBOCandidateFactor = config.geneticMBOCandidateFactor
    setFirstGenerationConfig(config.initialGenerationConfig)
    _deltaCacheBackingDirectoryRemovalFlag =
      config.deltaCacheBackingDirectoryRemovalFlag
    _deltaCacheBackingDirectory = config.deltaCacheBackingDirectory
    _splitCachingStrategy = config.splitCachingStrategy
    this
  }

  private def resetMlFlowConfig(config: MLFlowConfig): this.type = {

    _mlFlowConfig = config
    _mlFlowTrackingURI = config.mlFlowTrackingURI
    _mlFlowExperimentName = config.mlFlowExperimentName
    _mlFlowAPIToken = config.mlFlowAPIToken
    _mlFlowModelSaveDirectory = config.mlFlowModelSaveDirectory
    _mlFlowLoggingMode = config.mlFlowLoggingMode
    _mlFlowBestSuffix = config.mlFlowBestSuffix
    _mlFlowCustomRunTags = config.mlFlowCustomRunTags

    this
  }

  def setMainConfig(value: MainConfig): this.type = {
    _mainConfig = value

    /**
      * Reset all of the local var's so that setters can be used in a chained manner without reverting to defaults.
      */
    _modelingFamily = value.modelFamily
    _labelCol = value.labelCol
    _featuresCol = value.featuresCol
    _naFillFlag = value.naFillFlag
    _varianceFilterFlag = value.varianceFilterFlag
    _outlierFilterFlag = value.outlierFilterFlag
    _pearsonFilterFlag = value.pearsonFilteringFlag
    _covarianceFilterFlag = value.covarianceFilteringFlag
    _oneHotEncodeFlag = value.oneHotEncodeFlag
    _scalingFlag = value.scalingFlag
    _featureInteractionFlag = value.featureInteractionFlag
    _dataPrepCachingFlag = value.dataPrepCachingFlag
    _dataPrepParallelism = value.dataPrepParallelism
    _autoStoppingFlag = value.autoStoppingFlag
    _autoStoppingScore = value.autoStoppingScore
    _featureImportanceCutoffType = value.featureImportanceCutoffType
    _featureImportanceCutoffValue = value.featureImportanceCutoffValue
    _dateTimeConversionType = value.dateTimeConversionType
    _fieldsToIgnoreInVector = value.fieldsToIgnoreInVector
    _numericBoundaries = value.numericBoundaries
    _stringBoundaries = value.stringBoundaries
    _scoringMetric = value.scoringMetric
    _scoringOptimizationStrategy = value.scoringOptimizationStrategy
    setFillConfig(value.fillConfig)
    setOutlierConfig(value.outlierConfig)
    setPearsonConfig(value.pearsonConfig)
    setCovarianceConfig(value.covarianceConfig)
    setScalerConfig(value.scalingConfig)
    setFeatureInteractionConfig(value.featureInteractionConfig)
    setGeneticConfig(value.geneticConfig)
    _mlFlowLoggingFlag = value.mlFlowLoggingFlag
    _mlFlowArtifactsFlag = value.mlFlowLogArtifactsFlag
    resetMlFlowConfig(value.mlFlowConfig)
    _inferenceConfigSaveLocation = value.inferenceConfigSaveLocation
    _dataReductionFactor = value.dataReductionFactor
    _pipelineDebugFlag = value.pipelineDebugFlag
    _pipelineId = value.pipelineId
    this
  }

  def setFeatConfig(): this.type = {
    _featureImportancesConfig = MainConfig(
      modelFamily = "RandomForest",
      labelCol = _labelCol,
      featuresCol = _featuresCol,
      naFillFlag = _naFillFlag,
      varianceFilterFlag = _varianceFilterFlag,
      outlierFilterFlag = _outlierFilterFlag,
      pearsonFilteringFlag = _pearsonFilterFlag,
      covarianceFilteringFlag = _covarianceFilterFlag,
      oneHotEncodeFlag = _oneHotEncodeFlag,
      scalingFlag = _scalingFlag,
      featureInteractionFlag = _featureInteractionFlag,
      dataPrepCachingFlag = _dataPrepCachingFlag,
      dataPrepParallelism = _dataPrepParallelism,
      autoStoppingFlag = _autoStoppingFlag,
      autoStoppingScore = _autoStoppingScore,
      featureImportanceCutoffType = _featureImportanceCutoffType,
      featureImportanceCutoffValue = _featureImportanceCutoffValue,
      dateTimeConversionType = _dateTimeConversionType,
      fieldsToIgnoreInVector = _fieldsToIgnoreInVector,
      numericBoundaries = _numericBoundaries,
      stringBoundaries = _stringBoundaries,
      scoringMetric = _scoringMetric,
      scoringOptimizationStrategy = _scoringOptimizationStrategy,
      fillConfig = _fillConfig,
      outlierConfig = _outlierConfig,
      pearsonConfig = _pearsonConfig,
      covarianceConfig = _covarianceConfig,
      scalingConfig = _scalingConfig,
      featureInteractionConfig = _featureInteractionConfig,
      geneticConfig = _geneticConfig,
      mlFlowLoggingFlag = _mlFlowLoggingFlag,
      mlFlowLogArtifactsFlag = _mlFlowArtifactsFlag,
      mlFlowConfig = _mlFlowConfig,
      inferenceConfigSaveLocation = _inferenceConfigSaveLocation,
      dataReductionFactor = _dataReductionFactor,
      pipelineDebugFlag = _pipelineDebugFlag,
      pipelineId = _pipelineId
    )
    this
  }

  def setFeatConfig(value: MainConfig): this.type = {
    _featureImportancesConfig = value
    require(
      value.modelFamily == "RandomForest",
      s"Model Family for Feature Importances must be 'RandomForest'. ${value.modelFamily} is not supported."
    )
    setConfigs()
    this
  }

  def setTreeSplitsConfig(): this.type = {
    _treeSplitsConfig = MainConfig(
      modelFamily = "Trees",
      labelCol = _labelCol,
      featuresCol = _featuresCol,
      naFillFlag = _naFillFlag,
      varianceFilterFlag = _varianceFilterFlag,
      outlierFilterFlag = _outlierFilterFlag,
      pearsonFilteringFlag = _pearsonFilterFlag,
      covarianceFilteringFlag = _covarianceFilterFlag,
      oneHotEncodeFlag = _oneHotEncodeFlag,
      scalingFlag = _scalingFlag,
      featureInteractionFlag = _featureInteractionFlag,
      dataPrepCachingFlag = _dataPrepCachingFlag,
      dataPrepParallelism = _dataPrepParallelism,
      autoStoppingFlag = _autoStoppingFlag,
      autoStoppingScore = _autoStoppingScore,
      featureImportanceCutoffType = _featureImportanceCutoffType,
      featureImportanceCutoffValue = _featureImportanceCutoffValue,
      dateTimeConversionType = _dateTimeConversionType,
      fieldsToIgnoreInVector = _fieldsToIgnoreInVector,
      numericBoundaries = _numericBoundaries,
      stringBoundaries = _stringBoundaries,
      scoringMetric = _scoringMetric,
      scoringOptimizationStrategy = _scoringOptimizationStrategy,
      fillConfig = _fillConfig,
      outlierConfig = _outlierConfig,
      pearsonConfig = _pearsonConfig,
      covarianceConfig = _covarianceConfig,
      scalingConfig = _scalingConfig,
      featureInteractionConfig = _featureInteractionConfig,
      geneticConfig = _geneticConfig,
      mlFlowLoggingFlag = _mlFlowLoggingFlag,
      mlFlowLogArtifactsFlag = _mlFlowArtifactsFlag,
      mlFlowConfig = _mlFlowConfig,
      inferenceConfigSaveLocation = _inferenceConfigSaveLocation,
      dataReductionFactor = _dataReductionFactor,
      pipelineDebugFlag = _pipelineDebugFlag,
      pipelineId = _pipelineId
    )
    this
  }

  def setTreeSplitsConfig(value: MainConfig): this.type = {
    _treeSplitsConfig = value
    require(
      value.modelFamily == "Trees",
      s"Model Family for Trees Splits must be 'Trees'. ${value.modelFamily} is not supported."
    )
    setConfigs()
    this
  }

  def getPipelineId: String = _mainConfig.pipelineId

  def getModelingFamily: String = _modelingFamily

  def getLabelCol: String = _labelCol

  def getFeaturesCol: String = _featuresCol

  def getNaFillStatus: Boolean = _naFillFlag

  def getVarianceFilterStatus: Boolean = _varianceFilterFlag

  def getOutlierFilterStatus: Boolean = _outlierFilterFlag

  def getPearsonFilterStatus: Boolean = _pearsonFilterFlag

  def getCovarianceFilterStatus: Boolean = _covarianceFilterFlag

  def getOneHotEncodingStatus: Boolean = _oneHotEncodeFlag

  def getScalingStatus: Boolean = _scalingFlag

  def getFeatureInteractionStatus: Boolean = _featureInteractionFlag

  def getDataPrepCachingStatus: Boolean = _dataPrepCachingFlag

  def getDataPrepParallelism: Int = _dataPrepParallelism

  def getNumericBoundaries: Map[String, (Double, Double)] = _numericBoundaries

  def getStringBoundaries: Map[String, List[String]] = _stringBoundaries

  def getScoringMetric: String = _scoringMetric

  def getScoringOptimizationStrategy: String = _scoringOptimizationStrategy

  def getNumericFillStat: String = _numericFillStat

  def getCharacterFillStat: String = _characterFillStat

  def getDateTimeConversionType: String = _dateTimeConversionType

  def getFieldsToIgnoreInVector: Array[String] = _fieldsToIgnoreInVector

  def getNAFillFilterPrecision: Double = _naFillFilterPrecision

  def getCategoricalNAFillMap: Map[String, String] = _categoricalNAFillMap

  def getNumericNAFillMap: Map[String, AnyVal] = _numericNAFillMap

  def getCharacterNABlanketFillValue: String = _characterNABlanketFillValue

  def getNumericNABlanketFillValue: Double = _numericNABlanketFillValue

  def getNAFillMode: String = _naFillMode

  def getCardinalitySwitch: Boolean = _cardinalitySwitchFlag

  def getCardinalityType: String = _cardinalityType

  def getCardinalityLimit: Int = _cardinalityLimit

  def getCardinalityPrecision: Double = _cardinalityPrecision

  def getCardinalityCheckMode: String = _cardinalityCheckMode

  def getModelSelectionDistinctThreshold: Int = _modelSelectionDistinctThreshold

  def getFillConfig: FillConfig = _fillConfig

  def getFilterBounds: String = _filterBounds

  def getLowerFilterNTile: Double = _lowerFilterNTile

  def getUpperFilterNTile: Double = _upperFilterNTile

  def getFilterPrecision: Double = _filterPrecision

  def getContinuousDataThreshold: Int = _continuousDataThreshold

  def getFieldsToIgnore: Array[String] = _fieldsToIgnore

  def getOutlierConfig: OutlierConfig = _outlierConfig

  def getPearsonFilterStatistic: String = _pearsonFilterStatistic

  def getPearsonFilterDirection: String = _pearsonFilterDirection

  def getPearsonFilterManualValue: Double = _pearsonFilterManualValue

  def getPearsonFilterMode: String = _pearsonFilterMode

  def getPearsonAutoFilterNTile: Double = _pearsonAutoFilterNTile

  def getPearsonConfig: PearsonConfig = _pearsonConfig

  def getCorrelationCutoffLow: Double = _correlationCutoffLow

  def getCorrelationCutoffHigh: Double = _correlationCutoffHigh

  def getCovarianceConfig: CovarianceConfig = _covarianceConfig

  def getScalerType: String = _scalerType

  def getScalerMin: Double = _scalerMin

  def getScalerMax: Double = _scalerMax

  def getStandardScalingMeanFlag: Boolean = _standardScalerMeanFlag

  def getStandardScalingStdDevFlag: Boolean = _standardScalerStdDevFlag

  def getPNorm: Double = _pNorm

  def getScalingConfig: ScalingConfig = _scalingConfig

  def getFeatureInteractionConfig: FeatureInteractionConfig =
    _featureInteractionConfig

  def getFeatureInteractionRetentionMode: String =
    _featureInteractionRetentionMode

  def getFeatureInteractionContinuousDiscretizerBucketCount: Int =
    _featureInteractionContinuousDiscretizerBucketCount

  def getFeatureInteractionParallelism: Int = _featureInteractionParallelism

  def getFeatureInteractionTargetInteractionPercentage: Double =
    _featureInteractionTargetInteractionPercentage

  def getParallelism: Int = _parallelism

  def getKFold: Int = _kFold

  def getTrainPortion: Double = _trainPortion

  def getTrainSplitMethod: String = _trainSplitMethod

  def getKSampleConfig: KSampleConfig = _kSampleConfig

  def getSyntheticCol: String = _syntheticCol

  def getKGroups: Int = _kGroups

  def getKMeansMaxIter: Int = _kMeansMaxIter

  def getKMeansTolerance: Double = _kMeansTolerance

  def getKMeansDistanceMeasurement: String = _kMeansDistanceMeasurement

  def getKMeansSeed: Long = _kMeansSeed

  def getKMeansPredictionCol: String = _kMeansPredictionCol

  def getLSHHashTables: Int = _lshHashTables

  def getLSHOutputCol: String = _lshOutputCol

  def getQuorumCount: Int = _quorumCount

  def getMinimumVectorCountToMutate: Int = _minimumVectorCountToMutate

  def getVectorMutationMethod: String = _vectorMutationMethod

  def getMutationMode: String = _mutationMode

  def getMutationValue: Double = _mutationValue

  def getTrainSplitChronologicalColumn: String = _trainSplitChronologicalColumn

  def getTrainSplitChronologicalRandomPercentage: Double =
    _trainSplitChronologicalRandomPercentage

  def getSeed: Long = _seed

  def getFirstGenerationGenePool: Int = _firstGenerationGenePool

  def getNumberOfGenerations: Int = _numberOfGenerations

  def getNumberOfParentsToRetain: Int = _numberOfParentsToRetain

  def getNumberOfMutationsPerGeneration: Int = _numberOfMutationsPerGeneration

  def getGeneticMixing: Double = _geneticMixing

  def getGenerationalMutationStrategy: String = _generationalMutationStrategy

  def getFixedMutationValue: Int = _fixedMutationValue

  def getMutationMagnitudeMode: String = _mutationMagnitudeMode

  def getModelSeedSetStatus: Boolean = _modelSeedSetStatus

  def getModelSeedMap: Map[String, Any] = _modelSeedMap

  def getFirstGenerationPermutationCount: Int = _firstGenerationPermutationCount

  def getFirstGenerationIndexMixingMode: String =
    _firstGenerationIndexMixingMode

  def getFirstGenerationArraySeed: Long = _firstGenerationArraySeed

  def getHyperSpaceInferenceStatus: Boolean = _hyperSpaceInference

  def getHyperSpaceInferenceCount: Int = _hyperSpaceInferenceCount

  def getHyperSpaceModelType: String = _hyperSpaceModelType

  def getHyperSpaceModelCount: Int = _hyperSpaceModelCount

  def getFirstGenerationConfig: FirstGenerationConfig = _firstGenerationConfig

  def getFirstGenerationMode: String = _firstGenerationMode

  def getMlFlowLoggingFlag: Boolean = _mlFlowLoggingFlag

  def getMlFlowLogArtifactsFlag: Boolean = _mlFlowArtifactsFlag

  def getMlFlowTrackingURI: String = _mlFlowTrackingURI

  def getMlFlowExperimentName: String = _mlFlowExperimentName

  def getMlFlowModelSaveDirectory: String = _mlFlowModelSaveDirectory

  def getMlFlowLoggingMode: String = _mlFlowLoggingMode

  def getMlFlowBestSuffix: String = _mlFlowBestSuffix

  def getMlFlowCustomRunTags: Map[String, String] = _mlFlowCustomRunTags

  def getMlFlowConfig: MLFlowConfig = _mlFlowConfig

  def getGeneticConfig: GeneticConfig = _geneticConfig

  def getMainConfig: MainConfig = _mainConfig

  def getFeatConfig: MainConfig = _featureImportancesConfig

  def getTreeSplitsConfig: MainConfig = _treeSplitsConfig

  def getAutoStoppingFlag: Boolean = _autoStoppingFlag

  def getAutoStoppingScore: Double = _autoStoppingScore

  def getFeatureImportanceCutoffType: String = _featureImportanceCutoffType

  def getFeatureImportanceCutoffValue: Double = _featureImportanceCutoffValue

  def getEvolutionStrategy: String = _evolutionStrategy

  def getContinuousEvolutionMaxIterations: Int =
    _continuousEvolutionMaxIterations

  def getContinuousEvolutionStoppingScore: Double =
    _continuousEvolutionStoppingScore

  def getContinuousEvolutionParallelism: Int = _continuousEvolutionParallelism

  def getContinuousEvolutionMutationAggressiveness: Int =
    _continuousEvolutionMutationAggressiveness

  def getContinuousEvolutionGeneticMixing: Double =
    _continuousEvolutionGeneticMixing

  def getContinuousEvolutionRollingImporvementCount: Int =
    _continuousEvolutionRollingImprovementCount

  def getInferenceConfigSaveLocation: String = _inferenceConfigSaveLocation

  def getDataReductionFactor: Double = _dataReductionFactor

  def getDeltaCacheBackingDirectory: String = _deltaCacheBackingDirectory

  def getDeltaCacheBackingDirectoryRemovalFlag: Boolean =
    _deltaCacheBackingDirectoryRemovalFlag

  def getSplitCachingStrategy: String = _splitCachingStrategy

  /**
    * Helper method for extracting the config from a run's GenericModelReturn payload
    * This is designed to handle "lazy" copy/paste from either stdout or the mlflow ui.
    * The alternative (preferred method of seeding a run start) is to submit a Map() for the run configuration seed.
    *
    * @param fullModelReturn: String The Generic Model Config of a run, to be used as a starting point for further
    *                       tuning or refinement.
    * @return A Map Object that can be parsed into the requisite case class definition to set a seed for a particular
    *         type of model run.
    */
  private def extractGenericModelReturnMap(
    fullModelReturn: String
  ): Map[String, Any] = {

    val patternToMatch = "(?<=\\()[^()]*".r

    val configElements =
      patternToMatch.findAllIn(fullModelReturn).toList(1).split(",")

    var configMap = Map[String, Any]()

    configElements.foreach { x =>
      val components = x.trim.split(" -> ")
      configMap += (components(0) -> components(1))
    }
    configMap
  }

}