All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databricks.labs.automl.AutomationRunner.scala Maven / Gradle / Ivy

The newest version!
package com.databricks.labs.automl

import com.databricks.labs.automl.executor.DataPrep
import com.databricks.labs.automl.inference.{
  InferenceConfig,
  InferenceModelConfig,
  InferenceTools
}
import com.databricks.labs.automl.model._
import com.databricks.labs.automl.model.tools.split.{
  DataSplitCustodial,
  DataSplitUtility
}
import com.databricks.labs.automl.model.tools.{PostModelingOptimization}
import com.databricks.labs.automl.params._
import com.databricks.labs.automl.reports.{
  DecisionTreeSplits,
  RandomForestFeatureImportance
}
import com.databricks.labs.automl.tracking.{
  MLFlowReportStructure,
  MLFlowReturn,
  MLFlowTracker
}
import com.microsoft.ml.spark.lightgbm.{
  LightGBMClassificationModel,
  LightGBMRegressionModel
}
import ml.dmlc.xgboost4j.scala.spark.{
  XGBoostClassificationModel,
  XGBoostRegressionModel
}
import com.databricks.labs.automl.utils.AutoMlPipelineMlFlowUtils
import org.apache.log4j.{Level, Logger}
import org.apache.spark.ml.classification._
import org.apache.spark.ml.regression.{
  DecisionTreeRegressionModel,
  GBTRegressionModel,
  LinearRegressionModel,
  RandomForestRegressionModel
}
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.apache.spark.storage.StorageLevel
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.writePretty
import org.json4s.{Formats, NoTypeHints}

import scala.collection.mutable.ArrayBuffer

class AutomationRunner(df: DataFrame) extends DataPrep(df) with InferenceTools {

  private val logger: Logger = Logger.getLogger(this.getClass)

  private def runRandomForest(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[RandomForestModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    val initialize = new RandomForestTuner(
      cachedData,
      splitData,
      payload.modelType,
      isPipeline
    ).setLabelCol(_mainConfig.labelCol)
      .setFeaturesCol(_mainConfig.featuresCol)
      .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
      .setRandomForestNumericBoundaries(_mainConfig.numericBoundaries)
      .setRandomForestStringBoundaries(_mainConfig.stringBoundaries)
      .setScoringMetric(_mainConfig.scoringMetric)
      .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
      .setTrainSplitMethod(
        trainSplitValidation(
          _mainConfig.geneticConfig.trainSplitMethod,
          payload.modelType
        )
      )
      .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
      .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
      .setKMeansMaxIter(_mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter)
      .setKMeansTolerance(
        _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
      )
      .setKMeansDistanceMeasurement(
        _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
      )
      .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
      .setKMeansPredictionCol(
        _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
      )
      .setLSHHashTables(_mainConfig.geneticConfig.kSampleConfig.lshHashTables)
      .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
      .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
      .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
      .setMinimumVectorCountToMutate(
        _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
      )
      .setVectorMutationMethod(
        _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
      )
      .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
      .setMutationValue(_mainConfig.geneticConfig.kSampleConfig.mutationValue)
      .setLabelBalanceMode(
        _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
      )
      .setCardinalityThreshold(
        _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
      )
      .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
      .setNumericTarget(_mainConfig.geneticConfig.kSampleConfig.numericTarget)
      .setTrainSplitChronologicalColumn(
        _mainConfig.geneticConfig.trainSplitChronologicalColumn
      )
      .setTrainSplitChronologicalRandomPercentage(
        _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
      )
      .setParallelism(_mainConfig.geneticConfig.parallelism)
      .setKFold(_mainConfig.geneticConfig.kFold)
      .setSeed(_mainConfig.geneticConfig.seed)
      .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
      .setFirstGenerationGenePool(
        _mainConfig.geneticConfig.firstGenerationGenePool
      )
      .setNumberOfMutationGenerations(
        _mainConfig.geneticConfig.numberOfGenerations
      )
      .setNumberOfMutationsPerGeneration(
        _mainConfig.geneticConfig.numberOfMutationsPerGeneration
      )
      .setNumberOfParentsToRetain(
        _mainConfig.geneticConfig.numberOfParentsToRetain
      )
      .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
      .setGenerationalMutationStrategy(
        _mainConfig.geneticConfig.generationalMutationStrategy
      )
      .setMutationMagnitudeMode(_mainConfig.geneticConfig.mutationMagnitudeMode)
      .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
      .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
      .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
      .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
      .setContinuousEvolutionImprovementThreshold(
        _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
      )
      .setGeneticMBORegressorType(
        _mainConfig.geneticConfig.geneticMBORegressorType
      )
      .setGeneticMBOCandidateFactor(
        _mainConfig.geneticConfig.geneticMBOCandidateFactor
      )
      .setContinuousEvolutionMaxIterations(
        _mainConfig.geneticConfig.continuousEvolutionMaxIterations
      )
      .setContinuousEvolutionStoppingScore(
        _mainConfig.geneticConfig.continuousEvolutionStoppingScore
      )
      .setContinuousEvolutionParallelism(
        _mainConfig.geneticConfig.continuousEvolutionParallelism
      )
      .setContinuousEvolutionMutationAggressiveness(
        _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
      )
      .setContinuousEvolutionGeneticMixing(
        _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
      )
      .setContinuousEvolutionRollingImporvementCount(
        _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
      )
      .setDataReductionFactor(_mainConfig.dataReductionFactor)
      .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
      .setFirstGenPermutations(
        _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
      )
      .setFirstGenIndexMixingMode(
        _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
      )
      .setFirstGenArraySeed(
        _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
      )
      .setHyperSpaceModelCount(_mainConfig.geneticConfig.hyperSpaceModelCount)

    if (_modelSeedSetStatus)
      initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

    val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

    val resultBuffer = modelResultsRaw.toBuffer
    val statsBuffer = new ArrayBuffer[DataFrame]()
    statsBuffer += modelStatsRaw

    if (_mainConfig.geneticConfig.hyperSpaceInference) {

      println("\n\t\tStarting Post Tuning Inference Run.\n")

      val genericResults = new ArrayBuffer[GenericModelReturn]

      modelResultsRaw.foreach { x =>
        genericResults += GenericModelReturn(
          hyperParams = extractPayload(x.modelHyperParams),
          model = x.model,
          score = x.score,
          metrics = x.evalMetrics,
          generation = x.generation
        )
      }

      val hyperSpaceRunCandidates = new PostModelingOptimization()
        .setModelFamily("RandomForest")
        .setModelType(payload.modelType)
        .setHyperParameterSpaceCount(
          _mainConfig.geneticConfig.hyperSpaceInferenceCount
        )
        .setNumericBoundaries(initialize.getRandomForestNumericBoundaries)
        .setStringBoundaries(initialize.getRandomForestStringBoundaries)
        .setSeed(_mainConfig.geneticConfig.seed)
        .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
        .randomForestPrediction(
          genericResults.result.toArray,
          _mainConfig.geneticConfig.hyperSpaceModelType,
          _mainConfig.geneticConfig.hyperSpaceModelCount
        )

      val (hyperResults, hyperDataFrame) =
        initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

      hyperResults.foreach { x =>
        resultBuffer += x
      }
      statsBuffer += hyperDataFrame

    }

    DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

    (
      resultBuffer.toArray,
      statsBuffer.reduce(_ union _),
      payload.modelType,
      cachedData
    )

  }

  private def runLightGBM(
    lightGBMType: String,
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[LightGBMModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    val initialize = new LightGBMTuner(
      cachedData,
      splitData,
      payload.modelType,
      lightGBMType,
      isPipeline
    ).setLabelCol(_mainConfig.labelCol)
      .setFeaturesCol(_mainConfig.featuresCol)
      .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
      .setLGBMNumericBoundaries(_mainConfig.numericBoundaries)
      .setLGBMStringBoundaries(_mainConfig.stringBoundaries)
      .setScoringMetric(_mainConfig.scoringMetric)
      .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
      .setTrainSplitMethod(
        trainSplitValidation(
          _mainConfig.geneticConfig.trainSplitMethod,
          payload.modelType
        )
      )
      .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
      .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
      .setKMeansMaxIter(_mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter)
      .setKMeansTolerance(
        _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
      )
      .setKMeansDistanceMeasurement(
        _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
      )
      .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
      .setKMeansPredictionCol(
        _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
      )
      .setLSHHashTables(_mainConfig.geneticConfig.kSampleConfig.lshHashTables)
      .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
      .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
      .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
      .setMinimumVectorCountToMutate(
        _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
      )
      .setVectorMutationMethod(
        _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
      )
      .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
      .setMutationValue(_mainConfig.geneticConfig.kSampleConfig.mutationValue)
      .setLabelBalanceMode(
        _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
      )
      .setCardinalityThreshold(
        _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
      )
      .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
      .setNumericTarget(_mainConfig.geneticConfig.kSampleConfig.numericTarget)
      .setTrainSplitChronologicalColumn(
        _mainConfig.geneticConfig.trainSplitChronologicalColumn
      )
      .setTrainSplitChronologicalRandomPercentage(
        _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
      )
      .setParallelism(_mainConfig.geneticConfig.parallelism)
      .setKFold(_mainConfig.geneticConfig.kFold)
      .setSeed(_mainConfig.geneticConfig.seed)
      .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
      .setFirstGenerationGenePool(
        _mainConfig.geneticConfig.firstGenerationGenePool
      )
      .setNumberOfMutationGenerations(
        _mainConfig.geneticConfig.numberOfGenerations
      )
      .setNumberOfMutationsPerGeneration(
        _mainConfig.geneticConfig.numberOfMutationsPerGeneration
      )
      .setNumberOfParentsToRetain(
        _mainConfig.geneticConfig.numberOfParentsToRetain
      )
      .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
      .setGenerationalMutationStrategy(
        _mainConfig.geneticConfig.generationalMutationStrategy
      )
      .setMutationMagnitudeMode(_mainConfig.geneticConfig.mutationMagnitudeMode)
      .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
      .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
      .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
      .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
      .setContinuousEvolutionImprovementThreshold(
        _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
      )
      .setGeneticMBORegressorType(
        _mainConfig.geneticConfig.geneticMBORegressorType
      )
      .setGeneticMBOCandidateFactor(
        _mainConfig.geneticConfig.geneticMBOCandidateFactor
      )
      .setContinuousEvolutionMaxIterations(
        _mainConfig.geneticConfig.continuousEvolutionMaxIterations
      )
      .setContinuousEvolutionStoppingScore(
        _mainConfig.geneticConfig.continuousEvolutionStoppingScore
      )
      .setContinuousEvolutionParallelism(
        _mainConfig.geneticConfig.continuousEvolutionParallelism
      )
      .setContinuousEvolutionMutationAggressiveness(
        _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
      )
      .setContinuousEvolutionGeneticMixing(
        _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
      )
      .setContinuousEvolutionRollingImporvementCount(
        _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
      )
      .setDataReductionFactor(_mainConfig.dataReductionFactor)
      .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
      .setFirstGenPermutations(
        _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
      )
      .setFirstGenIndexMixingMode(
        _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
      )
      .setFirstGenArraySeed(
        _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
      )
      .setHyperSpaceModelCount(_mainConfig.geneticConfig.hyperSpaceModelCount)

    if (_modelSeedSetStatus)
      initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

    val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

    val resultBuffer = modelResultsRaw.toBuffer
    val statsBuffer = new ArrayBuffer[DataFrame]()
    statsBuffer += modelStatsRaw

    if (_mainConfig.geneticConfig.hyperSpaceInference) {

      println("\n\t\tStarting Post Tuning Inference Run.\n")

      val genericResults = new ArrayBuffer[GenericModelReturn]

      modelResultsRaw.foreach { x =>
        genericResults += GenericModelReturn(
          hyperParams = extractPayload(x.modelHyperParams),
          model = x.model,
          score = x.score,
          metrics = x.evalMetrics,
          generation = x.generation
        )
      }

      val hyperSpaceRunCandidates = new PostModelingOptimization()
        .setModelFamily(lightGBMType)
        .setModelType(payload.modelType)
        .setHyperParameterSpaceCount(
          _mainConfig.geneticConfig.hyperSpaceInferenceCount
        )
        .setNumericBoundaries(initialize.getLightGBMNumericBoundaries)
        .setStringBoundaries(initialize.getLightGBMStringBoundaries)
        .setSeed(_mainConfig.geneticConfig.seed)
        .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
        .lightGBMPrediction(
          genericResults.result.toArray,
          _mainConfig.geneticConfig.hyperSpaceModelType,
          _mainConfig.geneticConfig.hyperSpaceModelCount
        )

      val (hyperResults, hyperDataFrame) =
        initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

      hyperResults.foreach { x =>
        resultBuffer += x
      }
      statsBuffer += hyperDataFrame

    }

    DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

    (
      resultBuffer.toArray,
      statsBuffer.reduce(_ union _),
      payload.modelType,
      cachedData
    )

  }

  private def runXGBoost(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[XGBoostModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    val initialize = new XGBoostTuner(
      cachedData,
      splitData,
      payload.modelType,
      isPipeline
    ).setLabelCol(_mainConfig.labelCol)
      .setFeaturesCol(_mainConfig.featuresCol)
      .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
      .setXGBoostNumericBoundaries(_mainConfig.numericBoundaries)
      .setScoringMetric(_mainConfig.scoringMetric)
      .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
      .setTrainSplitMethod(
        trainSplitValidation(
          _mainConfig.geneticConfig.trainSplitMethod,
          payload.modelType
        )
      )
      .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
      .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
      .setKMeansMaxIter(_mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter)
      .setKMeansTolerance(
        _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
      )
      .setKMeansDistanceMeasurement(
        _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
      )
      .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
      .setKMeansPredictionCol(
        _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
      )
      .setLSHHashTables(_mainConfig.geneticConfig.kSampleConfig.lshHashTables)
      .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
      .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
      .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
      .setMinimumVectorCountToMutate(
        _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
      )
      .setVectorMutationMethod(
        _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
      )
      .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
      .setMutationValue(_mainConfig.geneticConfig.kSampleConfig.mutationValue)
      .setLabelBalanceMode(
        _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
      )
      .setCardinalityThreshold(
        _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
      )
      .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
      .setNumericTarget(_mainConfig.geneticConfig.kSampleConfig.numericTarget)
      .setTrainSplitChronologicalColumn(
        _mainConfig.geneticConfig.trainSplitChronologicalColumn
      )
      .setTrainSplitChronologicalRandomPercentage(
        _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
      )
      .setParallelism(_mainConfig.geneticConfig.parallelism)
      .setKFold(_mainConfig.geneticConfig.kFold)
      .setSeed(_mainConfig.geneticConfig.seed)
      .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
      .setFirstGenerationGenePool(
        _mainConfig.geneticConfig.firstGenerationGenePool
      )
      .setNumberOfMutationGenerations(
        _mainConfig.geneticConfig.numberOfGenerations
      )
      .setNumberOfMutationsPerGeneration(
        _mainConfig.geneticConfig.numberOfMutationsPerGeneration
      )
      .setNumberOfParentsToRetain(
        _mainConfig.geneticConfig.numberOfParentsToRetain
      )
      .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
      .setGenerationalMutationStrategy(
        _mainConfig.geneticConfig.generationalMutationStrategy
      )
      .setMutationMagnitudeMode(_mainConfig.geneticConfig.mutationMagnitudeMode)
      .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
      .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
      .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
      .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
      .setContinuousEvolutionImprovementThreshold(
        _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
      )
      .setGeneticMBORegressorType(
        _mainConfig.geneticConfig.geneticMBORegressorType
      )
      .setGeneticMBOCandidateFactor(
        _mainConfig.geneticConfig.geneticMBOCandidateFactor
      )
      .setContinuousEvolutionMaxIterations(
        _mainConfig.geneticConfig.continuousEvolutionMaxIterations
      )
      .setContinuousEvolutionStoppingScore(
        _mainConfig.geneticConfig.continuousEvolutionStoppingScore
      )
      .setContinuousEvolutionParallelism(
        _mainConfig.geneticConfig.continuousEvolutionParallelism
      )
      .setContinuousEvolutionMutationAggressiveness(
        _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
      )
      .setContinuousEvolutionGeneticMixing(
        _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
      )
      .setContinuousEvolutionRollingImporvementCount(
        _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
      )
      .setDataReductionFactor(_mainConfig.dataReductionFactor)
      .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
      .setFirstGenPermutations(
        _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
      )
      .setFirstGenIndexMixingMode(
        _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
      )
      .setFirstGenArraySeed(
        _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
      )
      .setHyperSpaceModelCount(_mainConfig.geneticConfig.hyperSpaceModelCount)

    if (_modelSeedSetStatus)
      initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

    val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

    val resultBuffer = modelResultsRaw.toBuffer
    val statsBuffer = new ArrayBuffer[DataFrame]()
    statsBuffer += modelStatsRaw

    if (_mainConfig.geneticConfig.hyperSpaceInference) {

      println("\n\t\tStarting Post Tuning Inference Run.\n")

      val genericResults = new ArrayBuffer[GenericModelReturn]

      modelResultsRaw.foreach { x =>
        genericResults += GenericModelReturn(
          hyperParams = extractPayload(x.modelHyperParams),
          model = x.model,
          score = x.score,
          metrics = x.evalMetrics,
          generation = x.generation
        )
      }

      val hyperSpaceRunCandidates = new PostModelingOptimization()
        .setModelFamily("XGBoost")
        .setModelType(payload.modelType)
        .setHyperParameterSpaceCount(
          _mainConfig.geneticConfig.hyperSpaceInferenceCount
        )
        .setNumericBoundaries(initialize.getXGBoostNumericBoundaries)
        .setStringBoundaries(_mainConfig.stringBoundaries)
        .setSeed(_mainConfig.geneticConfig.seed)
        .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
        .xgBoostPrediction(
          genericResults.result.toArray,
          _mainConfig.geneticConfig.hyperSpaceModelType,
          _mainConfig.geneticConfig.hyperSpaceModelCount
        )

      val (hyperResults, hyperDataFrame) =
        initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

      hyperResults.foreach { x =>
        resultBuffer += x
      }
      statsBuffer += hyperDataFrame

    }

    DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

    (
      resultBuffer.toArray,
      statsBuffer.reduce(_ union _),
      payload.modelType,
      cachedData
    )

  }

  private def runMLPC(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[MLPCModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    payload.modelType match {
      case "classifier" =>
        val initialize = new MLPCTuner(cachedData, splitData, isPipeline)
          .setLabelCol(_mainConfig.labelCol)
          .setFeaturesCol(_mainConfig.featuresCol)
          .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
          .setMlpcNumericBoundaries(_mainConfig.numericBoundaries)
          .setMlpcStringBoundaries(_mainConfig.stringBoundaries)
          .setScoringMetric(_mainConfig.scoringMetric)
          .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
          .setTrainSplitMethod(
            trainSplitValidation(
              _mainConfig.geneticConfig.trainSplitMethod,
              payload.modelType
            )
          )
          .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
          .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
          .setKMeansMaxIter(
            _mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter
          )
          .setKMeansTolerance(
            _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
          )
          .setKMeansDistanceMeasurement(
            _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
          )
          .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
          .setKMeansPredictionCol(
            _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
          )
          .setLSHHashTables(
            _mainConfig.geneticConfig.kSampleConfig.lshHashTables
          )
          .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
          .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
          .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
          .setMinimumVectorCountToMutate(
            _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
          )
          .setVectorMutationMethod(
            _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
          )
          .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
          .setMutationValue(
            _mainConfig.geneticConfig.kSampleConfig.mutationValue
          )
          .setLabelBalanceMode(
            _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
          )
          .setCardinalityThreshold(
            _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
          )
          .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
          .setNumericTarget(
            _mainConfig.geneticConfig.kSampleConfig.numericTarget
          )
          .setTrainSplitChronologicalColumn(
            _mainConfig.geneticConfig.trainSplitChronologicalColumn
          )
          .setTrainSplitChronologicalRandomPercentage(
            _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
          )
          .setParallelism(_mainConfig.geneticConfig.parallelism)
          .setKFold(_mainConfig.geneticConfig.kFold)
          .setSeed(_mainConfig.geneticConfig.seed)
          .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
          .setFirstGenerationGenePool(
            _mainConfig.geneticConfig.firstGenerationGenePool
          )
          .setNumberOfMutationGenerations(
            _mainConfig.geneticConfig.numberOfGenerations
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setNumberOfParentsToRetain(
            _mainConfig.geneticConfig.numberOfParentsToRetain
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
          .setGenerationalMutationStrategy(
            _mainConfig.geneticConfig.generationalMutationStrategy
          )
          .setMutationMagnitudeMode(
            _mainConfig.geneticConfig.mutationMagnitudeMode
          )
          .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
          .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
          .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
          .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
          .setContinuousEvolutionImprovementThreshold(
            _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
          )
          .setGeneticMBORegressorType(
            _mainConfig.geneticConfig.geneticMBORegressorType
          )
          .setGeneticMBOCandidateFactor(
            _mainConfig.geneticConfig.geneticMBOCandidateFactor
          )
          .setContinuousEvolutionMaxIterations(
            _mainConfig.geneticConfig.continuousEvolutionMaxIterations
          )
          .setContinuousEvolutionStoppingScore(
            _mainConfig.geneticConfig.continuousEvolutionStoppingScore
          )
          .setContinuousEvolutionParallelism(
            _mainConfig.geneticConfig.continuousEvolutionParallelism
          )
          .setContinuousEvolutionMutationAggressiveness(
            _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
          )
          .setContinuousEvolutionGeneticMixing(
            _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
          )
          .setContinuousEvolutionRollingImporvementCount(
            _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
          )
          .setDataReductionFactor(_mainConfig.dataReductionFactor)
          .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
          .setFirstGenPermutations(
            _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
          )
          .setFirstGenIndexMixingMode(
            _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
          )
          .setFirstGenArraySeed(
            _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
          )
          .setHyperSpaceModelCount(
            _mainConfig.geneticConfig.hyperSpaceModelCount
          )

        if (_modelSeedSetStatus)
          initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

        val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

        val resultBuffer = modelResultsRaw.toBuffer
        val statsBuffer = new ArrayBuffer[DataFrame]()
        statsBuffer += modelStatsRaw

        if (_mainConfig.geneticConfig.hyperSpaceInference) {

          println("\n\t\tStarting Post Tuning Inference Run.\n")

          val genericResults = new ArrayBuffer[GenericModelReturn]

          modelResultsRaw.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }

          val hyperSpaceRunCandidates = new PostModelingOptimization()
            .setModelFamily("MLPC")
            .setModelType(payload.modelType)
            .setHyperParameterSpaceCount(
              _mainConfig.geneticConfig.hyperSpaceInferenceCount
            )
            .setNumericBoundaries(initialize.getMlpcNumericBoundaries)
            .setStringBoundaries(initialize.getMlpcStringBoundaries)
            .setSeed(_mainConfig.geneticConfig.seed)
            .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
            .mlpcPrediction(
              genericResults.result.toArray,
              _mainConfig.geneticConfig.hyperSpaceModelType,
              _mainConfig.geneticConfig.hyperSpaceModelCount,
              initialize.getFeatureInputSize,
              initialize.getClassDistinctCount
            )

          val (hyperResults, hyperDataFrame) =
            initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

          hyperResults.foreach { x =>
            resultBuffer += x
          }
          statsBuffer += hyperDataFrame

        }

        DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

        (
          resultBuffer.toArray,
          statsBuffer.reduce(_ union _),
          payload.modelType,
          cachedData
        )

      case _ =>
        throw new UnsupportedOperationException(
          s"Detected Model Type ${payload.modelType} is not supported by MultiLayer Perceptron Classifier"
        )
    }
  }

  private def runGBT(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[GBTModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    val initialize = new GBTreesTuner(
      cachedData,
      splitData,
      payload.modelType,
      isPipeline
    ).setLabelCol(_mainConfig.labelCol)
      .setFeaturesCol(_mainConfig.featuresCol)
      .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
      .setGBTNumericBoundaries(_mainConfig.numericBoundaries)
      .setGBTStringBoundaries(_mainConfig.stringBoundaries)
      .setScoringMetric(_mainConfig.scoringMetric)
      .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
      .setTrainSplitMethod(
        trainSplitValidation(
          _mainConfig.geneticConfig.trainSplitMethod,
          payload.modelType
        )
      )
      .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
      .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
      .setKMeansMaxIter(_mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter)
      .setKMeansTolerance(
        _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
      )
      .setKMeansDistanceMeasurement(
        _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
      )
      .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
      .setKMeansPredictionCol(
        _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
      )
      .setLSHHashTables(_mainConfig.geneticConfig.kSampleConfig.lshHashTables)
      .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
      .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
      .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
      .setMinimumVectorCountToMutate(
        _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
      )
      .setVectorMutationMethod(
        _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
      )
      .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
      .setMutationValue(_mainConfig.geneticConfig.kSampleConfig.mutationValue)
      .setLabelBalanceMode(
        _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
      )
      .setCardinalityThreshold(
        _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
      )
      .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
      .setNumericTarget(_mainConfig.geneticConfig.kSampleConfig.numericTarget)
      .setTrainSplitChronologicalColumn(
        _mainConfig.geneticConfig.trainSplitChronologicalColumn
      )
      .setTrainSplitChronologicalRandomPercentage(
        _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
      )
      .setParallelism(_mainConfig.geneticConfig.parallelism)
      .setKFold(_mainConfig.geneticConfig.kFold)
      .setSeed(_mainConfig.geneticConfig.seed)
      .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
      .setFirstGenerationGenePool(
        _mainConfig.geneticConfig.firstGenerationGenePool
      )
      .setNumberOfMutationGenerations(
        _mainConfig.geneticConfig.numberOfGenerations
      )
      .setNumberOfMutationsPerGeneration(
        _mainConfig.geneticConfig.numberOfMutationsPerGeneration
      )
      .setNumberOfParentsToRetain(
        _mainConfig.geneticConfig.numberOfParentsToRetain
      )
      .setNumberOfMutationsPerGeneration(
        _mainConfig.geneticConfig.numberOfMutationsPerGeneration
      )
      .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
      .setGenerationalMutationStrategy(
        _mainConfig.geneticConfig.generationalMutationStrategy
      )
      .setMutationMagnitudeMode(_mainConfig.geneticConfig.mutationMagnitudeMode)
      .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
      .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
      .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
      .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
      .setContinuousEvolutionImprovementThreshold(
        _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
      )
      .setGeneticMBORegressorType(
        _mainConfig.geneticConfig.geneticMBORegressorType
      )
      .setGeneticMBOCandidateFactor(
        _mainConfig.geneticConfig.geneticMBOCandidateFactor
      )
      .setContinuousEvolutionMaxIterations(
        _mainConfig.geneticConfig.continuousEvolutionMaxIterations
      )
      .setContinuousEvolutionStoppingScore(
        _mainConfig.geneticConfig.continuousEvolutionStoppingScore
      )
      .setContinuousEvolutionParallelism(
        _mainConfig.geneticConfig.continuousEvolutionParallelism
      )
      .setContinuousEvolutionMutationAggressiveness(
        _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
      )
      .setContinuousEvolutionGeneticMixing(
        _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
      )
      .setContinuousEvolutionRollingImporvementCount(
        _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
      )
      .setDataReductionFactor(_mainConfig.dataReductionFactor)
      .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
      .setFirstGenPermutations(
        _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
      )
      .setFirstGenIndexMixingMode(
        _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
      )
      .setFirstGenArraySeed(
        _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
      )
      .setHyperSpaceModelCount(_mainConfig.geneticConfig.hyperSpaceModelCount)

    if (_modelSeedSetStatus)
      initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

    val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

    val resultBuffer = modelResultsRaw.toBuffer
    val statsBuffer = new ArrayBuffer[DataFrame]()
    statsBuffer += modelStatsRaw

    if (_mainConfig.geneticConfig.hyperSpaceInference) {

      println("\n\t\tStarting Post Tuning Inference Run.\n")

      val genericResults = new ArrayBuffer[GenericModelReturn]

      modelResultsRaw.foreach { x =>
        genericResults += GenericModelReturn(
          hyperParams = extractPayload(x.modelHyperParams),
          model = x.model,
          score = x.score,
          metrics = x.evalMetrics,
          generation = x.generation
        )
      }

      val hyperSpaceRunCandidates = new PostModelingOptimization()
        .setModelFamily("GBT")
        .setModelType(payload.modelType)
        .setHyperParameterSpaceCount(
          _mainConfig.geneticConfig.hyperSpaceInferenceCount
        )
        .setNumericBoundaries(initialize.getGBTNumericBoundaries)
        .setStringBoundaries(initialize.getGBTStringBoundaries)
        .setSeed(_mainConfig.geneticConfig.seed)
        .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
        .gbtPrediction(
          genericResults.result.toArray,
          _mainConfig.geneticConfig.hyperSpaceModelType,
          _mainConfig.geneticConfig.hyperSpaceModelCount
        )

      val (hyperResults, hyperDataFrame) =
        initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

      hyperResults.foreach { x =>
        resultBuffer += x
      }
      statsBuffer += hyperDataFrame

    }

    DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

    (
      resultBuffer.toArray,
      statsBuffer.reduce(_ union _),
      payload.modelType,
      cachedData
    )

  }

  private def runLinearRegression(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[LinearRegressionModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    payload.modelType match {
      case "regressor" =>
        val initialize = new LinearRegressionTuner(
          cachedData,
          splitData,
          isPipeline
        ).setLabelCol(_mainConfig.labelCol)
          .setFeaturesCol(_mainConfig.featuresCol)
          .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
          .setLinearRegressionNumericBoundaries(_mainConfig.numericBoundaries)
          .setLinearRegressionStringBoundaries(_mainConfig.stringBoundaries)
          .setScoringMetric(_mainConfig.scoringMetric)
          .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
          .setTrainSplitMethod(
            trainSplitValidation(
              _mainConfig.geneticConfig.trainSplitMethod,
              payload.modelType
            )
          )
          .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
          .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
          .setKMeansMaxIter(
            _mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter
          )
          .setKMeansTolerance(
            _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
          )
          .setKMeansDistanceMeasurement(
            _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
          )
          .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
          .setKMeansPredictionCol(
            _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
          )
          .setLSHHashTables(
            _mainConfig.geneticConfig.kSampleConfig.lshHashTables
          )
          .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
          .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
          .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
          .setMinimumVectorCountToMutate(
            _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
          )
          .setVectorMutationMethod(
            _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
          )
          .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
          .setMutationValue(
            _mainConfig.geneticConfig.kSampleConfig.mutationValue
          )
          .setLabelBalanceMode(
            _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
          )
          .setCardinalityThreshold(
            _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
          )
          .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
          .setNumericTarget(
            _mainConfig.geneticConfig.kSampleConfig.numericTarget
          )
          .setTrainSplitChronologicalColumn(
            _mainConfig.geneticConfig.trainSplitChronologicalColumn
          )
          .setTrainSplitChronologicalRandomPercentage(
            _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
          )
          .setParallelism(_mainConfig.geneticConfig.parallelism)
          .setKFold(_mainConfig.geneticConfig.kFold)
          .setSeed(_mainConfig.geneticConfig.seed)
          .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
          .setFirstGenerationGenePool(
            _mainConfig.geneticConfig.firstGenerationGenePool
          )
          .setNumberOfMutationGenerations(
            _mainConfig.geneticConfig.numberOfGenerations
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setNumberOfParentsToRetain(
            _mainConfig.geneticConfig.numberOfParentsToRetain
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
          .setGenerationalMutationStrategy(
            _mainConfig.geneticConfig.generationalMutationStrategy
          )
          .setMutationMagnitudeMode(
            _mainConfig.geneticConfig.mutationMagnitudeMode
          )
          .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
          .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
          .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
          .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
          .setContinuousEvolutionImprovementThreshold(
            _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
          )
          .setGeneticMBORegressorType(
            _mainConfig.geneticConfig.geneticMBORegressorType
          )
          .setGeneticMBOCandidateFactor(
            _mainConfig.geneticConfig.geneticMBOCandidateFactor
          )
          .setContinuousEvolutionMaxIterations(
            _mainConfig.geneticConfig.continuousEvolutionMaxIterations
          )
          .setContinuousEvolutionStoppingScore(
            _mainConfig.geneticConfig.continuousEvolutionStoppingScore
          )
          .setContinuousEvolutionParallelism(
            _mainConfig.geneticConfig.continuousEvolutionParallelism
          )
          .setContinuousEvolutionMutationAggressiveness(
            _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
          )
          .setContinuousEvolutionGeneticMixing(
            _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
          )
          .setContinuousEvolutionRollingImporvementCount(
            _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
          )
          .setDataReductionFactor(_mainConfig.dataReductionFactor)
          .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
          .setFirstGenPermutations(
            _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
          )
          .setFirstGenIndexMixingMode(
            _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
          )
          .setFirstGenArraySeed(
            _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
          )
          .setHyperSpaceModelCount(
            _mainConfig.geneticConfig.hyperSpaceModelCount
          )

        if (_modelSeedSetStatus)
          initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

        val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

        val resultBuffer = modelResultsRaw.toBuffer
        val statsBuffer = new ArrayBuffer[DataFrame]()
        statsBuffer += modelStatsRaw

        if (_mainConfig.geneticConfig.hyperSpaceInference) {

          println("\n\t\tStarting Post Tuning Inference Run.\n")

          val genericResults = new ArrayBuffer[GenericModelReturn]

          modelResultsRaw.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }

          val hyperSpaceRunCandidates = new PostModelingOptimization()
            .setModelFamily("LinearRegression")
            .setModelType(payload.modelType)
            .setHyperParameterSpaceCount(
              _mainConfig.geneticConfig.hyperSpaceInferenceCount
            )
            .setNumericBoundaries(
              initialize.getLinearRegressionNumericBoundaries
            )
            .setStringBoundaries(initialize.getLinearRegressionStringBoundaries)
            .setSeed(_mainConfig.geneticConfig.seed)
            .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
            .linearRegressionPrediction(
              genericResults.result.toArray,
              _mainConfig.geneticConfig.hyperSpaceModelType,
              _mainConfig.geneticConfig.hyperSpaceModelCount
            )

          val (hyperResults, hyperDataFrame) =
            initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

          hyperResults.foreach { x =>
            resultBuffer += x
          }
          statsBuffer += hyperDataFrame

        }

        DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

        (
          resultBuffer.toArray,
          statsBuffer.reduce(_ union _),
          payload.modelType,
          cachedData
        )

      case _ =>
        throw new UnsupportedOperationException(
          s"Detected Model Type ${payload.modelType} is not supported by Linear Regression"
        )
    }

  }

  private def runLogisticRegression(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[LogisticRegressionModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    payload.modelType match {
      case "classifier" =>
        val initialize = new LogisticRegressionTuner(
          cachedData,
          splitData,
          isPipeline
        ).setLabelCol(_mainConfig.labelCol)
          .setFeaturesCol(_mainConfig.featuresCol)
          .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
          .setLogisticRegressionNumericBoundaries(_mainConfig.numericBoundaries)
          .setScoringMetric(_mainConfig.scoringMetric)
          .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
          .setTrainSplitMethod(
            trainSplitValidation(
              _mainConfig.geneticConfig.trainSplitMethod,
              payload.modelType
            )
          )
          .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
          .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
          .setKMeansMaxIter(
            _mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter
          )
          .setKMeansTolerance(
            _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
          )
          .setKMeansDistanceMeasurement(
            _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
          )
          .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
          .setKMeansPredictionCol(
            _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
          )
          .setLSHHashTables(
            _mainConfig.geneticConfig.kSampleConfig.lshHashTables
          )
          .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
          .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
          .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
          .setMinimumVectorCountToMutate(
            _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
          )
          .setVectorMutationMethod(
            _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
          )
          .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
          .setMutationValue(
            _mainConfig.geneticConfig.kSampleConfig.mutationValue
          )
          .setLabelBalanceMode(
            _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
          )
          .setCardinalityThreshold(
            _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
          )
          .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
          .setNumericTarget(
            _mainConfig.geneticConfig.kSampleConfig.numericTarget
          )
          .setTrainSplitChronologicalColumn(
            _mainConfig.geneticConfig.trainSplitChronologicalColumn
          )
          .setTrainSplitChronologicalRandomPercentage(
            _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
          )
          .setParallelism(_mainConfig.geneticConfig.parallelism)
          .setKFold(_mainConfig.geneticConfig.kFold)
          .setSeed(_mainConfig.geneticConfig.seed)
          .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
          .setFirstGenerationGenePool(
            _mainConfig.geneticConfig.firstGenerationGenePool
          )
          .setNumberOfMutationGenerations(
            _mainConfig.geneticConfig.numberOfGenerations
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setNumberOfParentsToRetain(
            _mainConfig.geneticConfig.numberOfParentsToRetain
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
          .setGenerationalMutationStrategy(
            _mainConfig.geneticConfig.generationalMutationStrategy
          )
          .setMutationMagnitudeMode(
            _mainConfig.geneticConfig.mutationMagnitudeMode
          )
          .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
          .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
          .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
          .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
          .setContinuousEvolutionImprovementThreshold(
            _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
          )
          .setGeneticMBORegressorType(
            _mainConfig.geneticConfig.geneticMBORegressorType
          )
          .setGeneticMBOCandidateFactor(
            _mainConfig.geneticConfig.geneticMBOCandidateFactor
          )
          .setContinuousEvolutionMaxIterations(
            _mainConfig.geneticConfig.continuousEvolutionMaxIterations
          )
          .setContinuousEvolutionStoppingScore(
            _mainConfig.geneticConfig.continuousEvolutionStoppingScore
          )
          .setContinuousEvolutionParallelism(
            _mainConfig.geneticConfig.continuousEvolutionParallelism
          )
          .setContinuousEvolutionMutationAggressiveness(
            _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
          )
          .setContinuousEvolutionGeneticMixing(
            _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
          )
          .setContinuousEvolutionRollingImporvementCount(
            _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
          )
          .setDataReductionFactor(_mainConfig.dataReductionFactor)
          .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
          .setFirstGenPermutations(
            _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
          )
          .setFirstGenIndexMixingMode(
            _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
          )
          .setFirstGenArraySeed(
            _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
          )
          .setHyperSpaceModelCount(
            _mainConfig.geneticConfig.hyperSpaceModelCount
          )

        if (_modelSeedSetStatus)
          initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

        val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

        val resultBuffer = modelResultsRaw.toBuffer
        val statsBuffer = new ArrayBuffer[DataFrame]()
        statsBuffer += modelStatsRaw

        if (_mainConfig.geneticConfig.hyperSpaceInference) {

          println("\n\t\tStarting Post Tuning Inference Run.\n")

          val genericResults = new ArrayBuffer[GenericModelReturn]

          modelResultsRaw.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }

          val hyperSpaceRunCandidates = new PostModelingOptimization()
            .setModelFamily("LogisticRegression")
            .setModelType(payload.modelType)
            .setHyperParameterSpaceCount(
              _mainConfig.geneticConfig.hyperSpaceInferenceCount
            )
            .setNumericBoundaries(
              initialize.getLogisticRegressionNumericBoundaries
            )
            .setStringBoundaries(_mainConfig.stringBoundaries)
            .setSeed(_mainConfig.geneticConfig.seed)
            .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
            .logisticRegressionPrediction(
              genericResults.result.toArray,
              _mainConfig.geneticConfig.hyperSpaceModelType,
              _mainConfig.geneticConfig.hyperSpaceModelCount
            )

          val (hyperResults, hyperDataFrame) =
            initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

          hyperResults.foreach { x =>
            resultBuffer += x
          }
          statsBuffer += hyperDataFrame

        }

        DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

        (
          resultBuffer.toArray,
          statsBuffer.reduce(_ union _),
          payload.modelType,
          cachedData
        )

      case _ =>
        throw new UnsupportedOperationException(
          s"Detected Model Type ${payload.modelType} is not supported by Logistic Regression"
        )
    }

  }

  private def runSVM(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[SVMModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    payload.modelType match {
      case "classifier" =>
        val initialize = new SVMTuner(cachedData, splitData, isPipeline)
          .setLabelCol(_mainConfig.labelCol)
          .setFeaturesCol(_mainConfig.featuresCol)
          .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
          .setSvmNumericBoundaries(_mainConfig.numericBoundaries)
          .setScoringMetric(_mainConfig.scoringMetric)
          .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
          .setTrainSplitMethod(
            trainSplitValidation(
              _mainConfig.geneticConfig.trainSplitMethod,
              payload.modelType
            )
          )
          .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
          .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
          .setKMeansMaxIter(
            _mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter
          )
          .setKMeansTolerance(
            _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
          )
          .setKMeansDistanceMeasurement(
            _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
          )
          .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
          .setKMeansPredictionCol(
            _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
          )
          .setLSHHashTables(
            _mainConfig.geneticConfig.kSampleConfig.lshHashTables
          )
          .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
          .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
          .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
          .setMinimumVectorCountToMutate(
            _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
          )
          .setVectorMutationMethod(
            _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
          )
          .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
          .setMutationValue(
            _mainConfig.geneticConfig.kSampleConfig.mutationValue
          )
          .setLabelBalanceMode(
            _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
          )
          .setCardinalityThreshold(
            _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
          )
          .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
          .setNumericTarget(
            _mainConfig.geneticConfig.kSampleConfig.numericTarget
          )
          .setTrainSplitChronologicalColumn(
            _mainConfig.geneticConfig.trainSplitChronologicalColumn
          )
          .setTrainSplitChronologicalRandomPercentage(
            _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
          )
          .setParallelism(_mainConfig.geneticConfig.parallelism)
          .setKFold(_mainConfig.geneticConfig.kFold)
          .setSeed(_mainConfig.geneticConfig.seed)
          .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
          .setFirstGenerationGenePool(
            _mainConfig.geneticConfig.firstGenerationGenePool
          )
          .setNumberOfMutationGenerations(
            _mainConfig.geneticConfig.numberOfGenerations
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setNumberOfParentsToRetain(
            _mainConfig.geneticConfig.numberOfParentsToRetain
          )
          .setNumberOfMutationsPerGeneration(
            _mainConfig.geneticConfig.numberOfMutationsPerGeneration
          )
          .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
          .setGenerationalMutationStrategy(
            _mainConfig.geneticConfig.generationalMutationStrategy
          )
          .setMutationMagnitudeMode(
            _mainConfig.geneticConfig.mutationMagnitudeMode
          )
          .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
          .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
          .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
          .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
          .setContinuousEvolutionImprovementThreshold(
            _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
          )
          .setGeneticMBORegressorType(
            _mainConfig.geneticConfig.geneticMBORegressorType
          )
          .setGeneticMBOCandidateFactor(
            _mainConfig.geneticConfig.geneticMBOCandidateFactor
          )
          .setContinuousEvolutionMaxIterations(
            _mainConfig.geneticConfig.continuousEvolutionMaxIterations
          )
          .setContinuousEvolutionStoppingScore(
            _mainConfig.geneticConfig.continuousEvolutionStoppingScore
          )
          .setContinuousEvolutionParallelism(
            _mainConfig.geneticConfig.continuousEvolutionParallelism
          )
          .setContinuousEvolutionMutationAggressiveness(
            _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
          )
          .setContinuousEvolutionGeneticMixing(
            _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
          )
          .setContinuousEvolutionRollingImporvementCount(
            _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
          )
          .setDataReductionFactor(_mainConfig.dataReductionFactor)
          .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
          .setFirstGenPermutations(
            _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
          )
          .setFirstGenIndexMixingMode(
            _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
          )
          .setFirstGenArraySeed(
            _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
          )
          .setHyperSpaceModelCount(
            _mainConfig.geneticConfig.hyperSpaceModelCount
          )

        if (_modelSeedSetStatus)
          initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

        val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

        val resultBuffer = modelResultsRaw.toBuffer
        val statsBuffer = new ArrayBuffer[DataFrame]()
        statsBuffer += modelStatsRaw

        if (_mainConfig.geneticConfig.hyperSpaceInference) {

          println("\n\t\tStarting Post Tuning Inference Run.\n")

          val genericResults = new ArrayBuffer[GenericModelReturn]

          modelResultsRaw.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }

          val hyperSpaceRunCandidates = new PostModelingOptimization()
            .setModelFamily("SVM")
            .setModelType(payload.modelType)
            .setHyperParameterSpaceCount(
              _mainConfig.geneticConfig.hyperSpaceInferenceCount
            )
            .setNumericBoundaries(initialize.getSvmNumericBoundaries)
            .setStringBoundaries(_mainConfig.stringBoundaries)
            .setSeed(_mainConfig.geneticConfig.seed)
            .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
            .svmPrediction(
              genericResults.result.toArray,
              _mainConfig.geneticConfig.hyperSpaceModelType,
              _mainConfig.geneticConfig.hyperSpaceModelCount
            )

          val (hyperResults, hyperDataFrame) =
            initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

          hyperResults.foreach { x =>
            resultBuffer += x
          }
          statsBuffer += hyperDataFrame

        }

        DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

        (
          resultBuffer.toArray,
          statsBuffer.reduce(_ union _),
          payload.modelType,
          cachedData
        )

      case _ =>
        throw new UnsupportedOperationException(
          s"Detected Model Type ${payload.modelType} is not supported by Support Vector Machines"
        )
    }
  }

  private def runTrees(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): (Array[TreesModelsWithResults], DataFrame, String, DataFrame) = {

    val cachedData = if (_mainConfig.dataPrepCachingFlag) {
      val data = payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      data.foreach(_ => ())
      data
    } else {
      payload.data
    }

    val splitData = DataSplitUtility.split(
      cachedData,
      _mainConfig.geneticConfig.kFold,
      _mainConfig.geneticConfig.trainSplitMethod,
      _mainConfig.labelCol,
      _mainConfig.geneticConfig.deltaCacheBackingDirectory,
      _mainConfig.geneticConfig.splitCachingStrategy,
      _mainConfig.modelFamily,
      _mainConfig.geneticConfig.parallelism,
      _mainConfig.geneticConfig.trainPortion,
      _mainConfig.geneticConfig.kSampleConfig.syntheticCol,
      _mainConfig.geneticConfig.trainSplitChronologicalColumn,
      _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage,
      _mainConfig.dataReductionFactor
    )

    val initialize = new DecisionTreeTuner(
      cachedData,
      splitData,
      payload.modelType,
      isPipeline
    ).setLabelCol(_mainConfig.labelCol)
      .setFeaturesCol(_mainConfig.featuresCol)
      .setFieldsToIgnore(_mainConfig.fieldsToIgnoreInVector)
      .setTreesNumericBoundaries(_mainConfig.numericBoundaries)
      .setTreesStringBoundaries(_mainConfig.stringBoundaries)
      .setScoringMetric(_mainConfig.scoringMetric)
      .setTrainPortion(_mainConfig.geneticConfig.trainPortion)
      .setTrainSplitMethod(
        trainSplitValidation(
          _mainConfig.geneticConfig.trainSplitMethod,
          payload.modelType
        )
      )
      .setSyntheticCol(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
      .setKGroups(_mainConfig.geneticConfig.kSampleConfig.kGroups)
      .setKMeansMaxIter(_mainConfig.geneticConfig.kSampleConfig.kMeansMaxIter)
      .setKMeansTolerance(
        _mainConfig.geneticConfig.kSampleConfig.kMeansTolerance
      )
      .setKMeansDistanceMeasurement(
        _mainConfig.geneticConfig.kSampleConfig.kMeansDistanceMeasurement
      )
      .setKMeansSeed(_mainConfig.geneticConfig.kSampleConfig.kMeansSeed)
      .setKMeansPredictionCol(
        _mainConfig.geneticConfig.kSampleConfig.kMeansPredictionCol
      )
      .setLSHHashTables(_mainConfig.geneticConfig.kSampleConfig.lshHashTables)
      .setLSHSeed(_mainConfig.geneticConfig.kSampleConfig.lshSeed)
      .setLSHOutputCol(_mainConfig.geneticConfig.kSampleConfig.lshOutputCol)
      .setQuorumCount(_mainConfig.geneticConfig.kSampleConfig.quorumCount)
      .setMinimumVectorCountToMutate(
        _mainConfig.geneticConfig.kSampleConfig.minimumVectorCountToMutate
      )
      .setVectorMutationMethod(
        _mainConfig.geneticConfig.kSampleConfig.vectorMutationMethod
      )
      .setMutationMode(_mainConfig.geneticConfig.kSampleConfig.mutationMode)
      .setMutationValue(_mainConfig.geneticConfig.kSampleConfig.mutationValue)
      .setLabelBalanceMode(
        _mainConfig.geneticConfig.kSampleConfig.labelBalanceMode
      )
      .setCardinalityThreshold(
        _mainConfig.geneticConfig.kSampleConfig.cardinalityThreshold
      )
      .setNumericRatio(_mainConfig.geneticConfig.kSampleConfig.numericRatio)
      .setNumericTarget(_mainConfig.geneticConfig.kSampleConfig.numericTarget)
      .setTrainSplitChronologicalColumn(
        _mainConfig.geneticConfig.trainSplitChronologicalColumn
      )
      .setTrainSplitChronologicalRandomPercentage(
        _mainConfig.geneticConfig.trainSplitChronologicalRandomPercentage
      )
      .setParallelism(_mainConfig.geneticConfig.parallelism)
      .setKFold(_mainConfig.geneticConfig.kFold)
      .setSeed(_mainConfig.geneticConfig.seed)
      .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
      .setFirstGenerationGenePool(
        _mainConfig.geneticConfig.firstGenerationGenePool
      )
      .setNumberOfMutationGenerations(
        _mainConfig.geneticConfig.numberOfGenerations
      )
      .setNumberOfMutationsPerGeneration(
        _mainConfig.geneticConfig.numberOfMutationsPerGeneration
      )
      .setNumberOfParentsToRetain(
        _mainConfig.geneticConfig.numberOfParentsToRetain
      )
      .setGeneticMixing(_mainConfig.geneticConfig.geneticMixing)
      .setGenerationalMutationStrategy(
        _mainConfig.geneticConfig.generationalMutationStrategy
      )
      .setMutationMagnitudeMode(_mainConfig.geneticConfig.mutationMagnitudeMode)
      .setFixedMutationValue(_mainConfig.geneticConfig.fixedMutationValue)
      .setEarlyStoppingFlag(_mainConfig.autoStoppingFlag)
      .setEarlyStoppingScore(_mainConfig.autoStoppingScore)
      .setEvolutionStrategy(_mainConfig.geneticConfig.evolutionStrategy)
      .setContinuousEvolutionImprovementThreshold(
        _mainConfig.geneticConfig.continuousEvolutionImprovementThreshold
      )
      .setGeneticMBORegressorType(
        _mainConfig.geneticConfig.geneticMBORegressorType
      )
      .setGeneticMBOCandidateFactor(
        _mainConfig.geneticConfig.geneticMBOCandidateFactor
      )
      .setContinuousEvolutionMaxIterations(
        _mainConfig.geneticConfig.continuousEvolutionMaxIterations
      )
      .setContinuousEvolutionStoppingScore(
        _mainConfig.geneticConfig.continuousEvolutionStoppingScore
      )
      .setContinuousEvolutionParallelism(
        _mainConfig.geneticConfig.continuousEvolutionParallelism
      )
      .setContinuousEvolutionMutationAggressiveness(
        _mainConfig.geneticConfig.continuousEvolutionMutationAggressiveness
      )
      .setContinuousEvolutionGeneticMixing(
        _mainConfig.geneticConfig.continuousEvolutionGeneticMixing
      )
      .setContinuousEvolutionRollingImporvementCount(
        _mainConfig.geneticConfig.continuousEvolutionRollingImprovementCount
      )
      .setDataReductionFactor(_mainConfig.dataReductionFactor)
      .setFirstGenMode(_mainConfig.geneticConfig.initialGenerationMode)
      .setFirstGenPermutations(
        _mainConfig.geneticConfig.initialGenerationConfig.permutationCount
      )
      .setFirstGenIndexMixingMode(
        _mainConfig.geneticConfig.initialGenerationConfig.indexMixingMode
      )
      .setFirstGenArraySeed(
        _mainConfig.geneticConfig.initialGenerationConfig.arraySeed
      )
      .setHyperSpaceModelCount(_mainConfig.geneticConfig.hyperSpaceModelCount)

    if (_modelSeedSetStatus)
      initialize.setModelSeed(_mainConfig.geneticConfig.modelSeed)

    val (modelResultsRaw, modelStatsRaw) = initialize.evolveWithScoringDF()

    val resultBuffer = modelResultsRaw.toBuffer
    val statsBuffer = new ArrayBuffer[DataFrame]()
    statsBuffer += modelStatsRaw

    if (_mainConfig.geneticConfig.hyperSpaceInference) {

      println("\n\t\tStarting Post Tuning Inference Run.\n")

      val genericResults = new ArrayBuffer[GenericModelReturn]

      modelResultsRaw.foreach { x =>
        genericResults += GenericModelReturn(
          hyperParams = extractPayload(x.modelHyperParams),
          model = x.model,
          score = x.score,
          metrics = x.evalMetrics,
          generation = x.generation
        )
      }

      val hyperSpaceRunCandidates = new PostModelingOptimization()
        .setModelFamily("Trees")
        .setModelType(payload.modelType)
        .setHyperParameterSpaceCount(
          _mainConfig.geneticConfig.hyperSpaceInferenceCount
        )
        .setNumericBoundaries(initialize.getTreesNumericBoundaries)
        .setStringBoundaries(initialize.getTreesStringBoundaries)
        .setSeed(_mainConfig.geneticConfig.seed)
        .setOptimizationStrategy(_mainConfig.scoringOptimizationStrategy)
        .treesPrediction(
          genericResults.result.toArray,
          _mainConfig.geneticConfig.hyperSpaceModelType,
          _mainConfig.geneticConfig.hyperSpaceModelCount
        )

      val (hyperResults, hyperDataFrame) =
        initialize.postRunModeledHyperParams(hyperSpaceRunCandidates)

      hyperResults.foreach { x =>
        resultBuffer += x
      }
      statsBuffer += hyperDataFrame

    }

    DataSplitCustodial.cleanCachedInstances(splitData, _mainConfig)

    (
      resultBuffer.toArray,
      statsBuffer.reduce(_ union _),
      payload.modelType,
      cachedData
    )
  }

  private def logResultsToMlFlow(runData: Array[GenericModelReturn],
                                 modelFamily: String,
                                 modelType: String): MLFlowReportStructure = {

    val mlFlowLogger = MLFlowTracker(_mainConfig)

    if (_mainConfig.mlFlowLogArtifactsFlag) mlFlowLogger.logArtifactsOn()
    else mlFlowLogger.logArtifactsOff()

    mlFlowLogger.logMlFlowDataAndModels(
      runData,
      modelFamily,
      modelType,
      _mainConfig.inferenceConfigSaveLocation,
      _mainConfig.scoringOptimizationStrategy
    )
  }

  private def logPipelineResultsToMlFlow(
    runData: Array[GenericModelReturn],
    modelFamily: String,
    modelType: String
  ): MLFlowReportStructure = {

    val mlFlowLogger = MLFlowTracker(_mainConfig)
    mlFlowLogger.logMlFlowForPipeline(
      AutoMlPipelineMlFlowUtils
        .getMainConfigByPipelineId(_mainConfig.pipelineId)
        .mlFlowRunId,
      runData,
      modelFamily,
      modelType,
      _mainConfig.scoringOptimizationStrategy
    )
  }

  protected[automl] def executeTuning(
    payload: DataGeneration,
    isPipeline: Boolean = false
  ): TunerOutput = {

    val genericResults = new ArrayBuffer[GenericModelReturn]
    logger.log(Level.INFO, convertMainConfigToJson(_mainConfig))

    val (resultArray, modelStats, modelSelection, dataframe) =
      _mainConfig.modelFamily match {
        case "RandomForest" =>
          val (results, stats, selection, data) =
            runRandomForest(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "XGBoost" =>
          val (results, stats, selection, data) =
            runXGBoost(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "gbmBinary" | "gbmMulti" | "gbmMultiOVA" | "gbmHuber" | "gbmFair" |
            "gbmLasso" | "gbmRidge" | "gbmPoisson" | "gbmQuantile" | "gbmMape" |
            "gbmTweedie" | "gbmGamma" =>
          val (results, stats, selection, data) =
            runLightGBM(_mainConfig.modelFamily, payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "GBT" =>
          val (results, stats, selection, data) = runGBT(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "MLPC" =>
          val (results, stats, selection, data) = runMLPC(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractMLPCPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "LinearRegression" =>
          val (results, stats, selection, data) =
            runLinearRegression(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "LogisticRegression" =>
          val (results, stats, selection, data) =
            runLogisticRegression(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "SVM" =>
          val (results, stats, selection, data) = runSVM(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
        case "Trees" =>
          val (results, stats, selection, data) = runTrees(payload, isPipeline)
          results.foreach { x =>
            genericResults += GenericModelReturn(
              hyperParams = extractPayload(x.modelHyperParams),
              model = x.model,
              score = x.score,
              metrics = x.evalMetrics,
              generation = x.generation
            )
          }
          (genericResults, stats, selection, data)
      }

    val genericResultData = genericResults.result.toArray

    val mlFlow = if (_mainConfig.mlFlowLoggingFlag && !isPipeline) {

      // set the Inference details in general for the run
      // TODO - Remove this - It's here and in the tracker but the values are different and should be set equal
      val inferenceModelConfig = InferenceModelConfig(
        modelFamily = _mainConfig.modelFamily,
        modelType = modelSelection,
        modelLoadMethod = "path",
        mlFlowConfig = _mainConfig.mlFlowConfig,
        mlFlowRunId = "none",
        modelPathLocation = "notDefined"
      )

      // Set the Inference Config
      InferenceConfig.setInferenceModelConfig(inferenceModelConfig)
      InferenceConfig.setInferenceConfigStorageLocation(
        _mainConfig.inferenceConfigSaveLocation
      )

      // Write the Inference Payload out to the specified location
      val outputInferencePayload = InferenceConfig.getInferenceConfig

      val inferenceConfigReadable = convertInferenceConfigToJson(
        outputInferencePayload
      )
      val inferenceLog =
        s"Inference Configuration: \n${inferenceConfigReadable.prettyJson}"
      println(inferenceLog)

      logger.log(Level.INFO, inferenceLog)

      val mlFlowResult = try {
        logResultsToMlFlow(
          genericResultData,
          _mainConfig.modelFamily,
          modelSelection
        )
      } catch {
        case e: Exception =>
          println(
            s"Failed to log to mlflow.  Check configuration. \n ${e.printStackTrace()} " +
              s"\n ${e.getStackTraceString}"
          )
          logger.log(Level.FATAL, e.getStackTraceString)
          generateDummyMLFlowReturn("error").get
      }

      implicit val formats: Formats = Serialization.formats(hints = NoTypeHints)
      val pretty = writePretty(mlFlowResult)

      logger.log(Level.INFO, pretty)
      mlFlowResult
    } else if (isPipeline && _mainConfig.mlFlowLoggingFlag) {
      logPipelineResultsToMlFlow(
        genericResultData,
        _mainConfig.modelFamily,
        modelSelection
      )
    } else {
      generateDummyMLFlowReturn("undefined").get
    }

    val generationalData = extractGenerationalScores(
      genericResultData,
      _mainConfig.scoringOptimizationStrategy,
      _mainConfig.modelFamily,
      modelSelection
    )

    new TunerOutput(
      rawData = dataframe,
      modelSelection = modelSelection,
      mlFlowOutput = mlFlow
    ) {
      override def modelReport: Array[GenericModelReturn] = genericResultData
      override def generationReport: Array[GenerationalReport] =
        generationalData
      override def modelReportDataFrame: DataFrame = modelStats
      override def generationReportDataFrame: DataFrame =
        generationDataFrameReport(
          generationalData,
          _mainConfig.scoringOptimizationStrategy
        )
    }

  }

  private def generateDummyMLFlowReturn(
    msg: String
  ): Option[MLFlowReportStructure] = {
    try {
      val genTracker = MLFlowTracker(_mainConfig)
      val dummyLog = MLFlowReturn(
        genTracker.getMLFlowClient,
        msg,
        Array((msg, 0.0))
      )
      Some(MLFlowReportStructure(dummyLog, dummyLog))
    } catch {
      case ex: Exception => Some(MLFlowReportStructure(null, null))
    }
  }

  protected[automl] def predictFromBestModel(
    resultPayload: Array[GenericModelReturn],
    rawData: DataFrame,
    modelSelection: String
  ): DataFrame = {

    val bestModel = resultPayload(0)

    _mainConfig.modelFamily match {
      case "RandomForest" =>
        modelSelection match {
          case "regressor" =>
            val model =
              bestModel.model.asInstanceOf[RandomForestRegressionModel]
            model.transform(rawData)
          case "classifier" =>
            val model =
              bestModel.model.asInstanceOf[RandomForestClassificationModel]
            model.transform(rawData)
        }
      case "XGBoost" =>
        modelSelection match {
          case "regressor" =>
            val model = bestModel.model.asInstanceOf[XGBoostRegressionModel]
            model.transform(rawData)
          case "classifier" =>
            val model = bestModel.model.asInstanceOf[XGBoostClassificationModel]
            model.transform(rawData)
        }
      case "gbmBinary" | "gbmMulti" | "gbmMultiOVA" =>
        val model = bestModel.model.asInstanceOf[LightGBMClassificationModel]
        model.transform(rawData)
      case "gbmHuber" | "gbmFair" | "gbmLasso" | "gbmRidge" | "gbmPoisson" |
          "gbmQuantile" | "gbmMape" | "gbmTweedie" | "gbmGamma" =>
        val model = bestModel.model.asInstanceOf[LightGBMRegressionModel]
        model.transform(rawData)
      case "GBT" =>
        modelSelection match {
          case "regressor" =>
            val model = bestModel.model.asInstanceOf[GBTRegressionModel]
            model.transform(rawData)
          case "classifier" =>
            val model = bestModel.model.asInstanceOf[GBTClassificationModel]
            model.transform(rawData)
        }
      case "MLPC" =>
        val model =
          bestModel.model.asInstanceOf[MultilayerPerceptronClassificationModel]
        model.transform(rawData)
      case "LinearRegression" =>
        val model = bestModel.model.asInstanceOf[LinearRegressionModel]
        model.transform(rawData)
      case "LogisticRegression" =>
        val model = bestModel.model.asInstanceOf[LogisticRegressionModel]
        model.transform(rawData)
      case "SVM" =>
        val model = bestModel.model.asInstanceOf[LinearSVCModel]
        model.transform(rawData)
      case "Trees" =>
        modelSelection match {
          case "classifier" =>
            val model =
              bestModel.model.asInstanceOf[DecisionTreeClassificationModel]
            model.transform(rawData)
          case "regressor" =>
            val model =
              bestModel.model.asInstanceOf[DecisionTreeRegressionModel]
            model.transform(rawData)
        }
    }

  }

  @deprecated(
    "This method will be removed and replaced with the standalone version in " +
      "com.databricks.labs.automl.exploration.FeatureImportances in a future release."
  )
  def exploreFeatureImportances(): FeatureImportanceReturn = {

    println(
      "[DEPRECATION WARNING] - .exploreFeatureImportances() has been replaced by " +
        "com.databricks.labs.automl.exploration.FeatureImportances .  This method will be removed in the next release."
    )

    val payload = prepData()

    val cachedData = if (_featureImportancesConfig.dataPrepCachingFlag) {
      payload.data.persist(StorageLevel.MEMORY_AND_DISK)
      payload.data.count()
      payload.data
    } else {
      payload.data
    }

    if (_featureImportancesConfig.dataPrepCachingFlag) payload.data.count()

    val featureResults = new RandomForestFeatureImportance(
      cachedData,
      _featureImportancesConfig,
      payload.modelType
    ).setCutoffType(_featureImportancesConfig.featureImportanceCutoffType)
      .setCutoffValue(_featureImportancesConfig.featureImportanceCutoffValue)
      .runFeatureImportances(payload.fields)

    if (_featureImportancesConfig.dataPrepCachingFlag) cachedData.unpersist()

    FeatureImportanceReturn(
      featureResults._1,
      featureResults._2,
      featureResults._3,
      payload.modelType
    )
  }

  @deprecated(
    "This method will be removed and replaced with the standalone version in " +
      "com.databricks.labs.automl.exploration.FeatureImportances in a future release."
  )
  def runWithFeatureCulling(): FeatureImportanceOutput = {

    println(
      "[DEPRECATION WARNING] - .runWithFeatureCulling() has been replaced by " +
        "com.databricks.labs.automl.exploration.FeatureImportances .  This method will be removed in the next release."
    )

    // Get the Feature Importances

    val featureImportanceResults = exploreFeatureImportances()

    val selectableFields = featureImportanceResults.fields :+ _featureImportancesConfig.labelCol

    println(
      s"Feature Selected: ${featureImportanceResults.fields.mkString(", ")}"
    )

    val dataSubset = df.select(selectableFields.map(col): _*)

    if (_featureImportancesConfig.dataPrepCachingFlag) {
      dataSubset.persist(StorageLevel.MEMORY_AND_DISK)
      dataSubset.count
    }

    val runResults =
      new AutomationRunner(dataSubset).setMainConfig(_mainConfig).run()

    if (_mainConfig.dataPrepCachingFlag) dataSubset.unpersist()

    new FeatureImportanceOutput(
      featureImportanceResults.data,
      mlFlowOutput = runResults.mlFlowOutput
    ) {
      override def modelReport: Array[GenericModelReturn] =
        runResults.modelReport
      override def generationReport: Array[GenerationalReport] =
        runResults.generationReport
      override def modelReportDataFrame: DataFrame =
        runResults.modelReportDataFrame
      override def generationReportDataFrame: DataFrame =
        runResults.generationReportDataFrame
    }

  }

  @deprecated(
    "This method will be removed and replaced with the standalone version in " +
      "com.databricks.labs.automl.exploration.FeatureImportances in a future release."
  )
  def runFeatureCullingWithPrediction(): FeatureImportancePredictionOutput = {

    println(
      "[DEPRECATION WARNING] - .runFeatureCullingWithPrediction() has been replaced by " +
        "com.databricks.labs.automl.exploration.FeatureImportances .  This method will be removed in the next release."
    )

    val featureImportanceResults = exploreFeatureImportances()

    val selectableFields = featureImportanceResults.fields :+ _mainConfig.labelCol

    println(
      s"Features Selected: ${featureImportanceResults.fields.mkString(", ")}"
    )

    val dataSubset = df.select(selectableFields.map(col): _*)

    if (_mainConfig.dataPrepCachingFlag) {
      dataSubset.persist(StorageLevel.MEMORY_AND_DISK)
      dataSubset.count
    }

    val runner = new AutomationRunner(dataSubset).setMainConfig(_mainConfig)

    val payload = runner.prepData()

    val runResults = runner.executeTuning(payload)

    if (_mainConfig.dataPrepCachingFlag) dataSubset.unpersist()

    val cleanedData = _mainConfig.geneticConfig.trainSplitMethod match {
      case "kSample" =>
        runResults.rawData
          .filter(
            col(_mainConfig.geneticConfig.kSampleConfig.syntheticCol) === false
          )
          .drop(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
      case _ => runResults.rawData
    }

    val predictedData = predictFromBestModel(
      runResults.modelReport,
      cleanedData,
      runResults.modelSelection
    )

    if (_mainConfig.dataPrepCachingFlag) runResults.rawData.unpersist()

    new FeatureImportancePredictionOutput(
      featureImportances = featureImportanceResults.data,
      predictionData = predictedData,
      mlFlowOutput = runResults.mlFlowOutput
    ) {
      override def modelReport: Array[GenericModelReturn] =
        runResults.modelReport
      override def generationReport: Array[GenerationalReport] =
        runResults.generationReport
      override def modelReportDataFrame: DataFrame =
        runResults.modelReportDataFrame
      override def generationReportDataFrame: DataFrame =
        runResults.generationReportDataFrame
    }

  }

  def generateDecisionSplits(): TreeSplitReport = {

    val payload = prepData()

    new DecisionTreeSplits(payload.data, _treeSplitsConfig, payload.modelType)
      .runTreeSplitAnalysis(payload.fields)

  }

  def run(): AutomationOutput = {

    val tunerResult = executeTuning(prepData())

    if (_mainConfig.dataPrepCachingFlag) tunerResult.rawData.unpersist()

    new AutomationOutput(mlFlowOutput = tunerResult.mlFlowOutput) {
      override def modelReport: Array[GenericModelReturn] =
        tunerResult.modelReport
      override def generationReport: Array[GenerationalReport] =
        tunerResult.generationReport
      override def modelReportDataFrame: DataFrame =
        tunerResult.modelReportDataFrame
      override def generationReportDataFrame: DataFrame =
        tunerResult.generationReportDataFrame
    }

  }

  def runWithPrediction(): PredictionOutput = {

    val tunerResult = executeTuning(prepData())

    val cleanedData = _mainConfig.geneticConfig.trainSplitMethod match {
      case "kSample" =>
        tunerResult.rawData
          .filter(
            col(_mainConfig.geneticConfig.kSampleConfig.syntheticCol) === false
          )
          .drop(_mainConfig.geneticConfig.kSampleConfig.syntheticCol)
      case _ => tunerResult.rawData
    }

    val predictedData = predictFromBestModel(
      tunerResult.modelReport,
      cleanedData,
      tunerResult.modelSelection
    )

    if (_mainConfig.dataPrepCachingFlag) tunerResult.rawData.unpersist()

    new PredictionOutput(
      dataWithPredictions = predictedData,
      mlFlowOutput = tunerResult.mlFlowOutput
    ) {
      override def modelReport: Array[GenericModelReturn] =
        tunerResult.modelReport
      override def generationReport: Array[GenerationalReport] =
        tunerResult.generationReport
      override def modelReportDataFrame: DataFrame =
        tunerResult.modelReportDataFrame
      override def generationReportDataFrame: DataFrame =
        tunerResult.generationReportDataFrame
    }

  }

  def runWithConfusionReport(): ConfusionOutput = {
    val predictionPayload = runWithPrediction()
    val confusionData = predictionPayload.dataWithPredictions
      .select("prediction", _mainConfig.labelCol)
      .groupBy("prediction", _mainConfig.labelCol)
      .agg(count("*").alias("count"))

    new ConfusionOutput(
      predictionData = predictionPayload.dataWithPredictions,
      confusionData = confusionData,
      mlFlowOutput = predictionPayload.mlFlowOutput
    ) {
      override def modelReport: Array[GenericModelReturn] =
        predictionPayload.modelReport
      override def generationReport: Array[GenerationalReport] =
        predictionPayload.generationReport
      override def modelReportDataFrame: DataFrame =
        predictionPayload.modelReportDataFrame
      override def generationReportDataFrame: DataFrame =
        predictionPayload.generationReportDataFrame
    }

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy