All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databricks.labs.automl.params.Configuration.scala Maven / Gradle / Ivy

package com.databricks.labs.automl.params

case class MainConfig(modelFamily: String,
                      labelCol: String,
                      featuresCol: String,
                      naFillFlag: Boolean,
                      varianceFilterFlag: Boolean,
                      outlierFilterFlag: Boolean,
                      pearsonFilteringFlag: Boolean,
                      covarianceFilteringFlag: Boolean,
                      oneHotEncodeFlag: Boolean,
                      scalingFlag: Boolean,
                      featureInteractionFlag: Boolean,
                      dataPrepCachingFlag: Boolean,
                      dataPrepParallelism: Int,
                      autoStoppingFlag: Boolean,
                      autoStoppingScore: Double,
                      featureImportanceCutoffType: String,
                      featureImportanceCutoffValue: Double,
                      dateTimeConversionType: String,
                      fieldsToIgnoreInVector: Array[String],
                      numericBoundaries: Map[String, (Double, Double)],
                      stringBoundaries: Map[String, List[String]],
                      scoringMetric: String,
                      scoringOptimizationStrategy: String,
                      fillConfig: FillConfig,
                      outlierConfig: OutlierConfig,
                      pearsonConfig: PearsonConfig,
                      covarianceConfig: CovarianceConfig,
                      featureInteractionConfig: FeatureInteractionConfig,
                      scalingConfig: ScalingConfig,
                      geneticConfig: GeneticConfig,
                      mlFlowLoggingFlag: Boolean,
                      mlFlowLogArtifactsFlag: Boolean,
                      mlFlowConfig: MLFlowConfig,
                      inferenceConfigSaveLocation: String,
                      dataReductionFactor: Double,
                      pipelineDebugFlag: Boolean,
                      pipelineId: String)

case class DataPrepConfig(naFillFlag: Boolean,
                          varianceFilterFlag: Boolean,
                          outlierFilterFlag: Boolean,
                          pearsonFilterFlag: Boolean,
                          covarianceFilterFlag: Boolean,
                          scalingFlag: Boolean)

case class MLFlowConfig(mlFlowTrackingURI: String,
                        mlFlowExperimentName: String,
                        mlFlowAPIToken: String,
                        mlFlowModelSaveDirectory: String,
                        mlFlowLoggingMode: String,
                        mlFlowBestSuffix: String,
                        mlFlowCustomRunTags: Map[String, String])

case class FillConfig(numericFillStat: String,
                      characterFillStat: String,
                      modelSelectionDistinctThreshold: Int,
                      cardinalitySwitch: Boolean,
                      cardinalityType: String,
                      cardinalityLimit: Int,
                      cardinalityPrecision: Double,
                      cardinalityCheckMode: String,
                      filterPrecision: Double,
                      categoricalNAFillMap: Map[String, String],
                      numericNAFillMap: Map[String, AnyVal],
                      characterNABlanketFillValue: String,
                      numericNABlanketFillValue: Double,
                      naFillMode: String)

case class OutlierConfig(filterBounds: String,
                         lowerFilterNTile: Double,
                         upperFilterNTile: Double,
                         filterPrecision: Double,
                         continuousDataThreshold: Int,
                         fieldsToIgnore: Array[String])

case class PearsonConfig(filterStatistic: String,
                         filterDirection: String,
                         filterManualValue: Double,
                         filterMode: String,
                         autoFilterNTile: Double)

case class CovarianceConfig(correlationCutoffLow: Double,
                            correlationCutoffHigh: Double)

case class FirstGenerationConfig(permutationCount: Int,
                                 indexMixingMode: String,
                                 arraySeed: Long)

case class KSampleConfig(syntheticCol: String,
                         kGroups: Int,
                         kMeansMaxIter: Int,
                         kMeansTolerance: Double,
                         kMeansDistanceMeasurement: String,
                         kMeansSeed: Long,
                         kMeansPredictionCol: String,
                         lshHashTables: Int,
                         lshSeed: Long,
                         lshOutputCol: String,
                         quorumCount: Int,
                         minimumVectorCountToMutate: Int,
                         vectorMutationMethod: String,
                         mutationMode: String,
                         mutationValue: Double,
                         labelBalanceMode: String,
                         cardinalityThreshold: Int,
                         numericRatio: Double,
                         numericTarget: Int,
                         outputDfRepartitionScaleFactor: Int)

case class GeneticConfig(parallelism: Int,
                         kFold: Int,
                         trainPortion: Double,
                         trainSplitMethod: String,
                         kSampleConfig: KSampleConfig,
                         trainSplitChronologicalColumn: String,
                         trainSplitChronologicalRandomPercentage: Double,
                         seed: Long,
                         firstGenerationGenePool: Int,
                         numberOfGenerations: Int,
                         numberOfParentsToRetain: Int,
                         numberOfMutationsPerGeneration: Int,
                         geneticMixing: Double,
                         generationalMutationStrategy: String,
                         fixedMutationValue: Int,
                         mutationMagnitudeMode: String,
                         evolutionStrategy: String,
                         geneticMBORegressorType: String,
                         geneticMBOCandidateFactor: Int,
                         continuousEvolutionMaxIterations: Int,
                         continuousEvolutionStoppingScore: Double,
                         continuousEvolutionImprovementThreshold: Int,
                         continuousEvolutionParallelism: Int,
                         continuousEvolutionMutationAggressiveness: Int,
                         continuousEvolutionGeneticMixing: Double,
                         continuousEvolutionRollingImprovementCount: Int,
                         modelSeed: Map[String, Any],
                         hyperSpaceInference: Boolean,
                         hyperSpaceInferenceCount: Int,
                         hyperSpaceModelType: String,
                         hyperSpaceModelCount: Int,
                         initialGenerationMode: String,
                         initialGenerationConfig: FirstGenerationConfig,
                         deltaCacheBackingDirectory: String,
                         splitCachingStrategy: String,
                         deltaCacheBackingDirectoryRemovalFlag: Boolean)

case class ScalingConfig(scalerType: String,
                         scalerMin: Double,
                         scalerMax: Double,
                         standardScalerMeanFlag: Boolean,
                         standardScalerStdDevFlag: Boolean,
                         pNorm: Double)

case class FeatureInteractionConfig(retentionMode: String,
                                    continuousDiscretizerBucketCount: Int,
                                    parallelism: Int,
                                    targetInteractionPercentage: Double)




© 2015 - 2025 Weber Informatics LLC | Privacy Policy