
com.databricks.labs.automl.params.Configuration.scala Maven / Gradle / Ivy
package com.databricks.labs.automl.params
case class MainConfig(modelFamily: String,
labelCol: String,
featuresCol: String,
naFillFlag: Boolean,
varianceFilterFlag: Boolean,
outlierFilterFlag: Boolean,
pearsonFilteringFlag: Boolean,
covarianceFilteringFlag: Boolean,
oneHotEncodeFlag: Boolean,
scalingFlag: Boolean,
featureInteractionFlag: Boolean,
dataPrepCachingFlag: Boolean,
dataPrepParallelism: Int,
autoStoppingFlag: Boolean,
autoStoppingScore: Double,
featureImportanceCutoffType: String,
featureImportanceCutoffValue: Double,
dateTimeConversionType: String,
fieldsToIgnoreInVector: Array[String],
numericBoundaries: Map[String, (Double, Double)],
stringBoundaries: Map[String, List[String]],
scoringMetric: String,
scoringOptimizationStrategy: String,
fillConfig: FillConfig,
outlierConfig: OutlierConfig,
pearsonConfig: PearsonConfig,
covarianceConfig: CovarianceConfig,
featureInteractionConfig: FeatureInteractionConfig,
scalingConfig: ScalingConfig,
geneticConfig: GeneticConfig,
mlFlowLoggingFlag: Boolean,
mlFlowLogArtifactsFlag: Boolean,
mlFlowConfig: MLFlowConfig,
inferenceConfigSaveLocation: String,
dataReductionFactor: Double,
pipelineDebugFlag: Boolean,
pipelineId: String)
case class DataPrepConfig(naFillFlag: Boolean,
varianceFilterFlag: Boolean,
outlierFilterFlag: Boolean,
pearsonFilterFlag: Boolean,
covarianceFilterFlag: Boolean,
scalingFlag: Boolean)
case class MLFlowConfig(mlFlowTrackingURI: String,
mlFlowExperimentName: String,
mlFlowAPIToken: String,
mlFlowModelSaveDirectory: String,
mlFlowLoggingMode: String,
mlFlowBestSuffix: String,
mlFlowCustomRunTags: Map[String, String])
case class FillConfig(numericFillStat: String,
characterFillStat: String,
modelSelectionDistinctThreshold: Int,
cardinalitySwitch: Boolean,
cardinalityType: String,
cardinalityLimit: Int,
cardinalityPrecision: Double,
cardinalityCheckMode: String,
filterPrecision: Double,
categoricalNAFillMap: Map[String, String],
numericNAFillMap: Map[String, AnyVal],
characterNABlanketFillValue: String,
numericNABlanketFillValue: Double,
naFillMode: String)
case class OutlierConfig(filterBounds: String,
lowerFilterNTile: Double,
upperFilterNTile: Double,
filterPrecision: Double,
continuousDataThreshold: Int,
fieldsToIgnore: Array[String])
case class PearsonConfig(filterStatistic: String,
filterDirection: String,
filterManualValue: Double,
filterMode: String,
autoFilterNTile: Double)
case class CovarianceConfig(correlationCutoffLow: Double,
correlationCutoffHigh: Double)
case class FirstGenerationConfig(permutationCount: Int,
indexMixingMode: String,
arraySeed: Long)
case class KSampleConfig(syntheticCol: String,
kGroups: Int,
kMeansMaxIter: Int,
kMeansTolerance: Double,
kMeansDistanceMeasurement: String,
kMeansSeed: Long,
kMeansPredictionCol: String,
lshHashTables: Int,
lshSeed: Long,
lshOutputCol: String,
quorumCount: Int,
minimumVectorCountToMutate: Int,
vectorMutationMethod: String,
mutationMode: String,
mutationValue: Double,
labelBalanceMode: String,
cardinalityThreshold: Int,
numericRatio: Double,
numericTarget: Int,
outputDfRepartitionScaleFactor: Int)
case class GeneticConfig(parallelism: Int,
kFold: Int,
trainPortion: Double,
trainSplitMethod: String,
kSampleConfig: KSampleConfig,
trainSplitChronologicalColumn: String,
trainSplitChronologicalRandomPercentage: Double,
seed: Long,
firstGenerationGenePool: Int,
numberOfGenerations: Int,
numberOfParentsToRetain: Int,
numberOfMutationsPerGeneration: Int,
geneticMixing: Double,
generationalMutationStrategy: String,
fixedMutationValue: Int,
mutationMagnitudeMode: String,
evolutionStrategy: String,
geneticMBORegressorType: String,
geneticMBOCandidateFactor: Int,
continuousEvolutionMaxIterations: Int,
continuousEvolutionStoppingScore: Double,
continuousEvolutionImprovementThreshold: Int,
continuousEvolutionParallelism: Int,
continuousEvolutionMutationAggressiveness: Int,
continuousEvolutionGeneticMixing: Double,
continuousEvolutionRollingImprovementCount: Int,
modelSeed: Map[String, Any],
hyperSpaceInference: Boolean,
hyperSpaceInferenceCount: Int,
hyperSpaceModelType: String,
hyperSpaceModelCount: Int,
initialGenerationMode: String,
initialGenerationConfig: FirstGenerationConfig,
deltaCacheBackingDirectory: String,
splitCachingStrategy: String,
deltaCacheBackingDirectoryRemovalFlag: Boolean)
case class ScalingConfig(scalerType: String,
scalerMin: Double,
scalerMax: Double,
standardScalerMeanFlag: Boolean,
standardScalerStdDevFlag: Boolean,
pNorm: Double)
case class FeatureInteractionConfig(retentionMode: String,
continuousDiscretizerBucketCount: Int,
parallelism: Int,
targetInteractionPercentage: Double)
© 2015 - 2025 Weber Informatics LLC | Privacy Policy