
com.databricks.labs.automl.model.tools.HyperParameterFullSearch.scala Maven / Gradle / Ivy
package com.databricks.labs.automl.model.tools
import com.databricks.labs.automl.model.tools.structures._
import com.databricks.labs.automl.params._
import scala.collection.mutable.ArrayBuffer
class HyperParameterFullSearch extends Defaults with ModelConfigGenerators {
var _modelFamily = ""
var _modelType = ""
var _permutationCount = 10
var _indexMixingMode = "linear"
var _arraySeed = 42L
private val allowableMixingModes = List("linear", "random")
def setModelFamily(value: String): this.type = {
require(
_supportedModels.contains(value),
s"${this.getClass.toString} error! Model Family $value is not supported." +
s"\n\t Supported families: ${_supportedModels.mkString(", ")}"
)
_modelFamily = value
this
}
def setModelType(value: String): this.type = {
value match {
case "classifier" => _modelType = value
case "regressor" => _modelType = value
case _ =>
throw new UnsupportedOperationException(
s"Model type $value is not supported."
)
}
this
}
def setPermutationCount(value: Int): this.type = {
_permutationCount = value
this
}
def setIndexMixingMode(value: String): this.type = {
require(
allowableMixingModes.contains(value),
s"Index Mixing mode $value is not supported. Allowable modes are: " +
s"${allowableMixingModes.mkString(", ")}"
)
_indexMixingMode = value
this
}
def setArraySeed(value: Long): this.type = {
_arraySeed = value
this
}
def getModelFamily: String = _modelFamily
def getModelType: String = _modelType
def getPermutationCount: Int = _permutationCount
def getIndexMixingMode: String = _indexMixingMode
def getArraySeed: Long = _arraySeed
/**
* Method for generating a geometric space search for a first-generation hyper parameter generation for RandomForest
* @param numericBoundaries The allowable restrictive space for the numeric hyper parameters
* @param stringBoundaries The allowable values for string-based hyper parameters
* @return An Array of Hyperparameter settings for RandomForest algorithms.
*/
def initialGenerationSeedRandomForest(
numericBoundaries: Map[String, (Double, Double)],
stringBoundaries: Map[String, List[String]]
): Array[RandomForestConfig] = {
var outputPayload = new ArrayBuffer[RandomForestConfig]()
val impurityValues = _modelType match {
case "regressor" => List("variance")
case _ => stringBoundaries("impurity")
}
// Set the config object
val rfConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = stringBoundaries
)
// Generate the permutation collections
val generatedArrays = randomForestNumericArrayGenerator(rfConfig)
// Create some index values
var _impurityIdx = 0
var _featureSubsetStrategyIdx = 0
var numericArrays = Array(
generatedArrays.numTreesArray,
generatedArrays.maxBinsArray,
generatedArrays.maxDepthArray,
generatedArrays.minInfoGainArray,
generatedArrays.subSamplingRateArray
)
// Main builder loop
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s"index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
// Handle the string value selections
val impurityLoop = selectStringIndex(impurityValues, _impurityIdx)
_impurityIdx = impurityLoop.IndexCounterStatus
val featureSubsetStrategyLoop = selectStringIndex(
stringBoundaries("featureSubsetStrategy"),
_featureSubsetStrategyIdx
)
_featureSubsetStrategyIdx = featureSubsetStrategyLoop.IndexCounterStatus
outputPayload += RandomForestConfig(
numTrees = selectedIndeces.selectedPayload(0).toInt,
impurity = impurityLoop.selectedStringValue,
maxBins = selectedIndeces.selectedPayload(1).toInt,
maxDepth = selectedIndeces.selectedPayload(2).toInt,
minInfoGain = selectedIndeces.selectedPayload(3),
subSamplingRate = selectedIndeces.selectedPayload(4),
featureSubsetStrategy = featureSubsetStrategyLoop.selectedStringValue
)
_impurityIdx += 1
_featureSubsetStrategyIdx += 1
}
outputPayload.result.toArray
}
/**
* Method for generating a geometric search space for a first-generation hyper parameter generation for LightGBM
* @param numericBoundaries LightGBM numeric search space boundaries
* @param stringBoundaries LightGBM string search space boundaries
* @return An array of LightGBM configs
* @since 0.6.1
* @author Ben Wilson, Databricks
* @throws UnsupportedOperationException if the index mixing mode supplied is invalid.
*/
@throws(classOf[UnsupportedOperationException])
def initialGenerationSeedLightGBM(
numericBoundaries: Map[String, (Double, Double)],
stringBoundaries: Map[String, List[String]]
): Array[LightGBMConfig] = {
var outputPayload = new ArrayBuffer[LightGBMConfig]()
val lightGBMConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = stringBoundaries
)
val generatedArrays = lightGBMNumericArrayGenerator(lightGBMConfig)
var _boostFromAverageIdx = 0
var _boostingTypeIdx = 0
var numericArrays = Array(
generatedArrays.baggingFractionArray,
generatedArrays.baggingFreqArray,
generatedArrays.featureFractionArray,
generatedArrays.learningRateArray,
generatedArrays.maxBinArray,
generatedArrays.maxDepthArray,
generatedArrays.minSumHessianInLeafArray,
generatedArrays.numIterationsArray,
generatedArrays.numLeavesArray,
generatedArrays.lambdaL1Array,
generatedArrays.lambdaL2Array,
generatedArrays.alphaArray
)
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s" Index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
val boostFromAverageLoop = selectCoinFlip(_boostFromAverageIdx)
val boostingTypeLoop =
selectStringIndex(stringBoundaries("boostingType"), _boostingTypeIdx)
_boostingTypeIdx = boostingTypeLoop.IndexCounterStatus
outputPayload += LightGBMConfig(
baggingFraction = selectedIndeces.selectedPayload(0),
baggingFreq = selectedIndeces.selectedPayload(1).toInt,
featureFraction = selectedIndeces.selectedPayload(2),
learningRate = selectedIndeces.selectedPayload(3),
maxBin = selectedIndeces.selectedPayload(4).toInt,
maxDepth = selectedIndeces.selectedPayload(5).toInt,
minSumHessianInLeaf = selectedIndeces.selectedPayload(6),
numIterations = selectedIndeces.selectedPayload(7).toInt,
numLeaves = selectedIndeces.selectedPayload(8).toInt,
boostFromAverage = boostFromAverageLoop,
lambdaL1 = selectedIndeces.selectedPayload(9),
lambdaL2 = selectedIndeces.selectedPayload(10),
alpha = selectedIndeces.selectedPayload(11),
boostingType = boostingTypeLoop.selectedStringValue
)
_boostFromAverageIdx += 1
_boostingTypeIdx += 1
}
outputPayload.result.toArray
}
/**
* Method for generating a geometric search space for a first-generation hyper parameter generation for DecisionTrees
* @param numericBoundaries numeric bounds restrictions
* @param stringBoundaries string value restrictions
* @return An Array of Hyperparameter settings for DecisionTrees algorithms
*/
def initialGenerationSeedTrees(
numericBoundaries: Map[String, (Double, Double)],
stringBoundaries: Map[String, List[String]]
): Array[TreesConfig] = {
var outputPayload = new ArrayBuffer[TreesConfig]()
val impurityValues = _modelType match {
case "regressor" => List("variance")
case _ => stringBoundaries("impurity")
}
val treesConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = stringBoundaries
)
val generatedArrays = treesNumericArrayGenerator(treesConfig)
var _impurityIdx = 0
var numericArrays = Array(
generatedArrays.maxBinsArray,
generatedArrays.maxDepthArray,
generatedArrays.minInfoGainArray,
generatedArrays.minInstancesPerNodeArray
)
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s"Index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
val impurityLoop = selectStringIndex(impurityValues, _impurityIdx)
_impurityIdx = impurityLoop.IndexCounterStatus
outputPayload += TreesConfig(
impurity = impurityLoop.selectedStringValue,
maxBins = selectedIndeces.selectedPayload(0).toInt,
maxDepth = selectedIndeces.selectedPayload(1).toInt,
minInfoGain = selectedIndeces.selectedPayload(2),
minInstancesPerNode = selectedIndeces.selectedPayload(3).toInt
)
_impurityIdx += 1
}
outputPayload.result.toArray
}
def initialGenerationSeedGBT(
numericBoundaries: Map[String, (Double, Double)],
stringBoundaries: Map[String, List[String]]
): Array[GBTConfig] = {
var outputPayload = new ArrayBuffer[GBTConfig]()
val impurityValues = _modelType match {
case "regressor" => List("variance")
case _ => stringBoundaries("impurity")
}
val lossTypeValues = _modelType match {
case "regressor" => List("squared", "absolute")
case _ => stringBoundaries("lossType")
}
val gbtConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = stringBoundaries
)
val generatedArrays = gbtNumericArrayGenerator(gbtConfig)
var _impurityIdx = 0
var _lossTypeIdx = 0
var numericArrays = Array(
generatedArrays.maxBinsArray,
generatedArrays.maxDepthArray,
generatedArrays.maxIterArray,
generatedArrays.minInfoGainArray,
generatedArrays.minInstancesPerNodeArray,
generatedArrays.stepSizeArray
)
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s"Index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
val impurityLoop = selectStringIndex(impurityValues, _impurityIdx)
val lossTypeLoop = selectStringIndex(lossTypeValues, _lossTypeIdx)
_impurityIdx = impurityLoop.IndexCounterStatus
_lossTypeIdx = lossTypeLoop.IndexCounterStatus
outputPayload += GBTConfig(
impurity = impurityLoop.selectedStringValue,
lossType = lossTypeLoop.selectedStringValue,
maxBins = selectedIndeces.selectedPayload(0).toInt,
maxDepth = selectedIndeces.selectedPayload(1).toInt,
maxIter = selectedIndeces.selectedPayload(2).toInt,
minInfoGain = selectedIndeces.selectedPayload(3),
minInstancesPerNode = selectedIndeces.selectedPayload(4).toInt,
stepSize = selectedIndeces.selectedPayload(5)
)
_impurityIdx += 1
_lossTypeIdx += 1
}
outputPayload.result.toArray
}
def initialGenerationSeedLinearRegression(
numericBoundaries: Map[String, (Double, Double)],
stringBoundaries: Map[String, List[String]]
): Array[LinearRegressionConfig] = {
var outputPayload = new ArrayBuffer[LinearRegressionConfig]()
val linearRegressionConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = stringBoundaries
)
val generatedArrays = linearRegressionNumericArrayGenerator(
linearRegressionConfig
)
var _fitInterceptIdx = 0
var _standardizationIdx = 0
var _lossIdx = 0
var numericArrays = Array(
generatedArrays.elasticNetParamsArray,
generatedArrays.maxIterArray,
generatedArrays.regParamArray,
generatedArrays.toleranceArray
)
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s"Index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
val fitInterceptLoop = selectCoinFlip(_fitInterceptIdx)
val standardizationLoop = selectCoinFlip(_standardizationIdx)
val lossLoop = selectStringIndex(stringBoundaries("loss"), _lossIdx)
_lossIdx = lossLoop.IndexCounterStatus
/**
* For Linear Regression, the loss setting of 'huber' does not permit regularization of elasticnet or L1.
* It must be set to L2 regularization (elasticNetParams == 0.0) to function.
*/
val loss = lossLoop.selectedStringValue
val elasticNetParams = loss match {
case "huber" => 0.0
case _ => selectedIndeces.selectedPayload(0)
}
outputPayload += LinearRegressionConfig(
loss = loss,
elasticNetParams = elasticNetParams,
fitIntercept = fitInterceptLoop,
maxIter = selectedIndeces.selectedPayload(1).toInt,
regParam = selectedIndeces.selectedPayload(2),
standardization = standardizationLoop,
tolerance = selectedIndeces.selectedPayload(3)
)
_lossIdx += 1
_standardizationIdx += 1
_fitInterceptIdx += 1
}
outputPayload.result.toArray
}
def initialGenerationSeedLogisticRegression(
numericBoundaries: Map[String, (Double, Double)]
): Array[LogisticRegressionConfig] = {
var outputPayload = new ArrayBuffer[LogisticRegressionConfig]()
val logisticRegressionConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = Map[String, List[String]]()
)
val generatedArrays = logisticRegressionNumericArrayGenerator(
logisticRegressionConfig
)
var _fitInterceptIdx = 0
var _standardizationIdx = 0
var numericArrays = Array(
generatedArrays.elasticNetParamsArray,
generatedArrays.maxIterArray,
generatedArrays.regParamArray,
generatedArrays.toleranceArray
)
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s"Index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
val fitInterceptLoop = selectCoinFlip(_fitInterceptIdx)
val standardizationLoop = selectCoinFlip(_standardizationIdx)
outputPayload += LogisticRegressionConfig(
elasticNetParams = selectedIndeces.selectedPayload(0),
fitIntercept = fitInterceptLoop,
maxIter = selectedIndeces.selectedPayload(1).toInt,
regParam = selectedIndeces.selectedPayload(2),
standardization = standardizationLoop,
tolerance = selectedIndeces.selectedPayload(3)
)
_standardizationIdx += 1
_fitInterceptIdx += 1
}
outputPayload.result.toArray
}
def initialGenerationSeedSVM(
numericBoundaries: Map[String, (Double, Double)]
): Array[SVMConfig] = {
var outputPayload = new ArrayBuffer[SVMConfig]()
val svmConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = Map[String, List[String]]()
)
val generatedArrays = svmNumericArrayGenerator(svmConfig)
var _fitInterceptIdx = 0
var _standardizationIdx = 0
var numericArrays = Array(
generatedArrays.maxIterArray,
generatedArrays.regParamArray,
generatedArrays.toleranceArray
)
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s"Index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
val fitInterceptLoop = selectCoinFlip(_fitInterceptIdx)
val standardizationLoop = selectCoinFlip(_standardizationIdx)
outputPayload += SVMConfig(
fitIntercept = fitInterceptLoop,
maxIter = selectedIndeces.selectedPayload(0).toInt,
regParam = selectedIndeces.selectedPayload(1),
standardization = standardizationLoop,
tolerance = selectedIndeces.selectedPayload(2)
)
_standardizationIdx += 1
_fitInterceptIdx += 1
}
outputPayload.result.toArray
}
def initialGenerationSeedXGBoost(
numericBoundaries: Map[String, (Double, Double)]
): Array[XGBoostConfig] = {
var outputPayload = new ArrayBuffer[XGBoostConfig]()
val xgboostConfig = PermutationConfiguration(
modelType = _modelType,
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = Map[String, List[String]]()
)
val generatedArrays = xgboostNumericArrayGenerator(xgboostConfig)
var numericArrays = Array(
generatedArrays.alphaArray,
generatedArrays.etaArray,
generatedArrays.gammaArray,
generatedArrays.lambdaArray,
generatedArrays.maxDepthArray,
generatedArrays.subSampleArray,
generatedArrays.minChildWeightArray,
generatedArrays.numRoundArray,
generatedArrays.maxBinsArray,
generatedArrays.trainTestRatioArray
)
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => staticIndexSelection(numericArrays)
case "random" => randomIndexSelection(numericArrays)
case _ =>
throw new UnsupportedOperationException(
s"Index mixing mode ${_indexMixingMode} is not supported."
)
}
numericArrays = selectedIndeces.remainingPayload
outputPayload += XGBoostConfig(
alpha = selectedIndeces.selectedPayload(0),
eta = selectedIndeces.selectedPayload(1),
gamma = selectedIndeces.selectedPayload(2),
lambda = selectedIndeces.selectedPayload(3),
maxDepth = selectedIndeces.selectedPayload(4).toInt,
subSample = selectedIndeces.selectedPayload(5),
minChildWeight = selectedIndeces.selectedPayload(6),
numRound = selectedIndeces.selectedPayload(7).toInt,
maxBins = selectedIndeces.selectedPayload(8).toInt,
trainTestRatio = selectedIndeces.selectedPayload(9)
)
}
outputPayload.result.toArray
}
def initialGenerationSeedMLPC(
numericBoundaries: Map[String, (Double, Double)],
stringBoundaries: Map[String, List[String]],
inputFeatureSize: Int,
distinctClasses: Int
): Array[MLPCConfig] = {
var outputPayload = new ArrayBuffer[MLPCConfig]()
val mlpcConfig = MLPCPermutationConfiguration(
permutationTarget = _permutationCount,
numericBoundaries = numericBoundaries,
stringBoundaries = stringBoundaries,
inputFeatureSize = inputFeatureSize,
distinctClasses = distinctClasses
)
var generatedArrays = mlpcNumericArrayGenerator(mlpcConfig)
var _solverIdx = 0
for (i <- 1 to _permutationCount) {
val selectedIndeces = _indexMixingMode match {
case "linear" => mlpcStaticIndexSelection(generatedArrays)
case "random" => mlpcRandomIndexSelection(generatedArrays)
case _ =>
throw new UnsupportedOperationException(
s"Index mixing mode ${_indexMixingMode} is not supported."
)
}
generatedArrays = selectedIndeces.remainingPayloads
val solverLoop = selectStringIndex(stringBoundaries("solver"), _solverIdx)
_solverIdx = solverLoop.IndexCounterStatus
outputPayload += MLPCConfig(
layers = selectedIndeces.selectedPayload.layers,
maxIter = selectedIndeces.selectedPayload.maxIter,
solver = solverLoop.selectedStringValue,
stepSize = selectedIndeces.selectedPayload.stepSize,
tolerance = selectedIndeces.selectedPayload.tolerance
)
_solverIdx += 1
}
outputPayload.result.toArray
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy