All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databricks.labs.automl.model.tools.structures.SeedGenerator.scala Maven / Gradle / Ivy

package com.databricks.labs.automl.model.tools.structures

import com.databricks.labs.automl.params.MLPCConfig

import scala.collection.mutable.ArrayBuffer
import scala.math._
import scala.reflect.ClassTag
import scala.util.Random

trait SeedGenerator {

  def generateLinearIntSpace(boundaries: NumericBoundaries,
                             generatorCount: Int): Array[Double] = {

    val integerSpace = new ArrayBuffer[Double]()

    val generatedDoubles = generateLinearSpace(boundaries, generatorCount)

    generatedDoubles.foreach { x =>
      integerSpace += x.round
    }

    integerSpace.result.toArray

  }

  def generateLinearSpace(boundaries: NumericBoundaries,
                          generatorCount: Int): Array[Double] = {

    val space = new ArrayBuffer[Double]

    val iteratorDelta = (boundaries.maximum - boundaries.minimum) / (generatorCount.toDouble - 1.0)

    for (i <- 0 until generatorCount - 1) {
      space += boundaries.minimum + i * iteratorDelta
    }
    space += boundaries.maximum
    space.result.toArray
  }

  def convertToLog(minScale: Double,
                   maxScale: Double,
                   value: Double): Double = {

    val minVal = if (minScale == 0.0) 1.0E-10 else minScale

    val b = log(maxScale / minVal) / (maxScale - minVal)

    val a = maxScale / exp(b * maxScale)

    a * exp(b * value)
  }

  def generateLogSpace(boundaries: NumericBoundaries,
                       generatorCount: Int): Array[Double] = {

    val space = new ArrayBuffer[Double]

    val linearSpace = generateLinearSpace(boundaries, generatorCount)

    linearSpace.foreach { x =>
      space += convertToLog(boundaries.minimum, boundaries.maximum, x)
    }

    space.result.toArray

  }

  private[tools] def constructLayerArray(inputFeatureSize: Int,
                                         distinctClasses: Int,
                                         layerCount: Int,
                                         sizeAdjustment: Int): Array[Int] = {

    val layerConstruct = new ArrayBuffer[Int]

    layerConstruct += inputFeatureSize

    (1 to layerCount).foreach(
      x => layerConstruct += inputFeatureSize + layerCount - x + sizeAdjustment
    )
    layerConstruct += distinctClasses
    layerConstruct.result.toArray
  }

  def generateArraySpace(layerBoundaryLow: Int,
                         layerBoundaryHigh: Int,
                         hiddenBoundaryLow: Int,
                         hiddenBoundaryHigh: Int,
                         inputFeatureSize: Int,
                         distinctClasses: Int,
                         generatorCount: Int): Array[Array[Int]] = {

    val outputBuffer = new ArrayBuffer[Array[Int]]()

    // Generate the layer Boundary space and the size adjustment space
    val layerBoundaries = generateLinearIntSpace(
      NumericBoundaries(layerBoundaryLow, layerBoundaryHigh),
      generatorCount
    )
    val sizeAdjustmentBoundaries = generateLinearIntSpace(
      NumericBoundaries(hiddenBoundaryLow, hiddenBoundaryHigh),
      generatorCount
    )

    (0 until generatorCount).foreach { x =>
      outputBuffer += constructLayerArray(
        inputFeatureSize,
        distinctClasses,
        layerBoundaries(x).toInt,
        sizeAdjustmentBoundaries(x).toInt
      )
    }
    outputBuffer.result.toArray
  }

  private[SeedGenerator] def getNthRoot(n: Double, root: Double): Double = {
    pow(exp(1.0 / root), log(n))
  }

  def getNumberOfElements(
    numericBoundaries: Map[String, (Double, Double)]
  ): Int = {
    numericBoundaries.keys.size
  }

  def getPermutationCounts(targetIterations: Int,
                           numberOfElements: Int): Int = {

    getNthRoot(targetIterations.toDouble, numberOfElements.toDouble).ceil.toInt

  }

  protected[tools] def randomSampleArray[T: ClassTag](
    hyperParameterArray: Array[T],
    sampleCount: Int,
    seed: Long = 42L
  ): Array[T] = {

    val randomSeed = new Random(seed)
    Array.fill(sampleCount)(
      hyperParameterArray(randomSeed.nextInt(hyperParameterArray.length))
    )

  }

  protected[tools] def extractContinuousBoundaries(
    parameter: Tuple2[Double, Double]
  ): NumericBoundaries = {
    NumericBoundaries(parameter._1, parameter._2)
  }

  protected[tools] def selectStringIndex(
    availableParams: List[String],
    currentIterator: Int
  ): StringSelectionReturn = {
    val listLength = availableParams.length
    val idxSelection = if (currentIterator >= listLength) 0 else currentIterator

    StringSelectionReturn(availableParams(idxSelection), idxSelection)
  }

  protected[tools] def selectCoinFlip(currentIterator: Int): Boolean = {
    if (currentIterator.toDouble % 2.0 == 0.0) true else false
  }

  protected[tools] def staticIndexSelection(
    numericArrays: Array[Array[Double]]
  ): NumericArrayCollection = {

    val selectedPayload = numericArrays.map(x => x(0))

    val remainingArrays = numericArrays.map(x => x.drop(1))

    NumericArrayCollection(selectedPayload, remainingArrays)

  }

  protected[tools] def randomIndexSelection(
    numericArrays: Array[Array[Double]]
  ): NumericArrayCollection = {

    val bufferContainer = new ArrayBuffer[Array[Double]]()

    numericArrays.foreach { x =>
      bufferContainer += Random.shuffle(x.toList).toArray
    }

    val arrayRandomHolder = bufferContainer.result.toArray

    val randomlySelectedPayload = arrayRandomHolder.map(x => x(0))

    val remainingArrays = arrayRandomHolder.map(x => x.drop(1))

    NumericArrayCollection(randomlySelectedPayload, remainingArrays)

  }

  protected[tools] def mlpcStaticIndexSelection(
    numericArrays: MLPCNumericArrays
  ): MLPCArrayCollection = {

    val layersArray = numericArrays.layersArray
    val selectedLayers = layersArray.take(1)(0)
    val updatedLayersArray = layersArray.drop(1)

    val maxIterArray = numericArrays.maxIterArray
    val selectedMaxIter = maxIterArray.take(1)(0)
    val updatedMaxIterArray = maxIterArray.drop(1)

    val stepSizeArray = numericArrays.stepSizeArray
    val selectedStepSize = stepSizeArray.take(1)(0)
    val updatedStepSizeArray = stepSizeArray.drop(1)

    val tolArray = numericArrays.toleranceArray
    val selectedTol = tolArray.take(1)(0)
    val updateTolArray = tolArray.drop(1)

    val remainingArrays = MLPCNumericArrays(
      layersArray = updatedLayersArray,
      maxIterArray = updatedMaxIterArray,
      stepSizeArray = updatedStepSizeArray,
      toleranceArray = updateTolArray
    )

    MLPCArrayCollection(
      MLPCConfig(
        layers = selectedLayers,
        maxIter = selectedMaxIter.toInt,
        solver = "placeholder",
        stepSize = selectedStepSize,
        tolerance = selectedTol
      ),
      remainingArrays
    )

  }

  protected[tools] def mlpcLayersExtractor(layers: Array[Int]): (Int, Int) = {

    val hiddenLayersSizeAdjust =
      if (layers.length > 2) layers(1) - layers(0) else 0
    val layerCount = layers.length - 2

    (layerCount, hiddenLayersSizeAdjust)
  }

  protected[tools] def mlpcRandomIndexSelection(
    numericArrays: MLPCNumericArrays
  ): MLPCArrayCollection = {

    val shuffledData = MLPCNumericArrays(
      layersArray = Random.shuffle(numericArrays.layersArray.toList).toArray,
      maxIterArray = Random.shuffle(numericArrays.maxIterArray.toList).toArray,
      stepSizeArray = Random.shuffle(numericArrays.stepSizeArray.toList).toArray,
      toleranceArray =
        Random.shuffle(numericArrays.toleranceArray.toList).toArray
    )

    mlpcStaticIndexSelection(shuffledData)
  }

  /**
    * Calculates the number of possible additional permutations to be added to the search space for string values
    * @param stringBoundaries The string boundary payload for a modeling family
    * @return Int representing any additional permutations on the numeric body that will need to be generated in order
    *         to attempt to reach the target unique hyperparameter search space
    */
  protected[tools] def stringBoundaryPermutationCalculator(
    stringBoundaries: Map[String, List[String]]
  ): Int = {

    var uniqueValues = 0

    stringBoundaries.foreach { x =>
      uniqueValues += x._2.length - 1
    }

    uniqueValues
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy