Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
import{ModelWithSummary, SummarizableEstimator}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, SQLContext, functions}
import scala.util.Try
* Common trait to all the hyper-parameter optimizers.
trait HyperparametersOptimizer[M <: ModelWithSummary[M]] extends SummarizableEstimator[M] with HasConfigurations {
val paramNames: Param[Map[Param[_], String]] = new Param[Map[Param[_], String]](
this, "paramsFriendlyNames", "Names of the parameters to use in column names to store configs"
val metricsBlock = new Param[String](this, "metricsBlock", "Name of the block with metrics to get results from.")
val metricsExpression = new Param[String](this, "metricsExpression",
"Expression used to extract single metric value from the metrics table. __THIS__ shoud be used as a table alias.")
val configurationIndexColumn = new Param[String](this, "configurationIndexColumn",
"Name of the column to store id of config for further analysis.")
val resultingMetricColumn = new Param[String](this, "resultingMetricColumn",
"Name of the column to store resulting metrics for further analysis.")
val errorColumn = new Param[String](this, "errorColumn",
"Name of the column to store text of the error if occurs.")
metricsBlock -> "metrics",
configurationIndexColumn -> "configurationIndex",
resultingMetricColumn -> "resultingMetric",
errorColumn -> "error"
def setParamNames(value: (Param[_], String)*): this.type = set(paramNames, value.toMap)
def resolveParamName(param: Param[_]) : String =
def getMetricsBlock: String = $(metricsBlock)
def setMetricsBlock(value: String): this.type = set(metricsBlock, value)
def getMetricsExpression: String = $(metricsExpression)
def setMetricsExpression(value: String): this.type = set(metricsExpression, value)
def getConfigurationIndexColumn: String = $(configurationIndexColumn)
def setConfigurationIndexColumn(value: String): this.type = set(configurationIndexColumn, value)
def getResultingMetricColumn: String = $(resultingMetricColumn)
def setResultingMetricColumn(value: String): this.type = set(resultingMetricColumn, value)
* Extracts information of the resulting metrics from the trained model.
protected def extractParamsAndQuality(params: ParamMap, model: M): (ParamMap, M, Double) = {
val metrics = model.summary.blocks(Block($(metricsBlock)))
val tableName = model.uid + "_metrics"
val query = $(metricsExpression).replaceAll("__THIS__", tableName)
val quality = metrics.sqlContext.sql(query)[Number](0)).collect().map(_.doubleValue()).sum
(params, model, quality)
* Given all the history of the optimization create the resulting model with the configurations
* summary block.
protected def extractBestModel(sqlContext: SQLContext, failedModels: Seq[(ParamMap, Try[M])], rankedModels: Seq[(ParamMap, M, Double)]): M = {
val configurationBlock: DataFrame = createConfigurationsBlock(sqlContext, failedModels, rankedModels)
// Now get the best model and enrich its summary
val bestModel = rankedModels.head._2
val nestedBlocks: Map[Block, DataFrame] =
block => block ->
x => x._1._2.summary(block).withColumn($(configurationIndexColumn), functions.lit(x._2))
).reduce(_ union _)).toMap ++ Map(configurations -> configurationBlock)
* Create summary block with investigated configurations.
protected def createConfigurationsBlock(sqlContext: SQLContext, failedModels: Seq[(ParamMap, Try[M])], rankedModels: Seq[(ParamMap, M, Double)]): DataFrame = {
// Extract parameters to build config for
val keys: Seq[Param[_]] =[Param[Any]]).sortBy(
// Infer dataset schema
val schema = StructType(
StructField($(configurationIndexColumn), IntegerType),
StructField($(resultingMetricColumn), DoubleType),
StructField($(errorColumn), StringType)) ++ => {
val dataType = x match {
case _: IntParam => IntegerType
case _: DoubleParam => DoubleType
case _: LongParam => LongType
case _: BooleanParam => BooleanType
case _: FloatParam => FloatType
case _: StringArrayParam => ArrayType(StringType, true)
case _: DoubleArrayParam => ArrayType(DoubleType, true)
case _: IntArrayParam => ArrayType(IntegerType, true)
case _ => StringType
StructField(get(paramNames).map(_.getOrElse(x, x.toString())).getOrElse(x.toString()), dataType, true)
def extractParams(params: ParamMap) = { => params.get(key).map(value => key match {
case _: IntParam | _: DoubleParam | _: LongParam | _: BooleanParam | _: FloatParam => value
case _: StringArrayParam | _: DoubleArrayParam | _: IntArrayParam => value
case _ => key.asInstanceOf[Param[Any]].jsonEncode(value)
// Construct resulting block with variable part of configuration
val rows = => {
val index: Int = x._2
val params: ParamMap = x._1._1
val metric: Double = x._1._3
Row.fromSeq(Seq[Any](index, metric, "") ++ extractParams(params))
}) ++ failedModels.filter(_._2.isFailure).map(x => {
val params = x._1
val error = x._2.failed.get.toString
Row.fromSeq(Seq[Any](Int.MaxValue, Double.NaN, error) ++ extractParams(params))
val configurationBlock = sqlContext.createDataFrame(
sqlContext.sparkContext.parallelize(rows, 1),
def extractConfig(model : M) : (Double, ParamMap) = {
val row = model.summary(configurations).collect().head
* In order to support correct restoration from the temporary model storage and grouped optimization
* we need a way to restore model configuration from its summary row of configurations block.
protected def extractConfig(row: Row): (Double, ParamMap)
override def copy(extra: ParamMap): HyperparametersOptimizer[M]