All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.starlake.extract.TemplateParams.scala Maven / Gradle / Ivy

package ai.starlake.extract

import ai.starlake.config.Settings
import ai.starlake.schema.model.WriteMode.OVERWRITE
import ai.starlake.schema.model.{Domain, Schema, TransformInput}

/** Params for the script's mustache template
  * @param domainToExport
  *   Domain name
  * @param tableToExport
  *   table to export
  * @param columnsToExport
  *   cols to export
  * @param fullExport
  *   if table is going to be fully or delta exported
  * @param deltaColumn
  *   if delta exported, which is the col holding the date of last update
  * @param dsvDelimiter
  *   export result dsv delimiter
  * @param exportOutputFileBase
  *   export dsv file base name (will be completed by current datetime when executed)
  * @param scriptOutputFile
  *   where the script is produced
  * @param auditDB
  *   Schema name where the audit table is created. Same as domainToExport by default
  */
case class TemplateParams(
  domainToExport: String,
  tableToExport: String,
  columnsToExport: List[(String, String, Boolean, TransformInput)],
  fullExport: Boolean,
  deltaColumn: Option[String],
  dsvDelimiter: String,
  auditDB: Option[String],
  activeEnv: Map[String, String]
) {

  val paramMap: Map[String, Any] = {

    // This is how we deal with the last element not needing a trailing a comma in a Mustache template
    val columnsParam: List[Map[String, Any]] = columnsToExport match {
      case (name, tpe, ignore, privacyLevel) :: Nil =>
        List(
          Map(
            "name"              -> name,
            "type"              -> tpe,
            "trailing_col_char" -> "",
            "ignore"            -> ignore,
            "privacyLevel"      -> privacyLevel.toString
          )
        )
      case Nil => Nil
      case atLeastTwoElemList =>
        val allButLast = atLeastTwoElemList.dropRight(1)
        val (lastName, lastType, lastIgnore, lastPrivacyLevel) = atLeastTwoElemList.last
        allButLast
          .map { case (name, tpe, ignore, privacyLevel) =>
            Map(
              "name"              -> name,
              "type"              -> tpe,
              "ignore"            -> ignore,
              "privacyLevel"      -> privacyLevel.toString,
              "trailing_col_char" -> ","
            )
          } :+ Map(
          "name"              -> lastName,
          "type"              -> lastType,
          "ignore"            -> lastIgnore,
          "privacyLevel"      -> lastPrivacyLevel,
          "trailing_col_char" -> ""
        )
    }
    deltaColumn
      .foldLeft(
        List(
          "domain"       -> domainToExport,
          "table"        -> tableToExport,
          "domain_table" -> domainToExport, // For compatibility
          "table_name"   -> tableToExport, // For compatibility
          "delimiter"    -> dsvDelimiter,
          "columns"      -> columnsParam,
          "full_export"  -> fullExport,
          "audit_schema" -> auditDB.getOrElse(domainToExport)
        )
      ) { case (list, deltaCol) => list :+ ("delta_column" -> deltaCol.toUpperCase) }
      .toMap ++ activeEnv
  }
}

object TemplateParams {

  /** Generating all the TemplateParams, corresponding to all the schema's tables of the domain
    *
    * @param domain
    *   The domain
    * @param scriptsOutputFolder
    *   Where the scripts are produced
    * @param defaultDeltaColumn
    *   The default delta column to use
    * @param deltaColumns
    *   A table name -> delta column to use mapping (if needing a special delta column for a given
    *   table). Has precedence over `defaultDeltaColumn`.
    * @return
    */
  def fromDomain(
    domain: Domain,
    defaultDeltaColumn: Option[String],
    deltaColumns: Map[String, String],
    auditDB: Option[String],
    activeEnv: Map[String, String]
  )(implicit settings: Settings): List[TemplateParams] =
    domain.tables.map(table =>
      fromSchema(
        domain.name,
        table,
        deltaColumns.get(table.name).orElse(defaultDeltaColumn),
        auditDB,
        activeEnv
      )
    )

  /** Generate scripts template parameters, extracting the tables and the columns described in the
    * schema
    * @param schema
    *   The schema used to generate the scripts parameters
    * @param scriptsOutputFolder
    *   Where the scripts are produced
    * @param deltaColumn
    *   The delta column to use for that table
    * @return
    *   The corresponding TemplateParams
    */
  def fromSchema(
    domainName: String,
    schema: Schema,
    deltaColumn: Option[String],
    auditDB: Option[String],
    activeEnv: Map[String, String]
  )(implicit settings: Settings): TemplateParams = {
    // exportFileBase is the csv file name base such as EXPORT_L58MA_CLIENT_DELTA_...
    // Considering a pattern like EXPORT_L58MA_CLIENT.*.csv
    // The script which is generated will append the current date time to that base (EXPORT_L58MA_CLIENT_18032020173100).
    val isFullExport =
      schema.metadata.flatMap(_.writeStrategy.map(_.toWriteMode())).contains(OVERWRITE)
    new TemplateParams(
      domainToExport = domainName,
      tableToExport = schema.name,
      columnsToExport = schema.attributes
        .filter(_.script.isEmpty)
        .map(col => (col.name, col.`type`, col.resolveIgnore(), col.resolvePrivacy())),
      fullExport = isFullExport,
      deltaColumn = if (!isFullExport) deltaColumn else None,
      dsvDelimiter = schema.metadata.flatMap(_.separator).getOrElse(","),
      auditDB,
      activeEnv
    )
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy