no.nrk.bigquery.codegen.CodeGen.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of bigquery-codegen_3 Show documentation
bigquery-codegen
The newest version!
/*
 * Copyright 2020 NRK
 *
 * SPDX-License-Identifier: MIT
 */

package no.nrk.bigquery
package codegen

import org.apache.commons.text.translate.LookupTranslator

import java.nio.charset.StandardCharsets
import java.nio.file.*
import scala.collection.immutable
import scala.jdk.CollectionConverters.*

object Generators {
  private val escaper = new LookupTranslator(
    Map[CharSequence, CharSequence]("\"" -> "\\\"", "\\" -> "\\\\", "$" -> "$$").asJava)

  private def lit(v: String) = s""""$v""""

  private def description(desc: Option[String]) =
    desc.map(s => s"Some(${lit(s)})").getOrElse("None")

  def genSchema(schema: BQSchema) = {
    val outerIndent = repeat(4)(" ")

    def genField(indent: Int, field: BQField): String = {
      val level = repeat(indent)(" ")
      val desc = description(field.description)
      val gen = if (field.tpe == BQField.Type.STRUCT) {
        val subfields = field.subFields.map(f => genField(indent + 2, f)).mkString(",\n")
        s"BQField.struct(${lit(field.name)}, BQField.Mode.${field.mode.name}, ${desc})(\n$subfields\n$level)"
      } else {
        s"BQField(${lit(field.name)}, BQField.Type.${field.tpe.name}, BQField.Mode.${field.mode.name}, ${desc})"
      }
      level + gen
    }
    s"""BQSchema.of(
       |${schema.fields.map(genField(6, _)).mkString("", ",\n", "")}
       |$outerIndent)""".stripMargin
  }

  def repeat(n: Int)(rep: String) = List.fill(n)(rep).mkString

  def genQuery(sql: String) = {
    val indent = repeat(14 + 8)(" ")

    def escape(line: String) =
      escaper.translate(line)

    val query2 = {
      val split = sql.split("\n")
      if (split.length > 1) {
        val head = "|" + escape(split.head) + "\n"
        head + split.tail.map(line => indent + "|" + escape(line)).mkString("\n")
      } else sql.trim
    }

    val quoted = "\"\"\""
    s"bqsql$quoted$query2$quoted.stripMargin"
  }

  def genPartitionType(partitionType: BQPartitionType[Any]): String =
    partitionType match {
      case BQPartitionType.DatePartitioned(Ident(field)) =>
        s"BQPartitionType.DatePartitioned(Ident(${lit(field)}))"
      case BQPartitionType.HourPartitioned(Ident(field)) =>
        s"BQPartitionType.HourPartitioned(Ident(${lit(field)}))"
      case BQPartitionType.MonthPartitioned(Ident(field)) =>
        s"BQPartitionType.MonthPartitioned(Ident(${lit(field)}))"
      case BQPartitionType.Sharded =>
        "BQPartitionType.Sharded"
      case BQPartitionType.IntegerRangePartitioned(Ident(field), range) =>
        s"BQPartitionType.IntegerRangePartitioned(Ident(${lit(field)}), BQIntegerRange(start = ${range.start}, end = ${range.end}, interval = ${range.interval}))"
      case _: BQPartitionType.NotPartitioned =>
        "BQPartitionType.NotPartitioned"
    }

  // todo: can we use pprint for this?
  def genViewDef(view: BQTableDef.View[Any]) = {
    val partitionType = genPartitionType(view.partitionType)
    val labels =
      if (view.labels.values.isEmpty) "TableLabels.Empty"
      else view.labels.values.map { case (k, v) => s"${lit(k)} -> ${lit(v)}" }.mkString("TableLabels(", ", ", ")")
    val outerLevel = repeat(2)(" ")
    val level = repeat(4)(" ")
    s"""§BQTableDef.View(
        §${level}tableId = BQTableId.unsafeFromString(${lit(view.tableId.asString)}),
        §${level}partitionType = ${partitionType},
        §${level}query = ${genQuery(view.query.asString)},
        §${level}schema = ${genSchema(view.schema)},
        §${level}description = ${description(view.description)},
        §${level}labels = ${labels},
        §$outerLevel)""".stripMargin('§')
  }

  // todo: can we use pprint for this?
  def genTableDef(table: BQTableDef.Table[Any]) = {
    val partitionType = genPartitionType(table.partitionType)
    val labels =
      if (table.labels.values.isEmpty) "TableLabels.Empty"
      else table.labels.values.map { case (k, v) => s"${lit(k)} -> ${lit(v)}" }.mkString("TableLabels(", ", ", ")")
    val clustering = table.clustering.map(ident => s"Ident(${lit(ident.value)})").mkString("List(", ", ", ")")
    val options =
      if (table.tableOptions == TableOptions.Empty) "TableOptions.Empty"
      else s"TableOptions(partitionFilterRequired = ${table.tableOptions.partitionFilterRequired})"

    val outerLevel = repeat(2)(" ")
    val level = repeat(4)(" ")
    s"""|BQTableDef.Table(
        |${level}tableId = BQTableId.unsafeFromString(${lit(table.tableId.asString)}),
        |${level}schema = ${genSchema(table.schema)},
        |${level}partitionType = ${partitionType},
        |${level}description = ${description(table.description)},
        |${level}clustering = $clustering,
        |${level}labels = $labels,
        |${level}tableOptions = $options
        |$outerLevel)""".stripMargin
  }
}

object CodeGen {
  def generate(tables: immutable.Seq[BQTableDef[Any]], basePackage: List[String]) =
    tables.collect {
      case table: BQTableDef.Table[?] =>
        Source(SourceLocation(table.tableId, basePackage, "Table"), Generators.genTableDef(table))
      case view: BQTableDef.View[?] =>
        Source(SourceLocation(view.tableId, basePackage, "View"), Generators.genViewDef(view))
    }
}

case class Source(loc: SourceLocation, definition: String) {
  def asObject =
    s"""|package ${loc.pkgNames.mkString(".")}
        |
        |import no.nrk.bigquery._
        |import no.nrk.bigquery.syntax._
        |
        |object ${loc.typeName} {
        |  val definition = $definition
        |}
        |""".stripMargin

  def writeTo(basedir: Path) = {
    val destination = loc.destinationFile(basedir)
    Files.write(destination, asObject.getBytes(StandardCharsets.UTF_8))
    ()
  }
}

case class SourceLocation(pkgNames: List[String], typeName: String) {
  def destinationFile(basedir: Path): Path =
    basedir.resolve(s"src/main/scala/${pkgNames.mkString("/")}/$typeName.scala")
}

object SourceLocation {
  def apply(tableId: BQTableId, packages: List[String], suffix: String): SourceLocation = {
    def clean(str: String): String = {
      val camelCase = str
        .split("[-_ ]")
        .zipWithIndex
        .map {
          case (frag, 0) => frag
          case (frag, _) => frag.capitalize
        }
        .mkString

      camelCase.filter(_.isLetterOrDigit) match {
        case x if x.head.isDigit => "x" + x
        case x => x
      }
    }

    val pkgNames =
      packages ::: List(clean(tableId.dataset.project.value), clean(tableId.dataset.id))

    val objectName = clean(tableId.tableName).capitalize
    SourceLocation(pkgNames, objectName + suffix)
  }
}