com.ebiznext.comet.job.index.connectionload.ConnectionLoadConfig.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of comet-spark3_2.12 Show documentation
comet-spark3
There is a newer version: 0.2.6
package com.ebiznext.comet.job.index.connectionload

import java.sql.{DriverManager, SQLException}

import com.ebiznext.comet.config.Settings
import com.ebiznext.comet.schema.model.RowLevelSecurity
import com.ebiznext.comet.utils.CliConfig
import com.google.cloud.bigquery.JobInfo.{CreateDisposition, WriteDisposition}
import org.apache.spark.sql.DataFrame
import scopt.OParser

case class ConnectionLoadConfig(
  sourceFile: Either[String, DataFrame] = Left(""),
  outputTable: String = "",
  createDisposition: CreateDisposition = CreateDisposition.CREATE_IF_NEEDED,
  writeDisposition: WriteDisposition = WriteDisposition.WRITE_APPEND,
  format: String = "jdbc",
  mode: Option[String] = None,
  options: Map[String, String] = Map.empty,
  rls: Option[List[RowLevelSecurity]] = None
)

object ConnectionLoadConfig extends CliConfig[ConnectionLoadConfig] {

  def checkTablePresent(
    jdbcOptions: Settings.Connection,
    jdbcEngine: Settings.JdbcEngine,
    outputTable: String
  ): Unit = {
    assert(jdbcOptions.format == "jdbc")
    val table = jdbcEngine.tables(outputTable)

    val conn = DriverManager.getConnection(
      jdbcOptions.options("url"),
      jdbcOptions.options("user"),
      jdbcOptions.options("password")
    )

    try {
      val stmt = conn.createStatement
      try {
        val pingSql = table.effectivePingSql(outputTable)
        val rs = stmt.executeQuery(pingSql)
        rs.close() // we don't need to fetch the result, it should be empty anyway.
      } catch {
        case _: SQLException =>
          stmt.executeUpdate(table.createSql)
          conn.commit() // some databases are transactional wrt schema updates 🥰
      } finally {
        stmt.close()
      }
    } finally {
      conn.close()
    }

  }

  def fromComet(
    jdbcName: String,
    comet: Settings.Comet,
    sourceFile: Either[String, DataFrame],
    outputTable: String,
    createDisposition: CreateDisposition = CreateDisposition.CREATE_IF_NEEDED,
    writeDisposition: WriteDisposition = WriteDisposition.WRITE_APPEND,
    partitions: Int = 1,
    batchSize: Int = 1000,
    createTableIfAbsent: Boolean = true
  ): ConnectionLoadConfig = {
    // TODO: wanted to just call this "apply" but I'd need to get rid of the defaults in the ctor above

    val jdbcOptions = comet.connections(jdbcName)
    val isJDBC = jdbcOptions.format == "jdbc"

    if (createTableIfAbsent && isJDBC) {
      val jdbcEngine = comet.jdbcEngines(jdbcOptions.engine)
      checkTablePresent(jdbcOptions, jdbcEngine, outputTable)
    }

    ConnectionLoadConfig(
      sourceFile = sourceFile,
      outputTable = outputTable,
      createDisposition = createDisposition,
      writeDisposition = writeDisposition,
      jdbcOptions.format,
      jdbcOptions.mode,
      jdbcOptions.options
    )
  }

  val parser: OParser[Unit, ConnectionLoadConfig] = {
    val builder = OParser.builder[ConnectionLoadConfig]
    import builder._
    OParser.sequence(
      programName("comet cnxload"),
      head("comet", "cnxload", "[options]"),
      note(""),
      opt[String]("source_file")
        .action((x, c) => c.copy(sourceFile = Left(x)))
        .text("Full Path to source file")
        .required(),
      opt[String]("output_table")
        .action((x, c) => c.copy(outputTable = x))
        .text("JDBC Output Table")
        .required(),
      opt[Map[String, String]]("options")
        .action((x, c) => c.copy(options = x))
        .text(
          "Connection options eq for jdbc : driver, user, password, url, partitions, batchSize"
        ),
      opt[String]("create_disposition")
        .action((x, c) => c.copy(createDisposition = CreateDisposition.valueOf(x)))
        .text(
          "Big Query Create disposition https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/CreateDisposition"
        ),
      opt[String]("write_disposition")
        .action((x, c) => c.copy(writeDisposition = WriteDisposition.valueOf(x)))
        .text(
          "Big Query Write disposition https://cloud.google.com/bigquery/docs/reference/auditlogs/rest/Shared.Types/WriteDisposition"
        )
    )
  }

  // comet bqload  --source_file xxx --output_table schema --source_format parquet --create_disposition  CREATE_IF_NEEDED --write_disposition WRITE_TRUNCATE
  //               --partitions 1  --batch_size 1000 --user username --password pwd -- url jdbcurl
  def parse(args: Seq[String]): Option[ConnectionLoadConfig] =
    OParser.parse(parser, args, ConnectionLoadConfig())
}