All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.alibaba.hologres.spark.config.HologresConfigs.scala Maven / Gradle / Ivy

package com.alibaba.hologres.spark.config

import com.alibaba.hologres.client.HoloConfig
import com.alibaba.hologres.client.copy.CopyMode
import com.alibaba.hologres.client.model.{WriteFailStrategy, WriteMode}
import com.alibaba.hologres.spark.utils.JDBCUtil
import com.alibaba.hologres.spark.utils.JDBCUtil._
import org.apache.spark.SparkContext

/** Hologres config parameters process. */
class HologresConfigs(sourceOptions: Map[String, String]) extends Serializable {
  val holoConfig = new HoloConfig

  val username: String = sourceOptions.getOrElse("username",
    throw new IllegalArgumentException("Missing necessary parameter 'username'."))
  holoConfig.setUsername(username)
  val password: String = sourceOptions.getOrElse("password",
    throw new IllegalArgumentException("Missing necessary parameter 'password'."))
  holoConfig.setPassword(password)

  lazy val database: String = sourceOptions.getOrElse("database",
    throw new IllegalArgumentException("If jdbcUrl is not provided, please provide parameter 'database'."))
  lazy val endpoint: String = sourceOptions.getOrElse("endpoint",
    throw new IllegalArgumentException("If jdbcUrl is not provided, please provide parameter 'endpoint'."))
  var jdbcUrl: String = JDBCUtil.formatUrlWithHologres(sourceOptions.getOrElse("jdbcurl", getDbUrl(endpoint, database)))
  holoConfig.setJdbcUrl(jdbcUrl)

  def resetJdbcUrl(url: String): Unit = {
    jdbcUrl = url
    holoConfig.setJdbcUrl(jdbcUrl)
  }

  // when read from holo, could choose set query or table
  val query: String = sourceOptions.getOrElse("query", "")
  var table: String = sourceOptions.getOrElse("table", "")
  lazy val isTableSource: Boolean = {
    if ((query == null || query.isEmpty) && (table == null || table.isEmpty)) {
      throw new IllegalArgumentException("Missing necessary parameter 'table'. If table is not provided, please provide parameter 'query' for read.")
    }
    if ((query != null && query.nonEmpty) && (table != null && table.nonEmpty)) {
      throw new IllegalArgumentException("If query is provided, please do not provide parameter 'table'.")
    }
    table != null && table.nonEmpty
  }

  private val writeModeStr: String = sourceOptions.getOrElse("write_mode", "insertOrIgnore").toLowerCase
  val writeMode: WriteMode = writeModeStr match {
    case "insertorignore" | "insert_or_ignore" => WriteMode.INSERT_OR_IGNORE
    case "insertorreplace" | "insert_or_replace" => WriteMode.INSERT_OR_REPLACE
    case "insertorupdate" | "insert_or_update" => WriteMode.INSERT_OR_UPDATE
    case _ =>
      throw new IllegalArgumentException("Could not recognize writeMode " + writeModeStr)
  }
  holoConfig.setWriteMode(writeMode)

  val writeFailStrategy: String = sourceOptions.getOrElse("write_fail_strategy", "tryOneByOne").toLowerCase
  val wFailStrategy: WriteFailStrategy = writeFailStrategy match {
    case "tryonebyone" | "try_one_by_one" => WriteFailStrategy.TRY_ONE_BY_ONE
    case "none" => WriteFailStrategy.NONE
    case _ => throw new IllegalArgumentException("Could not recognize WriteFailStrategy " + writeFailStrategy)
  }
  holoConfig.setWriteFailStrategy(wFailStrategy)

  val enableServerlessComputing: Boolean = sourceOptions.getOrElse("enable_serverless_computing", "false").toBoolean
  val serverlessComputingQueryPriority: Int = sourceOptions.getOrElse("serverless_computing_query_priority", "3").toInt
  val statementTimeout: Int = sourceOptions.getOrElse("statement_timeout", "28800000").toInt
  sourceOptions.get("dynamic_partition").map(v => holoConfig.setDynamicPartition(v.toBoolean))
  sourceOptions.get("write_batch_size").map(v => holoConfig.setWriteBatchSize(v.toInt))
  sourceOptions.get("write_batch_byte_size").map(v => holoConfig.setWriteBatchByteSize(v.toLong))
  sourceOptions.get("write_max_interval_ms").map(v => holoConfig.setWriteMaxIntervalMs(v.toLong))
  sourceOptions.get("write_thread_size").map(v => holoConfig.setWriteThreadSize(v.toInt))
  sourceOptions.get("use_legacy_put_handler").map(v => holoConfig.setUseLegacyPutHandler(v.toBoolean))
  sourceOptions.get("retry_count").map(v => holoConfig.setRetryCount(v.toInt))
  sourceOptions.get("retry_sleep_init_ms").map(v => holoConfig.setRetrySleepInitMs(v.toLong))
  sourceOptions.get("retry_sleep_step_ms").map(v => holoConfig.setRetrySleepStepMs(v.toLong))
  sourceOptions.get("connection_max_idle_ms").map(v => holoConfig.setConnectionMaxIdleMs(v.toLong))
  sourceOptions.get("fixed_connection_mode").map(v => holoConfig.setUseFixedFe(v.toBoolean))
  val removeU0000: Boolean = sourceOptions.getOrElse("remove_u0000", "true").toBoolean
  holoConfig.setRemoveU0000InTextColumnValue(removeU0000)
  val scan_batch_size: Int = sourceOptions.getOrElse("scan_batch_size", "256").toInt
  val scan_timeout_seconds: Int = sourceOptions.getOrElse("scan_timeout_seconds", "28800").toInt
  val max_partition_count: Int = sourceOptions.getOrElse("max_partition_count", "80").toInt
  val pushDownPredicate: Boolean = sourceOptions.getOrElse("push_down_predicate", "true").toBoolean
  val pushDownLimit: Boolean = sourceOptions.getOrElse("push_down_limit", "true").toBoolean

  // 调整之前两个boolean类型的参数copy_write_mode和bulk_load为新的enum参数
  private val copyModeStr: String = sourceOptions.getOrElse("copy_write_mode", "stream").toLowerCase
  var copyMode: CopyMode = copyModeStr match {
    case "stream" => CopyMode.STREAM
    case "bulk_load" => CopyMode.BULK_LOAD
    case "bulk_load_on_conflict" => CopyMode.BULK_LOAD_ON_CONFLICT
    case "disable" => null
    case _ =>
      throw new IllegalArgumentException("Could not recognize copy_write_mode " + copyModeStr)
  }
  val copy_write_format: String = sourceOptions.getOrElse("copy_write_format", "binary")
  val copy_write_dirty_data_check: Boolean = sourceOptions.getOrElse("copy_write_dirty_data_check", "false").toBoolean
  var direct_connect: Boolean = sourceOptions.getOrElse("direct_connect", "true").toBoolean
  val max_cell_buffer_size: Int = sourceOptions.getOrElse("max_cell_buffer_size", "20971520").toInt
  val reshuffleByHoloDistributionKey: Boolean = sourceOptions.getOrElse("reshuffle_by_holo_distribution_key", "false").toBoolean

  val bulkRead: Boolean = sourceOptions.getOrElse("bulk_read", "true").toBoolean
  // overwrite来自于用户对SaveMode参数的设置,写入开始会创建临时表并写入,写入成功时会清理原表的数据。
  var tempTableForOverwrite: String = _

  holoConfig.setInputNumberAsEpochMsForDatetimeColumn(true)
  var sparkAppName: String = SparkContext.getOrCreate().appName
  if (sparkAppName == null || "".eq(sparkAppName)) {
    sparkAppName = "default"
  }
  holoConfig.setAppName("hologres-connector-spark-" + sparkAppName)

  override def clone(): HologresConfigs = new HologresConfigs(sourceOptions)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy