All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.hudi.HoodieOptionConfig.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.hudi

import org.apache.hudi.DataSourceWriteOptions
import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieTableType}
import org.apache.hudi.common.table.HoodieTableConfig
import org.apache.hudi.common.util.ValidationUtils
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType


/**
 * The HoodieOptionConfig defines some short name for the hoodie
 * option key and value.
 */
object HoodieOptionConfig {

  /**
   * The short name for the value of COW_TABLE_TYPE_OPT_VAL.
   */
  val SQL_VALUE_TABLE_TYPE_COW = "cow"

  /**
   * The short name for the value of MOR_TABLE_TYPE_OPT_VAL.
   */
  val SQL_VALUE_TABLE_TYPE_MOR = "mor"


  val SQL_KEY_TABLE_PRIMARY_KEY: HoodieSQLOption[String] = buildConf()
    .withSqlKey("primaryKey")
    .withHoodieKey(DataSourceWriteOptions.RECORDKEY_FIELD.key)
    .withTableConfigKey(HoodieTableConfig.RECORDKEY_FIELDS.key)
    .defaultValue(DataSourceWriteOptions.RECORDKEY_FIELD.defaultValue())
    .build()

  val SQL_KEY_TABLE_TYPE: HoodieSQLOption[String] = buildConf()
    .withSqlKey("type")
    .withHoodieKey(DataSourceWriteOptions.TABLE_TYPE.key)
    .withTableConfigKey(HoodieTableConfig.TYPE.key)
    .defaultValue(SQL_VALUE_TABLE_TYPE_COW)
    .build()

  val SQL_KEY_PRECOMBINE_FIELD: HoodieSQLOption[String] = buildConf()
    .withSqlKey("preCombineField")
    .withHoodieKey(DataSourceWriteOptions.PRECOMBINE_FIELD.key)
    .withTableConfigKey(HoodieTableConfig.PRECOMBINE_FIELD.key)
    .build()

  val SQL_PAYLOAD_CLASS: HoodieSQLOption[String] = buildConf()
    .withSqlKey("payloadClass")
    .withHoodieKey(DataSourceWriteOptions.PAYLOAD_CLASS_NAME.key)
    .withTableConfigKey(HoodieTableConfig.PAYLOAD_CLASS_NAME.key)
    .defaultValue(classOf[DefaultHoodieRecordPayload].getName)
    .build()

  /**
   * The mapping of the sql short name key to the hoodie's config key.
   */
  private lazy val keyMapping: Map[String, String] = {
    HoodieOptionConfig.getClass.getDeclaredFields
        .filter(f => f.getType == classOf[HoodieSQLOption[_]])
        .map(f => {f.setAccessible(true); f.get(HoodieOptionConfig).asInstanceOf[HoodieSQLOption[_]]})
        .map(option => option.sqlKeyName -> option.hoodieKeyName)
        .toMap
  }

  /**
   * The mapping of the sql short name key to the hoodie table config key
   * defined in HoodieTableConfig.
   */
  private lazy val keyTableConfigMapping: Map[String, String] = {
    HoodieOptionConfig.getClass.getDeclaredFields
      .filter(f => f.getType == classOf[HoodieSQLOption[_]])
      .map(f => {f.setAccessible(true); f.get(HoodieOptionConfig).asInstanceOf[HoodieSQLOption[_]]})
      .filter(_.tableConfigKey.isDefined)
      .map(option => option.sqlKeyName -> option.tableConfigKey.get)
      .toMap
  }

  private lazy val tableConfigKeyToSqlKey: Map[String, String] =
    keyTableConfigMapping.map(f => f._2 -> f._1)

  /**
   * Mapping of the short sql value to the hoodie's config value
   */
  private val valueMapping: Map[String, String] = Map (
    SQL_VALUE_TABLE_TYPE_COW -> DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL,
    SQL_VALUE_TABLE_TYPE_MOR -> DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL
  )

  private lazy val reverseValueMapping = valueMapping.map(f => f._2 -> f._1)

  def withDefaultSqlOptions(options: Map[String, String]): Map[String, String] = defaultSqlOptions ++ options

  /**
   * Mapping the sql's short name key/value in the options to the hoodie's config key/value.
   * @param options
   * @return
   */
  def mappingSqlOptionToHoodieParam(options: Map[String, String]): Map[String, String] = {
    options.map (kv =>
      keyMapping.getOrElse(kv._1, kv._1) -> valueMapping.getOrElse(kv._2, kv._2))
  }

  /**
   * Mapping the sql options to the hoodie table config which used to store to the hoodie
   * .properties when create the table.
   * @param options
   * @return
   */
  def mappingSqlOptionToTableConfig(options: Map[String, String]): Map[String, String] = {
    options.map { case (k, v) =>
      if (keyTableConfigMapping.contains(k)) {
        keyTableConfigMapping(k) -> valueMapping.getOrElse(v, v)
      } else {
        k -> v
      }
    }
  }

  /**
   * Mapping the table config (loaded from the hoodie.properties) to the sql options.
   */
  def mappingTableConfigToSqlOption(options: Map[String, String]): Map[String, String] = {
    options.map(kv => tableConfigKeyToSqlKey.getOrElse(kv._1, kv._1) -> reverseValueMapping.getOrElse(kv._2, kv._2))
  }

  val defaultSqlOptions: Map[String, String] = {
    HoodieOptionConfig.getClass.getDeclaredFields
      .filter(f => f.getType == classOf[HoodieSQLOption[_]])
      .map(f => {f.setAccessible(true); f.get(HoodieOptionConfig).asInstanceOf[HoodieSQLOption[_]]})
      .filter(option => option.tableConfigKey.isDefined && option.defaultValue.isDefined)
      .map(option => option.sqlKeyName -> option.defaultValue.get.toString)
      .toMap
  }

  /**
   * Get the primary key from the table options.
   * @param options
   * @return
   */
  def getPrimaryColumns(options: Map[String, String]): Array[String] = {
    val params = mappingSqlOptionToHoodieParam(options)
    params.get(DataSourceWriteOptions.RECORDKEY_FIELD.key)
      .map(_.split(",").filter(_.nonEmpty))
      .getOrElse(Array.empty)
  }

  /**
   * Get the table type from the table options.
   * @param options
   * @return
   */
  def getTableType(options: Map[String, String]): String = {
    val params = mappingSqlOptionToHoodieParam(options)
    params.getOrElse(DataSourceWriteOptions.TABLE_TYPE.key,
      DataSourceWriteOptions.TABLE_TYPE.defaultValue)
  }

  def getPreCombineField(options: Map[String, String]): Option[String] = {
    val params = mappingSqlOptionToHoodieParam(options)
    params.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key).filter(_.nonEmpty)
  }

  def deleteHoodieOptions(options: Map[String, String]): Map[String, String] = {
    options.filterNot(_._1.startsWith("hoodie.")).filterNot(kv => keyMapping.contains(kv._1))
  }

  // extract primaryKey, preCombineField, type options
  def extractSqlOptions(options: Map[String, String]): Map[String, String] = {
    val sqlOptions = mappingTableConfigToSqlOption(options)
    val targetOptions = keyMapping.keySet -- Set(SQL_PAYLOAD_CLASS.sqlKeyName)
    sqlOptions.filterKeys(targetOptions.contains)
  }

  // validate primaryKey, preCombineField and type options
  def validateTable(spark: SparkSession, schema: StructType, sqlOptions: Map[String, String]): Unit = {
    val resolver = spark.sessionState.conf.resolver

    // validate primary key
    val primaryKeys = sqlOptions.get(SQL_KEY_TABLE_PRIMARY_KEY.sqlKeyName)
      .map(_.split(",").filter(_.length > 0))
    ValidationUtils.checkArgument(primaryKeys.nonEmpty, "No `primaryKey` is specified.")
    primaryKeys.get.foreach { primaryKey =>
      ValidationUtils.checkArgument(schema.exists(f => resolver(f.name, primaryKey)),
        s"Can't find primaryKey `$primaryKey` in ${schema.treeString}.")
    }

    // validate precombine key
    val precombineKey = sqlOptions.get(SQL_KEY_PRECOMBINE_FIELD.sqlKeyName)
    if (precombineKey.isDefined && precombineKey.get.nonEmpty) {
      ValidationUtils.checkArgument(schema.exists(f => resolver(f.name, precombineKey.get)),
        s"Can't find preCombineKey `${precombineKey.get}` in ${schema.treeString}.")
    }

    // validate table type
    val tableType = sqlOptions.get(SQL_KEY_TABLE_TYPE.sqlKeyName)
    ValidationUtils.checkArgument(tableType.nonEmpty, "No `type` is specified.")
    ValidationUtils.checkArgument(
      tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_COW) ||
      tableType.get.equalsIgnoreCase(SQL_VALUE_TABLE_TYPE_MOR),
      s"'type' must be '$SQL_VALUE_TABLE_TYPE_COW' or '$SQL_VALUE_TABLE_TYPE_MOR'")
  }

  def buildConf[T](): HoodieSQLOptionBuilder[T] = {
    new HoodieSQLOptionBuilder[T]
  }
}

case class HoodieSQLOption[T](
    sqlKeyName: String,
    hoodieKeyName: String,
    tableConfigKey: Option[String],
    defaultValue: Option[T]
)

class HoodieSQLOptionBuilder[T] {

  private var sqlKeyName: String = _
  private var hoodieKeyName: String =_
  private var tableConfigKey: String =_
  private var defaultValue: T =_

  def withSqlKey(sqlKeyName: String): HoodieSQLOptionBuilder[T] = {
    this.sqlKeyName = sqlKeyName
    this
  }

  def withHoodieKey(hoodieKeyName: String): HoodieSQLOptionBuilder[T] = {
    this.hoodieKeyName = hoodieKeyName
    this
  }

  def withTableConfigKey(tableConfigKey: String): HoodieSQLOptionBuilder[T] = {
    this.tableConfigKey = tableConfigKey
    this
  }

  def defaultValue(defaultValue: T): HoodieSQLOptionBuilder[T] = {
    this.defaultValue = defaultValue
    this
  }

  def build(): HoodieSQLOption[T] = {
    HoodieSQLOption(sqlKeyName, hoodieKeyName, Option(tableConfigKey), Option(defaultValue))
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy