All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.internal.SQLConf.scala Maven / Gradle / Ivy

There is a newer version: 3.5.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.internal

import java.util.{Locale, Properties, TimeZone}
import java.util
import java.util.concurrent.TimeUnit
import java.util.concurrent.atomic.AtomicReference
import java.util.zip.Deflater

import scala.collection.JavaConverters._
import scala.collection.immutable
import scala.util.Try
import scala.util.control.NonFatal
import scala.util.matching.Regex

import org.apache.hadoop.fs.Path

import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext, TaskContext}
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._
import org.apache.spark.internal.config.{IGNORE_MISSING_FILES => SPARK_IGNORE_MISSING_FILES}
import org.apache.spark.network.util.ByteUnit
import org.apache.spark.sql.catalyst.ScalaReflection
import org.apache.spark.sql.catalyst.analysis.{HintErrorLogger, Resolver}
import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
import org.apache.spark.sql.catalyst.plans.logical.HintErrorHandler
import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
import org.apache.spark.sql.types.{AtomicType, TimestampNTZType, TimestampType}
import org.apache.spark.unsafe.array.ByteArrayMethods
import org.apache.spark.util.Utils

////////////////////////////////////////////////////////////////////////////////////////////////////
// This file defines the configuration options for Spark SQL.
////////////////////////////////////////////////////////////////////////////////////////////////////


object SQLConf {

  private[this] val sqlConfEntriesUpdateLock = new Object

  @volatile
  private[this] var sqlConfEntries: util.Map[String, ConfigEntry[_]] = util.Collections.emptyMap()

  private[this] val staticConfKeysUpdateLock = new Object

  @volatile
  private[this] var staticConfKeys: java.util.Set[String] = util.Collections.emptySet()

  private def register(entry: ConfigEntry[_]): Unit = sqlConfEntriesUpdateLock.synchronized {
    require(!sqlConfEntries.containsKey(entry.key),
      s"Duplicate SQLConfigEntry. ${entry.key} has been registered")
    val updatedMap = new java.util.HashMap[String, ConfigEntry[_]](sqlConfEntries)
    updatedMap.put(entry.key, entry)
    sqlConfEntries = updatedMap
  }

  // For testing only
  private[sql] def unregister(entry: ConfigEntry[_]): Unit = sqlConfEntriesUpdateLock.synchronized {
    val updatedMap = new java.util.HashMap[String, ConfigEntry[_]](sqlConfEntries)
    updatedMap.remove(entry.key)
    sqlConfEntries = updatedMap
  }

  private[internal] def getConfigEntry(key: String): ConfigEntry[_] = {
    sqlConfEntries.get(key)
  }

  private[internal] def getConfigEntries(): util.Collection[ConfigEntry[_]] = {
    sqlConfEntries.values()
  }

  private[internal] def containsConfigEntry(entry: ConfigEntry[_]): Boolean = {
    getConfigEntry(entry.key) == entry
  }

  private[sql] def containsConfigKey(key: String): Boolean = {
    sqlConfEntries.containsKey(key)
  }

  def registerStaticConfigKey(key: String): Unit = staticConfKeysUpdateLock.synchronized {
    val updated = new util.HashSet[String](staticConfKeys)
    updated.add(key)
    staticConfKeys = updated
  }

  def isStaticConfigKey(key: String): Boolean = staticConfKeys.contains(key)

  def buildConf(key: String): ConfigBuilder = ConfigBuilder(key).onCreate(register)

  def buildStaticConf(key: String): ConfigBuilder = {
    ConfigBuilder(key).onCreate { entry =>
      SQLConf.registerStaticConfigKey(entry.key)
      SQLConf.register(entry)
    }
  }

  /**
   * Merge all non-static configs to the SQLConf. For example, when the 1st [[SparkSession]] and
   * the global [[SharedState]] have been initialized, all static configs have taken affect and
   * should not be set to other values. Other later created sessions should respect all static
   * configs and only be able to change non-static configs.
   */
  private[sql] def mergeNonStaticSQLConfigs(
      sqlConf: SQLConf,
      configs: Map[String, String]): Unit = {
    for ((k, v) <- configs if !staticConfKeys.contains(k)) {
      sqlConf.setConfString(k, v)
    }
  }

  /**
   * Extract entries from `SparkConf` and put them in the `SQLConf`
   */
  private[sql] def mergeSparkConf(sqlConf: SQLConf, sparkConf: SparkConf): Unit = {
    sparkConf.getAll.foreach { case (k, v) =>
      sqlConf.setConfString(k, v)
    }
  }

  /**
   * Default config. Only used when there is no active SparkSession for the thread.
   * See [[get]] for more information.
   */
  private lazy val fallbackConf = new ThreadLocal[SQLConf] {
    override def initialValue: SQLConf = new SQLConf
  }

  /** See [[get]] for more information. */
  def getFallbackConf: SQLConf = fallbackConf.get()

  private lazy val existingConf = new ThreadLocal[SQLConf] {
    override def initialValue: SQLConf = null
  }

  def withExistingConf[T](conf: SQLConf)(f: => T): T = {
    val old = existingConf.get()
    existingConf.set(conf)
    try {
      f
    } finally {
      if (old != null) {
        existingConf.set(old)
      } else {
        existingConf.remove()
      }
    }
  }

  /**
   * Defines a getter that returns the SQLConf within scope.
   * See [[get]] for more information.
   */
  private val confGetter = new AtomicReference[() => SQLConf](() => fallbackConf.get())

  /**
   * Sets the active config object within the current scope.
   * See [[get]] for more information.
   */
  def setSQLConfGetter(getter: () => SQLConf): Unit = {
    confGetter.set(getter)
  }

  // Make sure SqlApiConf is always in sync with SQLConf. SqlApiConf will always try to
  // load SqlConf to make sure both classes are in sync from the get go.
  SqlApiConfHelper.setConfGetter(() => SQLConf.get)

  /**
   * Returns the active config object within the current scope. If there is an active SparkSession,
   * the proper SQLConf associated with the thread's active session is used. If it's called from
   * tasks in the executor side, a SQLConf will be created from job local properties, which are set
   * and propagated from the driver side, unless a `SQLConf` has been set in the scope by
   * `withExistingConf` as done for propagating SQLConf for operations performed on RDDs created
   * from DataFrames.
   *
   * The way this works is a little bit convoluted, due to the fact that config was added initially
   * only for physical plans (and as a result not in sql/catalyst module).
   *
   * The first time a SparkSession is instantiated, we set the [[confGetter]] to return the
   * active SparkSession's config. If there is no active SparkSession, it returns using the thread
   * local [[fallbackConf]]. The reason [[fallbackConf]] is a thread local (rather than just a conf)
   * is to support setting different config options for different threads so we can potentially
   * run tests in parallel. At the time this feature was implemented, this was a no-op since we
   * run unit tests (that does not involve SparkSession) in serial order.
   */
  def get: SQLConf = {
    if (Utils.isInRunningSparkTask) {
      val conf = existingConf.get()
      if (conf != null) {
        conf
      } else {
        new ReadOnlySQLConf(TaskContext.get())
      }
    } else {
      val isSchedulerEventLoopThread = SparkContext.getActive
        .flatMap { sc => Option(sc.dagScheduler) }
        .map(_.eventProcessLoop.eventThread)
        .exists(_.getId == Thread.currentThread().getId)
      if (isSchedulerEventLoopThread) {
        // DAGScheduler event loop thread does not have an active SparkSession, the `confGetter`
        // will return `fallbackConf` which is unexpected. Here we require the caller to get the
        // conf within `withExistingConf`, otherwise fail the query.
        val conf = existingConf.get()
        if (conf != null) {
          conf
        } else if (Utils.isTesting) {
          throw QueryExecutionErrors.cannotGetSQLConfInSchedulerEventLoopThreadError()
        } else {
          confGetter.get()()
        }
      } else {
        val conf = existingConf.get()
        if (conf != null) {
          conf
        } else {
          confGetter.get()()
        }
      }
    }
  }

  val ANALYZER_MAX_ITERATIONS = buildConf("spark.sql.analyzer.maxIterations")
    .internal()
    .doc("The max number of iterations the analyzer runs.")
    .version("3.0.0")
    .intConf
    .createWithDefault(100)

  val MULTI_COMMUTATIVE_OP_OPT_THRESHOLD =
    buildConf("spark.sql.analyzer.canonicalization.multiCommutativeOpMemoryOptThreshold")
      .internal()
      .doc("The minimum number of operands in a commutative expression tree to" +
        " invoke the MultiCommutativeOp memory optimization during canonicalization.")
      .version("3.4.0")
      .intConf
      .createWithDefault(3)

  val OPTIMIZER_EXCLUDED_RULES = buildConf("spark.sql.optimizer.excludedRules")
    .doc("Configures a list of rules to be disabled in the optimizer, in which the rules are " +
      "specified by their rule names and separated by comma. It is not guaranteed that all the " +
      "rules in this configuration will eventually be excluded, as some rules are necessary " +
      "for correctness. The optimizer will log the rules that have indeed been excluded.")
    .version("2.4.0")
    .stringConf
    .createOptional

  val OPTIMIZER_MAX_ITERATIONS = buildConf("spark.sql.optimizer.maxIterations")
    .internal()
    .doc("The max number of iterations the optimizer runs.")
    .version("2.0.0")
    .intConf
    .createWithDefault(100)

  val OPTIMIZER_INSET_CONVERSION_THRESHOLD =
    buildConf("spark.sql.optimizer.inSetConversionThreshold")
      .internal()
      .doc("The threshold of set size for InSet conversion.")
      .version("2.0.0")
      .intConf
      .createWithDefault(10)

  val OPTIMIZER_INSET_SWITCH_THRESHOLD =
    buildConf("spark.sql.optimizer.inSetSwitchThreshold")
      .internal()
      .doc("Configures the max set size in InSet for which Spark will generate code with " +
        "switch statements. This is applicable only to bytes, shorts, ints, dates.")
      .version("3.0.0")
      .intConf
      .checkValue(threshold => threshold >= 0 && threshold <= 600, "The max set size " +
        "for using switch statements in InSet must be non-negative and less than or equal to 600")
      .createWithDefault(400)

  val PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.planChangeLog.level")
    .internal()
    .doc("Configures the log level for logging the change from the original plan to the new " +
      "plan after a rule or batch is applied. The value can be 'trace', 'debug', 'info', " +
      "'warn', or 'error'. The default log level is 'trace'.")
    .version("3.1.0")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel),
      "Invalid value for 'spark.sql.planChangeLog.level'. Valid values are " +
        "'trace', 'debug', 'info', 'warn' and 'error'.")
    .createWithDefault("trace")

  val PLAN_CHANGE_LOG_RULES = buildConf("spark.sql.planChangeLog.rules")
    .internal()
    .doc("Configures a list of rules for logging plan changes, in which the rules are " +
      "specified by their rule names and separated by comma.")
    .version("3.1.0")
    .stringConf
    .createOptional

  val PLAN_CHANGE_LOG_BATCHES = buildConf("spark.sql.planChangeLog.batches")
    .internal()
    .doc("Configures a list of batches for logging plan changes, in which the batches " +
      "are specified by their batch names and separated by comma.")
    .version("3.1.0")
    .stringConf
    .createOptional

  val PLAN_CHANGE_VALIDATION = buildConf("spark.sql.planChangeValidation")
    .internal()
    .doc("If true, Spark will validate all the plan changes made by analyzer/optimizer and other " +
      "catalyst rules, to make sure every rule returns a valid plan")
    .version("3.4.0")
    .booleanConf
    .createWithDefault(false)

  val ALLOW_NAMED_FUNCTION_ARGUMENTS = buildConf("spark.sql.allowNamedFunctionArguments")
    .doc("If true, Spark will turn on support for named parameters for all functions that has" +
      " it implemented.")
    .version("3.5.0")
    .booleanConf
    .createWithDefault(true)

  val DYNAMIC_PARTITION_PRUNING_ENABLED =
    buildConf("spark.sql.optimizer.dynamicPartitionPruning.enabled")
      .doc("When true, we will generate predicate for partition column when it's used as join key")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val DYNAMIC_PARTITION_PRUNING_USE_STATS =
    buildConf("spark.sql.optimizer.dynamicPartitionPruning.useStats")
      .internal()
      .doc("When true, distinct count statistics will be used for computing the data size of the " +
        "partitioned table after dynamic partition pruning, in order to evaluate if it is worth " +
        "adding an extra subquery as the pruning filter if broadcast reuse is not applicable.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO =
    buildConf("spark.sql.optimizer.dynamicPartitionPruning.fallbackFilterRatio")
      .internal()
      .doc("When statistics are not available or configured not to be used, this config will be " +
        "used as the fallback filter ratio for computing the data size of the partitioned table " +
        "after dynamic partition pruning, in order to evaluate if it is worth adding an extra " +
        "subquery as the pruning filter if broadcast reuse is not applicable.")
      .version("3.0.0")
      .doubleConf
      .createWithDefault(0.5)

  val DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY =
    buildConf("spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly")
      .internal()
      .doc("When true, dynamic partition pruning will only apply when the broadcast exchange of " +
        "a broadcast hash join operation can be reused as the dynamic pruning filter.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED =
    buildConf("spark.sql.optimizer.runtimeFilter.semiJoinReduction.enabled")
      .doc("When true and if one side of a shuffle join has a selective predicate, we attempt " +
        "to insert a semi join in the other side to reduce the amount of shuffle data.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val RUNTIME_FILTER_NUMBER_THRESHOLD =
    buildConf("spark.sql.optimizer.runtimeFilter.number.threshold")
      .doc("The total number of injected runtime filters (non-DPP) for a single " +
        "query. This is to prevent driver OOMs with too many Bloom filters.")
      .version("3.3.0")
      .intConf
      .checkValue(threshold => threshold >= 0, "The threshold should be >= 0")
      .createWithDefault(10)

  val RUNTIME_BLOOM_FILTER_ENABLED =
    buildConf("spark.sql.optimizer.runtime.bloomFilter.enabled")
      .doc("When true and if one side of a shuffle join has a selective predicate, we attempt " +
        "to insert a bloom filter in the other side to reduce the amount of shuffle data.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD =
    buildConf("spark.sql.optimizer.runtime.bloomFilter.creationSideThreshold")
      .doc("Size threshold of the bloom filter creation side plan. Estimated size needs to be " +
        "under this value to try to inject bloom filter.")
      .version("3.3.0")
      .bytesConf(ByteUnit.BYTE)
      .createWithDefaultString("10MB")

  val RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD =
    buildConf("spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold")
      .doc("Byte size threshold of the Bloom filter application side plan's aggregated scan " +
        "size. Aggregated scan byte size of the Bloom filter application side needs to be over " +
        "this value to inject a bloom filter.")
      .version("3.3.0")
      .bytesConf(ByteUnit.BYTE)
      .createWithDefaultString("10GB")

  val RUNTIME_BLOOM_FILTER_EXPECTED_NUM_ITEMS =
    buildConf("spark.sql.optimizer.runtime.bloomFilter.expectedNumItems")
      .doc("The default number of expected items for the runtime bloomfilter")
      .version("3.3.0")
      .longConf
      .createWithDefault(1000000L)

  val RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS =
    buildConf("spark.sql.optimizer.runtime.bloomFilter.maxNumItems")
      .doc("The max allowed number of expected items for the runtime bloom filter")
      .version("3.3.0")
      .longConf
      .createWithDefault(4000000L)


  val RUNTIME_BLOOM_FILTER_NUM_BITS =
    buildConf("spark.sql.optimizer.runtime.bloomFilter.numBits")
      .doc("The default number of bits to use for the runtime bloom filter")
      .version("3.3.0")
      .longConf
      .createWithDefault(8388608L)

  val RUNTIME_BLOOM_FILTER_MAX_NUM_BITS =
    buildConf("spark.sql.optimizer.runtime.bloomFilter.maxNumBits")
      .doc("The max number of bits to use for the runtime bloom filter")
      .version("3.3.0")
      .longConf
      .createWithDefault(67108864L)

  val RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED =
    buildConf("spark.sql.optimizer.runtime.rowLevelOperationGroupFilter.enabled")
      .doc("Enables runtime group filtering for group-based row-level operations. " +
        "Data sources that replace groups of data (e.g. files, partitions) may prune entire " +
        "groups using provided data source filters when planning a row-level operation scan. " +
        "However, such filtering is limited as not all expressions can be converted into data " +
        "source filters and some expressions can only be evaluated by Spark (e.g. subqueries). " +
        "Since rewriting groups is expensive, Spark can execute a query at runtime to find what " +
        "records match the condition of the row-level operation. The information about matching " +
        "records will be passed back to the row-level operation scan, allowing data sources to " +
        "discard groups that don't have to be rewritten.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val PLANNED_WRITE_ENABLED = buildConf("spark.sql.optimizer.plannedWrite.enabled")
    .internal()
    .doc("When set to true, Spark optimizer will add logical sort operators to V1 write commands " +
      "if needed so that `FileFormatWriter` does not need to insert physical sorts.")
    .version("3.4.0")
    .booleanConf
    .createWithDefault(true)

  val EXPRESSION_PROJECTION_CANDIDATE_LIMIT =
    buildConf("spark.sql.optimizer.expressionProjectionCandidateLimit")
      .doc("The maximum number of the candidate of output expressions whose alias are replaced." +
        " It can preserve the output partitioning and ordering." +
        " Negative value means disable this optimization.")
      .internal()
      .version("3.4.0")
      .intConf
      .createWithDefault(100)

  val COMPRESS_CACHED = buildConf("spark.sql.inMemoryColumnarStorage.compressed")
    .doc("When set to true Spark SQL will automatically select a compression codec for each " +
      "column based on statistics of the data.")
    .version("1.0.1")
    .booleanConf
    .createWithDefault(true)

  val COLUMN_BATCH_SIZE = buildConf("spark.sql.inMemoryColumnarStorage.batchSize")
    .doc("Controls the size of batches for columnar caching.  Larger batch sizes can improve " +
      "memory utilization and compression, but risk OOMs when caching data.")
    .version("1.1.1")
    .intConf
    .createWithDefault(10000)

  val IN_MEMORY_PARTITION_PRUNING =
    buildConf("spark.sql.inMemoryColumnarStorage.partitionPruning")
      .internal()
      .doc("When true, enable partition pruning for in-memory columnar tables.")
      .version("1.2.0")
      .booleanConf
      .createWithDefault(true)

  val IN_MEMORY_TABLE_SCAN_STATISTICS_ENABLED =
    buildConf("spark.sql.inMemoryTableScanStatistics.enable")
      .internal()
      .doc("When true, enable in-memory table scan accumulators.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val CACHE_VECTORIZED_READER_ENABLED =
    buildConf("spark.sql.inMemoryColumnarStorage.enableVectorizedReader")
      .doc("Enables vectorized reader for columnar caching.")
      .version("2.3.1")
      .booleanConf
      .createWithDefault(true)

  val COLUMN_VECTOR_OFFHEAP_ENABLED =
    buildConf("spark.sql.columnVector.offheap.enabled")
      .internal()
      .doc("When true, use OffHeapColumnVector in ColumnarBatch.")
      .version("2.3.0")
      .booleanConf
      .createWithDefault(false)

  val PREFER_SORTMERGEJOIN = buildConf("spark.sql.join.preferSortMergeJoin")
    .internal()
    .doc("When true, prefer sort merge join over shuffled hash join. " +
      "Sort merge join consumes less memory than shuffled hash join and it works efficiently " +
      "when both join tables are large. On the other hand, shuffled hash join can improve " +
      "performance (e.g., of full outer joins) when one of join tables is much smaller.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION =
    buildConf("spark.sql.requireAllClusterKeysForCoPartition")
      .internal()
      .doc("When true, the planner requires all the clustering keys as the hash partition keys " +
        "of the children, to eliminate the shuffles for the operator that needs its children to " +
        "be co-partitioned, such as JOIN node. This is to avoid data skews which can lead to " +
        "significant performance regression if shuffles are eliminated.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val REQUIRE_ALL_CLUSTER_KEYS_FOR_DISTRIBUTION =
    buildConf("spark.sql.requireAllClusterKeysForDistribution")
      .internal()
      .doc("When true, the planner requires all the clustering keys as the partition keys " +
        "(with same ordering) of the children, to eliminate the shuffle for the operator that " +
        "requires its children be clustered distributed, such as AGGREGATE and WINDOW node. " +
        "This is to avoid data skews which can lead to significant performance regression if " +
        "shuffle is eliminated.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val MAX_SINGLE_PARTITION_BYTES = buildConf("spark.sql.maxSinglePartitionBytes")
    .doc("The maximum number of bytes allowed for a single partition. Otherwise, The planner " +
      "will introduce shuffle to improve parallelism.")
    .version("3.4.0")
    .bytesConf(ByteUnit.BYTE)
    .createWithDefault(Long.MaxValue)

  val RADIX_SORT_ENABLED = buildConf("spark.sql.sort.enableRadixSort")
    .internal()
    .doc("When true, enable use of radix sort when possible. Radix sort is much faster but " +
      "requires additional memory to be reserved up-front. The memory overhead may be " +
      "significant when sorting very small rows (up to 50% more in this case).")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val AUTO_BROADCASTJOIN_THRESHOLD = buildConf("spark.sql.autoBroadcastJoinThreshold")
    .doc("Configures the maximum size in bytes for a table that will be broadcast to all worker " +
      "nodes when performing a join.  By setting this value to -1 broadcasting can be disabled. " +
      "Note that currently statistics are only supported for Hive Metastore tables where the " +
      "command `ANALYZE TABLE  COMPUTE STATISTICS noscan` has been " +
      "run, and file-based data source tables where the statistics are computed directly on " +
      "the files of data.")
    .version("1.1.0")
    .bytesConf(ByteUnit.BYTE)
    .createWithDefaultString("10MB")

  val SHUFFLE_HASH_JOIN_FACTOR = buildConf("spark.sql.shuffledHashJoinFactor")
    .doc("The shuffle hash join can be selected if the data size of small" +
      " side multiplied by this factor is still smaller than the large side.")
    .version("3.3.0")
    .intConf
    .checkValue(_ >= 1, "The shuffle hash join factor cannot be negative.")
    .createWithDefault(3)

  val LIMIT_INITIAL_NUM_PARTITIONS = buildConf("spark.sql.limit.initialNumPartitions")
    .internal()
    .doc("Initial number of partitions to try when executing a take on a query. Higher values " +
      "lead to more partitions read. Lower values might lead to longer execution times as more" +
      "jobs will be run")
    .version("3.4.0")
    .intConf
    .checkValue(_ > 0, "value should be positive")
    .createWithDefault(1)

  val LIMIT_SCALE_UP_FACTOR = buildConf("spark.sql.limit.scaleUpFactor")
    .internal()
    .doc("Minimal increase rate in number of partitions between attempts when executing a take " +
      "on a query. Higher values lead to more partitions read. Lower values might lead to " +
      "longer execution times as more jobs will be run")
    .version("2.1.1")
    .intConf
    .createWithDefault(4)

  val ADVANCED_PARTITION_PREDICATE_PUSHDOWN =
    buildConf("spark.sql.hive.advancedPartitionPredicatePushdown.enabled")
      .internal()
      .doc("When true, advanced partition predicate pushdown into Hive metastore is enabled.")
      .version("2.3.0")
      .booleanConf
      .createWithDefault(true)

  val LEAF_NODE_DEFAULT_PARALLELISM = buildConf("spark.sql.leafNodeDefaultParallelism")
    .doc("The default parallelism of Spark SQL leaf nodes that produce data, such as the file " +
      "scan node, the local data scan node, the range node, etc. The default value of this " +
      "config is 'SparkContext#defaultParallelism'.")
    .version("3.2.0")
    .intConf
    .checkValue(_ > 0, "The value of spark.sql.leafNodeDefaultParallelism must be positive.")
    .createOptional

  val SHUFFLE_PARTITIONS = buildConf("spark.sql.shuffle.partitions")
    .doc("The default number of partitions to use when shuffling data for joins or aggregations. " +
      "Note: For structured streaming, this configuration cannot be changed between query " +
      "restarts from the same checkpoint location.")
    .version("1.1.0")
    .intConf
    .checkValue(_ > 0, "The value of spark.sql.shuffle.partitions must be positive")
    .createWithDefault(200)

  val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
    buildConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize")
      .internal()
      .doc("(Deprecated since Spark 3.0)")
      .version("1.6.0")
      .bytesConf(ByteUnit.BYTE)
      .checkValue(_ > 0, "advisoryPartitionSizeInBytes must be positive")
      .createWithDefaultString("64MB")

  val ADAPTIVE_EXECUTION_ENABLED = buildConf("spark.sql.adaptive.enabled")
    .doc("When true, enable adaptive query execution, which re-optimizes the query plan in the " +
      "middle of query execution, based on accurate runtime statistics.")
    .version("1.6.0")
    .booleanConf
    .createWithDefault(true)

  val ADAPTIVE_EXECUTION_FORCE_APPLY = buildConf("spark.sql.adaptive.forceApply")
    .internal()
    .doc("Adaptive query execution is skipped when the query does not have exchanges or " +
      "sub-queries. By setting this config to true (together with " +
      s"'${ADAPTIVE_EXECUTION_ENABLED.key}' set to true), Spark will force apply adaptive query " +
      "execution for all supported queries.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(false)

  val ADAPTIVE_EXECUTION_APPLY_FINAL_STAGE_SHUFFLE_OPTIMIZATIONS =
    buildConf("spark.sql.adaptive.applyFinalStageShuffleOptimizations")
      .internal()
      .doc("Configures whether adaptive query execution (if enabled) should apply shuffle " +
        "coalescing and local shuffle read optimization for the final query stage.")
      .version("3.4.2")
      .booleanConf
      .createWithDefault(true)

  val ADAPTIVE_EXECUTION_LOG_LEVEL = buildConf("spark.sql.adaptive.logLevel")
    .internal()
    .doc("Configures the log level for adaptive execution logging of plan changes. The value " +
      "can be 'trace', 'debug', 'info', 'warn', or 'error'. The default log level is 'debug'.")
    .version("3.0.0")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValues(Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR"))
    .createWithDefault("debug")

  val ADVISORY_PARTITION_SIZE_IN_BYTES =
    buildConf("spark.sql.adaptive.advisoryPartitionSizeInBytes")
      .doc("The advisory size in bytes of the shuffle partition during adaptive optimization " +
        s"(when ${ADAPTIVE_EXECUTION_ENABLED.key} is true). It takes effect when Spark " +
        "coalesces small shuffle partitions or splits skewed shuffle partition.")
      .version("3.0.0")
      .fallbackConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)

  val COALESCE_PARTITIONS_ENABLED =
    buildConf("spark.sql.adaptive.coalescePartitions.enabled")
      .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is true, Spark will coalesce " +
        "contiguous shuffle partitions according to the target size (specified by " +
        s"'${ADVISORY_PARTITION_SIZE_IN_BYTES.key}'), to avoid too many small tasks.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val COALESCE_PARTITIONS_PARALLELISM_FIRST =
    buildConf("spark.sql.adaptive.coalescePartitions.parallelismFirst")
      .doc("When true, Spark does not respect the target size specified by " +
        s"'${ADVISORY_PARTITION_SIZE_IN_BYTES.key}' (default 64MB) when coalescing contiguous " +
        "shuffle partitions, but adaptively calculate the target size according to the default " +
        "parallelism of the Spark cluster. The calculated size is usually smaller than the " +
        "configured target size. This is to maximize the parallelism and avoid performance " +
        "regression when enabling adaptive query execution. It's recommended to set this config " +
        "to false and respect the configured target size.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val COALESCE_PARTITIONS_MIN_PARTITION_SIZE =
    buildConf("spark.sql.adaptive.coalescePartitions.minPartitionSize")
      .doc("The minimum size of shuffle partitions after coalescing. This is useful when the " +
        "adaptively calculated target size is too small during partition coalescing.")
      .version("3.2.0")
      .bytesConf(ByteUnit.BYTE)
      .checkValue(_ > 0, "minPartitionSize must be positive")
      .createWithDefaultString("1MB")

  val COALESCE_PARTITIONS_MIN_PARTITION_NUM =
    buildConf("spark.sql.adaptive.coalescePartitions.minPartitionNum")
      .internal()
      .doc("(deprecated) The suggested (not guaranteed) minimum number of shuffle partitions " +
        "after coalescing. If not set, the default value is the default parallelism of the " +
        "Spark cluster. This configuration only has an effect when " +
        s"'${ADAPTIVE_EXECUTION_ENABLED.key}' and " +
        s"'${COALESCE_PARTITIONS_ENABLED.key}' are both true.")
      .version("3.0.0")
      .intConf
      .checkValue(_ > 0, "The minimum number of partitions must be positive.")
      .createOptional

  val COALESCE_PARTITIONS_INITIAL_PARTITION_NUM =
    buildConf("spark.sql.adaptive.coalescePartitions.initialPartitionNum")
      .doc("The initial number of shuffle partitions before coalescing. If not set, it equals to " +
        s"${SHUFFLE_PARTITIONS.key}. This configuration only has an effect when " +
        s"'${ADAPTIVE_EXECUTION_ENABLED.key}' and '${COALESCE_PARTITIONS_ENABLED.key}' " +
        "are both true.")
      .version("3.0.0")
      .intConf
      .checkValue(_ > 0, "The initial number of partitions must be positive.")
      .createOptional

  val FETCH_SHUFFLE_BLOCKS_IN_BATCH =
    buildConf("spark.sql.adaptive.fetchShuffleBlocksInBatch")
      .internal()
      .doc("Whether to fetch the contiguous shuffle blocks in batch. Instead of fetching blocks " +
        "one by one, fetching contiguous shuffle blocks for the same map task in batch can " +
        "reduce IO and improve performance. Note, multiple contiguous blocks exist in single " +
        s"fetch request only happen when '${ADAPTIVE_EXECUTION_ENABLED.key}' and " +
        s"'${COALESCE_PARTITIONS_ENABLED.key}' are both true. This feature also depends " +
        "on a relocatable serializer, the concatenation support codec in use, the new version " +
        "shuffle fetch protocol and io encryption is disabled.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val LOCAL_SHUFFLE_READER_ENABLED =
    buildConf("spark.sql.adaptive.localShuffleReader.enabled")
      .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is true, Spark tries to use local " +
        "shuffle reader to read the shuffle data when the shuffle partitioning is not needed, " +
        "for example, after converting sort-merge join to broadcast-hash join.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val SKEW_JOIN_ENABLED =
    buildConf("spark.sql.adaptive.skewJoin.enabled")
      .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is true, Spark dynamically " +
        "handles skew in shuffled join (sort-merge and shuffled hash) by splitting (and " +
        "replicating if needed) skewed partitions.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val SKEW_JOIN_SKEWED_PARTITION_FACTOR =
    buildConf("spark.sql.adaptive.skewJoin.skewedPartitionFactor")
      .doc("A partition is considered as skewed if its size is larger than this factor " +
        "multiplying the median partition size and also larger than " +
        "'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes'")
      .version("3.0.0")
      .doubleConf
      .checkValue(_ >= 0, "The skew factor cannot be negative.")
      .createWithDefault(5.0)

  val SKEW_JOIN_SKEWED_PARTITION_THRESHOLD =
    buildConf("spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes")
      .doc("A partition is considered as skewed if its size in bytes is larger than this " +
        s"threshold and also larger than '${SKEW_JOIN_SKEWED_PARTITION_FACTOR.key}' " +
        "multiplying the median partition size. Ideally this config should be set larger " +
        s"than '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}'.")
      .version("3.0.0")
      .bytesConf(ByteUnit.BYTE)
      .createWithDefaultString("256MB")

  val NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN =
    buildConf("spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin")
      .internal()
      .doc("The relation with a non-empty partition ratio lower than this config will not be " +
        "considered as the build side of a broadcast-hash join in adaptive execution regardless " +
        "of its size.This configuration only has an effect when " +
        s"'${ADAPTIVE_EXECUTION_ENABLED.key}' is true.")
      .version("3.0.0")
      .doubleConf
      .checkValue(_ >= 0, "The non-empty partition ratio must be positive number.")
      .createWithDefault(0.2)

  val ADAPTIVE_OPTIMIZER_EXCLUDED_RULES =
    buildConf("spark.sql.adaptive.optimizer.excludedRules")
      .doc("Configures a list of rules to be disabled in the adaptive optimizer, in which the " +
        "rules are specified by their rule names and separated by comma. The optimizer will log " +
        "the rules that have indeed been excluded.")
      .version("3.1.0")
      .stringConf
      .createOptional

  val ADAPTIVE_AUTO_BROADCASTJOIN_THRESHOLD =
    buildConf("spark.sql.adaptive.autoBroadcastJoinThreshold")
      .doc("Configures the maximum size in bytes for a table that will be broadcast to all " +
        "worker nodes when performing a join. By setting this value to -1 broadcasting can be " +
        s"disabled. The default value is same with ${AUTO_BROADCASTJOIN_THRESHOLD.key}. " +
        "Note that, this config is used only in adaptive framework.")
      .version("3.2.0")
      .bytesConf(ByteUnit.BYTE)
      .createOptional

  val ADAPTIVE_MAX_SHUFFLE_HASH_JOIN_LOCAL_MAP_THRESHOLD =
    buildConf("spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold")
      .doc("Configures the maximum size in bytes per partition that can be allowed to build " +
        "local hash map. If this value is not smaller than " +
        s"${ADVISORY_PARTITION_SIZE_IN_BYTES.key} and all the partition size are not larger " +
        "than this config, join selection prefer to use shuffled hash join instead of " +
        s"sort merge join regardless of the value of ${PREFER_SORTMERGEJOIN.key}.")
      .version("3.2.0")
      .bytesConf(ByteUnit.BYTE)
      .createWithDefault(0L)

  val ADAPTIVE_OPTIMIZE_SKEWS_IN_REBALANCE_PARTITIONS_ENABLED =
    buildConf("spark.sql.adaptive.optimizeSkewsInRebalancePartitions.enabled")
      .doc(s"When true and '${ADAPTIVE_EXECUTION_ENABLED.key}' is true, Spark will optimize the " +
        "skewed shuffle partitions in RebalancePartitions and split them to smaller ones " +
        s"according to the target size (specified by '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}'), " +
        "to avoid data skew.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val ADAPTIVE_REBALANCE_PARTITIONS_SMALL_PARTITION_FACTOR =
    buildConf("spark.sql.adaptive.rebalancePartitionsSmallPartitionFactor")
      .doc(s"A partition will be merged during splitting if its size is small than this factor " +
        s"multiply ${ADVISORY_PARTITION_SIZE_IN_BYTES.key}.")
      .version("3.3.0")
      .doubleConf
      .checkValue(v => v > 0 && v < 1, "the factor must be in (0, 1)")
      .createWithDefault(0.2)

  val ADAPTIVE_FORCE_OPTIMIZE_SKEWED_JOIN =
    buildConf("spark.sql.adaptive.forceOptimizeSkewedJoin")
      .doc("When true, force enable OptimizeSkewedJoin even if it introduces extra shuffle.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val ADAPTIVE_CUSTOM_COST_EVALUATOR_CLASS =
    buildConf("spark.sql.adaptive.customCostEvaluatorClass")
      .doc("The custom cost evaluator class to be used for adaptive execution. If not being set," +
        " Spark will use its own SimpleCostEvaluator by default.")
      .version("3.2.0")
      .stringConf
      .createOptional

  val SUBEXPRESSION_ELIMINATION_ENABLED =
    buildConf("spark.sql.subexpressionElimination.enabled")
      .internal()
      .doc("When true, common subexpressions will be eliminated.")
      .version("1.6.0")
      .booleanConf
      .createWithDefault(true)

  val SUBEXPRESSION_ELIMINATION_CACHE_MAX_ENTRIES =
    buildConf("spark.sql.subexpressionElimination.cache.maxEntries")
      .internal()
      .doc("The maximum entries of the cache used for interpreted subexpression elimination.")
      .version("3.1.0")
      .intConf
      .checkValue(_ >= 0, "The maximum must not be negative")
      .createWithDefault(100)

  val SUBEXPRESSION_ELIMINATION_SKIP_FOR_SHORTCUT_EXPR =
    buildConf("spark.sql.subexpressionElimination.skipForShortcutExpr")
      .internal()
      .doc("When true, shortcut eliminate subexpression with `AND`, `OR`. " +
        "The subexpression may not need to eval even if it appears more than once. " +
        "e.g., `if(or(a, and(b, b)))`, the expression `b` would be skipped if `a` is true.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(false)

  val CASE_SENSITIVE = buildConf(SqlApiConfHelper.CASE_SENSITIVE_KEY)
    .internal()
    .doc("Whether the query analyzer should be case sensitive or not. " +
      "Default to case insensitive. It is highly discouraged to turn on case sensitive mode.")
    .version("1.4.0")
    .booleanConf
    .createWithDefault(false)

  val CONSTRAINT_PROPAGATION_ENABLED = buildConf("spark.sql.constraintPropagation.enabled")
    .internal()
    .doc("When true, the query optimizer will infer and propagate data constraints in the query " +
      "plan to optimize them. Constraint propagation can sometimes be computationally expensive " +
      "for certain kinds of query plans (such as those with a large number of predicates and " +
      "aliases) which might negatively impact overall runtime.")
    .version("2.2.0")
    .booleanConf
    .createWithDefault(true)

  val PROPAGATE_DISTINCT_KEYS_ENABLED =
    buildConf("spark.sql.optimizer.propagateDistinctKeys.enabled")
      .internal()
      .doc("When true, the query optimizer will propagate a set of distinct attributes from the " +
        "current node and use it to optimize query.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
    .internal()
    .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
      "parser. The default is false since Spark 2.0. Setting it to true can restore the behavior " +
      "prior to Spark 2.0.")
    .version("2.2.1")
    .booleanConf
    .createWithDefault(false)

  val FILE_COMPRESSION_FACTOR = buildConf("spark.sql.sources.fileCompressionFactor")
    .internal()
    .doc("When estimating the output data size of a table scan, multiply the file size with this " +
      "factor as the estimated data size, in case the data is compressed in the file and lead to" +
      " a heavily underestimated result.")
    .version("2.3.1")
    .doubleConf
    .checkValue(_ > 0, "the value of fileCompressionFactor must be greater than 0")
    .createWithDefault(1.0)

  val PARQUET_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.parquet.mergeSchema")
    .doc("When true, the Parquet data source merges schemas collected from all data files, " +
         "otherwise the schema is picked from the summary file or a random data file " +
         "if no summary file is available.")
    .version("1.5.0")
    .booleanConf
    .createWithDefault(false)

  val PARQUET_SCHEMA_RESPECT_SUMMARIES = buildConf("spark.sql.parquet.respectSummaryFiles")
    .doc("When true, we make assumption that all part-files of Parquet are consistent with " +
         "summary files and we will ignore them when merging schema. Otherwise, if this is " +
         "false, which is the default, we will merge all part-files. This should be considered " +
         "as expert-only option, and shouldn't be enabled before knowing what it means exactly.")
    .version("1.5.0")
    .booleanConf
    .createWithDefault(false)

  val PARQUET_BINARY_AS_STRING = buildConf("spark.sql.parquet.binaryAsString")
    .doc("Some other Parquet-producing systems, in particular Impala and older versions of " +
      "Spark SQL, do not differentiate between binary data and strings when writing out the " +
      "Parquet schema. This flag tells Spark SQL to interpret binary data as a string to provide " +
      "compatibility with these systems.")
    .version("1.1.1")
    .booleanConf
    .createWithDefault(false)

  val PARQUET_INT96_AS_TIMESTAMP = buildConf("spark.sql.parquet.int96AsTimestamp")
    .doc("Some Parquet-producing systems, in particular Impala, store Timestamp into INT96. " +
      "Spark would also store Timestamp as INT96 because we need to avoid precision lost of the " +
      "nanoseconds field. This flag tells Spark SQL to interpret INT96 data as a timestamp to " +
      "provide compatibility with these systems.")
    .version("1.3.0")
    .booleanConf
    .createWithDefault(true)

  val PARQUET_INT96_TIMESTAMP_CONVERSION = buildConf("spark.sql.parquet.int96TimestampConversion")
    .doc("This controls whether timestamp adjustments should be applied to INT96 data when " +
      "converting to timestamps, for data written by Impala.  This is necessary because Impala " +
      "stores INT96 data with a different timezone offset than Hive & Spark.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(false)

  object ParquetOutputTimestampType extends Enumeration {
    val INT96, TIMESTAMP_MICROS, TIMESTAMP_MILLIS = Value
  }

  val PARQUET_OUTPUT_TIMESTAMP_TYPE = buildConf("spark.sql.parquet.outputTimestampType")
    .doc("Sets which Parquet timestamp type to use when Spark writes data to Parquet files. " +
      "INT96 is a non-standard but commonly used timestamp type in Parquet. TIMESTAMP_MICROS " +
      "is a standard timestamp type in Parquet, which stores number of microseconds from the " +
      "Unix epoch. TIMESTAMP_MILLIS is also standard, but with millisecond precision, which " +
      "means Spark has to truncate the microsecond portion of its timestamp value.")
    .version("2.3.0")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValues(ParquetOutputTimestampType.values.map(_.toString))
    .createWithDefault(ParquetOutputTimestampType.INT96.toString)

  val PARQUET_COMPRESSION = buildConf("spark.sql.parquet.compression.codec")
    .doc("Sets the compression codec used when writing Parquet files. If either `compression` or " +
      "`parquet.compression` is specified in the table-specific options/properties, the " +
      "precedence would be `compression`, `parquet.compression`, " +
      "`spark.sql.parquet.compression.codec`. Acceptable values include: none, uncompressed, " +
      "snappy, gzip, lzo, brotli, lz4, lz4raw, lz4_raw, zstd.")
    .version("1.1.1")
    .stringConf
    .transform(_.toLowerCase(Locale.ROOT))
    .checkValues(
      Set(
        "none",
        "uncompressed",
        "snappy",
        "gzip",
        "lzo",
        "brotli",
        "lz4",
        "lz4raw",
        "lz4_raw",
        "zstd"))
    .createWithDefault("snappy")

  val PARQUET_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.parquet.filterPushdown")
    .doc("Enables Parquet filter push-down optimization when set to true.")
    .version("1.2.0")
    .booleanConf
    .createWithDefault(true)

  val PARQUET_FILTER_PUSHDOWN_DATE_ENABLED = buildConf("spark.sql.parquet.filterPushdown.date")
    .doc("If true, enables Parquet filter push-down optimization for Date. " +
      s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
      "enabled.")
    .version("2.4.0")
    .internal()
    .booleanConf
    .createWithDefault(true)

  val PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED =
    buildConf("spark.sql.parquet.filterPushdown.timestamp")
      .doc("If true, enables Parquet filter push-down optimization for Timestamp. " +
        s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
        "enabled and Timestamp stored as TIMESTAMP_MICROS or TIMESTAMP_MILLIS type.")
      .version("2.4.0")
      .internal()
      .booleanConf
      .createWithDefault(true)

  val PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED =
    buildConf("spark.sql.parquet.filterPushdown.decimal")
      .doc("If true, enables Parquet filter push-down optimization for Decimal. " +
        s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
        "enabled.")
      .version("2.4.0")
      .internal()
      .booleanConf
      .createWithDefault(true)

  val PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED =
    buildConf("spark.sql.parquet.filterPushdown.string.startsWith")
    .doc("If true, enables Parquet filter push-down optimization for string startsWith function. " +
      s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
      "enabled.")
    .version("2.4.0")
    .internal()
    .booleanConf
    .createWithDefault(true)

  val PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED =
    buildConf("spark.sql.parquet.filterPushdown.stringPredicate")
      .doc("If true, enables Parquet filter push-down optimization for string predicate such " +
        "as startsWith/endsWith/contains function. This configuration only has an effect when " +
        s"'${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is enabled.")
      .version("3.4.0")
      .internal()
      .fallbackConf(PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED)

  val PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD =
    buildConf("spark.sql.parquet.pushdown.inFilterThreshold")
      .doc("For IN predicate, Parquet filter will push-down a set of OR clauses if its " +
        "number of values not exceeds this threshold. Otherwise, Parquet filter will push-down " +
        "a value greater than or equal to its minimum value and less than or equal to " +
        "its maximum value. By setting this value to 0 this feature can be disabled. " +
        s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is " +
        "enabled.")
      .version("2.4.0")
      .internal()
      .intConf
      .checkValue(threshold => threshold >= 0, "The threshold must not be negative.")
      .createWithDefault(10)

  val PARQUET_AGGREGATE_PUSHDOWN_ENABLED = buildConf("spark.sql.parquet.aggregatePushdown")
    .doc("If true, aggregates will be pushed down to Parquet for optimization. Support MIN, MAX " +
      "and COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date " +
      "type. For COUNT, support all data types. If statistics is missing from any Parquet file " +
      "footer, exception would be thrown.")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val PARQUET_WRITE_LEGACY_FORMAT = buildConf("spark.sql.parquet.writeLegacyFormat")
    .doc("If true, data will be written in a way of Spark 1.4 and earlier. For example, decimal " +
      "values will be written in Apache Parquet's fixed-length byte array format, which other " +
      "systems such as Apache Hive and Apache Impala use. If false, the newer format in Parquet " +
      "will be used. For example, decimals will be written in int-based format. If Parquet " +
      "output is intended for use with systems that do not support this newer format, set to true.")
    .version("1.6.0")
    .booleanConf
    .createWithDefault(false)

  val PARQUET_OUTPUT_COMMITTER_CLASS = buildConf("spark.sql.parquet.output.committer.class")
    .doc("The output committer class used by Parquet. The specified class needs to be a " +
      "subclass of org.apache.hadoop.mapreduce.OutputCommitter. Typically, it's also a subclass " +
      "of org.apache.parquet.hadoop.ParquetOutputCommitter. If it is not, then metadata " +
      "summaries will never be created, irrespective of the value of " +
      "parquet.summary.metadata.level")
    .version("1.5.0")
    .internal()
    .stringConf
    .createWithDefault("org.apache.parquet.hadoop.ParquetOutputCommitter")

  val PARQUET_VECTORIZED_READER_ENABLED =
    buildConf("spark.sql.parquet.enableVectorizedReader")
      .doc("Enables vectorized parquet decoding.")
      .version("2.0.0")
      .booleanConf
      .createWithDefault(true)

  val PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED =
    buildConf("spark.sql.parquet.enableNestedColumnVectorizedReader")
      .doc("Enables vectorized Parquet decoding for nested columns (e.g., struct, list, map). " +
          s"Requires ${PARQUET_VECTORIZED_READER_ENABLED.key} to be enabled.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled")
    .doc("If true, enables Parquet's native record-level filtering using the pushed down " +
      "filters. " +
      s"This configuration only has an effect when '${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' " +
      "is enabled and the vectorized reader is not used. You can ensure the vectorized reader " +
      s"is not used by setting '${PARQUET_VECTORIZED_READER_ENABLED.key}' to false.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(false)

  val PARQUET_VECTORIZED_READER_BATCH_SIZE = buildConf("spark.sql.parquet.columnarReaderBatchSize")
    .doc("The number of rows to include in a parquet vectorized reader batch. The number should " +
      "be carefully chosen to minimize overhead and avoid OOMs in reading data.")
    .version("2.4.0")
    .intConf
    .createWithDefault(4096)

  val PARQUET_FIELD_ID_WRITE_ENABLED =
    buildConf("spark.sql.parquet.fieldId.write.enabled")
      .doc("Field ID is a native field of the Parquet schema spec. When enabled, " +
        "Parquet writers will populate the field Id " +
        "metadata (if present) in the Spark schema to the Parquet schema.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val PARQUET_FIELD_ID_READ_ENABLED =
    buildConf("spark.sql.parquet.fieldId.read.enabled")
      .doc("Field ID is a native field of the Parquet schema spec. When enabled, Parquet readers " +
        "will use field IDs (if present) in the requested Spark schema to look up Parquet " +
        "fields instead of using column names")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val IGNORE_MISSING_PARQUET_FIELD_ID =
    buildConf("spark.sql.parquet.fieldId.read.ignoreMissing")
      .doc("When the Parquet file doesn't have any field IDs but the " +
        "Spark read schema is using field IDs to read, we will silently return nulls " +
        "when this flag is enabled, or error otherwise.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val PARQUET_INFER_TIMESTAMP_NTZ_ENABLED =
    buildConf("spark.sql.parquet.inferTimestampNTZ.enabled")
      .doc("When enabled, Parquet timestamp columns with annotation isAdjustedToUTC = false " +
        "are inferred as TIMESTAMP_NTZ type during schema inference. Otherwise, all the Parquet " +
        "timestamp columns are inferred as TIMESTAMP_LTZ types. Note that Spark writes the " +
        "output schema into Parquet's footer metadata on file writing and leverages it on file " +
        "reading. Thus this configuration only affects the schema inference on Parquet files " +
        "which are not written by Spark.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val ORC_COMPRESSION = buildConf("spark.sql.orc.compression.codec")
    .doc("Sets the compression codec used when writing ORC files. If either `compression` or " +
      "`orc.compress` is specified in the table-specific options/properties, the precedence " +
      "would be `compression`, `orc.compress`, `spark.sql.orc.compression.codec`." +
      "Acceptable values include: none, uncompressed, snappy, zlib, lzo, zstd, lz4.")
    .version("2.3.0")
    .stringConf
    .transform(_.toLowerCase(Locale.ROOT))
    .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo", "zstd", "lz4"))
    .createWithDefault("snappy")

  val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl")
    .doc("When native, use the native version of ORC support instead of the ORC library in Hive. " +
      "It is 'hive' by default prior to Spark 2.4.")
    .version("2.3.0")
    .internal()
    .stringConf
    .checkValues(Set("hive", "native"))
    .createWithDefault("native")

  val ORC_VECTORIZED_READER_ENABLED = buildConf("spark.sql.orc.enableVectorizedReader")
    .doc("Enables vectorized orc decoding.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(true)

  val ORC_VECTORIZED_READER_BATCH_SIZE = buildConf("spark.sql.orc.columnarReaderBatchSize")
    .doc("The number of rows to include in a orc vectorized reader batch. The number should " +
      "be carefully chosen to minimize overhead and avoid OOMs in reading data.")
    .version("2.4.0")
    .intConf
    .createWithDefault(4096)

  val ORC_VECTORIZED_WRITER_BATCH_SIZE = buildConf("spark.sql.orc.columnarWriterBatchSize")
    .doc("The number of rows to include in a orc vectorized writer batch. The number should " +
      "be carefully chosen to minimize overhead and avoid OOMs in writing data.")
    .version("3.4.0")
    .intConf
    .createWithDefault(1024)

  val ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED =
    buildConf("spark.sql.orc.enableNestedColumnVectorizedReader")
      .doc("Enables vectorized orc decoding for nested column.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val ORC_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.orc.filterPushdown")
    .doc("When true, enable filter pushdown for ORC files.")
    .version("1.4.0")
    .booleanConf
    .createWithDefault(true)

  val ORC_AGGREGATE_PUSHDOWN_ENABLED = buildConf("spark.sql.orc.aggregatePushdown")
    .doc("If true, aggregates will be pushed down to ORC for optimization. Support MIN, MAX and " +
      "COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date " +
      "type. For COUNT, support all data types. If statistics is missing from any ORC file " +
      "footer, exception would be thrown.")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val ORC_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.orc.mergeSchema")
    .doc("When true, the Orc data source merges schemas collected from all data files, " +
      "otherwise the schema is picked from a random data file.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(false)

  val HIVE_VERIFY_PARTITION_PATH = buildConf("spark.sql.hive.verifyPartitionPath")
    .doc("When true, check all the partition paths under the table\'s root directory " +
         "when reading data stored in HDFS. This configuration will be deprecated in the future " +
         s"releases and replaced by ${SPARK_IGNORE_MISSING_FILES.key}.")
    .version("1.4.0")
    .booleanConf
    .createWithDefault(false)

  val HIVE_METASTORE_DROP_PARTITION_BY_NAME =
    buildConf("spark.sql.hive.dropPartitionByName.enabled")
      .doc("When true, Spark will get partition name rather than partition object " +
           "to drop partition, which can improve the performance of drop partition.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val HIVE_METASTORE_PARTITION_PRUNING =
    buildConf("spark.sql.hive.metastorePartitionPruning")
      .doc("When true, some predicates will be pushed down into the Hive metastore so that " +
           "unmatching partitions can be eliminated earlier.")
      .version("1.5.0")
      .booleanConf
      .createWithDefault(true)

  val HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD =
    buildConf("spark.sql.hive.metastorePartitionPruningInSetThreshold")
      .doc("The threshold of set size for InSet predicate when pruning partitions through Hive " +
        "Metastore. When the set size exceeds the threshold, we rewrite the InSet predicate " +
        "to be greater than or equal to the minimum value in set and less than or equal to the " +
        "maximum value in set. Larger values may cause Hive Metastore stack overflow. But for " +
        "InSet inside Not with values exceeding the threshold, we won't push it to Hive Metastore."
      )
      .version("3.1.0")
      .internal()
      .intConf
      .checkValue(_ > 0, "The value of metastorePartitionPruningInSetThreshold must be positive")
      .createWithDefault(1000)

  val HIVE_METASTORE_PARTITION_PRUNING_FALLBACK_ON_EXCEPTION =
    buildConf("spark.sql.hive.metastorePartitionPruningFallbackOnException")
      .doc("Whether to fallback to get all partitions from Hive metastore and perform partition " +
        "pruning on Spark client side, when encountering MetaException from the metastore. Note " +
        "that Spark query performance may degrade if this is enabled and there are many " +
        "partitions to be listed. If this is disabled, Spark will fail the query instead.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val HIVE_METASTORE_PARTITION_PRUNING_FAST_FALLBACK =
    buildConf("spark.sql.hive.metastorePartitionPruningFastFallback")
      .doc("When this config is enabled, if the predicates are not supported by Hive or Spark " +
        "does fallback due to encountering MetaException from the metastore, " +
        "Spark will instead prune partitions by getting the partition names first " +
        "and then evaluating the filter expressions on the client side. " +
        "Note that the predicates with TimeZoneAwareExpression is not supported.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val HIVE_MANAGE_FILESOURCE_PARTITIONS =
    buildConf("spark.sql.hive.manageFilesourcePartitions")
      .doc("When true, enable metastore partition management for file source tables as well. " +
           "This includes both datasource and converted Hive tables. When partition management " +
           "is enabled, datasource tables store partition in the Hive metastore, and use the " +
           s"metastore to prune partitions during query planning when " +
           s"${HIVE_METASTORE_PARTITION_PRUNING.key} is set to true.")
      .version("2.1.1")
      .booleanConf
      .createWithDefault(true)

  val HIVE_FILESOURCE_PARTITION_FILE_CACHE_SIZE =
    buildConf("spark.sql.hive.filesourcePartitionFileCacheSize")
      .doc("When nonzero, enable caching of partition file metadata in memory. All tables share " +
           "a cache that can use up to specified num bytes for file metadata. This conf only " +
           "has an effect when hive filesource partition management is enabled.")
      .version("2.1.1")
      .longConf
      .createWithDefault(250 * 1024 * 1024)

  object HiveCaseSensitiveInferenceMode extends Enumeration {
    val INFER_AND_SAVE, INFER_ONLY, NEVER_INFER = Value
  }

  val HIVE_CASE_SENSITIVE_INFERENCE = buildConf("spark.sql.hive.caseSensitiveInferenceMode")
    .internal()
    .doc("Sets the action to take when a case-sensitive schema cannot be read from a Hive Serde " +
      "table's properties when reading the table with Spark native data sources. Valid options " +
      "include INFER_AND_SAVE (infer the case-sensitive schema from the underlying data files " +
      "and write it back to the table properties), INFER_ONLY (infer the schema but don't " +
      "attempt to write it to the table properties) and NEVER_INFER (the default mode-- fallback " +
      "to using the case-insensitive metastore schema instead of inferring).")
    .version("2.1.1")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValues(HiveCaseSensitiveInferenceMode.values.map(_.toString))
    .createWithDefault(HiveCaseSensitiveInferenceMode.NEVER_INFER.toString)

  val HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD =
    buildConf("spark.sql.hive.tablePropertyLengthThreshold")
      .internal()
      .doc("The maximum length allowed in a single cell when storing Spark-specific information " +
        "in Hive's metastore as table properties. Currently it covers 2 things: the schema's " +
        "JSON string, the histogram of column statistics.")
      .version("3.2.0")
      .intConf
      .createOptional

  val OPTIMIZER_METADATA_ONLY = buildConf("spark.sql.optimizer.metadataOnly")
    .internal()
    .doc("When true, enable the metadata-only query optimization that use the table's metadata " +
      "to produce the partition columns instead of table scans. It applies when all the columns " +
      "scanned are partition columns and the query has an aggregate operator that satisfies " +
      "distinct semantics. By default the optimization is disabled, and deprecated as of Spark " +
      "3.0 since it may return incorrect results when the files are empty, see also SPARK-26709." +
      "It will be removed in the future releases. If you must use, use 'SparkSessionExtensions' " +
      "instead to inject it as a custom rule.")
    .version("2.1.1")
    .booleanConf
    .createWithDefault(false)

  val COLUMN_NAME_OF_CORRUPT_RECORD = buildConf("spark.sql.columnNameOfCorruptRecord")
    .doc("The name of internal column for storing raw/un-parsed JSON and CSV records that fail " +
      "to parse.")
    .version("1.2.0")
    .stringConf
    .createWithDefault("_corrupt_record")

  val BROADCAST_TIMEOUT = buildConf("spark.sql.broadcastTimeout")
    .doc("Timeout in seconds for the broadcast wait time in broadcast joins.")
    .version("1.3.0")
    .timeConf(TimeUnit.SECONDS)
    .createWithDefaultString(s"${5 * 60}")

  // This is only used for the thriftserver
  val THRIFTSERVER_POOL = buildConf("spark.sql.thriftserver.scheduler.pool")
    .doc("Set a Fair Scheduler pool for a JDBC client session.")
    .version("1.1.1")
    .stringConf
    .createOptional

  val THRIFTSERVER_INCREMENTAL_COLLECT =
    buildConf("spark.sql.thriftServer.incrementalCollect")
      .internal()
      .doc("When true, enable incremental collection for execution in Thrift Server.")
      .version("2.0.3")
      .booleanConf
      .createWithDefault(false)

  val THRIFTSERVER_FORCE_CANCEL =
    buildConf("spark.sql.thriftServer.interruptOnCancel")
      .doc("When true, all running tasks will be interrupted if one cancels a query. " +
        "When false, all running tasks will remain until finished.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val THRIFTSERVER_QUERY_TIMEOUT =
    buildConf("spark.sql.thriftServer.queryTimeout")
      .doc("Set a query duration timeout in seconds in Thrift Server. If the timeout is set to " +
        "a positive value, a running query will be cancelled automatically when the timeout is " +
        "exceeded, otherwise the query continues to run till completion. If timeout values are " +
        "set for each statement via `java.sql.Statement.setQueryTimeout` and they are smaller " +
        "than this configuration value, they take precedence. If you set this timeout and prefer " +
        "to cancel the queries right away without waiting task to finish, consider enabling " +
        s"${THRIFTSERVER_FORCE_CANCEL.key} together.")
      .version("3.1.0")
      .timeConf(TimeUnit.SECONDS)
      .createWithDefault(0L)

  val THRIFTSERVER_UI_STATEMENT_LIMIT =
    buildConf("spark.sql.thriftserver.ui.retainedStatements")
      .doc("The number of SQL statements kept in the JDBC/ODBC web UI history.")
      .version("1.4.0")
      .intConf
      .createWithDefault(200)

  val THRIFTSERVER_UI_SESSION_LIMIT = buildConf("spark.sql.thriftserver.ui.retainedSessions")
    .doc("The number of SQL client sessions kept in the JDBC/ODBC web UI history.")
    .version("1.4.0")
    .intConf
    .createWithDefault(200)

  // This is used to set the default data source
  val DEFAULT_DATA_SOURCE_NAME = buildConf("spark.sql.sources.default")
    .doc("The default data source to use in input/output.")
    .version("1.3.0")
    .stringConf
    .createWithDefault("parquet")

  val CONVERT_CTAS = buildConf("spark.sql.hive.convertCTAS")
    .internal()
    .doc("When true, a table created by a Hive CTAS statement (no USING clause) " +
      "without specifying any storage property will be converted to a data source table, " +
      s"using the data source set by ${DEFAULT_DATA_SOURCE_NAME.key}.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(false)

  val GATHER_FASTSTAT = buildConf("spark.sql.hive.gatherFastStats")
      .internal()
      .doc("When true, fast stats (number of files and total size of all files) will be gathered" +
        " in parallel while repairing table partitions to avoid the sequential listing in Hive" +
        " metastore.")
      .version("2.0.1")
      .booleanConf
      .createWithDefault(true)

  val PARTITION_COLUMN_TYPE_INFERENCE =
    buildConf("spark.sql.sources.partitionColumnTypeInference.enabled")
      .doc("When true, automatically infer the data types for partitioned columns.")
      .version("1.5.0")
      .booleanConf
      .createWithDefault(true)

  val BUCKETING_ENABLED = buildConf("spark.sql.sources.bucketing.enabled")
    .doc("When false, we will treat bucketed table as normal table")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val V2_BUCKETING_ENABLED = buildConf("spark.sql.sources.v2.bucketing.enabled")
      .doc(s"Similar to ${BUCKETING_ENABLED.key}, this config is used to enable bucketing for V2 " +
        "data sources. When turned on, Spark will recognize the specific distribution " +
        "reported by a V2 data source through SupportsReportPartitioning, and will try to " +
        "avoid shuffle if necessary.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val V2_BUCKETING_PUSH_PART_VALUES_ENABLED =
    buildConf("spark.sql.sources.v2.bucketing.pushPartValues.enabled")
      .doc(s"Whether to pushdown common partition values when ${V2_BUCKETING_ENABLED.key} is " +
        "enabled. When turned on, if both sides of a join are of KeyGroupedPartitioning and if " +
        "they share compatible partition keys, even if they don't have the exact same partition " +
        "values, Spark will calculate a superset of partition values and pushdown that info to " +
        "scan nodes, which will use empty partitions for the missing partition values on either " +
        "side. This could help to eliminate unnecessary shuffles")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED =
    buildConf("spark.sql.sources.v2.bucketing.partiallyClusteredDistribution.enabled")
      .doc("During a storage-partitioned join, whether to allow input partitions to be " +
        "partially clustered, when both sides of the join are of KeyGroupedPartitioning. At " +
        "planning time, Spark will pick the side with less data size based on table " +
        "statistics, group and replicate them to match the other side. This is an optimization " +
        "on skew join and can help to reduce data skewness when certain partitions are assigned " +
        s"large amount of data. This config requires both ${V2_BUCKETING_ENABLED.key} and " +
        s"${V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key} to be enabled")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
    .doc("The maximum number of buckets allowed.")
    .version("2.4.0")
    .intConf
    .checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be greater than 0")
    .createWithDefault(100000)

  val AUTO_BUCKETED_SCAN_ENABLED =
    buildConf("spark.sql.sources.bucketing.autoBucketedScan.enabled")
      .doc("When true, decide whether to do bucketed scan on input tables based on query plan " +
        "automatically. Do not use bucketed scan if 1. query does not have operators to utilize " +
        "bucketing (e.g. join, group-by, etc), or 2. there's an exchange operator between these " +
        s"operators and table scan. Note when '${BUCKETING_ENABLED.key}' is set to " +
        "false, this configuration does not take any effect.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(true)

  val CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING =
    buildConf("spark.sql.optimizer.canChangeCachedPlanOutputPartitioning")
      .internal()
      .doc("Whether to forcibly enable some optimization rules that can change the output " +
        "partitioning of a cached query when executing it for caching. If it is set to true, " +
        "queries may need an extra shuffle to read the cached data. This configuration is " +
        "enabled by default. The optimization rules enabled by this configuration " +
        s"are ${ADAPTIVE_EXECUTION_ENABLED.key} and ${AUTO_BUCKETED_SCAN_ENABLED.key}.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val CROSS_JOINS_ENABLED = buildConf("spark.sql.crossJoin.enabled")
    .internal()
    .doc("When false, we will throw an error if a query contains a cartesian product without " +
        "explicit CROSS JOIN syntax.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val ORDER_BY_ORDINAL = buildConf("spark.sql.orderByOrdinal")
    .doc("When true, the ordinal numbers are treated as the position in the select list. " +
         "When false, the ordinal numbers in order/sort by clause are ignored.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val GROUP_BY_ORDINAL = buildConf("spark.sql.groupByOrdinal")
    .doc("When true, the ordinal numbers in group by clauses are treated as the position " +
      "in the select list. When false, the ordinal numbers are ignored.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val GROUP_BY_ALIASES = buildConf("spark.sql.groupByAliases")
    .doc("When true, aliases in a select list can be used in group by clauses. When false, " +
      "an analysis exception is thrown in the case.")
    .version("2.2.0")
    .booleanConf
    .createWithDefault(true)

  // The output committer class used by data sources. The specified class needs to be a
  // subclass of org.apache.hadoop.mapreduce.OutputCommitter.
  val OUTPUT_COMMITTER_CLASS = buildConf("spark.sql.sources.outputCommitterClass")
    .version("1.4.0")
    .internal()
    .stringConf
    .createOptional

  val FILE_COMMIT_PROTOCOL_CLASS =
    buildConf("spark.sql.sources.commitProtocolClass")
      .version("2.1.1")
      .internal()
      .stringConf
      .createWithDefault(
        "org.apache.spark.sql.execution.datasources.SQLHadoopMapReduceCommitProtocol")

  val PARALLEL_PARTITION_DISCOVERY_THRESHOLD =
    buildConf("spark.sql.sources.parallelPartitionDiscovery.threshold")
      .doc("The maximum number of paths allowed for listing files at driver side. If the number " +
        "of detected paths exceeds this value during partition discovery, it tries to list the " +
        "files with another Spark distributed job. This configuration is effective only when " +
        "using file-based sources such as Parquet, JSON and ORC.")
      .version("1.5.0")
      .intConf
      .checkValue(parallel => parallel >= 0, "The maximum number of paths allowed for listing " +
        "files at driver side must not be negative")
      .createWithDefault(32)

  val PARALLEL_PARTITION_DISCOVERY_PARALLELISM =
    buildConf("spark.sql.sources.parallelPartitionDiscovery.parallelism")
      .doc("The number of parallelism to list a collection of path recursively, Set the " +
        "number to prevent file listing from generating too many tasks.")
      .version("2.1.1")
      .internal()
      .intConf
      .createWithDefault(10000)

  val IGNORE_DATA_LOCALITY =
    buildConf("spark.sql.sources.ignoreDataLocality")
      .doc("If true, Spark will not fetch the block locations for each file on " +
        "listing files. This speeds up file listing, but the scheduler cannot " +
        "schedule tasks to take advantage of data locality. It can be particularly " +
        "useful if data is read from a remote cluster so the scheduler could never " +
        "take advantage of locality anyway.")
      .version("3.0.0")
      .internal()
      .booleanConf
      .createWithDefault(false)

  // Whether to automatically resolve ambiguity in join conditions for self-joins.
  // See SPARK-6231.
  val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY =
    buildConf("spark.sql.selfJoinAutoResolveAmbiguity")
      .version("1.4.0")
      .internal()
      .booleanConf
      .createWithDefault(true)

  val FAIL_AMBIGUOUS_SELF_JOIN_ENABLED =
    buildConf("spark.sql.analyzer.failAmbiguousSelfJoin")
      .doc("When true, fail the Dataset query if it contains ambiguous self-join.")
      .version("3.0.0")
      .internal()
      .booleanConf
      .createWithDefault(true)

  // Whether to retain group by columns or not in GroupedData.agg.
  val DATAFRAME_RETAIN_GROUP_COLUMNS = buildConf("spark.sql.retainGroupColumns")
    .version("1.4.0")
    .internal()
    .booleanConf
    .createWithDefault(true)

  val DATAFRAME_PIVOT_MAX_VALUES = buildConf("spark.sql.pivotMaxValues")
    .doc("When doing a pivot without specifying values for the pivot column this is the maximum " +
      "number of (distinct) values that will be collected without error.")
    .version("1.6.0")
    .intConf
    .createWithDefault(10000)

  val RUN_SQL_ON_FILES = buildConf("spark.sql.runSQLOnFiles")
    .internal()
    .doc("When true, we could use `datasource`.`path` as table in SQL query.")
    .version("1.6.0")
    .booleanConf
    .createWithDefault(true)

  val WHOLESTAGE_CODEGEN_ENABLED = buildConf("spark.sql.codegen.wholeStage")
    .internal()
    .doc("When true, the whole stage (of multiple operators) will be compiled into single java" +
      " method.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME =
    buildConf("spark.sql.codegen.useIdInClassName")
    .internal()
    .doc("When true, embed the (whole-stage) codegen stage ID into " +
      "the class name of the generated class as a suffix")
    .version("2.3.1")
    .booleanConf
    .createWithDefault(true)

  val WHOLESTAGE_MAX_NUM_FIELDS = buildConf("spark.sql.codegen.maxFields")
    .internal()
    .doc("The maximum number of fields (including nested fields) that will be supported before" +
      " deactivating whole-stage codegen.")
    .version("2.0.0")
    .intConf
    .createWithDefault(100)

  val CODEGEN_FACTORY_MODE = buildConf("spark.sql.codegen.factoryMode")
    .doc("This config determines the fallback behavior of several codegen generators " +
      "during tests. `FALLBACK` means trying codegen first and then falling back to " +
      "interpreted if any compile error happens. Disabling fallback if `CODEGEN_ONLY`. " +
      "`NO_CODEGEN` skips codegen and goes interpreted path always. Note that " +
      "this configuration is only for the internal usage, and NOT supposed to be set by " +
      "end users.")
    .version("2.4.0")
    .internal()
    .stringConf
    .checkValues(CodegenObjectFactoryMode.values.map(_.toString))
    .createWithDefault(CodegenObjectFactoryMode.FALLBACK.toString)

  val CODEGEN_FALLBACK = buildConf("spark.sql.codegen.fallback")
    .internal()
    .doc("When true, (whole stage) codegen could be temporary disabled for the part of query that" +
      " fail to compile generated code")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val CODEGEN_LOGGING_MAX_LINES = buildConf("spark.sql.codegen.logging.maxLines")
    .internal()
    .doc("The maximum number of codegen lines to log when errors occur. Use -1 for unlimited.")
    .version("2.3.0")
    .intConf
    .checkValue(maxLines => maxLines >= -1, "The maximum must be a positive integer, 0 to " +
      "disable logging or -1 to apply no limit.")
    .createWithDefault(1000)

  val WHOLESTAGE_HUGE_METHOD_LIMIT = buildConf("spark.sql.codegen.hugeMethodLimit")
    .internal()
    .doc("The maximum bytecode size of a single compiled Java function generated by whole-stage " +
      "codegen. When the compiled function exceeds this threshold, the whole-stage codegen is " +
      "deactivated for this subtree of the current query plan. The default value is 65535, which " +
      "is the largest bytecode size possible for a valid Java method. When running on HotSpot, " +
      s"it may be preferable to set the value to ${CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT} " +
      "to match HotSpot's implementation.")
    .version("2.3.0")
    .intConf
    .createWithDefault(65535)

  val CODEGEN_METHOD_SPLIT_THRESHOLD = buildConf("spark.sql.codegen.methodSplitThreshold")
    .internal()
    .doc("The threshold of source-code splitting in the codegen. When the number of characters " +
      "in a single Java function (without comment) exceeds the threshold, the function will be " +
      "automatically split to multiple smaller ones. We cannot know how many bytecode will be " +
      "generated, so use the code length as metric. When running on HotSpot, a function's " +
      "bytecode should not go beyond 8KB, otherwise it will not be JITted; it also should not " +
      "be too small, otherwise there will be many function calls.")
    .version("3.0.0")
    .intConf
    .checkValue(threshold => threshold > 0, "The threshold must be a positive integer.")
    .createWithDefault(1024)

  val WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR =
    buildConf("spark.sql.codegen.splitConsumeFuncByOperator")
      .internal()
      .doc("When true, whole stage codegen would put the logic of consuming rows of each " +
        "physical operator into individual methods, instead of a single big method. This can be " +
        "used to avoid oversized function that can miss the opportunity of JIT optimization.")
      .version("2.3.1")
      .booleanConf
      .createWithDefault(true)

  val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes")
    .doc("The maximum number of bytes to pack into a single partition when reading files. " +
      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
      "and ORC.")
    .version("2.0.0")
    .bytesConf(ByteUnit.BYTE)
    .createWithDefaultString("128MB") // parquet.block.size

  val FILES_OPEN_COST_IN_BYTES = buildConf("spark.sql.files.openCostInBytes")
    .internal()
    .doc("The estimated cost to open a file, measured by the number of bytes could be scanned in" +
      " the same time. This is used when putting multiple files into a partition. It's better to" +
      " over estimated, then the partitions with small files will be faster than partitions with" +
      " bigger files (which is scheduled first). This configuration is effective only when using" +
      " file-based sources such as Parquet, JSON and ORC.")
    .version("2.0.0")
    .bytesConf(ByteUnit.BYTE)
    .createWithDefaultString("4MB")

  val FILES_MIN_PARTITION_NUM = buildConf("spark.sql.files.minPartitionNum")
    .doc("The suggested (not guaranteed) minimum number of split file partitions. " +
      s"If not set, the default value is `${LEAF_NODE_DEFAULT_PARALLELISM.key}`. " +
      "This configuration is effective only when using file-based sources " +
      "such as Parquet, JSON and ORC.")
    .version("3.1.0")
    .intConf
    .checkValue(v => v > 0, "The min partition number must be a positive integer.")
    .createOptional

  val FILES_MAX_PARTITION_NUM = buildConf("spark.sql.files.maxPartitionNum")
    .doc("The suggested (not guaranteed) maximum number of split file partitions. If it is set, " +
      "Spark will rescale each partition to make the number of partitions is close to this " +
      "value if the initial number of partitions exceeds this value. This configuration is " +
      "effective only when using file-based sources such as Parquet, JSON and ORC.")
    .version("3.5.0")
    .intConf
    .checkValue(v => v > 0, "The maximum number of partitions must be a positive integer.")
    .createOptional

  val IGNORE_CORRUPT_FILES = buildConf("spark.sql.files.ignoreCorruptFiles")
    .doc("Whether to ignore corrupt files. If true, the Spark jobs will continue to run when " +
      "encountering corrupted files and the contents that have been read will still be returned. " +
      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
      "and ORC.")
    .version("2.1.1")
    .booleanConf
    .createWithDefault(false)

  val IGNORE_MISSING_FILES = buildConf("spark.sql.files.ignoreMissingFiles")
    .doc("Whether to ignore missing files. If true, the Spark jobs will continue to run when " +
      "encountering missing files and the contents that have been read will still be returned. " +
      "This configuration is effective only when using file-based sources such as Parquet, JSON " +
      "and ORC.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(false)

  val MAX_RECORDS_PER_FILE = buildConf("spark.sql.files.maxRecordsPerFile")
    .doc("Maximum number of records to write out to a single file. " +
      "If this value is zero or negative, there is no limit.")
    .version("2.2.0")
    .longConf
    .createWithDefault(0)

  val EXCHANGE_REUSE_ENABLED = buildConf("spark.sql.exchange.reuse")
    .internal()
    .doc("When true, the planner will try to find out duplicated exchanges and re-use them.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val SUBQUERY_REUSE_ENABLED = buildConf("spark.sql.execution.reuseSubquery")
    .internal()
    .doc("When true, the planner will try to find out duplicated subqueries and re-use them.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(true)

  val REMOVE_REDUNDANT_PROJECTS_ENABLED = buildConf("spark.sql.execution.removeRedundantProjects")
    .internal()
    .doc("Whether to remove redundant project exec node based on children's output and " +
      "ordering requirement.")
    .version("3.1.0")
    .booleanConf
    .createWithDefault(true)

  val REMOVE_REDUNDANT_SORTS_ENABLED = buildConf("spark.sql.execution.removeRedundantSorts")
    .internal()
    .doc("Whether to remove redundant physical sort node")
    .version("2.4.8")
    .booleanConf
    .createWithDefault(true)

  val REPLACE_HASH_WITH_SORT_AGG_ENABLED = buildConf("spark.sql.execution.replaceHashWithSortAgg")
    .internal()
    .doc("Whether to replace hash aggregate node with sort aggregate based on children's ordering")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val USE_PARTITION_EVALUATOR = buildConf("spark.sql.execution.usePartitionEvaluator")
    .internal()
    .doc("When true, use PartitionEvaluator to execute SQL operators.")
    .version("3.5.0")
    .booleanConf
    .createWithDefault(false)

  val STATE_STORE_PROVIDER_CLASS =
    buildConf("spark.sql.streaming.stateStore.providerClass")
      .internal()
      .doc(
        "The class used to manage state data in stateful streaming queries. This class must " +
          "be a subclass of StateStoreProvider, and must have a zero-arg constructor. " +
          "Note: For structured streaming, this configuration cannot be changed between query " +
          "restarts from the same checkpoint location.")
      .version("2.3.0")
      .stringConf
      .createWithDefault(
        "org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider")

  val STATE_SCHEMA_CHECK_ENABLED =
    buildConf("spark.sql.streaming.stateStore.stateSchemaCheck")
      .doc("When true, Spark will validate the state schema against schema on existing state and " +
        "fail query if it's incompatible.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(true)

  val STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT =
    buildConf("spark.sql.streaming.stateStore.minDeltasForSnapshot")
      .internal()
      .doc("Minimum number of state store delta files that needs to be generated before they " +
        "consolidated into snapshots.")
      .version("2.0.0")
      .intConf
      .createWithDefault(10)

  val STATE_STORE_FORMAT_VALIDATION_ENABLED =
    buildConf("spark.sql.streaming.stateStore.formatValidation.enabled")
      .internal()
      .doc("When true, check if the data from state store is valid or not when running streaming " +
        "queries. This can happen if the state store format has been changed. Note, the feature " +
        "is only effective in the build-in HDFS state store provider now.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(true)

  val FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION =
    buildConf("spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion")
      .internal()
      .doc("State format version used by flatMapGroupsWithState operation in a streaming query")
      .version("2.4.0")
      .intConf
      .checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
      .createWithDefault(2)

  val CHECKPOINT_LOCATION = buildConf("spark.sql.streaming.checkpointLocation")
    .doc("The default location for storing checkpoint data for streaming queries.")
    .version("2.0.0")
    .stringConf
    .createOptional

  val FORCE_DELETE_TEMP_CHECKPOINT_LOCATION =
    buildConf("spark.sql.streaming.forceDeleteTempCheckpointLocation")
      .doc("When true, enable temporary checkpoint locations force delete.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val MIN_BATCHES_TO_RETAIN = buildConf("spark.sql.streaming.minBatchesToRetain")
    .internal()
    .doc("The minimum number of batches that must be retained and made recoverable.")
    .version("2.1.1")
    .intConf
    .createWithDefault(100)

  val MAX_BATCHES_TO_RETAIN_IN_MEMORY = buildConf("spark.sql.streaming.maxBatchesToRetainInMemory")
    .internal()
    .doc("The maximum number of batches which will be retained in memory to avoid " +
      "loading from files. The value adjusts a trade-off between memory usage vs cache miss: " +
      "'2' covers both success and direct failure cases, '1' covers only success case, " +
      "and '0' covers extreme case - disable cache to maximize memory size of executors.")
    .version("2.4.0")
    .intConf
    .createWithDefault(2)

  val STREAMING_MAINTENANCE_INTERVAL =
    buildConf("spark.sql.streaming.stateStore.maintenanceInterval")
      .internal()
      .doc("The interval in milliseconds between triggering maintenance tasks in StateStore. " +
        "The maintenance task executes background maintenance task in all the loaded store " +
        "providers if they are still the active instances according to the coordinator. If not, " +
        "inactive instances of store providers will be closed.")
      .version("2.0.0")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefault(TimeUnit.MINUTES.toMillis(1)) // 1 minute

  val STATE_STORE_COMPRESSION_CODEC =
    buildConf("spark.sql.streaming.stateStore.compression.codec")
      .internal()
      .doc("The codec used to compress delta and snapshot files generated by StateStore. " +
        "By default, Spark provides four codecs: lz4, lzf, snappy, and zstd. You can also " +
        "use fully qualified class names to specify the codec. Default codec is lz4.")
      .version("3.1.0")
      .stringConf
      .createWithDefault("lz4")

  val CHECKPOINT_RENAMEDFILE_CHECK_ENABLED =
    buildConf("spark.sql.streaming.checkpoint.renamedFileCheck.enabled")
      .doc("When true, Spark will validate if renamed checkpoint file exists.")
      .internal()
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  /**
   * Note: this is defined in `RocksDBConf.FORMAT_VERSION`. These two places should be updated
   * together.
   */
  val STATE_STORE_ROCKSDB_FORMAT_VERSION =
    buildConf("spark.sql.streaming.stateStore.rocksdb.formatVersion")
      .internal()
      .doc("Set the RocksDB format version. This will be stored in the checkpoint when starting " +
        "a streaming query. The checkpoint will use this RocksDB format version in the entire " +
        "lifetime of the query.")
      .version("3.2.0")
      .intConf
      .checkValue(_ >= 0, "Must not be negative")
      // 5 is the default table format version for RocksDB 6.20.3.
      .createWithDefault(5)

  val STREAMING_AGGREGATION_STATE_FORMAT_VERSION =
    buildConf("spark.sql.streaming.aggregation.stateFormatVersion")
      .internal()
      .doc("State format version used by streaming aggregation operations in a streaming query. " +
        "State between versions are tend to be incompatible, so state format version shouldn't " +
        "be modified after running.")
      .version("2.4.0")
      .intConf
      .checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
      .createWithDefault(2)

  val STREAMING_STOP_ACTIVE_RUN_ON_RESTART =
    buildConf("spark.sql.streaming.stopActiveRunOnRestart")
    .doc("Running multiple runs of the same streaming query concurrently is not supported. " +
      "If we find a concurrent active run for a streaming query (in the same or different " +
      "SparkSessions on the same cluster) and this flag is true, we will stop the old streaming " +
      "query run to start the new one.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(true)

  val STREAMING_JOIN_STATE_FORMAT_VERSION =
    buildConf("spark.sql.streaming.join.stateFormatVersion")
      .internal()
      .doc("State format version used by streaming join operations in a streaming query. " +
        "State between versions are tend to be incompatible, so state format version shouldn't " +
        "be modified after running.")
      .version("3.0.0")
      .intConf
      .checkValue(v => Set(1, 2).contains(v), "Valid versions are 1 and 2")
      .createWithDefault(2)

  val STREAMING_SESSION_WINDOW_MERGE_SESSIONS_IN_LOCAL_PARTITION =
    buildConf("spark.sql.streaming.sessionWindow.merge.sessions.in.local.partition")
      .doc("When true, streaming session window sorts and merge sessions in local partition " +
        "prior to shuffle. This is to reduce the rows to shuffle, but only beneficial when " +
        "there're lots of rows in a batch being assigned to same sessions.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(false)

  val STREAMING_SESSION_WINDOW_STATE_FORMAT_VERSION =
    buildConf("spark.sql.streaming.sessionWindow.stateFormatVersion")
      .internal()
      .doc("State format version used by streaming session window in a streaming query. " +
        "State between versions are tend to be incompatible, so state format version shouldn't " +
        "be modified after running.")
      .version("3.2.0")
      .intConf
      .checkValue(v => Set(1).contains(v), "Valid version is 1")
      .createWithDefault(1)

  val UNSUPPORTED_OPERATION_CHECK_ENABLED =
    buildConf("spark.sql.streaming.unsupportedOperationCheck")
      .internal()
      .doc("When true, the logical plan for streaming query will be checked for unsupported" +
        " operations.")
      .version("2.0.0")
      .booleanConf
      .createWithDefault(true)

  val USE_DEPRECATED_KAFKA_OFFSET_FETCHING =
    buildConf("spark.sql.streaming.kafka.useDeprecatedOffsetFetching")
      .internal()
      .doc("When true, the deprecated Consumer based offset fetching used which could cause " +
        "infinite wait in Spark queries. Such cases query restart is the only workaround. " +
        "For further details please see Offset Fetching chapter of Structured Streaming Kafka " +
        "Integration Guide.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED =
    buildConf("spark.sql.streaming.statefulOperator.checkCorrectness.enabled")
      .internal()
      .doc("When true, the stateful operators for streaming query will be checked for possible " +
        "correctness issue due to global watermark. The correctness issue comes from queries " +
        "containing stateful operation which can emit rows older than the current watermark " +
        "plus allowed late record delay, which are \"late rows\" in downstream stateful " +
        "operations and these rows can be discarded. Please refer the programming guide doc for " +
        "more details. Once the issue is detected, Spark will throw analysis exception. " +
        "When this config is disabled, Spark will just print warning message for users. " +
        "Prior to Spark 3.1.0, the behavior is disabling this config.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(true)

  val STATEFUL_OPERATOR_ALLOW_MULTIPLE =
    buildConf("spark.sql.streaming.statefulOperator.allowMultiple")
      .internal()
      .doc("When true, multiple stateful operators are allowed to be present in a streaming " +
        "pipeline. The support for multiple stateful operators introduces a minor (semantically " +
        "correct) change in respect to late record filtering - late records are detected and " +
        "filtered in respect to the watermark from the previous microbatch instead of the " +
        "current one. This is a behavior change for Spark streaming pipelines and we allow " +
        "users to revert to the previous behavior of late record filtering (late records are " +
        "detected and filtered by comparing with the current microbatch watermark) by setting " +
        "the flag value to false. In this mode, only a single stateful operator will be allowed " +
        "in a streaming pipeline.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION =
    buildConf("spark.sql.streaming.statefulOperator.useStrictDistribution")
      .internal()
      .doc("The purpose of this config is only compatibility; DO NOT MANUALLY CHANGE THIS!!! " +
        "When true, the stateful operator for streaming query will use " +
        "StatefulOpClusteredDistribution which guarantees stable state partitioning as long as " +
        "the operator provides consistent grouping keys across the lifetime of query. " +
        "When false, the stateful operator for streaming query will use ClusteredDistribution " +
        "which is not sufficient to guarantee stable state partitioning despite the operator " +
        "provides consistent grouping keys across the lifetime of query. " +
        "This config will be set to true for new streaming queries to guarantee stable state " +
        "partitioning, and set to false for existing streaming queries to not break queries " +
        "which are restored from existing checkpoints. Please refer SPARK-38204 for details.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val FILESTREAM_SINK_METADATA_IGNORED =
    buildConf("spark.sql.streaming.fileStreamSink.ignoreMetadata")
      .internal()
      .doc("If this is enabled, when Spark reads from the results of a streaming query written " +
        "by `FileStreamSink`, Spark will ignore the metadata log and treat it as normal path to " +
        "read, e.g. listing files using HDFS APIs.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(false)

  /**
   * SPARK-38809 - Config option to allow skipping null values for hash based stream-stream joins.
   * Its possible for us to see nulls if state was written with an older version of Spark,
   * the state was corrupted on disk or if we had an issue with the state iterators.
   */
  val STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS =
  buildConf("spark.sql.streaming.stateStore.skipNullsForStreamStreamJoins.enabled")
    .internal()
    .doc("When true, this config will skip null values in hash based stream-stream joins. " +
      "The number of skipped null values will be shown as custom metric of stream join operator. " +
      "If the streaming query was started with Spark 3.5 or above, please exercise caution " +
      "before enabling this config since it may hide potential data loss/corruption issues.")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val ASYNC_LOG_PURGE =
    buildConf("spark.sql.streaming.asyncLogPurge.enabled")
      .internal()
      .doc("When true, purging the offset log and " +
        "commit log of old entries will be done asynchronously.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val STREAMING_METADATA_CACHE_ENABLED =
    buildConf("spark.sql.streaming.metadataCache.enabled")
      .internal()
      .doc("Whether the streaming HDFSMetadataLog caches the metadata of the latest two batches.")
      .booleanConf
      .createWithDefault(true)


  val STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED =
    buildConf("spark.sql.streaming.optimizeOneRowPlan.enabled")
      .internal()
      .doc("When true, enable OptimizeOneRowPlan rule for the case where the child is a " +
        "streaming Dataset. This is a fallback flag to revert the 'incorrect' behavior, hence " +
        "this configuration must not be used without understanding in depth. Use this only to " +
        "quickly recover failure in existing query!")
      .version("4.0.0")
      .booleanConf
      .createWithDefault(false)

  val VARIABLE_SUBSTITUTE_ENABLED =
    buildConf("spark.sql.variable.substitute")
      .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " +
        "and `${env:var}`.")
      .version("2.0.0")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_TWOLEVEL_AGG_MAP =
    buildConf("spark.sql.codegen.aggregate.map.twolevel.enabled")
      .internal()
      .doc("Enable two-level aggregate hash map. When enabled, records will first be " +
        "inserted/looked-up at a 1st-level, small, fast map, and then fallback to a " +
        "2nd-level, larger, slower map when 1st level is full or keys cannot be found. " +
        "When disabled, records go directly to the 2nd level.")
      .version("2.3.0")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_TWOLEVEL_AGG_MAP_PARTIAL_ONLY =
    buildConf("spark.sql.codegen.aggregate.map.twolevel.partialOnly")
      .internal()
      .doc("Enable two-level aggregate hash map for partial aggregate only, " +
        "because final aggregate might get more distinct keys compared to partial aggregate. " +
        "Overhead of looking up 1st-level map might dominate when having a lot of distinct keys.")
      .version("3.2.1")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_VECTORIZED_HASH_MAP =
    buildConf("spark.sql.codegen.aggregate.map.vectorized.enable")
      .internal()
      .doc("Enable vectorized aggregate hash map. This is for testing/benchmarking only.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val CODEGEN_SPLIT_AGGREGATE_FUNC =
    buildConf("spark.sql.codegen.aggregate.splitAggregateFunc.enabled")
      .internal()
      .doc("When true, the code generator would split aggregate code into individual methods " +
        "instead of a single big method. This can be used to avoid oversized function that " +
        "can miss the opportunity of JIT optimization.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_SORT_AGGREGATE_CODEGEN =
    buildConf("spark.sql.codegen.aggregate.sortAggregate.enabled")
      .internal()
      .doc("When true, enable code-gen for sort aggregate.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_FULL_OUTER_SHUFFLED_HASH_JOIN_CODEGEN =
    buildConf("spark.sql.codegen.join.fullOuterShuffledHashJoin.enabled")
      .internal()
      .doc("When true, enable code-gen for FULL OUTER shuffled hash join.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_BUILD_SIDE_OUTER_SHUFFLED_HASH_JOIN_CODEGEN =
    buildConf("spark.sql.codegen.join.buildSideOuterShuffledHashJoin.enabled")
      .internal()
      .doc("When true, enable code-gen for an OUTER shuffled hash join where outer side" +
        " is the build side.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_FULL_OUTER_SORT_MERGE_JOIN_CODEGEN =
    buildConf("spark.sql.codegen.join.fullOuterSortMergeJoin.enabled")
      .internal()
      .doc("When true, enable code-gen for FULL OUTER sort merge join.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val ENABLE_EXISTENCE_SORT_MERGE_JOIN_CODEGEN =
    buildConf("spark.sql.codegen.join.existenceSortMergeJoin.enabled")
      .internal()
      .doc("When true, enable code-gen for Existence sort merge join.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val MAX_NESTED_VIEW_DEPTH =
    buildConf("spark.sql.view.maxNestedViewDepth")
      .internal()
      .doc("The maximum depth of a view reference in a nested view. A nested view may reference " +
        "other nested views, the dependencies are organized in a directed acyclic graph (DAG). " +
        "However the DAG depth may become too large and cause unexpected behavior. This " +
        "configuration puts a limit on this: when the depth of a view exceeds this value during " +
        "analysis, we terminate the resolution to avoid potential errors.")
      .version("2.2.0")
      .intConf
      .checkValue(depth => depth > 0, "The maximum depth of a view reference in a nested view " +
        "must be positive.")
      .createWithDefault(100)

  val ALLOW_PARAMETERLESS_COUNT =
    buildConf("spark.sql.legacy.allowParameterlessCount")
      .internal()
      .doc("When true, the SQL function 'count' is allowed to take no parameters.")
      .version("3.1.1")
      .booleanConf
      .createWithDefault(false)

  val ALLOW_NON_EMPTY_LOCATION_IN_CTAS =
    buildConf("spark.sql.legacy.allowNonEmptyLocationInCTAS")
      .internal()
      .doc("When false, CTAS with LOCATION throws an analysis exception if the " +
        "location is not empty.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(false)

  val ALLOW_STAR_WITH_SINGLE_TABLE_IDENTIFIER_IN_COUNT =
    buildConf("spark.sql.legacy.allowStarWithSingleTableIdentifierInCount")
      .internal()
      .doc("When true, the SQL function 'count' is allowed to take single 'tblName.*' as parameter")
      .version("3.2")
      .booleanConf
      .createWithDefault(false)

  val ALLOW_ZERO_INDEX_IN_FORMAT_STRING =
    buildConf("spark.sql.legacy.allowZeroIndexInFormatString")
      .internal()
      .doc("When false, the `strfmt` in `format_string(strfmt, obj, ...)` and " +
        "`printf(strfmt, obj, ...)` will no longer support to use \"0$\" to specify the first " +
        "argument, the first argument should always reference by \"1$\" when use argument index " +
        "to indicating the position of the argument in the argument list.")
      .version("3.3")
      .booleanConf
      .createWithDefault(false)

  val USE_CURRENT_SQL_CONFIGS_FOR_VIEW =
    buildConf("spark.sql.legacy.useCurrentConfigsForView")
      .internal()
      .doc("When true, SQL Configs of the current active SparkSession instead of the captured " +
        "ones will be applied during the parsing and analysis phases of the view resolution.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val STORE_ANALYZED_PLAN_FOR_VIEW =
    buildConf("spark.sql.legacy.storeAnalyzedPlanForView")
      .internal()
      .doc("When true, analyzed plan instead of SQL text will be stored when creating " +
        "temporary view")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val ALLOW_TEMP_VIEW_CREATION_WITH_MULTIPLE_NAME_PARTS =
    buildConf("spark.sql.legacy.allowTempViewCreationWithMultipleNameparts")
      .internal()
      .doc("When true, temp view creation Dataset APIs will allow the view creation even if " +
        "the view name is multiple name parts. The extra name parts will be dropped " +
        "during the view creation")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val ALLOW_AUTO_GENERATED_ALIAS_FOR_VEW =
    buildConf("spark.sql.legacy.allowAutoGeneratedAliasForView")
      .internal()
      .doc("When true, it's allowed to use a input query without explicit alias when creating " +
        "a permanent view.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(false)

  val STREAMING_FILE_COMMIT_PROTOCOL_CLASS =
    buildConf("spark.sql.streaming.commitProtocolClass")
      .version("2.1.0")
      .internal()
      .stringConf
      .createWithDefault("org.apache.spark.sql.execution.streaming.ManifestFileCommitProtocol")

  val STREAMING_MULTIPLE_WATERMARK_POLICY =
    buildConf("spark.sql.streaming.multipleWatermarkPolicy")
      .doc("Policy to calculate the global watermark value when there are multiple watermark " +
        "operators in a streaming query. The default value is 'min' which chooses " +
        "the minimum watermark reported across multiple operators. Other alternative value is " +
        "'max' which chooses the maximum across multiple operators. " +
        "Note: This configuration cannot be changed between query restarts from the same " +
        "checkpoint location.")
      .version("2.4.0")
      .stringConf
      .transform(_.toLowerCase(Locale.ROOT))
      .checkValue(
        str => Set("min", "max").contains(str),
        "Invalid value for 'spark.sql.streaming.multipleWatermarkPolicy'. " +
          "Valid values are 'min' and 'max'")
      .createWithDefault("min") // must be same as MultipleWatermarkPolicy.DEFAULT_POLICY_NAME

  val OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD =
    buildConf("spark.sql.objectHashAggregate.sortBased.fallbackThreshold")
      .internal()
      .doc("In the case of ObjectHashAggregateExec, when the size of the in-memory hash map " +
        "grows too large, we will fall back to sort-based aggregation. This option sets a row " +
        "count threshold for the size of the hash map.")
      .version("2.2.0")
      .intConf
      // We are trying to be conservative and use a relatively small default count threshold here
      // since the state object of some TypedImperativeAggregate function can be quite large (e.g.
      // percentile_approx).
      .createWithDefault(128)

  val USE_OBJECT_HASH_AGG = buildConf("spark.sql.execution.useObjectHashAggregateExec")
    .internal()
    .doc("Decides if we use ObjectHashAggregateExec")
    .version("2.2.0")
    .booleanConf
    .createWithDefault(true)

  val JSON_GENERATOR_IGNORE_NULL_FIELDS =
    buildConf("spark.sql.jsonGenerator.ignoreNullFields")
      .doc("Whether to ignore null fields when generating JSON objects in JSON data source and " +
        "JSON functions such as to_json. " +
        "If false, it generates null for null fields in JSON objects.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val JSON_EXPRESSION_OPTIMIZATION =
    buildConf("spark.sql.optimizer.enableJsonExpressionOptimization")
      .doc("Whether to optimize JSON expressions in SQL optimizer. It includes pruning " +
        "unnecessary columns from from_json, simplifying from_json + to_json, to_json + " +
        "named_struct(from_json.col1, from_json.col2, ....).")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(true)

  val CSV_EXPRESSION_OPTIMIZATION =
    buildConf("spark.sql.optimizer.enableCsvExpressionOptimization")
      .doc("Whether to optimize CSV expressions in SQL optimizer. It includes pruning " +
        "unnecessary columns from from_csv.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val COLLAPSE_PROJECT_ALWAYS_INLINE = buildConf("spark.sql.optimizer.collapseProjectAlwaysInline")
    .doc("Whether to always collapse two adjacent projections and inline expressions even if " +
      "it causes extra duplication.")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion")
    .internal()
    .doc("Whether to delete the expired log files in file stream sink.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(true)

  val FILE_SINK_LOG_COMPACT_INTERVAL =
    buildConf("spark.sql.streaming.fileSink.log.compactInterval")
      .internal()
      .doc("Number of log files after which all the previous files " +
        "are compacted into the next log file.")
      .version("2.0.0")
      .intConf
      .createWithDefault(10)

  val FILE_SINK_LOG_CLEANUP_DELAY =
    buildConf("spark.sql.streaming.fileSink.log.cleanupDelay")
      .internal()
      .doc("How long that a file is guaranteed to be visible for all readers.")
      .version("2.0.0")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefault(TimeUnit.MINUTES.toMillis(10)) // 10 minutes

  val FILE_SOURCE_LOG_DELETION = buildConf("spark.sql.streaming.fileSource.log.deletion")
    .internal()
    .doc("Whether to delete the expired log files in file stream source.")
    .version("2.0.1")
    .booleanConf
    .createWithDefault(true)

  val FILE_SOURCE_LOG_COMPACT_INTERVAL =
    buildConf("spark.sql.streaming.fileSource.log.compactInterval")
      .internal()
      .doc("Number of log files after which all the previous files " +
        "are compacted into the next log file.")
      .version("2.0.1")
      .intConf
      .createWithDefault(10)

  val FILE_SOURCE_LOG_CLEANUP_DELAY =
    buildConf("spark.sql.streaming.fileSource.log.cleanupDelay")
      .internal()
      .doc("How long in milliseconds a file is guaranteed to be visible for all readers.")
      .version("2.0.1")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefault(TimeUnit.MINUTES.toMillis(10)) // 10 minutes

  val FILE_SOURCE_SCHEMA_FORCE_NULLABLE =
    buildConf("spark.sql.streaming.fileSource.schema.forceNullable")
      .internal()
      .doc("When true, force the schema of streaming file source to be nullable (including all " +
        "the fields). Otherwise, the schema might not be compatible with actual data, which " +
        "leads to corruptions.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val FILE_SOURCE_CLEANER_NUM_THREADS =
    buildConf("spark.sql.streaming.fileSource.cleaner.numThreads")
      .doc("Number of threads used in the file source completed file cleaner.")
      .version("3.0.0")
      .intConf
      .createWithDefault(1)

  val STREAMING_SCHEMA_INFERENCE =
    buildConf("spark.sql.streaming.schemaInference")
      .internal()
      .doc("Whether file-based streaming sources will infer its own schema")
      .version("2.0.0")
      .booleanConf
      .createWithDefault(false)

  val STREAMING_POLLING_DELAY =
    buildConf("spark.sql.streaming.pollingDelay")
      .internal()
      .doc("How long to delay polling new data when no data is available")
      .version("2.0.0")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefault(10L)

  val STREAMING_STOP_TIMEOUT =
    buildConf("spark.sql.streaming.stopTimeout")
      .doc("How long to wait in milliseconds for the streaming execution thread to stop when " +
        "calling the streaming query's stop() method. 0 or negative values wait indefinitely.")
      .version("3.0.0")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefaultString("0")

  val STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL =
    buildConf("spark.sql.streaming.noDataProgressEventInterval")
      .internal()
      .doc("How long to wait before providing query idle event when there is no data")
      .version("2.1.1")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefault(10000L)

  val STREAMING_NO_DATA_MICRO_BATCHES_ENABLED =
    buildConf("spark.sql.streaming.noDataMicroBatches.enabled")
      .doc(
        "Whether streaming micro-batch engine will execute batches without data " +
          "for eager state management for stateful streaming queries.")
      .version("2.4.1")
      .booleanConf
      .createWithDefault(true)

  val STREAMING_METRICS_ENABLED =
    buildConf("spark.sql.streaming.metricsEnabled")
      .doc("Whether Dropwizard/Codahale metrics will be reported for active streaming queries.")
      .version("2.0.2")
      .booleanConf
      .createWithDefault(false)

  val STREAMING_PROGRESS_RETENTION =
    buildConf("spark.sql.streaming.numRecentProgressUpdates")
      .doc("The number of progress updates to retain for a streaming query")
      .version("2.1.1")
      .intConf
      .createWithDefault(100)

  val STREAMING_CHECKPOINT_FILE_MANAGER_CLASS =
    buildConf("spark.sql.streaming.checkpointFileManagerClass")
      .internal()
      .doc("The class used to write checkpoint files atomically. This class must be a subclass " +
        "of the interface CheckpointFileManager.")
      .version("2.4.0")
      .stringConf

  val STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED =
    buildConf("spark.sql.streaming.checkpoint.escapedPathCheck.enabled")
      .internal()
      .doc("Whether to detect a streaming query may pick up an incorrect checkpoint path due " +
        "to SPARK-26824.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION =
    buildConf("spark.sql.statistics.parallelFileListingInStatsComputation.enabled")
      .internal()
      .doc("When true, SQL commands use parallel file listing, " +
        "as opposed to single thread listing. " +
        "This usually speeds up commands that need to list many directories.")
      .version("2.4.1")
      .booleanConf
      .createWithDefault(true)

  val DEFAULT_SIZE_IN_BYTES = buildConf("spark.sql.defaultSizeInBytes")
    .internal()
    .doc("The default table size used in query planning. By default, it is set to Long.MaxValue " +
      s"which is larger than `${AUTO_BROADCASTJOIN_THRESHOLD.key}` to be more conservative. " +
      "That is to say by default the optimizer will not choose to broadcast a table unless it " +
      "knows for sure its size is small enough.")
    .version("1.1.0")
    .bytesConf(ByteUnit.BYTE)
    .createWithDefault(Long.MaxValue)

  val ENABLE_FALL_BACK_TO_HDFS_FOR_STATS = buildConf("spark.sql.statistics.fallBackToHdfs")
    .doc("When true, it will fall back to HDFS if the table statistics are not available from " +
      "table metadata. This is useful in determining if a table is small enough to use " +
      "broadcast joins. This flag is effective only for non-partitioned Hive tables. " +
      "For non-partitioned data source tables, it will be automatically recalculated if table " +
      "statistics are not available. For partitioned data source and partitioned Hive tables, " +
      s"It is '${DEFAULT_SIZE_IN_BYTES.key}' if table statistics are not available.")
    .version("2.0.0")
    .booleanConf
    .createWithDefault(false)

  val NDV_MAX_ERROR =
    buildConf("spark.sql.statistics.ndv.maxError")
      .internal()
      .doc("The maximum relative standard deviation allowed in HyperLogLog++ algorithm " +
        "when generating column level statistics.")
      .version("2.1.1")
      .doubleConf
      .createWithDefault(0.05)

  val HISTOGRAM_ENABLED =
    buildConf("spark.sql.statistics.histogram.enabled")
      .doc("Generates histograms when computing column statistics if enabled. Histograms can " +
        "provide better estimation accuracy. Currently, Spark only supports equi-height " +
        "histogram. Note that collecting histograms takes extra cost. For example, collecting " +
        "column statistics usually takes only one table scan, but generating equi-height " +
        "histogram will cause an extra table scan.")
      .version("2.3.0")
      .booleanConf
      .createWithDefault(false)

  val HISTOGRAM_NUM_BINS =
    buildConf("spark.sql.statistics.histogram.numBins")
      .internal()
      .doc("The number of bins when generating histograms.")
      .version("2.3.0")
      .intConf
      .checkValue(num => num > 1, "The number of bins must be greater than 1.")
      .createWithDefault(254)

  val PERCENTILE_ACCURACY =
    buildConf("spark.sql.statistics.percentile.accuracy")
      .internal()
      .doc("Accuracy of percentile approximation when generating equi-height histograms. " +
        "Larger value means better accuracy. The relative error can be deduced by " +
        "1.0 / PERCENTILE_ACCURACY.")
      .version("2.3.0")
      .intConf
      .createWithDefault(10000)

  val AUTO_SIZE_UPDATE_ENABLED =
    buildConf("spark.sql.statistics.size.autoUpdate.enabled")
      .doc("Enables automatic update for table size once table's data is changed. Note that if " +
        "the total number of files of the table is very large, this can be expensive and slow " +
        "down data change commands.")
      .version("2.3.0")
      .booleanConf
      .createWithDefault(false)

  val CBO_ENABLED =
    buildConf("spark.sql.cbo.enabled")
      .doc("Enables CBO for estimation of plan statistics when set true.")
      .version("2.2.0")
      .booleanConf
      .createWithDefault(false)

  val PLAN_STATS_ENABLED =
    buildConf("spark.sql.cbo.planStats.enabled")
      .doc("When true, the logical plan will fetch row counts and column statistics from catalog.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val JOIN_REORDER_ENABLED =
    buildConf("spark.sql.cbo.joinReorder.enabled")
      .doc("Enables join reorder in CBO.")
      .version("2.2.0")
      .booleanConf
      .createWithDefault(false)

  val JOIN_REORDER_DP_THRESHOLD =
    buildConf("spark.sql.cbo.joinReorder.dp.threshold")
      .doc("The maximum number of joined nodes allowed in the dynamic programming algorithm.")
      .version("2.2.0")
      .intConf
      .checkValue(number => number > 0, "The maximum number must be a positive integer.")
      .createWithDefault(12)

  val JOIN_REORDER_CARD_WEIGHT =
    buildConf("spark.sql.cbo.joinReorder.card.weight")
      .internal()
      .doc("The weight of the ratio of cardinalities (number of rows) " +
        "in the cost comparison function. The ratio of sizes in bytes has weight " +
        "1 - this value. The weighted geometric mean of these ratios is used to decide " +
        "which of the candidate plans will be chosen by the CBO.")
      .version("2.2.0")
      .doubleConf
      .checkValue(weight => weight >= 0 && weight <= 1, "The weight value must be in [0, 1].")
      .createWithDefault(0.7)

  val JOIN_REORDER_DP_STAR_FILTER =
    buildConf("spark.sql.cbo.joinReorder.dp.star.filter")
      .doc("Applies star-join filter heuristics to cost based join enumeration.")
      .version("2.2.0")
      .booleanConf
      .createWithDefault(false)

  val STARSCHEMA_DETECTION = buildConf("spark.sql.cbo.starSchemaDetection")
    .doc("When true, it enables join reordering based on star schema detection. ")
    .version("2.2.0")
    .booleanConf
    .createWithDefault(false)

  val STARSCHEMA_FACT_TABLE_RATIO = buildConf("spark.sql.cbo.starJoinFTRatio")
    .internal()
    .doc("Specifies the upper limit of the ratio between the largest fact tables" +
      " for a star join to be considered. ")
    .version("2.2.0")
    .doubleConf
    .createWithDefault(0.9)

  private def isValidTimezone(zone: String): Boolean = {
    Try { DateTimeUtils.getZoneId(zone) }.isSuccess
  }

  val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY)
    .doc("The ID of session local timezone in the format of either region-based zone IDs or " +
      "zone offsets. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. " +
      "Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm' or '(+|-)HH:mm:ss', e.g '-08', " +
      "'+01:00' or '-13:33:33'. Also 'UTC' and 'Z' are supported as aliases of '+00:00'. Other " +
      "short names are not recommended to use because they can be ambiguous.")
    .version("2.2.0")
    .stringConf
    .checkValue(isValidTimezone, s"Cannot resolve the given timezone with" +
      " ZoneId.of(_, ZoneId.SHORT_IDS)")
    .createWithDefaultFunction(() => TimeZone.getDefault.getID)

  val WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
    buildConf("spark.sql.windowExec.buffer.in.memory.threshold")
      .internal()
      .doc("Threshold for number of rows guaranteed to be held in memory by the window operator")
      .version("2.2.1")
      .intConf
      .createWithDefault(4096)

  val WINDOW_EXEC_BUFFER_SPILL_THRESHOLD =
    buildConf("spark.sql.windowExec.buffer.spill.threshold")
      .internal()
      .doc("Threshold for number of rows to be spilled by window operator")
      .version("2.2.0")
      .intConf
      .createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)

  val WINDOW_GROUP_LIMIT_THRESHOLD =
    buildConf("spark.sql.optimizer.windowGroupLimitThreshold")
      .internal()
      .doc("Threshold for triggering `InsertWindowGroupLimit`. " +
        "0 means the output results is empty. -1 means disabling the optimization.")
      .version("3.5.0")
      .intConf
      .checkValue(_ >= -1,
        "The threshold of window group limit must be -1, 0 or positive integer.")
      .createWithDefault(1000)

  val SESSION_WINDOW_BUFFER_IN_MEMORY_THRESHOLD =
    buildConf("spark.sql.sessionWindow.buffer.in.memory.threshold")
      .internal()
      .doc("Threshold for number of windows guaranteed to be held in memory by the " +
        "session window operator. Note that the buffer is used only for the query Spark " +
        "cannot apply aggregations on determining session window.")
      .version("3.2.0")
      .intConf
      .createWithDefault(4096)

  val SESSION_WINDOW_BUFFER_SPILL_THRESHOLD =
    buildConf("spark.sql.sessionWindow.buffer.spill.threshold")
      .internal()
      .doc("Threshold for number of rows to be spilled by window operator. Note that " +
        "the buffer is used only for the query Spark cannot apply aggregations on determining " +
        "session window.")
      .version("3.2.0")
      .intConf
      .createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)

  val SORT_MERGE_JOIN_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
    buildConf("spark.sql.sortMergeJoinExec.buffer.in.memory.threshold")
      .internal()
      .doc("Threshold for number of rows guaranteed to be held in memory by the sort merge " +
        "join operator")
      .version("2.2.1")
      .intConf
      .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)

  val SORT_MERGE_JOIN_EXEC_BUFFER_SPILL_THRESHOLD =
    buildConf("spark.sql.sortMergeJoinExec.buffer.spill.threshold")
      .internal()
      .doc("Threshold for number of rows to be spilled by sort merge join operator")
      .version("2.2.0")
      .intConf
      .createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)

  val CARTESIAN_PRODUCT_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
    buildConf("spark.sql.cartesianProductExec.buffer.in.memory.threshold")
      .internal()
      .doc("Threshold for number of rows guaranteed to be held in memory by the cartesian " +
        "product operator")
      .version("2.2.1")
      .intConf
      .createWithDefault(4096)

  val CARTESIAN_PRODUCT_EXEC_BUFFER_SPILL_THRESHOLD =
    buildConf("spark.sql.cartesianProductExec.buffer.spill.threshold")
      .internal()
      .doc("Threshold for number of rows to be spilled by cartesian product operator")
      .version("2.2.0")
      .intConf
      .createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)

  val SUPPORT_QUOTED_REGEX_COLUMN_NAME = buildConf("spark.sql.parser.quotedRegexColumnNames")
    .doc("When true, quoted Identifiers (using backticks) in SELECT statement are interpreted" +
      " as regular expressions.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(false)

  val TVF_ALLOW_MULTIPLE_TABLE_ARGUMENTS_ENABLED =
    buildConf("spark.sql.tvf.allowMultipleTableArguments.enabled")
      .doc("When true, allows multiple table arguments for table-valued functions, " +
        "receiving the cartesian product of all the rows of these tables.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(false)

  val RANGE_EXCHANGE_SAMPLE_SIZE_PER_PARTITION =
    buildConf("spark.sql.execution.rangeExchange.sampleSizePerPartition")
      .internal()
      .doc("Number of points to sample per partition in order to determine the range boundaries" +
          " for range partitioning, typically used in global sorting (without limit).")
      .version("2.3.0")
      .intConf
      .createWithDefault(100)

  val ARROW_EXECUTION_ENABLED =
    buildConf("spark.sql.execution.arrow.enabled")
      .doc("(Deprecated since Spark 3.0, please set 'spark.sql.execution.arrow.pyspark.enabled'.)")
      .version("2.3.0")
      .booleanConf
      .createWithDefault(false)

  val ARROW_PYSPARK_EXECUTION_ENABLED =
    buildConf("spark.sql.execution.arrow.pyspark.enabled")
      .doc("When true, make use of Apache Arrow for columnar data transfers in PySpark. " +
        "This optimization applies to: " +
        "1. pyspark.sql.DataFrame.toPandas. " +
        "2. pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame " +
        "or a NumPy ndarray. " +
        "The following data type is unsupported: " +
        "ArrayType of TimestampType.")
      .version("3.0.0")
      .fallbackConf(ARROW_EXECUTION_ENABLED)

  val ARROW_PYSPARK_SELF_DESTRUCT_ENABLED =
    buildConf("spark.sql.execution.arrow.pyspark.selfDestruct.enabled")
      .doc("(Experimental) When true, make use of Apache Arrow's self-destruct and split-blocks " +
        "options for columnar data transfers in PySpark, when converting from Arrow to Pandas. " +
        "This reduces memory usage at the cost of some CPU time. " +
        "This optimization applies to: pyspark.sql.DataFrame.toPandas " +
        "when 'spark.sql.execution.arrow.pyspark.enabled' is set.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(false)

  val ARROW_LOCAL_RELATION_THRESHOLD =
    buildConf("spark.sql.execution.arrow.localRelationThreshold")
      .doc(
        "When converting Arrow batches to Spark DataFrame, local collections are used in the " +
          "driver side if the byte size of Arrow batches is smaller than this threshold. " +
          "Otherwise, the Arrow batches are sent and deserialized to Spark internal rows " +
          "in the executors.")
      .version("3.4.0")
      .bytesConf(ByteUnit.BYTE)
      .checkValue(_ >= 0, "This value must be equal to or greater than 0.")
      .createWithDefaultString("48MB")

  val PYSPARK_JVM_STACKTRACE_ENABLED =
    buildConf("spark.sql.pyspark.jvmStacktrace.enabled")
      .doc("When true, it shows the JVM stacktrace in the user-facing PySpark exception " +
        "together with Python stacktrace. By default, it is disabled to hide JVM stacktrace " +
        "and shows a Python-friendly exception only. Note that this is independent from log " +
        "level settings.")
      .version("3.0.0")
      .booleanConf
      // show full stacktrace in tests but hide in production by default.
      .createWithDefault(Utils.isTesting)

  val ARROW_SPARKR_EXECUTION_ENABLED =
    buildConf("spark.sql.execution.arrow.sparkr.enabled")
      .doc("When true, make use of Apache Arrow for columnar data transfers in SparkR. " +
        "This optimization applies to: " +
        "1. createDataFrame when its input is an R DataFrame " +
        "2. collect " +
        "3. dapply " +
        "4. gapply " +
        "The following data types are unsupported: " +
        "FloatType, BinaryType, ArrayType, StructType and MapType.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val ARROW_FALLBACK_ENABLED =
    buildConf("spark.sql.execution.arrow.fallback.enabled")
      .doc("(Deprecated since Spark 3.0, please set " +
        "'spark.sql.execution.arrow.pyspark.fallback.enabled'.)")
      .version("2.4.0")
      .booleanConf
      .createWithDefault(true)

  val ARROW_PYSPARK_FALLBACK_ENABLED =
    buildConf("spark.sql.execution.arrow.pyspark.fallback.enabled")
      .doc(s"When true, optimizations enabled by '${ARROW_PYSPARK_EXECUTION_ENABLED.key}' will " +
        "fallback automatically to non-optimized implementations if an error occurs.")
      .version("3.0.0")
      .fallbackConf(ARROW_FALLBACK_ENABLED)

  val ARROW_EXECUTION_MAX_RECORDS_PER_BATCH =
    buildConf("spark.sql.execution.arrow.maxRecordsPerBatch")
      .doc("When using Apache Arrow, limit the maximum number of records that can be written " +
        "to a single ArrowRecordBatch in memory. This configuration is not effective for the " +
        "grouping API such as DataFrame(.cogroup).groupby.applyInPandas because each group " +
        "becomes each ArrowRecordBatch. If set to zero or negative there is no limit.")
      .version("2.3.0")
      .intConf
      .createWithDefault(10000)

  val ARROW_EXECUTION_USE_LARGE_VAR_TYPES =
    buildConf("spark.sql.execution.arrow.useLargeVarTypes")
      .doc("When using Apache Arrow, use large variable width vectors for string and binary " +
        "types. Regular string and binary types have a 2GiB limit for a column in a single " +
        "record batch. Large variable types remove this limitation at the cost of higher memory " +
        "usage per value. Note that this only works for DataFrame.mapInArrow.")
      .version("3.5.0")
      .internal()
      .booleanConf
      .createWithDefault(false)

  val PANDAS_UDF_BUFFER_SIZE =
    buildConf("spark.sql.execution.pandas.udf.buffer.size")
      .doc(
        s"Same as `${BUFFER_SIZE.key}` but only applies to Pandas UDF executions. If it is not " +
        s"set, the fallback is `${BUFFER_SIZE.key}`. Note that Pandas execution requires more " +
        "than 4 bytes. Lowering this value could make small Pandas UDF batch iterated and " +
        "pipelined; however, it might degrade performance. See SPARK-27870.")
      .version("3.0.0")
      .fallbackConf(BUFFER_SIZE)

  val PANDAS_STRUCT_HANDLING_MODE =
    buildConf("spark.sql.execution.pandas.structHandlingMode")
      .doc(
        "The conversion mode of struct type when creating pandas DataFrame. " +
        "When \"legacy\"," +
        "1. when Arrow optimization is disabled, convert to Row object, " +
        "2. when Arrow optimization is enabled, convert to dict or raise an Exception " +
        "if there are duplicated nested field names. " +
        "When \"row\", convert to Row object regardless of Arrow optimization. " +
        "When \"dict\", convert to dict and use suffixed key names, e.g., a_0, a_1, " +
        "if there are duplicated nested field names, regardless of Arrow optimization."
      )
      .version("3.5.0")
      .stringConf
      .checkValues(Set("legacy", "row", "dict"))
      .createWithDefaultString("legacy")

  val PYSPARK_SIMPLIFIED_TRACEBACK =
    buildConf("spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled")
      .doc(
        "When true, the traceback from Python UDFs is simplified. It hides " +
        "the Python worker, (de)serialization, etc from PySpark in tracebacks, and only " +
        "shows the exception messages from UDFs. Note that this works only with CPython 3.7+.")
      .version("3.1.0")
      .booleanConf
      // show full stacktrace in tests but hide in production by default.
      .createWithDefault(!Utils.isTesting)

  val PYTHON_UDF_ARROW_ENABLED =
    buildConf("spark.sql.execution.pythonUDF.arrow.enabled")
      .doc("Enable Arrow optimization in regular Python UDFs. This optimization " +
        "can only be enabled when the given function takes at least one argument.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val PYTHON_TABLE_UDF_ARROW_ENABLED =
    buildConf("spark.sql.execution.pythonUDTF.arrow.enabled")
      .doc("Enable Arrow optimization for Python UDTFs.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(false)

  val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME =
    buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName")
      .internal()
      .doc("When true, columns will be looked up by name if labeled with a string and fallback " +
        "to use position if not. When false, a grouped map Pandas UDF will assign columns from " +
        "the returned Pandas DataFrame based on position, regardless of column label type. " +
        "This configuration will be deprecated in future releases.")
      .version("2.4.1")
      .booleanConf
      .createWithDefault(true)

  val PANDAS_ARROW_SAFE_TYPE_CONVERSION =
    buildConf("spark.sql.execution.pandas.convertToArrowArraySafely")
      .internal()
      .doc("When true, Arrow will perform safe type conversion when converting " +
        "Pandas.Series to Arrow array during serialization. Arrow will raise errors " +
        "when detecting unsafe type conversion like overflow. When false, disabling Arrow's type " +
        "check and do type conversions anyway. This config only works for Arrow 0.11.0+.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val PYSPARK_WORKER_PYTHON_EXECUTABLE =
    buildConf("spark.sql.execution.pyspark.python")
      .internal()
      .doc("Python binary executable to use for PySpark in executors when running Python " +
        "UDF, pandas UDF and pandas function APIs." +
        "If not set, it falls back to 'spark.pyspark.python' by default.")
      .version("3.5.0")
      .stringConf
      .createOptional

  val REPLACE_EXCEPT_WITH_FILTER = buildConf("spark.sql.optimizer.replaceExceptWithFilter")
    .internal()
    .doc("When true, the apply function of the rule verifies whether the right node of the" +
      " except operation is of type Filter or Project followed by Filter. If yes, the rule" +
      " further verifies 1) Excluding the filter operations from the right (as well as the" +
      " left node, if any) on the top, whether both the nodes evaluates to a same result." +
      " 2) The left and right nodes don't contain any SubqueryExpressions. 3) The output" +
      " column names of the left node are distinct. If all the conditions are met, the" +
      " rule will replace the except operation with a Filter by flipping the filter" +
      " condition(s) of the right node.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(true)

  val DECIMAL_OPERATIONS_ALLOW_PREC_LOSS =
    buildConf("spark.sql.decimalOperations.allowPrecisionLoss")
      .internal()
      .doc("When true (default), establishing the result type of an arithmetic operation " +
        "happens according to Hive behavior and SQL ANSI 2011 specification, i.e. rounding the " +
        "decimal part of the result if an exact representation is not possible. Otherwise, NULL " +
        "is returned in those cases, as previously.")
      .version("2.3.1")
      .booleanConf
      .createWithDefault(true)

  val LITERAL_PICK_MINIMUM_PRECISION =
    buildConf("spark.sql.legacy.literal.pickMinimumPrecision")
      .internal()
      .doc("When integral literal is used in decimal operations, pick a minimum precision " +
        "required by the literal if this config is true, to make the resulting precision and/or " +
        "scale smaller. This can reduce the possibility of precision lose and/or overflow.")
      .version("2.3.3")
      .booleanConf
      .createWithDefault(true)

  val SQL_OPTIONS_REDACTION_PATTERN = buildConf("spark.sql.redaction.options.regex")
    .doc("Regex to decide which keys in a Spark SQL command's options map contain sensitive " +
      "information. The values of options whose names that match this regex will be redacted " +
      "in the explain output. This redaction is applied on top of the global redaction " +
      s"configuration defined by ${SECRET_REDACTION_PATTERN.key}.")
    .version("2.2.2")
    .regexConf
    .createWithDefault("(?i)url".r)

  val SQL_STRING_REDACTION_PATTERN =
    buildConf("spark.sql.redaction.string.regex")
      .doc("Regex to decide which parts of strings produced by Spark contain sensitive " +
        "information. When this regex matches a string part, that string part is replaced by a " +
        "dummy value. This is currently used to redact the output of SQL explain commands. " +
        "When this conf is not set, the value from `spark.redaction.string.regex` is used.")
      .version("2.3.0")
      .fallbackConf(org.apache.spark.internal.config.STRING_REDACTION_PATTERN)

  val CONCAT_BINARY_AS_STRING = buildConf("spark.sql.function.concatBinaryAsString")
    .doc("When this option is set to false and all inputs are binary, `functions.concat` returns " +
      "an output as binary. Otherwise, it returns as a string.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(false)

  val ELT_OUTPUT_AS_STRING = buildConf("spark.sql.function.eltOutputAsString")
    .doc("When this option is set to false and all inputs are binary, `elt` returns " +
      "an output as binary. Otherwise, it returns as a string.")
    .version("2.3.0")
    .booleanConf
    .createWithDefault(false)

  val VALIDATE_PARTITION_COLUMNS =
    buildConf("spark.sql.sources.validatePartitionColumns")
      .internal()
      .doc("When this option is set to true, partition column values will be validated with " +
        "user-specified schema. If the validation fails, a runtime exception is thrown. " +
        "When this option is set to false, the partition column value will be converted to null " +
        "if it can not be casted to corresponding user-specified schema.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE =
    buildConf("spark.sql.streaming.continuous.epochBacklogQueueSize")
      .doc("The max number of entries to be stored in queue to wait for late epochs. " +
        "If this parameter is exceeded by the size of the queue, stream will stop with an error.")
      .version("3.0.0")
      .intConf
      .createWithDefault(10000)

  val CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE =
    buildConf("spark.sql.streaming.continuous.executorQueueSize")
      .internal()
      .doc("The size (measured in number of rows) of the queue used in continuous execution to" +
        " buffer the results of a ContinuousDataReader.")
      .version("2.3.0")
      .intConf
      .createWithDefault(1024)

  val CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS =
    buildConf("spark.sql.streaming.continuous.executorPollIntervalMs")
      .internal()
      .doc("The interval at which continuous execution readers will poll to check whether" +
        " the epoch has advanced on the driver.")
      .version("2.3.0")
      .timeConf(TimeUnit.MILLISECONDS)
      .createWithDefault(100)

  val USE_V1_SOURCE_LIST = buildConf("spark.sql.sources.useV1SourceList")
    .internal()
    .doc("A comma-separated list of data source short names or fully qualified data source " +
      "implementation class names for which Data Source V2 code path is disabled. These data " +
      "sources will fallback to Data Source V1 code path.")
    .version("3.0.0")
    .stringConf
    .createWithDefault("avro,csv,json,kafka,orc,parquet,text")

  val ALLOW_EMPTY_SCHEMAS_FOR_WRITES = buildConf("spark.sql.legacy.allowEmptySchemaWrite")
    .internal()
    .doc("When this option is set to true, validation of empty or empty nested schemas that " +
      "occurs when writing into a FileFormat based data source does not happen.")
    .version("3.4.0")
    .booleanConf
    .createWithDefault(false)

  val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
    .doc("A comma-separated list of fully qualified data source register class names for which" +
      " StreamWriteSupport is disabled. Writes to these sources will fall back to the V1 Sinks.")
    .version("2.3.1")
    .stringConf
    .createWithDefault("")

  val DISABLED_V2_STREAMING_MICROBATCH_READERS =
    buildConf("spark.sql.streaming.disabledV2MicroBatchReaders")
      .internal()
      .doc(
        "A comma-separated list of fully qualified data source register class names for which " +
          "MicroBatchReadSupport is disabled. Reads from these sources will fall back to the " +
          "V1 Sources.")
      .version("2.4.0")
      .stringConf
      .createWithDefault("")

  val FASTFAIL_ON_FILEFORMAT_OUTPUT =
    buildConf("spark.sql.execution.fastFailOnFileFormatOutput")
      .internal()
      .doc("Whether to fast fail task execution when writing output to FileFormat datasource. " +
        "If this is enabled, in `FileFormatWriter` we will catch `FileAlreadyExistsException` " +
        "and fast fail output task without further task retry. Only enabling this if you know " +
        "the `FileAlreadyExistsException` of the output task is unrecoverable, i.e., further " +
        "task attempts won't be able to success. If the `FileAlreadyExistsException` might be " +
        "recoverable, you should keep this as disabled and let Spark to retry output tasks. " +
        "This is disabled by default.")
      .version("3.0.2")
      .booleanConf
      .createWithDefault(false)

  object PartitionOverwriteMode extends Enumeration {
    val STATIC, DYNAMIC = Value
  }

  val PARTITION_OVERWRITE_MODE =
    buildConf("spark.sql.sources.partitionOverwriteMode")
      .doc("When INSERT OVERWRITE a partitioned data source table, we currently support 2 modes: " +
        "static and dynamic. In static mode, Spark deletes all the partitions that match the " +
        "partition specification(e.g. PARTITION(a=1,b)) in the INSERT statement, before " +
        "overwriting. In dynamic mode, Spark doesn't delete partitions ahead, and only overwrite " +
        "those partitions that have data written into it at runtime. By default we use static " +
        "mode to keep the same behavior of Spark prior to 2.3. Note that this config doesn't " +
        "affect Hive serde tables, as they are always overwritten with dynamic mode. This can " +
        "also be set as an output option for a data source using key partitionOverwriteMode " +
        "(which takes precedence over this setting), e.g. " +
        "dataframe.write.option(\"partitionOverwriteMode\", \"dynamic\").save(path)."
      )
      .version("2.3.0")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(PartitionOverwriteMode.values.map(_.toString))
      .createWithDefault(PartitionOverwriteMode.STATIC.toString)

  object StoreAssignmentPolicy extends Enumeration {
    val ANSI, LEGACY, STRICT = Value
  }

  val STORE_ASSIGNMENT_POLICY =
    buildConf("spark.sql.storeAssignmentPolicy")
      .doc("When inserting a value into a column with different data type, Spark will perform " +
        "type coercion. Currently, we support 3 policies for the type coercion rules: ANSI, " +
        "legacy and strict. With ANSI policy, Spark performs the type coercion as per ANSI SQL. " +
        "In practice, the behavior is mostly the same as PostgreSQL. " +
        "It disallows certain unreasonable type conversions such as converting " +
        "`string` to `int` or `double` to `boolean`. " +
        "With legacy policy, Spark allows the type coercion as long as it is a valid `Cast`, " +
        "which is very loose. e.g. converting `string` to `int` or `double` to `boolean` is " +
        "allowed. It is also the only behavior in Spark 2.x and it is compatible with Hive. " +
        "With strict policy, Spark doesn't allow any possible precision loss or data truncation " +
        "in type coercion, e.g. converting `double` to `int` or `decimal` to `double` is " +
        "not allowed."
      )
      .version("3.0.0")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(StoreAssignmentPolicy.values.map(_.toString))
      .createWithDefault(StoreAssignmentPolicy.ANSI.toString)

  val ANSI_ENABLED = buildConf(SqlApiConfHelper.ANSI_ENABLED_KEY)
    .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " +
      "For example, Spark will throw an exception at runtime instead of returning null results " +
      "when the inputs to a SQL operator/function are invalid." +
      "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " +
      "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " +
      "standard directly, but their behaviors align with ANSI SQL's style")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(sys.env.get("SPARK_ANSI_SQL_MODE").contains("true"))

  val ENFORCE_RESERVED_KEYWORDS = buildConf("spark.sql.ansi.enforceReservedKeywords")
    .doc(s"When true and '${ANSI_ENABLED.key}' is true, the Spark SQL parser enforces the ANSI " +
      "reserved keywords and forbids SQL queries that use reserved keywords as alias names " +
      "and/or identifiers for table, view, function, etc.")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val DOUBLE_QUOTED_IDENTIFIERS = buildConf("spark.sql.ansi.doubleQuotedIdentifiers")
    .doc(s"When true and '${ANSI_ENABLED.key}' is true, Spark SQL reads literals enclosed in " +
      "double quoted (\") as identifiers. When false they are read as string literals.")
    .version("3.4.0")
    .booleanConf
    .createWithDefault(false)

  val ANSI_RELATION_PRECEDENCE = buildConf("spark.sql.ansi.relationPrecedence")
    .doc(s"When true and '${ANSI_ENABLED.key}' is true, JOIN takes precedence over comma when " +
      "combining relation. For example, `t1, t2 JOIN t3` should result to `t1 X (t2 X t3)`. If " +
      "the config is false, the result is `(t1 X t2) X t3`.")
    .version("3.4.0")
    .booleanConf
    .createWithDefault(false)

  val CHUNK_BASE64_STRING_ENABLED = buildConf("spark.sql.chunkBase64String.enabled")
    .doc("Whether to truncate string generated by the `Base64` function. When true, base64" +
      " strings generated by the base64 function are chunked into lines of at most 76" +
      " characters. When false, the base64 strings are not chunked.")
    .version("3.5.2")
    .booleanConf
    .createWithDefault(true)

  val ENABLE_DEFAULT_COLUMNS =
    buildConf("spark.sql.defaultColumn.enabled")
      .internal()
      .doc("When true, allow CREATE TABLE, REPLACE TABLE, and ALTER COLUMN statements to set or " +
        "update default values for specific columns. Following INSERT, MERGE, and UPDATE " +
        "statements may then omit these values and their values will be injected automatically " +
        "instead.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val DEFAULT_COLUMN_ALLOWED_PROVIDERS =
    buildConf("spark.sql.defaultColumn.allowedProviders")
      .internal()
      .doc("List of table providers wherein SQL commands are permitted to assign DEFAULT column " +
        "values. Comma-separated list, whitespace ignored, case-insensitive. If an asterisk " +
        "appears after any table provider in this list, any command may assign DEFAULT column " +
        "except `ALTER TABLE ... ADD COLUMN`. Otherwise, if no asterisk appears, all commands " +
        "are permitted. This is useful because in order for such `ALTER TABLE ... ADD COLUMN` " +
        "commands to work, the target data source must include support for substituting in the " +
        "provided values when the corresponding fields are not present in storage.")
      .version("3.4.0")
      .stringConf
      .createWithDefault("csv,json,orc,parquet")

  val JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE =
    buildConf("spark.sql.jsonGenerator.writeNullIfWithDefaultValue")
      .internal()
      .doc("When true, when writing NULL values to columns of JSON tables with explicit DEFAULT " +
        "values using INSERT, UPDATE, or MERGE commands, never skip writing the NULL values to " +
        "storage, overriding spark.sql.jsonGenerator.ignoreNullFields or the ignoreNullFields " +
        "option. This can be useful to enforce that inserted NULL values are present in " +
        "storage to differentiate from missing data.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES =
    buildConf("spark.sql.defaultColumn.useNullsForMissingDefaultValues")
      .internal()
      .doc("When true, and DEFAULT columns are enabled, allow INSERT INTO commands with user-" +
        "specified lists of fewer columns than the target table to behave as if they had " +
        "specified DEFAULT for all remaining columns instead, in order.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val SKIP_TYPE_VALIDATION_ON_ALTER_PARTITION =
    buildConf("spark.sql.legacy.skipTypeValidationOnAlterPartition")
      .internal()
      .doc("When true, skip validation for partition spec in ALTER PARTITION. E.g., " +
        "`ALTER TABLE .. ADD PARTITION(p='a')` would work even the partition type is int. " +
        s"When false, the behavior follows ${STORE_ASSIGNMENT_POLICY.key}")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val SORT_BEFORE_REPARTITION =
    buildConf("spark.sql.execution.sortBeforeRepartition")
      .internal()
      .doc("When perform a repartition following a shuffle, the output row ordering would be " +
        "nondeterministic. If some downstream stages fail and some tasks of the repartition " +
        "stage retry, these tasks may generate different data, and that can lead to correctness " +
        "issues. Turn on this config to insert a local sort before actually doing repartition " +
        "to generate consistent repartition results. The performance of repartition() may go " +
        "down since we insert extra local sort before it.")
      .version("2.1.4")
      .booleanConf
      .createWithDefault(true)

  val NESTED_SCHEMA_PRUNING_ENABLED =
    buildConf("spark.sql.optimizer.nestedSchemaPruning.enabled")
      .internal()
      .doc("Prune nested fields from a logical relation's output which are unnecessary in " +
        "satisfying a query. This optimization allows columnar file format readers to avoid " +
        "reading unnecessary nested column data. Currently Parquet and ORC are the " +
        "data sources that implement this optimization.")
      .version("2.4.1")
      .booleanConf
      .createWithDefault(true)

  val DISABLE_HINTS =
    buildConf("spark.sql.optimizer.disableHints")
      .internal()
      .doc("When true, the optimizer will disable user-specified hints that are additional " +
        "directives for better planning of a query.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val NESTED_PREDICATE_PUSHDOWN_FILE_SOURCE_LIST =
    buildConf("spark.sql.optimizer.nestedPredicatePushdown.supportedFileSources")
      .internal()
      .doc("A comma-separated list of data source short names or fully qualified data source " +
        "implementation class names for which Spark tries to push down predicates for nested " +
        "columns and/or names containing `dots` to data sources. This configuration is only " +
        "effective with file-based data sources in DSv1. Currently, Parquet and ORC implement " +
        "both optimizations. The other data sources don't support this feature yet. So the " +
        "default value is 'parquet,orc'.")
      .version("3.0.0")
      .stringConf
      .createWithDefault("parquet,orc")

  val SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED =
    buildConf("spark.sql.optimizer.serializer.nestedSchemaPruning.enabled")
      .internal()
      .doc("Prune nested fields from object serialization operator which are unnecessary in " +
        "satisfying a query. This optimization allows object serializers to avoid " +
        "executing unnecessary nested expressions.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val NESTED_PRUNING_ON_EXPRESSIONS =
    buildConf("spark.sql.optimizer.expression.nestedPruning.enabled")
      .internal()
      .doc("Prune nested fields from expressions in an operator which are unnecessary in " +
        "satisfying a query. Note that this optimization doesn't prune nested fields from " +
        "physical data source scanning. For pruning nested fields from scanning, please use " +
        "`spark.sql.optimizer.nestedSchemaPruning.enabled` config.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val DECORRELATE_INNER_QUERY_ENABLED =
    buildConf("spark.sql.optimizer.decorrelateInnerQuery.enabled")
      .internal()
      .doc("Decorrelate inner query by eliminating correlated references and build domain joins.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val DECORRELATE_SET_OPS_ENABLED =
    buildConf("spark.sql.optimizer.decorrelateSetOps.enabled")
      .internal()
      .doc("Decorrelate subqueries with correlation under set operators.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val DECORRELATE_SUBQUERY_LEGACY_INCORRECT_COUNT_HANDLING_ENABLED =
    buildConf("spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled")
      .internal()
      .doc("If enabled, revert to legacy incorrect behavior for certain subqueries with COUNT or " +
        "similar aggregates: see SPARK-43098.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(false)

  val OPTIMIZE_ONE_ROW_RELATION_SUBQUERY =
    buildConf("spark.sql.optimizer.optimizeOneRowRelationSubquery")
      .internal()
      .doc("When true, the optimizer will inline subqueries with OneRowRelation as leaf nodes.")
      .version("3.2.0")
      .booleanConf
      .createWithDefault(true)

  val ALWAYS_INLINE_ONE_ROW_RELATION_SUBQUERY =
    buildConf("spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline")
      .internal()
      .doc(s"When true, the optimizer will always inline single row subqueries even if it " +
        "causes extra duplication. It only takes effect when " +
        s"${OPTIMIZE_ONE_ROW_RELATION_SUBQUERY.key} is set to true.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val PULL_HINTS_INTO_SUBQUERIES =
    buildConf("spark.sql.optimizer.pullHintsIntoSubqueries")
      .internal()
      .doc("Pull hints into subqueries in EliminateResolvedHint if enabled.")
      .booleanConf
      .createWithDefault(true)

  val TOP_K_SORT_FALLBACK_THRESHOLD =
    buildConf("spark.sql.execution.topKSortFallbackThreshold")
      .doc("In SQL queries with a SORT followed by a LIMIT like " +
          "'SELECT x FROM t ORDER BY y LIMIT m', if m is under this threshold, do a top-K sort" +
          " in memory, otherwise do a global sort which spills to disk if necessary.")
      .version("2.4.0")
      .intConf
      .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)

  val PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN =
    buildConf("spark.databricks.sql.optimizer.pruneFiltersCanPruneStreamingSubplan")
      .internal()
      .doc("Allow PruneFilters to remove streaming subplans when we encounter a false filter. " +
        "This flag is to restore prior buggy behavior for broken pipelines.")
      .version("4.0.0")
      .booleanConf
      .createWithDefault(false)

  object Deprecated {
    val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
  }

  object Replaced {
    val MAPREDUCE_JOB_REDUCES = "mapreduce.job.reduces"
  }

  val CSV_PARSER_COLUMN_PRUNING = buildConf("spark.sql.csv.parser.columnPruning.enabled")
    .internal()
    .doc("If it is set to true, column names of the requested schema are passed to CSV parser. " +
      "Other column values can be ignored during parsing even if they are malformed.")
    .version("2.4.0")
    .booleanConf
    .createWithDefault(true)

  val CSV_INPUT_BUFFER_SIZE = buildConf("spark.sql.csv.parser.inputBufferSize")
    .internal()
    .doc("If it is set, it configures the buffer size of CSV input during parsing. " +
      "It is the same as inputBufferSize option in CSV which has a higher priority. " +
      "Note that this is a workaround for the parsing library's regression, and this " +
      "configuration is internal and supposed to be removed in the near future.")
    .version("3.0.3")
    .intConf
    .createOptional

  val LEGACY_RESPECT_NULLABILITY_IN_TEXT_DATASET_CONVERSION =
    buildConf("spark.sql.legacy.respectNullabilityInTextDatasetConversion")
      .internal()
      .doc("When true, the nullability in the user-specified schema for " +
        "`DataFrameReader.schema(schema).json(jsonDataset)` and " +
        "`DataFrameReader.schema(schema).csv(csvDataset)` is respected. Otherwise, they are " +
        "turned to a nullable schema forcibly.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val REPL_EAGER_EVAL_ENABLED = buildConf("spark.sql.repl.eagerEval.enabled")
    .doc("Enables eager evaluation or not. When true, the top K rows of Dataset will be " +
      "displayed if and only if the REPL supports the eager evaluation. Currently, the " +
      "eager evaluation is supported in PySpark and SparkR. In PySpark, for the notebooks like " +
      "Jupyter, the HTML table (generated by _repr_html_) will be returned. For plain Python " +
      "REPL, the returned outputs are formatted like dataframe.show(). In SparkR, the returned " +
      "outputs are showed similar to R data.frame would.")
    .version("2.4.0")
    .booleanConf
    .createWithDefault(false)

  val REPL_EAGER_EVAL_MAX_NUM_ROWS = buildConf("spark.sql.repl.eagerEval.maxNumRows")
    .doc("The max number of rows that are returned by eager evaluation. This only takes " +
      s"effect when ${REPL_EAGER_EVAL_ENABLED.key} is set to true. The valid range of this " +
      "config is from 0 to (Int.MaxValue - 1), so the invalid config like negative and " +
      "greater than (Int.MaxValue - 1) will be normalized to 0 and (Int.MaxValue - 1).")
    .version("2.4.0")
    .intConf
    .createWithDefault(20)

  val REPL_EAGER_EVAL_TRUNCATE = buildConf("spark.sql.repl.eagerEval.truncate")
    .doc("The max number of characters for each cell that is returned by eager evaluation. " +
      s"This only takes effect when ${REPL_EAGER_EVAL_ENABLED.key} is set to true.")
    .version("2.4.0")
    .intConf
    .createWithDefault(20)

  val FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT =
    buildConf("spark.sql.codegen.aggregate.fastHashMap.capacityBit")
      .internal()
      .doc("Capacity for the max number of rows to be held in memory " +
        "by the fast hash aggregate product operator. The bit is not for actual value, " +
        "but the actual numBuckets is determined by loadFactor " +
        "(e.g: default bit value 16 , the actual numBuckets is ((1 << 16) / 0.5).")
      .version("2.4.0")
      .intConf
      .checkValue(bit => bit >= 10 && bit <= 30, "The bit value must be in [10, 30].")
      .createWithDefault(16)

  val AVRO_COMPRESSION_CODEC = buildConf("spark.sql.avro.compression.codec")
    .doc("Compression codec used in writing of AVRO files. Supported codecs: " +
      "uncompressed, deflate, snappy, bzip2, xz and zstandard. Default codec is snappy.")
    .version("2.4.0")
    .stringConf
    .checkValues(Set("uncompressed", "deflate", "snappy", "bzip2", "xz", "zstandard"))
    .createWithDefault("snappy")

  val AVRO_DEFLATE_LEVEL = buildConf("spark.sql.avro.deflate.level")
    .doc("Compression level for the deflate codec used in writing of AVRO files. " +
      "Valid value must be in the range of from 1 to 9 inclusive or -1. " +
      "The default value is -1 which corresponds to 6 level in the current implementation.")
    .version("2.4.0")
    .intConf
    .checkValues((1 to 9).toSet + Deflater.DEFAULT_COMPRESSION)
    .createWithDefault(Deflater.DEFAULT_COMPRESSION)

  val LEGACY_SIZE_OF_NULL = buildConf("spark.sql.legacy.sizeOfNull")
    .internal()
    .doc(s"If it is set to false, or ${ANSI_ENABLED.key} is true, then size of null returns " +
      "null. Otherwise, it returns -1, which was inherited from Hive.")
    .version("2.4.0")
    .booleanConf
    .createWithDefault(true)

  val LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL =
    buildConf("spark.sql.legacy.parseNullPartitionSpecAsStringLiteral")
      .internal()
      .doc("If it is set to true, `PARTITION(col=null)` is parsed as a string literal of its " +
        "text representation, e.g., string 'null', when the partition column is string type. " +
        "Otherwise, it is always parsed as a null literal in the partition spec.")
      .version("3.0.2")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL =
    buildConf("spark.sql.legacy.keepPartitionSpecAsStringLiteral")
      .internal()
      .doc("If it is set to true, `PARTITION(col=05)` is parsed as a string literal of its " +
        "text representation, e.g., string '05', when the partition column is string type. " +
        "Otherwise, it is always parsed as a numeric literal in the partition spec.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED =
    buildConf("spark.sql.legacy.replaceDatabricksSparkAvro.enabled")
      .internal()
      .doc("If it is set to true, the data source provider com.databricks.spark.avro is mapped " +
        "to the built-in but external Avro data source module for backward compatibility.")
      .version("2.4.0")
      .booleanConf
      .createWithDefault(true)

  val LEGACY_SETOPS_PRECEDENCE_ENABLED =
    buildConf("spark.sql.legacy.setopsPrecedence.enabled")
      .internal()
      .doc("When set to true and the order of evaluation is not specified by parentheses, the " +
        "set operations are performed from left to right as they appear in the query. When set " +
        "to false and order of evaluation is not specified by parentheses, INTERSECT operations " +
        "are performed before any UNION, EXCEPT and MINUS operations.")
      .version("2.4.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED =
    buildConf("spark.sql.legacy.exponentLiteralAsDecimal.enabled")
      .internal()
      .doc("When set to true, a literal with an exponent (e.g. 1E-30) would be parsed " +
        "as Decimal rather than Double.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED =
    buildConf("spark.sql.legacy.allowNegativeScaleOfDecimal")
      .internal()
      .doc("When set to true, negative scale of Decimal type is allowed. For example, " +
        "the type of number 1E10BD under legacy mode is DecimalType(2, -9), but is " +
        "Decimal(11, 0) in non legacy mode.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING =
    buildConf("spark.sql.legacy.bucketedTableScan.outputOrdering")
      .internal()
      .doc("When true, the bucketed table scan will list files during planning to figure out the " +
        "output ordering, which is expensive and may make the planning quite slow.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE =
    buildConf("spark.sql.legacy.parser.havingWithoutGroupByAsWhere")
      .internal()
      .doc("If it is set to true, the parser will treat HAVING without GROUP BY as a normal " +
        "WHERE, which does not follow SQL standard.")
      .version("2.4.1")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_ALLOW_EMPTY_STRING_IN_JSON =
    buildConf("spark.sql.legacy.json.allowEmptyString.enabled")
      .internal()
      .doc("When set to true, the parser of JSON data source treats empty strings as null for " +
        "some data types such as `IntegerType`.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE =
    buildConf("spark.sql.legacy.createEmptyCollectionUsingStringType")
      .internal()
      .doc("When set to true, Spark returns an empty collection with `StringType` as element " +
        "type if the `array`/`map` function is called without any parameters. Otherwise, Spark " +
        "returns an empty collection with `NullType` as element type.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_ALLOW_UNTYPED_SCALA_UDF =
    buildConf("spark.sql.legacy.allowUntypedScalaUDF")
      .internal()
      .doc("When set to true, user is allowed to use org.apache.spark.sql.functions." +
        "udf(f: AnyRef, dataType: DataType). Otherwise, an exception will be thrown at runtime.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_STATISTICAL_AGGREGATE =
    buildConf("spark.sql.legacy.statisticalAggregate")
      .internal()
      .doc("When set to true, statistical aggregate function returns Double.NaN " +
        "if divide by zero occurred during expression evaluation, otherwise, it returns null. " +
        "Before version 3.1.0, it returns NaN in divideByZero case by default.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val TRUNCATE_TABLE_IGNORE_PERMISSION_ACL =
    buildConf("spark.sql.truncateTable.ignorePermissionAcl.enabled")
      .internal()
      .doc("When set to true, TRUNCATE TABLE command will not try to set back original " +
        "permission and ACLs when re-creating the table/partition paths.")
      .version("2.4.6")
      .booleanConf
      .createWithDefault(false)

  val NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE =
    buildConf("spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue")
      .internal()
      .doc("When set to true, the key attribute resulted from running `Dataset.groupByKey` " +
        "for non-struct key type, will be named as `value`, following the behavior of Spark " +
        "version 2.4 and earlier.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val MAX_TO_STRING_FIELDS = buildConf("spark.sql.debug.maxToStringFields")
    .doc("Maximum number of fields of sequence-like entries can be converted to strings " +
      "in debug output. Any elements beyond the limit will be dropped and replaced by a" +
      """ "... N more fields" placeholder.""")
    .version("3.0.0")
    .intConf
    .createWithDefault(25)

  val MAX_PLAN_STRING_LENGTH = buildConf("spark.sql.maxPlanStringLength")
    .doc("Maximum number of characters to output for a plan string.  If the plan is " +
      "longer, further output will be truncated.  The default setting always generates a full " +
      "plan.  Set this to a lower value such as 8k if plan strings are taking up too much " +
      "memory or are causing OutOfMemory errors in the driver or UI processes.")
    .version("3.0.0")
    .bytesConf(ByteUnit.BYTE)
    .checkValue(i => i >= 0 && i <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH, "Invalid " +
      "value for 'spark.sql.maxPlanStringLength'.  Length must be a valid string length " +
      "(nonnegative and shorter than the maximum size).")
    .createWithDefaultString(s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}")

  val MAX_METADATA_STRING_LENGTH = buildConf("spark.sql.maxMetadataStringLength")
    .doc("Maximum number of characters to output for a metadata string. e.g. " +
      "file location in `DataSourceScanExec`, every value will be abbreviated if exceed length.")
    .version("3.1.0")
    .intConf
    .checkValue(_ > 3, "This value must be bigger than 3.")
    .createWithDefault(100)

  val SET_COMMAND_REJECTS_SPARK_CORE_CONFS =
    buildConf("spark.sql.legacy.setCommandRejectsSparkCoreConfs")
      .internal()
      .doc("If it is set to true, SET command will fail when the key is registered as " +
        "a SparkConf entry.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  object TimestampTypes extends Enumeration {
    val TIMESTAMP_NTZ, TIMESTAMP_LTZ = Value
  }

  val TIMESTAMP_TYPE =
    buildConf("spark.sql.timestampType")
      .doc("Configures the default timestamp type of Spark SQL, including SQL DDL, Cast clause, " +
        "type literal and the schema inference of data sources. " +
        s"Setting the configuration as ${TimestampTypes.TIMESTAMP_NTZ} will " +
        "use TIMESTAMP WITHOUT TIME ZONE as the default type while putting it as " +
        s"${TimestampTypes.TIMESTAMP_LTZ} will use TIMESTAMP WITH LOCAL TIME ZONE. " +
        "Before the 3.4.0 release, Spark only supports the TIMESTAMP WITH " +
        "LOCAL TIME ZONE type.")
      .version("3.4.0")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(TimestampTypes.values.map(_.toString))
      .createWithDefault(TimestampTypes.TIMESTAMP_LTZ.toString)

  val DATETIME_JAVA8API_ENABLED = buildConf("spark.sql.datetime.java8API.enabled")
    .doc("If the configuration property is set to true, java.time.Instant and " +
      "java.time.LocalDate classes of Java 8 API are used as external types for " +
      "Catalyst's TimestampType and DateType. If it is set to false, java.sql.Timestamp " +
      "and java.sql.Date are used for the same purpose.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(false)

  val UI_EXPLAIN_MODE = buildConf("spark.sql.ui.explainMode")
    .doc("Configures the query explain mode used in the Spark SQL UI. The value can be 'simple', " +
      "'extended', 'codegen', 'cost', or 'formatted'. The default value is 'formatted'.")
    .version("3.1.0")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValue(mode => Set("SIMPLE", "EXTENDED", "CODEGEN", "COST", "FORMATTED").contains(mode),
      "Invalid value for 'spark.sql.ui.explainMode'. Valid values are 'simple', 'extended', " +
      "'codegen', 'cost' and 'formatted'.")
    .createWithDefault("formatted")

  val SOURCES_BINARY_FILE_MAX_LENGTH = buildConf("spark.sql.sources.binaryFile.maxLength")
    .doc("The max length of a file that can be read by the binary file data source. " +
      "Spark will fail fast and not attempt to read the file if its length exceeds this value. " +
      "The theoretical max is Int.MaxValue, though VMs might implement a smaller max.")
    .version("3.0.0")
    .internal()
    .intConf
    .createWithDefault(Int.MaxValue)

  val LEGACY_CAST_DATETIME_TO_STRING =
    buildConf("spark.sql.legacy.typeCoercion.datetimeToString.enabled")
      .internal()
      .doc("If it is set to true, date/timestamp will cast to string in binary comparisons " +
        s"with String when ${ANSI_ENABLED.key} is false.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val DEFAULT_CATALOG = buildConf("spark.sql.defaultCatalog")
    .doc("Name of the default catalog. This will be the current catalog if users have not " +
      "explicitly set the current catalog yet.")
    .version("3.0.0")
    .stringConf
    .createWithDefault(SESSION_CATALOG_NAME)

  val V2_SESSION_CATALOG_IMPLEMENTATION =
    buildConf(s"spark.sql.catalog.$SESSION_CATALOG_NAME")
      .doc("A catalog implementation that will be used as the v2 interface to Spark's built-in " +
        s"v1 catalog: $SESSION_CATALOG_NAME. This catalog shares its identifier namespace with " +
        s"the $SESSION_CATALOG_NAME and must be consistent with it; for example, if a table can " +
        s"be loaded by the $SESSION_CATALOG_NAME, this catalog must also return the table " +
        s"metadata. To delegate operations to the $SESSION_CATALOG_NAME, implementations can " +
        "extend 'CatalogExtension'.")
      .version("3.0.0")
      .stringConf
      .createOptional

  object MapKeyDedupPolicy extends Enumeration {
    val EXCEPTION, LAST_WIN = Value
  }

  val MAP_KEY_DEDUP_POLICY = buildConf("spark.sql.mapKeyDedupPolicy")
    .doc("The policy to deduplicate map keys in builtin function: CreateMap, MapFromArrays, " +
      "MapFromEntries, StringToMap, MapConcat and TransformKeys. When EXCEPTION, the query " +
      "fails if duplicated map keys are detected. When LAST_WIN, the map key that is inserted " +
      "at last takes precedence.")
    .version("3.0.0")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValues(MapKeyDedupPolicy.values.map(_.toString))
    .createWithDefault(MapKeyDedupPolicy.EXCEPTION.toString)

  val LEGACY_LOOSE_UPCAST = buildConf("spark.sql.legacy.doLooseUpcast")
    .internal()
    .doc("When true, the upcast will be loose and allows string to atomic types.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(false)

  val LEGACY_CTE_PRECEDENCE_POLICY = buildConf("spark.sql.legacy.ctePrecedencePolicy")
    .internal()
    .doc("When LEGACY, outer CTE definitions takes precedence over inner definitions. If set to " +
      "CORRECTED, inner CTE definitions take precedence. The default value is EXCEPTION, " +
      "AnalysisException is thrown while name conflict is detected in nested CTE. This config " +
      "will be removed in future versions and CORRECTED will be the only behavior.")
    .version("3.0.0")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
    .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val LEGACY_TIME_PARSER_POLICY = buildConf(SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY)
    .internal()
    .doc("When LEGACY, java.text.SimpleDateFormat is used for formatting and parsing " +
      "dates/timestamps in a locale-sensitive manner, which is the approach before Spark 3.0. " +
      "When set to CORRECTED, classes from java.time.* packages are used for the same purpose. " +
      "The default value is EXCEPTION, RuntimeException is thrown when we will get different " +
      "results.")
    .version("3.0.0")
    .stringConf
    .transform(_.toUpperCase(Locale.ROOT))
    .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
    .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC =
    buildConf("spark.sql.legacy.followThreeValuedLogicInArrayExists")
      .internal()
      .doc("When true, the ArrayExists will follow the three-valued boolean logic.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(true)

  val ADDITIONAL_REMOTE_REPOSITORIES =
    buildConf("spark.sql.maven.additionalRemoteRepositories")
      .doc("A comma-delimited string config of the optional additional remote Maven mirror " +
        "repositories. This is only used for downloading Hive jars in IsolatedClientLoader " +
        "if the default Maven Central repo is unreachable.")
      .version("3.0.0")
      .stringConf
      .createWithDefault(
        sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY",
          "https://maven-central.storage-download.googleapis.com/maven2/"))

  val LEGACY_FROM_DAYTIME_STRING =
    buildConf("spark.sql.legacy.fromDayTimeString.enabled")
      .internal()
      .doc("When true, the `from` bound is not taken into account in conversion of " +
        "a day-time string to an interval, and the `to` bound is used to skip " +
        "all interval units out of the specified range. If it is set to `false`, " +
        "`ParseException` is thrown if the input does not match to the pattern " +
        "defined by `from` and `to`.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_PROPERTY_NON_RESERVED =
    buildConf("spark.sql.legacy.notReserveProperties")
      .internal()
      .doc("When true, all database and table properties are not reserved and available for " +
        "create/alter syntaxes. But please be aware that the reserved properties will be " +
        "silently removed.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_ADD_SINGLE_FILE_IN_ADD_FILE =
    buildConf("spark.sql.legacy.addSingleFileInAddFile")
      .internal()
      .doc("When true, only a single file can be added using ADD FILE. If false, then users " +
        "can add directory by passing directory path to ADD FILE.")
      .version("3.0.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED =
    buildConf("spark.sql.legacy.mssqlserver.numericMapping.enabled")
      .internal()
      .doc("When true, use legacy MySqlServer SMALLINT and REAL type mapping.")
      .version("2.4.5")
      .booleanConf
      .createWithDefault(false)

  val CSV_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.csv.filterPushdown.enabled")
    .doc("When true, enable filter pushdown to CSV datasource.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(true)

  val JSON_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.json.filterPushdown.enabled")
    .doc("When true, enable filter pushdown to JSON datasource.")
    .version("3.1.0")
    .booleanConf
    .createWithDefault(true)

  val AVRO_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.avro.filterPushdown.enabled")
    .doc("When true, enable filter pushdown to Avro datasource.")
    .version("3.1.0")
    .booleanConf
    .createWithDefault(true)

  val JSON_ENABLE_PARTIAL_RESULTS =
    buildConf("spark.sql.json.enablePartialResults")
      .internal()
      .doc("When set to true, enables partial results for structs, maps, and arrays in JSON " +
        "when one or more fields do not match the schema")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK =
    buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")
      .internal()
      .doc("When true, enable legacy date/time parsing fallback in CSV")
      .version("3.4.0")
      .booleanConf
      .createOptional

  val LEGACY_JSON_ENABLE_DATE_TIME_PARSING_FALLBACK =
    buildConf("spark.sql.legacy.json.enableDateTimeParsingFallback")
      .internal()
      .doc("When true, enable legacy date/time parsing fallback in JSON")
      .version("3.4.0")
      .booleanConf
      .createOptional

  val ADD_PARTITION_BATCH_SIZE =
    buildConf("spark.sql.addPartitionInBatch.size")
      .internal()
      .doc("The number of partitions to be handled in one turn when use " +
        "`AlterTableAddPartitionCommand` or `RepairTableCommand` to add partitions into table. " +
        "The smaller batch size is, the less memory is required for the real handler, e.g. " +
        "Hive Metastore.")
      .version("3.0.0")
      .intConf
      .checkValue(_ > 0, "The value of spark.sql.addPartitionInBatch.size must be positive")
      .createWithDefault(100)

  val LEGACY_ALLOW_HASH_ON_MAPTYPE = buildConf("spark.sql.legacy.allowHashOnMapType")
    .internal()
    .doc("When set to true, hash expressions can be applied on elements of MapType. Otherwise, " +
      "an analysis exception will be thrown.")
    .version("3.0.0")
    .booleanConf
    .createWithDefault(false)

  val LEGACY_INTEGER_GROUPING_ID =
    buildConf("spark.sql.legacy.integerGroupingId")
      .internal()
      .doc("When true, grouping_id() returns int values instead of long values.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_GROUPING_ID_WITH_APPENDED_USER_GROUPBY =
    buildConf("spark.sql.legacy.groupingIdWithAppendedUserGroupBy")
      .internal()
      .doc("When true, grouping_id() returns values based on grouping set columns plus " +
        "user-given group-by expressions order like Spark 3.2.0, 3.2.1, 3.2.2, and 3.3.0.")
      .version("3.2.3")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_PARQUET_NANOS_AS_LONG = buildConf("spark.sql.legacy.parquet.nanosAsLong")
    .internal()
    .doc("When true, the Parquet's nanos precision timestamps are converted to SQL long values.")
    .version("3.2.4")
    .booleanConf
    .createWithDefault(false)

  val PARQUET_INT96_REBASE_MODE_IN_WRITE =
    buildConf("spark.sql.parquet.int96RebaseModeInWrite")
      .internal()
      .doc("When LEGACY, Spark will rebase INT96 timestamps from Proleptic Gregorian calendar to " +
        "the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
        "When CORRECTED, Spark will not do rebase and write the timestamps as it is. " +
        "When EXCEPTION, which is the default, Spark will fail the writing if it sees ancient " +
        "timestamps that are ambiguous between the two calendars.")
      .version("3.1.0")
      .withAlternative("spark.sql.legacy.parquet.int96RebaseModeInWrite")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val PARQUET_REBASE_MODE_IN_WRITE =
    buildConf("spark.sql.parquet.datetimeRebaseModeInWrite")
      .internal()
      .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
        "to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
        "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
        "ancient dates/timestamps that are ambiguous between the two calendars. " +
        "This config influences on writes of the following parquet logical types: DATE, " +
        "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " +
        s"${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}.")
      .version("3.0.0")
      .withAlternative("spark.sql.legacy.parquet.datetimeRebaseModeInWrite")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val PARQUET_INT96_REBASE_MODE_IN_READ =
    buildConf("spark.sql.parquet.int96RebaseModeInRead")
      .internal()
      .doc("When LEGACY, Spark will rebase INT96 timestamps from the legacy hybrid (Julian + " +
        "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
        "When CORRECTED, Spark will not do rebase and read the timestamps as it is. " +
        "When EXCEPTION, which is the default, Spark will fail the reading if it sees ancient " +
        "timestamps that are ambiguous between the two calendars. This config is only effective " +
        "if the writer info (like Spark, Hive) of the Parquet files is unknown.")
      .version("3.1.0")
      .withAlternative("spark.sql.legacy.parquet.int96RebaseModeInRead")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val PARQUET_REBASE_MODE_IN_READ =
    buildConf("spark.sql.parquet.datetimeRebaseModeInRead")
      .internal()
      .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
        "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
        "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
        "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
        "only effective if the writer info (like Spark, Hive) of the Parquet files is unknown. " +
        "This config influences on reads of the following parquet logical types: DATE, " +
        "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " +
        s"${PARQUET_INT96_REBASE_MODE_IN_READ.key}.")
      .version("3.0.0")
      .withAlternative("spark.sql.legacy.parquet.datetimeRebaseModeInRead")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val AVRO_REBASE_MODE_IN_WRITE =
    buildConf("spark.sql.avro.datetimeRebaseModeInWrite")
      .internal()
      .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
        "to the legacy hybrid (Julian + Gregorian) calendar when writing Avro files. " +
        "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
        "ancient dates/timestamps that are ambiguous between the two calendars.")
      .version("3.0.0")
      .withAlternative("spark.sql.legacy.avro.datetimeRebaseModeInWrite")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val AVRO_REBASE_MODE_IN_READ =
    buildConf("spark.sql.avro.datetimeRebaseModeInRead")
      .internal()
      .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
        "Gregorian) calendar to Proleptic Gregorian calendar when reading Avro files. " +
        "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
        "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
        "only effective if the writer info (like Spark, Hive) of the Avro files is unknown.")
      .version("3.0.0")
      .withAlternative("spark.sql.legacy.avro.datetimeRebaseModeInRead")
      .stringConf
      .transform(_.toUpperCase(Locale.ROOT))
      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)

  val SCRIPT_TRANSFORMATION_EXIT_TIMEOUT =
    buildConf("spark.sql.scriptTransformation.exitTimeoutInSeconds")
      .internal()
      .doc("Timeout for executor to wait for the termination of transformation script when EOF.")
      .version("3.0.0")
      .timeConf(TimeUnit.SECONDS)
      .checkValue(_ > 0, "The timeout value must be positive")
      .createWithDefault(10L)

  val COALESCE_BUCKETS_IN_JOIN_ENABLED =
    buildConf("spark.sql.bucketing.coalesceBucketsInJoin.enabled")
      .doc("When true, if two bucketed tables with the different number of buckets are joined, " +
        "the side with a bigger number of buckets will be coalesced to have the same number " +
        "of buckets as the other side. Bigger number of buckets is divisible by the smaller " +
        "number of buckets. Bucket coalescing is applied to sort-merge joins and " +
        "shuffled hash join. Note: Coalescing bucketed table can avoid unnecessary shuffling " +
        "in join, but it also reduces parallelism and could possibly cause OOM for " +
        "shuffled hash join.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val COALESCE_BUCKETS_IN_JOIN_MAX_BUCKET_RATIO =
    buildConf("spark.sql.bucketing.coalesceBucketsInJoin.maxBucketRatio")
      .doc("The ratio of the number of two buckets being coalesced should be less than or " +
        "equal to this value for bucket coalescing to be applied. This configuration only " +
        s"has an effect when '${COALESCE_BUCKETS_IN_JOIN_ENABLED.key}' is set to true.")
      .version("3.1.0")
      .intConf
      .checkValue(_ > 0, "The difference must be positive.")
      .createWithDefault(4)

  val BROADCAST_HASH_JOIN_OUTPUT_PARTITIONING_EXPAND_LIMIT =
    buildConf("spark.sql.execution.broadcastHashJoin.outputPartitioningExpandLimit")
      .internal()
      .doc("The maximum number of partitionings that a HashPartitioning can be expanded to. " +
        "This configuration is applicable only for BroadcastHashJoin inner joins and can be " +
        "set to '0' to disable this feature.")
      .version("3.1.0")
      .intConf
      .checkValue(_ >= 0, "The value must be non-negative.")
      .createWithDefault(8)

  val OPTIMIZE_NULL_AWARE_ANTI_JOIN =
    buildConf("spark.sql.optimizeNullAwareAntiJoin")
      .internal()
      .doc("When true, NULL-aware anti join execution will be planed into " +
        "BroadcastHashJoinExec with flag isNullAwareAntiJoin enabled, " +
        "optimized from O(M*N) calculation into O(M) calculation " +
        "using Hash lookup instead of Looping lookup." +
        "Only support for singleColumn NAAJ for now.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(true)

  val LEGACY_COMPLEX_TYPES_TO_STRING =
    buildConf("spark.sql.legacy.castComplexTypesToString.enabled")
      .internal()
      .doc("When true, maps and structs are wrapped by [] in casting to strings, and " +
        "NULL elements of structs/maps/arrays will be omitted while converting to strings. " +
        "Otherwise, if this is false, which is the default, maps and structs are wrapped by {}, " +
        "and NULL elements will be converted to \"null\".")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_PATH_OPTION_BEHAVIOR =
    buildConf("spark.sql.legacy.pathOptionBehavior.enabled")
      .internal()
      .doc("When true, \"path\" option is overwritten if one path parameter is passed to " +
        "DataFrameReader.load(), DataFrameWriter.save(), DataStreamReader.load(), or " +
        "DataStreamWriter.start(). Also, \"path\" option is added to the overall paths if " +
        "multiple path parameters are passed to DataFrameReader.load()")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_EXTRA_OPTIONS_BEHAVIOR =
    buildConf("spark.sql.legacy.extraOptionsBehavior.enabled")
      .internal()
      .doc("When true, the extra options will be ignored for DataFrameReader.table(). If set it " +
        "to false, which is the default, Spark will check if the extra options have the same " +
        "key, but the value is different with the table serde properties. If the check passes, " +
        "the extra options will be merged with the serde properties as the scan options. " +
        "Otherwise, an exception will be thrown.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT =
    buildConf("spark.sql.legacy.createHiveTableByDefault")
      .internal()
      .doc("When set to true, CREATE TABLE syntax without USING or STORED AS will use Hive " +
        s"instead of the value of ${DEFAULT_DATA_SOURCE_NAME.key} as the table provider.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(true)

  val LEGACY_CHAR_VARCHAR_AS_STRING =
    buildConf("spark.sql.legacy.charVarcharAsString")
      .internal()
      .doc("When true, Spark treats CHAR/VARCHAR type the same as STRING type, which is the " +
        "behavior of Spark 3.0 and earlier. This means no length check for CHAR/VARCHAR type and " +
        "no padding for CHAR type when writing data to the table.")
      .version("3.1.0")
      .booleanConf
      .createWithDefault(false)

  val CHAR_AS_VARCHAR = buildConf("spark.sql.charAsVarchar")
    .doc("When true, Spark replaces CHAR type with VARCHAR type in CREATE/REPLACE/ALTER TABLE " +
      "commands, so that newly created/updated tables will not have CHAR type columns/fields. " +
      "Existing tables with CHAR type columns/fields are not affected by this config.")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val READ_SIDE_CHAR_PADDING = buildConf("spark.sql.readSideCharPadding")
    .doc("When true, Spark applies string padding when reading CHAR type columns/fields, " +
      "in addition to the write-side padding. This config is true by default to better enforce " +
      "CHAR type semantic in cases such as external tables.")
    .version("3.4.0")
    .booleanConf
    .createWithDefault(true)

  val LEGACY_NO_CHAR_PADDING_IN_PREDICATE = buildConf("spark.sql.legacy.noCharPaddingInPredicate")
    .internal()
    .doc("When true, Spark will not apply char type padding for CHAR type columns in string " +
      s"comparison predicates, when '${READ_SIDE_CHAR_PADDING.key}' is false.")
    .version("3.5.2")
    .booleanConf
    .createWithDefault(false)

  val CLI_PRINT_HEADER =
    buildConf("spark.sql.cli.print.header")
     .doc("When set to true, spark-sql CLI prints the names of the columns in query output.")
     .version("3.2.0")
    .booleanConf
    .createWithDefault(false)

  val LEGACY_EMPTY_CURRENT_DB_IN_CLI =
    buildConf("spark.sql.legacy.emptyCurrentDBInCli")
      .internal()
      .doc("When false, spark-sql CLI prints the the current database in prompt")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA =
    buildConf("spark.sql.legacy.keepCommandOutputSchema")
      .internal()
      .doc("When true, Spark will keep the output schema of commands such as SHOW DATABASES " +
        "unchanged.")
      .version("3.0.2")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_INTERVAL_ENABLED = buildConf("spark.sql.legacy.interval.enabled")
    .internal()
    .doc("When set to true, Spark SQL uses the mixed legacy interval type `CalendarIntervalType` " +
      "instead of the ANSI compliant interval types `YearMonthIntervalType` and " +
      "`DayTimeIntervalType`. For instance, the date subtraction expression returns " +
      "`CalendarIntervalType` when the SQL config is set to `true` otherwise an ANSI interval.")
    .version("3.2.0")
    .booleanConf
    .createWithDefault(false)

  val MAX_CONCURRENT_OUTPUT_FILE_WRITERS = buildConf("spark.sql.maxConcurrentOutputFileWriters")
    .internal()
    .doc("Maximum number of output file writers to use concurrently. If number of writers " +
      "needed reaches this limit, task will sort rest of output then writing them.")
    .version("3.2.0")
    .intConf
    .createWithDefault(0)

  val INFER_NESTED_DICT_AS_STRUCT = buildConf("spark.sql.pyspark.inferNestedDictAsStruct.enabled")
    .doc("PySpark's SparkSession.createDataFrame infers the nested dict as a map by default. " +
      "When it set to true, it infers the nested dict as a struct.")
    .version("3.3.0")
    .booleanConf
    .createWithDefault(false)

  val LEGACY_INFER_ARRAY_TYPE_FROM_FIRST_ELEMENT =
    buildConf("spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled")
      .doc("PySpark's SparkSession.createDataFrame infers the element type of an array from all " +
        "values in the array by default. If this config is set to true, it restores the legacy " +
        "behavior of only inferring the type from the first array element.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_USE_V1_COMMAND =
    buildConf("spark.sql.legacy.useV1Command")
      .internal()
      .doc("When true, Spark will use legacy V1 SQL commands.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val HISTOGRAM_NUMERIC_PROPAGATE_INPUT_TYPE =
    buildConf("spark.sql.legacy.histogramNumericPropagateInputType")
      .internal()
      .doc("The histogram_numeric function computes a histogram on numeric 'expr' using nb bins. " +
        "The return value is an array of (x,y) pairs representing the centers of the histogram's " +
        "bins. If this config is set to true, the output type of the 'x' field in the return " +
        "value is propagated from the input value consumed in the aggregate function. Otherwise, " +
        "'x' always has double type.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(true)

  val LEGACY_LPAD_RPAD_BINARY_TYPE_AS_STRING =
    buildConf("spark.sql.legacy.lpadRpadAlwaysReturnString")
      .internal()
      .doc("When set to false, when the first argument and the optional padding pattern is a " +
        "byte sequence, the result is a BINARY value. The default padding pattern in this case " +
        "is the zero byte. " +
        "When set to true, it restores the legacy behavior of always returning string types " +
        "even for binary inputs.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_NULL_VALUE_WRITTEN_AS_QUOTED_EMPTY_STRING_CSV =
    buildConf("spark.sql.legacy.nullValueWrittenAsQuotedEmptyStringCsv")
      .internal()
      .doc("When set to false, nulls are written as unquoted empty strings in CSV data source. " +
        "If set to true, it restores the legacy behavior that nulls were written as quoted " +
        "empty strings, `\"\"`.")
      .version("3.3.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT =
    buildConf("spark.sql.legacy.allowNullComparisonResultInArraySort")
      .internal()
      .doc("When set to false, `array_sort` function throws an error " +
        "if the comparator function returns null. " +
        "If set to true, it restores the legacy behavior that handles null as zero (equal).")
      .version("3.2.2")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA =
    buildConf("spark.sql.legacy.avro.allowIncompatibleSchema")
      .internal()
      .doc("When set to false, if types in Avro are encoded in the same format, but " +
        "the type in the Avro schema explicitly says that the data types are different, " +
        "reject reading the data type in the format to avoid returning incorrect results. " +
        "When set to true, it restores the legacy behavior of allow reading the data in the" +
        " format, which may return incorrect results.")
      .version("3.5.1")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME =
    buildConf("spark.sql.legacy.v1IdentifierNoCatalog")
      .internal()
      .doc(s"When set to false, the v1 identifier will include '$SESSION_CATALOG_NAME' as " +
        "the catalog name if database is defined. When set to true, it restores the legacy " +
        "behavior that does not include catalog name.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_IN_SUBQUERY_NULLABILITY =
    buildConf("spark.sql.legacy.inSubqueryNullability")
      .internal()
      .doc(s"When set to false, IN subquery nullability is correctly calculated based on " +
        s"both the left and right sides of the IN. When set to true, restores the legacy " +
        "behavior that does not check the right side's nullability.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(false)

  val LEGACY_NULL_IN_EMPTY_LIST_BEHAVIOR =
    buildConf("spark.sql.legacy.nullInEmptyListBehavior")
      .internal()
      .doc("When set to true, restores the legacy incorrect behavior of IN expressions for " +
        "NULL values IN an empty list (including IN subqueries and literal IN lists): " +
        "`null IN (empty list)` should evaluate to false, but sometimes (not always) " +
        "incorrectly evaluates to null in the legacy behavior.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(true)

  val ERROR_MESSAGE_FORMAT = buildConf("spark.sql.error.messageFormat")
    .doc("When PRETTY, the error message consists of textual representation of error class, " +
      "message and query context. The MINIMAL and STANDARD formats are pretty JSON formats where " +
      "STANDARD includes an additional JSON field `message`. This configuration property " +
      "influences on error messages of Thrift Server and SQL CLI while running queries.")
    .version("3.4.0")
    .stringConf.transform(_.toUpperCase(Locale.ROOT))
    .checkValues(ErrorMessageFormat.values.map(_.toString))
    .createWithDefault(ErrorMessageFormat.PRETTY.toString)

  val LATERAL_COLUMN_ALIAS_IMPLICIT_ENABLED =
    buildConf("spark.sql.lateralColumnAlias.enableImplicitResolution")
      .internal()
      .doc("Enable resolving implicit lateral column alias defined in the same SELECT list. For " +
        "example, with this conf turned on, for query `SELECT 1 AS a, a + 1` the `a` in `a + 1` " +
        "can be resolved as the previously defined `1 AS a`. But note that table column has " +
        "higher resolution priority than the lateral column alias.")
      .version("3.4.0")
      .booleanConf
      .createWithDefault(true)

  val STABLE_DERIVED_COLUMN_ALIAS_ENABLED =
    buildConf("spark.sql.stableDerivedColumnAlias.enabled")
      .internal()
      .doc("Enable deriving of stable column aliases from the lexer tree instead of parse tree " +
        "and form them via pretty SQL print.")
      .version("3.5.0")
      .booleanConf
      .createWithDefault(false)

  val LOCAL_RELATION_CACHE_THRESHOLD =
    buildConf(SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY)
      .doc("The threshold for the size in bytes of local relations to be cached at " +
        "the driver side after serialization.")
      .version("3.5.0")
      .intConf
      .checkValue(_ >= 0, "The threshold of cached local relations must not be negative")
      .createWithDefault(64 * 1024 * 1024)

  val EXCLUDE_SUBQUERY_EXP_REFS_FROM_REMOVE_REDUNDANT_ALIASES =
    buildConf("spark.sql.optimizer.excludeSubqueryRefsFromRemoveRedundantAliases.enabled")
      .internal()
      .doc("When true, exclude the references from the subquery expressions (in, exists, etc.) " +
        s"while removing redundant aliases.")
      .version("4.0.0")
      .booleanConf
      .createWithDefault(true)

  val LEGACY_PERCENTILE_DISC_CALCULATION = buildConf("spark.sql.legacy.percentileDiscCalculation")
    .internal()
    .doc("If true, the old bogus percentile_disc calculation is used. The old calculation " +
      "incorrectly mapped the requested percentile to the sorted range of values in some cases " +
      "and so returned incorrect results. Also, the new implementation is faster as it doesn't " +
      "contain the interpolation logic that the old percentile_cont based one did.")
    .version("3.3.4")
    .booleanConf
    .createWithDefault(false)

  val LEGACY_NEGATIVE_INDEX_IN_ARRAY_INSERT =
    buildConf("spark.sql.legacy.negativeIndexInArrayInsert")
      .internal()
      .doc("When set to true, restores the legacy behavior of `array_insert` for " +
        "negative indexes - 0-based: the function inserts new element before the last one " +
        "for the index -1. For example, `array_insert(['a', 'b'], -1, 'x')` returns " +
        "`['a', 'x', 'b']`. When set to false, the -1 index points out to the last element, " +
        "and the given example produces `['a', 'b', 'x']`.")
      .version("3.4.2")
      .booleanConf
      .createWithDefault(false)

  /**
   * Holds information about keys that have been deprecated.
   *
   * @param key The deprecated key.
   * @param version Version of Spark where key was deprecated.
   * @param comment Additional info regarding to the removed config. For example,
   *                reasons of config deprecation, what users should use instead of it.
   */
  case class DeprecatedConfig(key: String, version: String, comment: String) {
    def toDeprecationString: String = {
      s"The SQL config '$key' has been deprecated in Spark v$version " +
        s"and may be removed in the future. $comment"
    }
  }

  /**
   * Maps deprecated SQL config keys to information about the deprecation.
   *
   * The extra information is logged as a warning when the SQL config is present
   * in the user's configuration.
   */
  val deprecatedSQLConfigs: Map[String, DeprecatedConfig] = {
    val configs = Seq(
      DeprecatedConfig(
        PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME.key, "2.4",
        "The config allows to switch to the behaviour before Spark 2.4 " +
          "and will be removed in the future releases."),
      DeprecatedConfig(HIVE_VERIFY_PARTITION_PATH.key, "3.0",
        s"This config is replaced by '${SPARK_IGNORE_MISSING_FILES.key}'."),
      DeprecatedConfig(ARROW_EXECUTION_ENABLED.key, "3.0",
        s"Use '${ARROW_PYSPARK_EXECUTION_ENABLED.key}' instead of it."),
      DeprecatedConfig(ARROW_FALLBACK_ENABLED.key, "3.0",
        s"Use '${ARROW_PYSPARK_FALLBACK_ENABLED.key}' instead of it."),
      DeprecatedConfig(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE.key, "3.0",
        s"Use '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}' instead of it."),
      DeprecatedConfig(OPTIMIZER_METADATA_ONLY.key, "3.0",
        "Avoid to depend on this optimization to prevent a potential correctness issue. " +
          "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule."),
      DeprecatedConfig(CONVERT_CTAS.key, "3.1",
        s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead."),
      DeprecatedConfig("spark.sql.sources.schemaStringLengthThreshold", "3.2",
        s"Use '${HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD.key}' instead."),
      DeprecatedConfig(PARQUET_INT96_REBASE_MODE_IN_WRITE.alternatives.head, "3.2",
        s"Use '${PARQUET_INT96_REBASE_MODE_IN_WRITE.key}' instead."),
      DeprecatedConfig(PARQUET_INT96_REBASE_MODE_IN_READ.alternatives.head, "3.2",
        s"Use '${PARQUET_INT96_REBASE_MODE_IN_READ.key}' instead."),
      DeprecatedConfig(PARQUET_REBASE_MODE_IN_WRITE.alternatives.head, "3.2",
        s"Use '${PARQUET_REBASE_MODE_IN_WRITE.key}' instead."),
      DeprecatedConfig(PARQUET_REBASE_MODE_IN_READ.alternatives.head, "3.2",
        s"Use '${PARQUET_REBASE_MODE_IN_READ.key}' instead."),
      DeprecatedConfig(AVRO_REBASE_MODE_IN_WRITE.alternatives.head, "3.2",
        s"Use '${AVRO_REBASE_MODE_IN_WRITE.key}' instead."),
      DeprecatedConfig(AVRO_REBASE_MODE_IN_READ.alternatives.head, "3.2",
        s"Use '${AVRO_REBASE_MODE_IN_READ.key}' instead."),
      DeprecatedConfig(LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED.key, "3.2",
        """Use `.format("avro")` in `DataFrameWriter` or `DataFrameReader` instead."""),
      DeprecatedConfig(COALESCE_PARTITIONS_MIN_PARTITION_NUM.key, "3.2",
        s"Use '${COALESCE_PARTITIONS_MIN_PARTITION_SIZE.key}' instead.")
    )

    Map(configs.map { cfg => cfg.key -> cfg } : _*)
  }

  /**
   * Holds information about keys that have been removed.
   *
   * @param key The removed config key.
   * @param version Version of Spark where key was removed.
   * @param defaultValue The default config value. It can be used to notice
   *                     users that they set non-default value to an already removed config.
   * @param comment Additional info regarding to the removed config.
   */
  case class RemovedConfig(key: String, version: String, defaultValue: String, comment: String)

  /**
   * The map contains info about removed SQL configs. Keys are SQL config names,
   * map values contain extra information like the version in which the config was removed,
   * config's default value and a comment.
   *
   * Please, add a removed SQL configuration property here only when it affects behaviours.
   * For example, `spark.sql.variable.substitute.depth` was not added as it virtually
   * became no-op later. By this, it makes migrations to new Spark versions painless.
   */
  val removedSQLConfigs: Map[String, RemovedConfig] = {
    val configs = Seq(
      RemovedConfig("spark.sql.fromJsonForceNullableSchema", "3.0.0", "true",
        "It was removed to prevent errors like SPARK-23173 for non-default value."),
      RemovedConfig(
        "spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation", "3.0.0", "false",
        "It was removed to prevent loss of user data for non-default value."),
      RemovedConfig("spark.sql.legacy.compareDateTimestampInTimestamp", "3.0.0", "true",
        "It was removed to prevent errors like SPARK-23549 for non-default value."),
      RemovedConfig("spark.sql.parquet.int64AsTimestampMillis", "3.0.0", "false",
        "The config was deprecated since Spark 2.3." +
        s"Use '${PARQUET_OUTPUT_TIMESTAMP_TYPE.key}' instead of it."),
      RemovedConfig("spark.sql.execution.pandas.respectSessionTimeZone", "3.0.0", "true",
        "The non-default behavior is considered as a bug, see SPARK-22395. " +
        "The config was deprecated since Spark 2.3."),
      RemovedConfig("spark.sql.optimizer.planChangeLog.level", "3.1.0", "trace",
        s"Please use `${PLAN_CHANGE_LOG_LEVEL.key}` instead."),
      RemovedConfig("spark.sql.optimizer.planChangeLog.rules", "3.1.0", "",
        s"Please use `${PLAN_CHANGE_LOG_RULES.key}` instead."),
      RemovedConfig("spark.sql.optimizer.planChangeLog.batches", "3.1.0", "",
        s"Please use `${PLAN_CHANGE_LOG_BATCHES.key}` instead."),
      RemovedConfig("spark.sql.ansi.strictIndexOperator", "3.4.0", "true",
        "This was an internal configuration. It is not needed anymore since Spark SQL always " +
          "returns null when getting a map value with a non-existing key. See SPARK-40066 " +
          "for more details.")
    )

    Map(configs.map { cfg => cfg.key -> cfg } : _*)
  }
}

/**
 * A class that enables the setting and getting of mutable config parameters/hints.
 *
 * In the presence of a SQLContext, these can be set and queried by passing SET commands
 * into Spark SQL's query functions (i.e. sql()). Otherwise, users of this class can
 * modify the hints by programmatically calling the setters and getters of this class.
 *
 * SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads).
 */
class SQLConf extends Serializable with Logging with SqlApiConf {
  import SQLConf._

  /** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
  @transient protected[spark] val settings = java.util.Collections.synchronizedMap(
    new java.util.HashMap[String, String]())

  @transient protected val reader = new ConfigReader(settings)

  /** ************************ Spark SQL Params/Hints ******************* */

  def analyzerMaxIterations: Int = getConf(ANALYZER_MAX_ITERATIONS)

  def optimizerExcludedRules: Option[String] = getConf(OPTIMIZER_EXCLUDED_RULES)

  def optimizerMaxIterations: Int = getConf(OPTIMIZER_MAX_ITERATIONS)

  def optimizerInSetConversionThreshold: Int = getConf(OPTIMIZER_INSET_CONVERSION_THRESHOLD)

  def optimizerInSetSwitchThreshold: Int = getConf(OPTIMIZER_INSET_SWITCH_THRESHOLD)

  def planChangeLogLevel: String = getConf(PLAN_CHANGE_LOG_LEVEL)

  def planChangeRules: Option[String] = getConf(PLAN_CHANGE_LOG_RULES)

  def planChangeBatches: Option[String] = getConf(PLAN_CHANGE_LOG_BATCHES)

  def dynamicPartitionPruningEnabled: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_ENABLED)

  def dynamicPartitionPruningUseStats: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_USE_STATS)

  def dynamicPartitionPruningFallbackFilterRatio: Double =
    getConf(DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO)

  def dynamicPartitionPruningReuseBroadcastOnly: Boolean =
    getConf(DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY)

  def runtimeFilterSemiJoinReductionEnabled: Boolean =
    getConf(RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED)

  def runtimeFilterBloomFilterEnabled: Boolean =
    getConf(RUNTIME_BLOOM_FILTER_ENABLED)

  def runtimeFilterCreationSideThreshold: Long =
    getConf(RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD)

  def runtimeRowLevelOperationGroupFilterEnabled: Boolean =
    getConf(RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED)

  def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS)

  def isStateSchemaCheckEnabled: Boolean = getConf(STATE_SCHEMA_CHECK_ENABLED)

  def stateStoreMinDeltasForSnapshot: Int = getConf(STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT)

  def stateStoreFormatValidationEnabled: Boolean = getConf(STATE_STORE_FORMAT_VALIDATION_ENABLED)

  def stateStoreSkipNullsForStreamStreamJoins: Boolean =
    getConf(STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS)

  def checkpointLocation: Option[String] = getConf(CHECKPOINT_LOCATION)

  def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED)

  def useDeprecatedKafkaOffsetFetching: Boolean = getConf(USE_DEPRECATED_KAFKA_OFFSET_FETCHING)

  def statefulOperatorCorrectnessCheckEnabled: Boolean =
    getConf(STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED)

  def fileStreamSinkMetadataIgnored: Boolean = getConf(FILESTREAM_SINK_METADATA_IGNORED)

  def streamingFileCommitProtocolClass: String = getConf(STREAMING_FILE_COMMIT_PROTOCOL_CLASS)

  def fileSinkLogDeletion: Boolean = getConf(FILE_SINK_LOG_DELETION)

  def fileSinkLogCompactInterval: Int = getConf(FILE_SINK_LOG_COMPACT_INTERVAL)

  def fileSinkLogCleanupDelay: Long = getConf(FILE_SINK_LOG_CLEANUP_DELAY)

  def fileSourceLogDeletion: Boolean = getConf(FILE_SOURCE_LOG_DELETION)

  def fileSourceLogCompactInterval: Int = getConf(FILE_SOURCE_LOG_COMPACT_INTERVAL)

  def fileSourceLogCleanupDelay: Long = getConf(FILE_SOURCE_LOG_CLEANUP_DELAY)

  def streamingSchemaInference: Boolean = getConf(STREAMING_SCHEMA_INFERENCE)

  def streamingPollingDelay: Long = getConf(STREAMING_POLLING_DELAY)

  def streamingNoDataProgressEventInterval: Long =
    getConf(STREAMING_NO_DATA_PROGRESS_EVENT_INTERVAL)

  def streamingNoDataMicroBatchesEnabled: Boolean =
    getConf(STREAMING_NO_DATA_MICRO_BATCHES_ENABLED)

  def streamingMetricsEnabled: Boolean = getConf(STREAMING_METRICS_ENABLED)

  def streamingProgressRetention: Int = getConf(STREAMING_PROGRESS_RETENTION)

  def filesMaxPartitionBytes: Long = getConf(FILES_MAX_PARTITION_BYTES)

  def filesOpenCostInBytes: Long = getConf(FILES_OPEN_COST_IN_BYTES)

  def filesMinPartitionNum: Option[Int] = getConf(FILES_MIN_PARTITION_NUM)

  def filesMaxPartitionNum: Option[Int] = getConf(FILES_MAX_PARTITION_NUM)

  def ignoreCorruptFiles: Boolean = getConf(IGNORE_CORRUPT_FILES)

  def ignoreMissingFiles: Boolean = getConf(IGNORE_MISSING_FILES)

  def maxRecordsPerFile: Long = getConf(MAX_RECORDS_PER_FILE)

  def useCompression: Boolean = getConf(COMPRESS_CACHED)

  def orcCompressionCodec: String = getConf(ORC_COMPRESSION)

  def orcVectorizedReaderEnabled: Boolean = getConf(ORC_VECTORIZED_READER_ENABLED)

  def orcVectorizedReaderBatchSize: Int = getConf(ORC_VECTORIZED_READER_BATCH_SIZE)

  def orcVectorizedWriterBatchSize: Int = getConf(ORC_VECTORIZED_WRITER_BATCH_SIZE)

  def orcVectorizedReaderNestedColumnEnabled: Boolean =
    getConf(ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED)

  def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION)

  def parquetVectorizedReaderEnabled: Boolean = getConf(PARQUET_VECTORIZED_READER_ENABLED)

  def parquetVectorizedReaderNestedColumnEnabled: Boolean =
    getConf(PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED)

  def parquetVectorizedReaderBatchSize: Int = getConf(PARQUET_VECTORIZED_READER_BATCH_SIZE)

  def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE)

  def cacheVectorizedReaderEnabled: Boolean = getConf(CACHE_VECTORIZED_READER_ENABLED)

  def defaultNumShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)

  def numShufflePartitions: Int = {
    if (adaptiveExecutionEnabled && coalesceShufflePartitionsEnabled) {
      getConf(COALESCE_PARTITIONS_INITIAL_PARTITION_NUM).getOrElse(defaultNumShufflePartitions)
    } else {
      defaultNumShufflePartitions
    }
  }

  def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)

  def adaptiveExecutionLogLevel: String = getConf(ADAPTIVE_EXECUTION_LOG_LEVEL)

  def fetchShuffleBlocksInBatch: Boolean = getConf(FETCH_SHUFFLE_BLOCKS_IN_BATCH)

  def nonEmptyPartitionRatioForBroadcastJoin: Double =
    getConf(NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN)

  def coalesceShufflePartitionsEnabled: Boolean = getConf(COALESCE_PARTITIONS_ENABLED)

  def minBatchesToRetain: Int = getConf(MIN_BATCHES_TO_RETAIN)

  def maxBatchesToRetainInMemory: Int = getConf(MAX_BATCHES_TO_RETAIN_IN_MEMORY)

  def streamingMaintenanceInterval: Long = getConf(STREAMING_MAINTENANCE_INTERVAL)

  def stateStoreCompressionCodec: String = getConf(STATE_STORE_COMPRESSION_CODEC)

  def checkpointRenamedFileCheck: Boolean = getConf(CHECKPOINT_RENAMEDFILE_CHECK_ENABLED)

  def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)

  def parquetFilterPushDownDate: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_DATE_ENABLED)

  def parquetFilterPushDownTimestamp: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED)

  def parquetFilterPushDownDecimal: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED)

  def parquetFilterPushDownStringPredicate: Boolean =
    getConf(PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED)

  def parquetFilterPushDownInFilterThreshold: Int =
    getConf(PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD)

  def parquetAggregatePushDown: Boolean = getConf(PARQUET_AGGREGATE_PUSHDOWN_ENABLED)

  def orcFilterPushDown: Boolean = getConf(ORC_FILTER_PUSHDOWN_ENABLED)

  def orcAggregatePushDown: Boolean = getConf(ORC_AGGREGATE_PUSHDOWN_ENABLED)

  def isOrcSchemaMergingEnabled: Boolean = getConf(ORC_SCHEMA_MERGING_ENABLED)

  def verifyPartitionPath: Boolean = getConf(HIVE_VERIFY_PARTITION_PATH)

  def metastoreDropPartitionsByName: Boolean = getConf(HIVE_METASTORE_DROP_PARTITION_BY_NAME)

  def metastorePartitionPruning: Boolean = getConf(HIVE_METASTORE_PARTITION_PRUNING)

  def metastorePartitionPruningInSetThreshold: Int =
    getConf(HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD)

  def metastorePartitionPruningFallbackOnException: Boolean =
    getConf(HIVE_METASTORE_PARTITION_PRUNING_FALLBACK_ON_EXCEPTION)

  def metastorePartitionPruningFastFallback: Boolean =
    getConf(HIVE_METASTORE_PARTITION_PRUNING_FAST_FALLBACK)

  def manageFilesourcePartitions: Boolean = getConf(HIVE_MANAGE_FILESOURCE_PARTITIONS)

  def filesourcePartitionFileCacheSize: Long = getConf(HIVE_FILESOURCE_PARTITION_FILE_CACHE_SIZE)

  def caseSensitiveInferenceMode: HiveCaseSensitiveInferenceMode.Value =
    HiveCaseSensitiveInferenceMode.withName(getConf(HIVE_CASE_SENSITIVE_INFERENCE))

  def gatherFastStats: Boolean = getConf(GATHER_FASTSTAT)

  def optimizerMetadataOnly: Boolean = getConf(OPTIMIZER_METADATA_ONLY)

  def wholeStageEnabled: Boolean = getConf(WHOLESTAGE_CODEGEN_ENABLED)

  def wholeStageUseIdInClassName: Boolean = getConf(WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME)

  def wholeStageMaxNumFields: Int = getConf(WHOLESTAGE_MAX_NUM_FIELDS)

  def codegenFallback: Boolean = getConf(CODEGEN_FALLBACK)

  def codegenFactoryMode: String = getConf(CODEGEN_FACTORY_MODE)

  def codegenComments: Boolean = getConf(StaticSQLConf.CODEGEN_COMMENTS)

  def loggingMaxLinesForCodegen: Int = getConf(CODEGEN_LOGGING_MAX_LINES)

  def hugeMethodLimit: Int = getConf(WHOLESTAGE_HUGE_METHOD_LIMIT)

  def methodSplitThreshold: Int = getConf(CODEGEN_METHOD_SPLIT_THRESHOLD)

  def wholeStageSplitConsumeFuncByOperator: Boolean =
    getConf(WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR)

  def tableRelationCacheSize: Int =
    getConf(StaticSQLConf.FILESOURCE_TABLE_RELATION_CACHE_SIZE)

  def codegenCacheMaxEntries: Int = getConf(StaticSQLConf.CODEGEN_CACHE_MAX_ENTRIES)

  def exchangeReuseEnabled: Boolean = getConf(EXCHANGE_REUSE_ENABLED)

  def subqueryReuseEnabled: Boolean = getConf(SUBQUERY_REUSE_ENABLED)

  override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)

  def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)

  def escapedStringLiterals: Boolean = getConf(ESCAPED_STRING_LITERALS)

  def fileCompressionFactor: Double = getConf(FILE_COMPRESSION_FACTOR)

  def stringRedactionPattern: Option[Regex] = getConf(SQL_STRING_REDACTION_PATTERN)

  def sortBeforeRepartition: Boolean = getConf(SORT_BEFORE_REPARTITION)

  def topKSortFallbackThreshold: Int = getConf(TOP_K_SORT_FALLBACK_THRESHOLD)

  def fastHashAggregateRowMaxCapacityBit: Int = getConf(FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT)

  def streamingSessionWindowMergeSessionInLocalPartition: Boolean =
    getConf(STREAMING_SESSION_WINDOW_MERGE_SESSIONS_IN_LOCAL_PARTITION)

  override def datetimeJava8ApiEnabled: Boolean = getConf(DATETIME_JAVA8API_ENABLED)

  def uiExplainMode: String = getConf(UI_EXPLAIN_MODE)

  def addSingleFileInAddFile: Boolean = getConf(LEGACY_ADD_SINGLE_FILE_IN_ADD_FILE)

  def legacyMsSqlServerNumericMappingEnabled: Boolean =
    getConf(LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED)

  override def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value = {
    LegacyBehaviorPolicy.withName(getConf(SQLConf.LEGACY_TIME_PARSER_POLICY))
  }

  def broadcastHashJoinOutputPartitioningExpandLimit: Int =
    getConf(BROADCAST_HASH_JOIN_OUTPUT_PARTITIONING_EXPAND_LIMIT)

  /**
   * Returns the [[Resolver]] for the current configuration, which can be used to determine if two
   * identifiers are equal.
   */
  def resolver: Resolver = {
    if (caseSensitiveAnalysis) {
      org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
    } else {
      org.apache.spark.sql.catalyst.analysis.caseInsensitiveResolution
    }
  }

  /**
   * Returns the error handler for handling hint errors.
   */
  def hintErrorHandler: HintErrorHandler = HintErrorLogger

  def subexpressionEliminationEnabled: Boolean =
    getConf(SUBEXPRESSION_ELIMINATION_ENABLED)

  def subexpressionEliminationCacheMaxEntries: Int =
    getConf(SUBEXPRESSION_ELIMINATION_CACHE_MAX_ENTRIES)

  def subexpressionEliminationSkipForShotcutExpr: Boolean =
    getConf(SUBEXPRESSION_ELIMINATION_SKIP_FOR_SHORTCUT_EXPR)

  def autoBroadcastJoinThreshold: Long = getConf(AUTO_BROADCASTJOIN_THRESHOLD)

  def limitInitialNumPartitions: Int = getConf(LIMIT_INITIAL_NUM_PARTITIONS)

  def limitScaleUpFactor: Int = getConf(LIMIT_SCALE_UP_FACTOR)

  def advancedPartitionPredicatePushdownEnabled: Boolean =
    getConf(ADVANCED_PARTITION_PREDICATE_PUSHDOWN)

  def preferSortMergeJoin: Boolean = getConf(PREFER_SORTMERGEJOIN)

  def enableRadixSort: Boolean = getConf(RADIX_SORT_ENABLED)

  def isParquetSchemaMergingEnabled: Boolean = getConf(PARQUET_SCHEMA_MERGING_ENABLED)

  def isParquetSchemaRespectSummaries: Boolean = getConf(PARQUET_SCHEMA_RESPECT_SUMMARIES)

  def parquetOutputCommitterClass: String = getConf(PARQUET_OUTPUT_COMMITTER_CLASS)

  def isParquetBinaryAsString: Boolean = getConf(PARQUET_BINARY_AS_STRING)

  def isParquetINT96AsTimestamp: Boolean = getConf(PARQUET_INT96_AS_TIMESTAMP)

  def isParquetINT96TimestampConversion: Boolean = getConf(PARQUET_INT96_TIMESTAMP_CONVERSION)

  def parquetOutputTimestampType: ParquetOutputTimestampType.Value = {
    ParquetOutputTimestampType.withName(getConf(PARQUET_OUTPUT_TIMESTAMP_TYPE))
  }

  def writeLegacyParquetFormat: Boolean = getConf(PARQUET_WRITE_LEGACY_FORMAT)

  def parquetRecordFilterEnabled: Boolean = getConf(PARQUET_RECORD_FILTER_ENABLED)

  def inMemoryPartitionPruning: Boolean = getConf(IN_MEMORY_PARTITION_PRUNING)

  def inMemoryTableScanStatisticsEnabled: Boolean = getConf(IN_MEMORY_TABLE_SCAN_STATISTICS_ENABLED)

  def offHeapColumnVectorEnabled: Boolean = getConf(COLUMN_VECTOR_OFFHEAP_ENABLED)

  def columnNameOfCorruptRecord: String = getConf(COLUMN_NAME_OF_CORRUPT_RECORD)

  def broadcastTimeout: Long = {
    val timeoutValue = getConf(BROADCAST_TIMEOUT)
    if (timeoutValue < 0) Long.MaxValue else timeoutValue
  }

  def defaultDataSourceName: String = getConf(DEFAULT_DATA_SOURCE_NAME)

  def convertCTAS: Boolean = getConf(CONVERT_CTAS)

  def partitionColumnTypeInferenceEnabled: Boolean =
    getConf(SQLConf.PARTITION_COLUMN_TYPE_INFERENCE)

  def fileCommitProtocolClass: String = getConf(SQLConf.FILE_COMMIT_PROTOCOL_CLASS)

  def parallelPartitionDiscoveryThreshold: Int =
    getConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD)

  def parallelPartitionDiscoveryParallelism: Int =
    getConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_PARALLELISM)

  def bucketingEnabled: Boolean = getConf(SQLConf.BUCKETING_ENABLED)

  def bucketingMaxBuckets: Int = getConf(SQLConf.BUCKETING_MAX_BUCKETS)

  def autoBucketedScanEnabled: Boolean = getConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED)

  def v2BucketingEnabled: Boolean = getConf(SQLConf.V2_BUCKETING_ENABLED)

  def v2BucketingPushPartValuesEnabled: Boolean =
    getConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED)

  def v2BucketingPartiallyClusteredDistributionEnabled: Boolean =
    getConf(SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED)

  def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
    getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)

  def dataFrameRetainGroupColumns: Boolean = getConf(DATAFRAME_RETAIN_GROUP_COLUMNS)

  def dataFramePivotMaxValues: Int = getConf(DATAFRAME_PIVOT_MAX_VALUES)

  def runSQLonFile: Boolean = getConf(RUN_SQL_ON_FILES)

  def enableTwoLevelAggMap: Boolean = getConf(ENABLE_TWOLEVEL_AGG_MAP)

  def enableVectorizedHashMap: Boolean = getConf(ENABLE_VECTORIZED_HASH_MAP)

  def useObjectHashAggregation: Boolean = getConf(USE_OBJECT_HASH_AGG)

  def objectAggSortBasedFallbackThreshold: Int = getConf(OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD)

  def variableSubstituteEnabled: Boolean = getConf(VARIABLE_SUBSTITUTE_ENABLED)

  def warehousePath: String = new Path(getConf(StaticSQLConf.WAREHOUSE_PATH)).toString

  def hiveThriftServerSingleSession: Boolean =
    getConf(StaticSQLConf.HIVE_THRIFT_SERVER_SINGLESESSION)

  def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL)

  def groupByOrdinal: Boolean = getConf(GROUP_BY_ORDINAL)

  def groupByAliases: Boolean = getConf(GROUP_BY_ALIASES)

  def crossJoinEnabled: Boolean = getConf(SQLConf.CROSS_JOINS_ENABLED)

  override def sessionLocalTimeZone: String = getConf(SQLConf.SESSION_LOCAL_TIMEZONE)

  def jsonGeneratorIgnoreNullFields: Boolean = getConf(SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS)

  def jsonExpressionOptimization: Boolean = getConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION)

  def csvExpressionOptimization: Boolean = getConf(SQLConf.CSV_EXPRESSION_OPTIMIZATION)

  def parallelFileListingInStatsComputation: Boolean =
    getConf(SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION)

  def fallBackToHdfsForStatsEnabled: Boolean = getConf(ENABLE_FALL_BACK_TO_HDFS_FOR_STATS)

  def defaultSizeInBytes: Long = getConf(DEFAULT_SIZE_IN_BYTES)

  def ndvMaxError: Double = getConf(NDV_MAX_ERROR)

  def histogramEnabled: Boolean = getConf(HISTOGRAM_ENABLED)

  def histogramNumBins: Int = getConf(HISTOGRAM_NUM_BINS)

  def percentileAccuracy: Int = getConf(PERCENTILE_ACCURACY)

  def cboEnabled: Boolean = getConf(SQLConf.CBO_ENABLED)

  def planStatsEnabled: Boolean = getConf(SQLConf.PLAN_STATS_ENABLED)

  def autoSizeUpdateEnabled: Boolean = getConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED)

  def joinReorderEnabled: Boolean = getConf(SQLConf.JOIN_REORDER_ENABLED)

  def joinReorderDPThreshold: Int = getConf(SQLConf.JOIN_REORDER_DP_THRESHOLD)

  def joinReorderCardWeight: Double = getConf(SQLConf.JOIN_REORDER_CARD_WEIGHT)

  def joinReorderDPStarFilter: Boolean = getConf(SQLConf.JOIN_REORDER_DP_STAR_FILTER)

  def windowExecBufferInMemoryThreshold: Int = getConf(WINDOW_EXEC_BUFFER_IN_MEMORY_THRESHOLD)

  def windowExecBufferSpillThreshold: Int = getConf(WINDOW_EXEC_BUFFER_SPILL_THRESHOLD)

  def windowGroupLimitThreshold: Int = getConf(WINDOW_GROUP_LIMIT_THRESHOLD)

  def sessionWindowBufferInMemoryThreshold: Int = getConf(SESSION_WINDOW_BUFFER_IN_MEMORY_THRESHOLD)

  def sessionWindowBufferSpillThreshold: Int = getConf(SESSION_WINDOW_BUFFER_SPILL_THRESHOLD)

  def sortMergeJoinExecBufferInMemoryThreshold: Int =
    getConf(SORT_MERGE_JOIN_EXEC_BUFFER_IN_MEMORY_THRESHOLD)

  def sortMergeJoinExecBufferSpillThreshold: Int =
    getConf(SORT_MERGE_JOIN_EXEC_BUFFER_SPILL_THRESHOLD)

  def cartesianProductExecBufferInMemoryThreshold: Int =
    getConf(CARTESIAN_PRODUCT_EXEC_BUFFER_IN_MEMORY_THRESHOLD)

  def cartesianProductExecBufferSpillThreshold: Int =
    getConf(CARTESIAN_PRODUCT_EXEC_BUFFER_SPILL_THRESHOLD)

  def codegenSplitAggregateFunc: Boolean = getConf(SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC)

  def maxNestedViewDepth: Int = getConf(SQLConf.MAX_NESTED_VIEW_DEPTH)

  def useCurrentSQLConfigsForView: Boolean = getConf(SQLConf.USE_CURRENT_SQL_CONFIGS_FOR_VIEW)

  def storeAnalyzedPlanForView: Boolean = getConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW)

  def allowAutoGeneratedAliasForView: Boolean = getConf(SQLConf.ALLOW_AUTO_GENERATED_ALIAS_FOR_VEW)

  def allowStarWithSingleTableIdentifierInCount: Boolean =
    getConf(SQLConf.ALLOW_STAR_WITH_SINGLE_TABLE_IDENTIFIER_IN_COUNT)

  def allowNonEmptyLocationInCTAS: Boolean =
    getConf(SQLConf.ALLOW_NON_EMPTY_LOCATION_IN_CTAS)

  def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION)

  def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO)

  def supportQuotedRegexColumnName: Boolean = getConf(SUPPORT_QUOTED_REGEX_COLUMN_NAME)

  def tvfAllowMultipleTableArguments: Boolean = getConf(TVF_ALLOW_MULTIPLE_TABLE_ARGUMENTS_ENABLED)

  def rangeExchangeSampleSizePerPartition: Int = getConf(RANGE_EXCHANGE_SAMPLE_SIZE_PER_PARTITION)

  def arrowPySparkEnabled: Boolean = getConf(ARROW_PYSPARK_EXECUTION_ENABLED)

  def arrowLocalRelationThreshold: Long = getConf(ARROW_LOCAL_RELATION_THRESHOLD)

  def arrowPySparkSelfDestructEnabled: Boolean = getConf(ARROW_PYSPARK_SELF_DESTRUCT_ENABLED)

  def pysparkJVMStacktraceEnabled: Boolean = getConf(PYSPARK_JVM_STACKTRACE_ENABLED)

  def arrowSparkREnabled: Boolean = getConf(ARROW_SPARKR_EXECUTION_ENABLED)

  def arrowPySparkFallbackEnabled: Boolean = getConf(ARROW_PYSPARK_FALLBACK_ENABLED)

  def arrowMaxRecordsPerBatch: Int = getConf(ARROW_EXECUTION_MAX_RECORDS_PER_BATCH)

  def arrowUseLargeVarTypes: Boolean = getConf(ARROW_EXECUTION_USE_LARGE_VAR_TYPES)

  def pandasUDFBufferSize: Int = getConf(PANDAS_UDF_BUFFER_SIZE)

  def pandasStructHandlingMode: String = getConf(PANDAS_STRUCT_HANDLING_MODE)

  def pysparkSimplifiedTraceback: Boolean = getConf(PYSPARK_SIMPLIFIED_TRACEBACK)

  def pandasGroupedMapAssignColumnsByName: Boolean =
    getConf(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME)

  def arrowSafeTypeConversion: Boolean = getConf(SQLConf.PANDAS_ARROW_SAFE_TYPE_CONVERSION)

  def pysparkWorkerPythonExecutable: Option[String] =
    getConf(SQLConf.PYSPARK_WORKER_PYTHON_EXECUTABLE)

  def replaceExceptWithFilter: Boolean = getConf(REPLACE_EXCEPT_WITH_FILTER)

  def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS)

  def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)

  def continuousStreamingEpochBacklogQueueSize: Int =
    getConf(CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE)

  def continuousStreamingExecutorQueueSize: Int = getConf(CONTINUOUS_STREAMING_EXECUTOR_QUEUE_SIZE)

  def continuousStreamingExecutorPollIntervalMs: Long =
    getConf(CONTINUOUS_STREAMING_EXECUTOR_POLL_INTERVAL_MS)

  def disabledV2StreamingWriters: String = getConf(DISABLED_V2_STREAMING_WRITERS)

  def disabledV2StreamingMicroBatchReaders: String =
    getConf(DISABLED_V2_STREAMING_MICROBATCH_READERS)

  def fastFailFileFormatOutput: Boolean = getConf(FASTFAIL_ON_FILEFORMAT_OUTPUT)

  def concatBinaryAsString: Boolean = getConf(CONCAT_BINARY_AS_STRING)

  def eltOutputAsString: Boolean = getConf(ELT_OUTPUT_AS_STRING)

  def validatePartitionColumns: Boolean = getConf(VALIDATE_PARTITION_COLUMNS)

  def partitionOverwriteMode: PartitionOverwriteMode.Value =
    PartitionOverwriteMode.withName(getConf(PARTITION_OVERWRITE_MODE))

  def storeAssignmentPolicy: StoreAssignmentPolicy.Value =
    StoreAssignmentPolicy.withName(getConf(STORE_ASSIGNMENT_POLICY))

  override def ansiEnabled: Boolean = getConf(ANSI_ENABLED)

  def enableDefaultColumns: Boolean = getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)

  def defaultColumnAllowedProviders: String = getConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS)

  def jsonWriteNullIfWithDefaultValue: Boolean =
    getConf(JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE)

  def useNullsForMissingDefaultColumnValues: Boolean =
    getConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES)

  override def enforceReservedKeywords: Boolean = ansiEnabled && getConf(ENFORCE_RESERVED_KEYWORDS)

  override def doubleQuotedIdentifiers: Boolean = ansiEnabled && getConf(DOUBLE_QUOTED_IDENTIFIERS)

  def ansiRelationPrecedence: Boolean = ansiEnabled && getConf(ANSI_RELATION_PRECEDENCE)

  def chunkBase64StringEnabled: Boolean = getConf(CHUNK_BASE64_STRING_ENABLED)

  def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match {
    case "TIMESTAMP_LTZ" =>
      // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE
      TimestampType

    case "TIMESTAMP_NTZ" =>
      TimestampNTZType
  }

  def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)

  def serializerNestedSchemaPruningEnabled: Boolean =
    getConf(SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED)

  def nestedPruningOnExpressions: Boolean = getConf(NESTED_PRUNING_ON_EXPRESSIONS)

  def csvColumnPruning: Boolean = getConf(SQLConf.CSV_PARSER_COLUMN_PRUNING)

  def legacySizeOfNull: Boolean = {
    // size(null) should return null under ansi mode.
    getConf(SQLConf.LEGACY_SIZE_OF_NULL) && !getConf(ANSI_ENABLED)
  }

  def isReplEagerEvalEnabled: Boolean = getConf(SQLConf.REPL_EAGER_EVAL_ENABLED)

  def replEagerEvalMaxNumRows: Int = getConf(SQLConf.REPL_EAGER_EVAL_MAX_NUM_ROWS)

  def replEagerEvalTruncate: Int = getConf(SQLConf.REPL_EAGER_EVAL_TRUNCATE)

  def avroCompressionCodec: String = getConf(SQLConf.AVRO_COMPRESSION_CODEC)

  def avroDeflateLevel: Int = getConf(SQLConf.AVRO_DEFLATE_LEVEL)

  def replaceDatabricksSparkAvroEnabled: Boolean =
    getConf(SQLConf.LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED)

  override def setOpsPrecedenceEnforced: Boolean =
    getConf(SQLConf.LEGACY_SETOPS_PRECEDENCE_ENABLED)

  override def exponentLiteralAsDecimalEnabled: Boolean =
    getConf(SQLConf.LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED)

  def allowNegativeScaleOfDecimalEnabled: Boolean =
    getConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED)

  def legacyStatisticalAggregate: Boolean = getConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE)

  def truncateTableIgnorePermissionAcl: Boolean =
    getConf(SQLConf.TRUNCATE_TABLE_IGNORE_PERMISSION_ACL)

  def nameNonStructGroupingKeyAsValue: Boolean =
    getConf(SQLConf.NAME_NON_STRUCT_GROUPING_KEY_AS_VALUE)

  override def maxToStringFields: Int = getConf(SQLConf.MAX_TO_STRING_FIELDS)

  def maxPlanStringLength: Int = getConf(SQLConf.MAX_PLAN_STRING_LENGTH).toInt

  def maxMetadataStringLength: Int = getConf(SQLConf.MAX_METADATA_STRING_LENGTH)

  def setCommandRejectsSparkCoreConfs: Boolean =
    getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)

  def castDatetimeToString: Boolean = getConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING)

  def ignoreDataLocality: Boolean = getConf(SQLConf.IGNORE_DATA_LOCALITY)

  def csvFilterPushDown: Boolean = getConf(CSV_FILTER_PUSHDOWN_ENABLED)

  def jsonFilterPushDown: Boolean = getConf(JSON_FILTER_PUSHDOWN_ENABLED)

  def avroFilterPushDown: Boolean = getConf(AVRO_FILTER_PUSHDOWN_ENABLED)

  def jsonEnablePartialResults: Boolean = getConf(JSON_ENABLE_PARTIAL_RESULTS)

  def jsonEnableDateTimeParsingFallback: Option[Boolean] =
    getConf(LEGACY_JSON_ENABLE_DATE_TIME_PARSING_FALLBACK)

  def csvEnableDateTimeParsingFallback: Option[Boolean] =
    getConf(LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK)

  def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID)

  def groupingIdWithAppendedUserGroupByEnabled: Boolean =
    getConf(SQLConf.LEGACY_GROUPING_ID_WITH_APPENDED_USER_GROUPBY)

  def metadataCacheTTL: Long = getConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS)

  def coalesceBucketsInJoinEnabled: Boolean = getConf(SQLConf.COALESCE_BUCKETS_IN_JOIN_ENABLED)

  def coalesceBucketsInJoinMaxBucketRatio: Int =
    getConf(SQLConf.COALESCE_BUCKETS_IN_JOIN_MAX_BUCKET_RATIO)

  def optimizeNullAwareAntiJoin: Boolean =
    getConf(SQLConf.OPTIMIZE_NULL_AWARE_ANTI_JOIN)

  def legacyPathOptionBehavior: Boolean = getConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR)

  def disabledJdbcConnectionProviders: String = getConf(
    StaticSQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST)

  def charVarcharAsString: Boolean = getConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING)

  def readSideCharPadding: Boolean = getConf(SQLConf.READ_SIDE_CHAR_PADDING)

  def cliPrintHeader: Boolean = getConf(SQLConf.CLI_PRINT_HEADER)

  def legacyIntervalEnabled: Boolean = getConf(LEGACY_INTERVAL_ENABLED)

  def decorrelateInnerQueryEnabled: Boolean = getConf(SQLConf.DECORRELATE_INNER_QUERY_ENABLED)

  def maxConcurrentOutputFileWriters: Int = getConf(SQLConf.MAX_CONCURRENT_OUTPUT_FILE_WRITERS)

  def plannedWriteEnabled: Boolean = getConf(SQLConf.PLANNED_WRITE_ENABLED)

  def inferDictAsStruct: Boolean = getConf(SQLConf.INFER_NESTED_DICT_AS_STRUCT)

  def legacyInferArrayTypeFromFirstElement: Boolean = getConf(
    SQLConf.LEGACY_INFER_ARRAY_TYPE_FROM_FIRST_ELEMENT)

  def parquetFieldIdReadEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_READ_ENABLED)

  def parquetFieldIdWriteEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED)

  def ignoreMissingParquetFieldId: Boolean = getConf(SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID)

  def legacyParquetNanosAsLong: Boolean = getConf(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG)

  def parquetInferTimestampNTZEnabled: Boolean = getConf(PARQUET_INFER_TIMESTAMP_NTZ_ENABLED)

  def useV1Command: Boolean = getConf(SQLConf.LEGACY_USE_V1_COMMAND)

  def histogramNumericPropagateInputType: Boolean =
    getConf(SQLConf.HISTOGRAM_NUMERIC_PROPAGATE_INPUT_TYPE)

  def errorMessageFormat: ErrorMessageFormat.Value =
    ErrorMessageFormat.withName(getConf(SQLConf.ERROR_MESSAGE_FORMAT))

  def defaultDatabase: String = getConf(StaticSQLConf.CATALOG_DEFAULT_DATABASE)

  def allowsTempViewCreationWithMultipleNameparts: Boolean =
    getConf(SQLConf.ALLOW_TEMP_VIEW_CREATION_WITH_MULTIPLE_NAME_PARTS)

  def usePartitionEvaluator: Boolean = getConf(SQLConf.USE_PARTITION_EVALUATOR)

  def legacyNegativeIndexInArrayInsert: Boolean = {
    getConf(SQLConf.LEGACY_NEGATIVE_INDEX_IN_ARRAY_INSERT)
  }

  /** ********************** SQLConf functionality methods ************ */

  /** Set Spark SQL configuration properties. */
  def setConf(props: Properties): Unit = settings.synchronized {
    props.asScala.foreach { case (k, v) => setConfString(k, v) }
  }

  /** Set the given Spark SQL configuration property using a `string` value. */
  def setConfString(key: String, value: String): Unit = {
    require(key != null, "key cannot be null")
    require(value != null, s"value cannot be null for key: $key")
    val entry = getConfigEntry(key)
    if (entry != null) {
      // Only verify configs in the SQLConf object
      entry.valueConverter(value)
    }
    setConfWithCheck(key, value)
  }

  /** Set the given Spark SQL configuration property. */
  def setConf[T](entry: ConfigEntry[T], value: T): Unit = {
    require(entry != null, "entry cannot be null")
    require(value != null, s"value cannot be null for key: ${entry.key}")
    require(containsConfigEntry(entry), s"$entry is not registered")
    setConfWithCheck(entry.key, entry.stringConverter(value))
  }

  /** Return the value of Spark SQL configuration property for the given key. */
  @throws[NoSuchElementException]("if key is not set")
  def getConfString(key: String): String = {
    Option(settings.get(key)).
      orElse {
        // Try to use the default value
        Option(getConfigEntry(key)).map { e => e.stringConverter(e.readFrom(reader)) }
      }.
      getOrElse(throw QueryExecutionErrors.sqlConfigNotFoundError(key))
  }

  /**
   * Return the value of Spark SQL configuration property for the given key. If the key is not set
   * yet, return `defaultValue`. This is useful when `defaultValue` in ConfigEntry is not the
   * desired one.
   */
  def getConf[T](entry: ConfigEntry[T], defaultValue: T): T = {
    require(containsConfigEntry(entry), s"$entry is not registered")
    Option(settings.get(entry.key)).map(entry.valueConverter).getOrElse(defaultValue)
  }

  /**
   * Return the value of Spark SQL configuration property for the given key. If the key is not set
   * yet, return `defaultValue` in [[ConfigEntry]].
   */
  def getConf[T](entry: ConfigEntry[T]): T = {
    require(containsConfigEntry(entry), s"$entry is not registered")
    entry.readFrom(reader)
  }

  /**
   * Return the value of an optional Spark SQL configuration property for the given key. If the key
   * is not set yet, returns None.
   */
  def getConf[T](entry: OptionalConfigEntry[T]): Option[T] = {
    require(containsConfigEntry(entry), s"$entry is not registered")
    entry.readFrom(reader)
  }

  /**
   * Return the `string` value of Spark SQL configuration property for the given key. If the key is
   * not set yet, return `defaultValue`.
   */
  def getConfString(key: String, defaultValue: String): String = {
    Option(settings.get(key)).getOrElse {
      // If the key is not set, need to check whether the config entry is registered and is
      // a fallback conf, so that we can check its parent.
      getConfigEntry(key) match {
        case e: FallbackConfigEntry[_] =>
          getConfString(e.fallback.key, defaultValue)
        case e: ConfigEntry[_] if defaultValue != null && defaultValue != ConfigEntry.UNDEFINED =>
          // Only verify configs in the SQLConf object
          e.valueConverter(defaultValue)
          defaultValue
        case _ =>
          defaultValue
      }
    }
  }

  private var definedConfsLoaded = false
  /**
   * Init [[StaticSQLConf]] and [[org.apache.spark.sql.hive.HiveUtils]] so that all the defined
   * SQL Configurations will be registered to SQLConf
   */
  private def loadDefinedConfs(): Unit = {
    if (!definedConfsLoaded) {
      definedConfsLoaded = true
      // Force to register static SQL configurations
      StaticSQLConf
      try {
        // Force to register SQL configurations from Hive module
        val symbol = ScalaReflection.mirror.staticModule("org.apache.spark.sql.hive.HiveUtils")
        ScalaReflection.mirror.reflectModule(symbol).instance
      } catch {
        case NonFatal(e) =>
          logWarning("SQL configurations from Hive module is not loaded", e)
      }
    }
  }

  /**
   * Return all the configuration properties that have been set (i.e. not the default).
   * This creates a new copy of the config properties in the form of a Map.
   */
  def getAllConfs: immutable.Map[String, String] =
    settings.synchronized { settings.asScala.toMap }

  /**
   * Return all the configuration definitions that have been defined in [[SQLConf]]. Each
   * definition contains key, defaultValue and doc.
   */
  def getAllDefinedConfs: Seq[(String, String, String, String)] = {
    loadDefinedConfs()
    getConfigEntries().asScala.filter(_.isPublic).map { entry =>
      val displayValue = Option(getConfString(entry.key, null)).getOrElse(entry.defaultValueString)
      (entry.key, displayValue, entry.doc, entry.version)
    }.toSeq
  }

  /**
   * Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
   */
  def redactOptions[K, V](options: Map[K, V]): Map[K, V] = {
    redactOptions(options.toSeq).toMap
  }

  /**
   * Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
   */
  def redactOptions[K, V](options: collection.Seq[(K, V)]): collection.Seq[(K, V)] = {
    val regexes = Seq(
      getConf(SQL_OPTIONS_REDACTION_PATTERN),
      SECRET_REDACTION_PATTERN.readFrom(reader))

    regexes.foldLeft(options) { case (opts, r) => Utils.redact(Some(r), opts) }
  }

  /**
   * Return whether a given key is set in this [[SQLConf]].
   */
  def contains(key: String): Boolean = {
    settings.containsKey(key)
  }

  /**
   * Logs a warning message if the given config key is deprecated.
   */
  private def logDeprecationWarning(key: String): Unit = {
    SQLConf.deprecatedSQLConfigs.get(key).foreach { config =>
      logWarning(config.toDeprecationString)
    }
  }

  private def requireDefaultValueOfRemovedConf(key: String, value: String): Unit = {
    SQLConf.removedSQLConfigs.get(key).foreach {
      case RemovedConfig(configName, version, defaultValue, comment) =>
        if (value != defaultValue) {
          throw QueryCompilationErrors.configRemovedInVersionError(configName, version, comment)
        }
    }
  }

  protected def setConfWithCheck(key: String, value: String): Unit = {
    logDeprecationWarning(key)
    requireDefaultValueOfRemovedConf(key, value)
    settings.put(key, value)
  }

  def unsetConf(key: String): Unit = {
    logDeprecationWarning(key)
    settings.remove(key)
  }

  def unsetConf(entry: ConfigEntry[_]): Unit = {
    unsetConf(entry.key)
  }

  def clear(): Unit = {
    settings.clear()
  }

  override def clone(): SQLConf = {
    val result = new SQLConf
    getAllConfs.foreach {
      case(k, v) => if (v ne null) result.setConfString(k, v)
    }
    result
  }

  // For test only
  def copy(entries: (ConfigEntry[_], Any)*): SQLConf = {
    val cloned = clone()
    entries.foreach {
      case (entry, value) => cloned.setConfString(entry.key, value.toString)
    }
    cloned
  }

  def isModifiable(key: String): Boolean = {
    containsConfigKey(key) && !isStaticConfigKey(key)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy