org.apache.spark.sql.SQLConf.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of snappy-spark-sql_2.10 Show documentation
Show all versions of snappy-spark-sql_2.10 Show documentation
SnappyData distributed data store and execution engine
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql
import java.util.Properties
import scala.collection.immutable
import scala.collection.JavaConverters._
import org.apache.parquet.hadoop.ParquetOutputCommitter
import org.apache.spark.sql.catalyst.CatalystConf
////////////////////////////////////////////////////////////////////////////////////////////////////
// This file defines the configuration options for Spark SQL.
////////////////////////////////////////////////////////////////////////////////////////////////////
private[spark] object SQLConf {
private val sqlConfEntries = java.util.Collections.synchronizedMap(
new java.util.HashMap[String, SQLConfEntry[_]]())
/**
* An entry contains all meta information for a configuration.
*
* @param key the key for the configuration
* @param defaultValue the default value for the configuration
* @param valueConverter how to convert a string to the value. It should throw an exception if the
* string does not have the required format.
* @param stringConverter how to convert a value to a string that the user can use it as a valid
* string value. It's usually `toString`. But sometimes, a custom converter
* is necessary. E.g., if T is List[String], `a, b, c` is better than
* `List(a, b, c)`.
* @param doc the document for the configuration
* @param isPublic if this configuration is public to the user. If it's `false`, this
* configuration is only used internally and we should not expose it to the user.
* @tparam T the value type
*/
private[sql] class SQLConfEntry[T] private(
val key: String,
val defaultValue: Option[T],
val valueConverter: String => T,
val stringConverter: T => String,
val doc: String,
val isPublic: Boolean) {
def defaultValueString: String = defaultValue.map(stringConverter).getOrElse("")
override def toString: String = {
s"SQLConfEntry(key = $key, defaultValue=$defaultValueString, doc=$doc, isPublic = $isPublic)"
}
}
private[sql] object SQLConfEntry {
private def apply[T](
key: String,
defaultValue: Option[T],
valueConverter: String => T,
stringConverter: T => String,
doc: String,
isPublic: Boolean): SQLConfEntry[T] =
sqlConfEntries.synchronized {
if (sqlConfEntries.containsKey(key)) {
throw new IllegalArgumentException(s"Duplicate SQLConfEntry. $key has been registered")
}
val entry =
new SQLConfEntry[T](key, defaultValue, valueConverter, stringConverter, doc, isPublic)
sqlConfEntries.put(key, entry)
entry
}
def intConf(
key: String,
defaultValue: Option[Int] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[Int] =
SQLConfEntry(key, defaultValue, { v =>
try {
v.toInt
} catch {
case _: NumberFormatException =>
throw new IllegalArgumentException(s"$key should be int, but was $v")
}
}, _.toString, doc, isPublic)
def longConf(
key: String,
defaultValue: Option[Long] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[Long] =
SQLConfEntry(key, defaultValue, { v =>
try {
v.toLong
} catch {
case _: NumberFormatException =>
throw new IllegalArgumentException(s"$key should be long, but was $v")
}
}, _.toString, doc, isPublic)
def doubleConf(
key: String,
defaultValue: Option[Double] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[Double] =
SQLConfEntry(key, defaultValue, { v =>
try {
v.toDouble
} catch {
case _: NumberFormatException =>
throw new IllegalArgumentException(s"$key should be double, but was $v")
}
}, _.toString, doc, isPublic)
def booleanConf(
key: String,
defaultValue: Option[Boolean] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[Boolean] =
SQLConfEntry(key, defaultValue, { v =>
try {
v.toBoolean
} catch {
case _: IllegalArgumentException =>
throw new IllegalArgumentException(s"$key should be boolean, but was $v")
}
}, _.toString, doc, isPublic)
def stringConf(
key: String,
defaultValue: Option[String] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[String] =
SQLConfEntry(key, defaultValue, v => v, v => v, doc, isPublic)
def enumConf[T](
key: String,
valueConverter: String => T,
validValues: Set[T],
defaultValue: Option[T] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[T] =
SQLConfEntry(key, defaultValue, v => {
val _v = valueConverter(v)
if (!validValues.contains(_v)) {
throw new IllegalArgumentException(
s"The value of $key should be one of ${validValues.mkString(", ")}, but was $v")
}
_v
}, _.toString, doc, isPublic)
def seqConf[T](
key: String,
valueConverter: String => T,
defaultValue: Option[Seq[T]] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[Seq[T]] = {
SQLConfEntry(
key, defaultValue, _.split(",").map(valueConverter), _.mkString(","), doc, isPublic)
}
def stringSeqConf(
key: String,
defaultValue: Option[Seq[String]] = None,
doc: String = "",
isPublic: Boolean = true): SQLConfEntry[Seq[String]] = {
seqConf(key, s => s, defaultValue, doc, isPublic)
}
}
import SQLConfEntry._
val ALLOW_MULTIPLE_CONTEXTS = booleanConf("spark.sql.allowMultipleContexts",
defaultValue = Some(true),
doc = "When set to true, creating multiple SQLContexts/HiveContexts is allowed." +
"When set to false, only one SQLContext/HiveContext is allowed to be created " +
"through the constructor (new SQLContexts/HiveContexts created through newSession " +
"method is allowed). Please note that this conf needs to be set in Spark Conf. Once" +
"a SQLContext/HiveContext has been created, changing the value of this conf will not" +
"have effect.",
isPublic = true)
val COMPRESS_CACHED = booleanConf("spark.sql.inMemoryColumnarStorage.compressed",
defaultValue = Some(true),
doc = "When set to true Spark SQL will automatically select a compression codec for each " +
"column based on statistics of the data.",
isPublic = false)
val COLUMN_BATCH_SIZE = intConf("spark.sql.inMemoryColumnarStorage.batchSize",
defaultValue = Some(10000),
doc = "Controls the size of batches for columnar caching. Larger batch sizes can improve " +
"memory utilization and compression, but risk OOMs when caching data.",
isPublic = false)
val IN_MEMORY_PARTITION_PRUNING =
booleanConf("spark.sql.inMemoryColumnarStorage.partitionPruning",
defaultValue = Some(true),
doc = "When true, enable partition pruning for in-memory columnar tables.",
isPublic = false)
val AUTO_BROADCASTJOIN_THRESHOLD = intConf("spark.sql.autoBroadcastJoinThreshold",
defaultValue = Some(10 * 1024 * 1024),
doc = "Configures the maximum size in bytes for a table that will be broadcast to all worker " +
"nodes when performing a join. By setting this value to -1 broadcasting can be disabled. " +
"Note that currently statistics are only supported for Hive Metastore tables where the " +
"commandANALYZE TABLE <tableName> COMPUTE STATISTICS noscan
has been run.")
val DEFAULT_SIZE_IN_BYTES = longConf(
"spark.sql.defaultSizeInBytes",
doc = "The default table size used in query planning. By default, it is set to a larger " +
"value than `spark.sql.autoBroadcastJoinThreshold` to be more conservative. That is to say " +
"by default the optimizer will not choose to broadcast a table unless it knows for sure its" +
"size is small enough.",
isPublic = false)
val SHUFFLE_PARTITIONS = intConf("spark.sql.shuffle.partitions",
defaultValue = Some(200),
doc = "The default number of partitions to use when shuffling data for joins or aggregations.")
// Snappy Changes : Have kept the next two properties for SnappyAQP related changes
val TUNGSTEN_ENABLED = booleanConf("spark.sql.tungsten.enabled",
defaultValue = Some(true),
doc = "When true, use the optimized Tungsten physical execution backend which explicitly " +
"manages memory and dynamically generates bytecode for expression evaluation.")
val UNSAFE_ENABLED = booleanConf("spark.sql.unsafe.enabled",
defaultValue = Some(true), // use TUNGSTEN_ENABLED as default
doc = "When true, use the new optimized Tungsten physical execution backend.",
isPublic = false)
val SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE =
longConf("spark.sql.adaptive.shuffle.targetPostShuffleInputSize",
defaultValue = Some(64 * 1024 * 1024),
doc = "The target post-shuffle input size in bytes of a task.")
val ADAPTIVE_EXECUTION_ENABLED = booleanConf("spark.sql.adaptive.enabled",
defaultValue = Some(false),
doc = "When true, enable adaptive query execution.")
val SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS =
intConf("spark.sql.adaptive.minNumPostShufflePartitions",
defaultValue = Some(-1),
doc = "The advisory minimal number of post-shuffle partitions provided to " +
"ExchangeCoordinator. This setting is used in our test to make sure we " +
"have enough parallelism to expose issues that will not be exposed with a " +
"single partition. When the value is a non-positive value, this setting will" +
"not be provided to ExchangeCoordinator.",
isPublic = false)
val SUBEXPRESSION_ELIMINATION_ENABLED = booleanConf("spark.sql.subexpressionElimination.enabled",
defaultValue = Some(true),
doc = "When true, common subexpressions will be eliminated.",
isPublic = false)
val DIALECT = stringConf(
"spark.sql.dialect",
defaultValue = Some("sql"),
doc = "The default SQL dialect to use.")
val CASE_SENSITIVE = booleanConf("spark.sql.caseSensitive",
defaultValue = Some(true),
doc = "Whether the query analyzer should be case sensitive or not.")
val PARQUET_SCHEMA_MERGING_ENABLED = booleanConf("spark.sql.parquet.mergeSchema",
defaultValue = Some(false),
doc = "When true, the Parquet data source merges schemas collected from all data files, " +
"otherwise the schema is picked from the summary file or a random data file " +
"if no summary file is available.")
val PARQUET_SCHEMA_RESPECT_SUMMARIES = booleanConf("spark.sql.parquet.respectSummaryFiles",
defaultValue = Some(false),
doc = "When true, we make assumption that all part-files of Parquet are consistent with " +
"summary files and we will ignore them when merging schema. Otherwise, if this is " +
"false, which is the default, we will merge all part-files. This should be considered " +
"as expert-only option, and shouldn't be enabled before knowing what it means exactly.")
val PARQUET_BINARY_AS_STRING = booleanConf("spark.sql.parquet.binaryAsString",
defaultValue = Some(false),
doc = "Some other Parquet-producing systems, in particular Impala and older versions of " +
"Spark SQL, do not differentiate between binary data and strings when writing out the " +
"Parquet schema. This flag tells Spark SQL to interpret binary data as a string to provide " +
"compatibility with these systems.")
val PARQUET_INT96_AS_TIMESTAMP = booleanConf("spark.sql.parquet.int96AsTimestamp",
defaultValue = Some(true),
doc = "Some Parquet-producing systems, in particular Impala, store Timestamp into INT96. " +
"Spark would also store Timestamp as INT96 because we need to avoid precision lost of the " +
"nanoseconds field. This flag tells Spark SQL to interpret INT96 data as a timestamp to " +
"provide compatibility with these systems.")
val PARQUET_CACHE_METADATA = booleanConf("spark.sql.parquet.cacheMetadata",
defaultValue = Some(true),
doc = "Turns on caching of Parquet schema metadata. Can speed up querying of static data.")
val PARQUET_COMPRESSION = enumConf("spark.sql.parquet.compression.codec",
valueConverter = v => v.toLowerCase,
validValues = Set("uncompressed", "snappy", "gzip", "lzo"),
defaultValue = Some("gzip"),
doc = "Sets the compression codec use when writing Parquet files. Acceptable values include: " +
"uncompressed, snappy, gzip, lzo.")
val PARQUET_FILTER_PUSHDOWN_ENABLED = booleanConf("spark.sql.parquet.filterPushdown",
defaultValue = Some(true),
doc = "Enables Parquet filter push-down optimization when set to true.")
val PARQUET_WRITE_LEGACY_FORMAT = booleanConf(
key = "spark.sql.parquet.writeLegacyFormat",
defaultValue = Some(false),
doc = "Whether to follow Parquet's format specification when converting Parquet schema to " +
"Spark SQL schema and vice versa.")
val PARQUET_OUTPUT_COMMITTER_CLASS = stringConf(
key = "spark.sql.parquet.output.committer.class",
defaultValue = Some(classOf[ParquetOutputCommitter].getName),
doc = "The output committer class used by Parquet. The specified class needs to be a " +
"subclass of org.apache.hadoop.mapreduce.OutputCommitter. Typically, it's also a subclass " +
"of org.apache.parquet.hadoop.ParquetOutputCommitter. NOTE: 1. Instead of SQLConf, this " +
"option must be set in Hadoop Configuration. 2. This option overrides " +
"\"spark.sql.sources.outputCommitterClass\".")
val PARQUET_UNSAFE_ROW_RECORD_READER_ENABLED = booleanConf(
key = "spark.sql.parquet.enableUnsafeRowRecordReader",
defaultValue = Some(true),
doc = "Enables using the custom ParquetUnsafeRowRecordReader.")
val ORC_FILTER_PUSHDOWN_ENABLED = booleanConf("spark.sql.orc.filterPushdown",
defaultValue = Some(false),
doc = "When true, enable filter pushdown for ORC files.")
val HIVE_VERIFY_PARTITION_PATH = booleanConf("spark.sql.hive.verifyPartitionPath",
defaultValue = Some(false),
doc = "")
val HIVE_METASTORE_PARTITION_PRUNING = booleanConf("spark.sql.hive.metastorePartitionPruning",
defaultValue = Some(false),
doc = "When true, some predicates will be pushed down into the Hive metastore so that " +
"unmatching partitions can be eliminated earlier.")
val NATIVE_VIEW = booleanConf("spark.sql.nativeView",
defaultValue = Some(false),
doc = "When true, CREATE VIEW will be handled by Spark SQL instead of Hive native commands. " +
"Note that this function is experimental and should ony be used when you are using " +
"non-hive-compatible tables written by Spark SQL. The SQL string used to create " +
"view should be fully qualified, i.e. use `tbl1`.`col1` instead of `*` whenever " +
"possible, or you may get wrong result.",
isPublic = false)
val COLUMN_NAME_OF_CORRUPT_RECORD = stringConf("spark.sql.columnNameOfCorruptRecord",
defaultValue = Some("_corrupt_record"),
doc = "")
val BROADCAST_TIMEOUT = intConf("spark.sql.broadcastTimeout",
defaultValue = Some(5 * 60),
doc = "Timeout in seconds for the broadcast wait time in broadcast joins.")
// This is only used for the thriftserver
val THRIFTSERVER_POOL = stringConf("spark.sql.thriftserver.scheduler.pool",
doc = "Set a Fair Scheduler pool for a JDBC client session")
val THRIFTSERVER_UI_STATEMENT_LIMIT = intConf("spark.sql.thriftserver.ui.retainedStatements",
defaultValue = Some(200),
doc = "The number of SQL statements kept in the JDBC/ODBC web UI history.")
val THRIFTSERVER_UI_SESSION_LIMIT = intConf("spark.sql.thriftserver.ui.retainedSessions",
defaultValue = Some(200),
doc = "The number of SQL client sessions kept in the JDBC/ODBC web UI history.")
// This is used to set the default data source
val DEFAULT_DATA_SOURCE_NAME = stringConf("spark.sql.sources.default",
defaultValue = Some("org.apache.spark.sql.parquet"),
doc = "The default data source to use in input/output.")
// This is used to control the when we will split a schema's JSON string to multiple pieces
// in order to fit the JSON string in metastore's table property (by default, the value has
// a length restriction of 4000 characters). We will split the JSON string of a schema
// to its length exceeds the threshold.
val SCHEMA_STRING_LENGTH_THRESHOLD = intConf("spark.sql.sources.schemaStringLengthThreshold",
defaultValue = Some(4000),
doc = "The maximum length allowed in a single cell when " +
"storing additional schema information in Hive's metastore.",
isPublic = false)
val PARTITION_DISCOVERY_ENABLED = booleanConf("spark.sql.sources.partitionDiscovery.enabled",
defaultValue = Some(true),
doc = "When true, automatically discover data partitions.")
val PARTITION_COLUMN_TYPE_INFERENCE =
booleanConf("spark.sql.sources.partitionColumnTypeInference.enabled",
defaultValue = Some(true),
doc = "When true, automatically infer the data types for partitioned columns.")
val PARTITION_MAX_FILES =
intConf("spark.sql.sources.maxConcurrentWrites",
defaultValue = Some(1),
doc = "The maximum number of concurrent files to open before falling back on sorting when " +
"writing out files using dynamic partitioning.")
// The output committer class used by HadoopFsRelation. The specified class needs to be a
// subclass of org.apache.hadoop.mapreduce.OutputCommitter.
//
// NOTE:
//
// 1. Instead of SQLConf, this option *must be set in Hadoop Configuration*.
// 2. This option can be overriden by "spark.sql.parquet.output.committer.class".
val OUTPUT_COMMITTER_CLASS =
stringConf("spark.sql.sources.outputCommitterClass", isPublic = false)
val PARALLEL_PARTITION_DISCOVERY_THRESHOLD = intConf(
key = "spark.sql.sources.parallelPartitionDiscovery.threshold",
defaultValue = Some(32),
doc = "")
// Whether to perform eager analysis when constructing a dataframe.
// Set to false when debugging requires the ability to look at invalid query plans.
val DATAFRAME_EAGER_ANALYSIS = booleanConf(
"spark.sql.eagerAnalysis",
defaultValue = Some(true),
doc = "When true, eagerly applies query analysis on DataFrame operations.",
isPublic = false)
// Whether to automatically resolve ambiguity in join conditions for self-joins.
// See SPARK-6231.
val DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY = booleanConf(
"spark.sql.selfJoinAutoResolveAmbiguity",
defaultValue = Some(true),
isPublic = false)
// Whether to retain group by columns or not in GroupedData.agg.
val DATAFRAME_RETAIN_GROUP_COLUMNS = booleanConf(
"spark.sql.retainGroupColumns",
defaultValue = Some(true),
isPublic = false)
val DATAFRAME_PIVOT_MAX_VALUES = intConf(
"spark.sql.pivotMaxValues",
defaultValue = Some(10000),
doc = "When doing a pivot without specifying values for the pivot column this is the maximum " +
"number of (distinct) values that will be collected without error."
)
val RUN_SQL_ON_FILES = booleanConf("spark.sql.runSQLOnFiles",
defaultValue = Some(true),
isPublic = false,
doc = "When true, we could use `datasource`.`path` as table in SQL query"
)
val SPECIALIZE_SINGLE_DISTINCT_AGG_PLANNING =
booleanConf("spark.sql.specializeSingleDistinctAggPlanning",
defaultValue = Some(false),
isPublic = false,
doc = "When true, if a query only has a single distinct column and it has " +
"grouping expressions, we will use our planner rule to handle this distinct " +
"column (other cases are handled by DistinctAggregationRewriter). " +
"When false, we will always use DistinctAggregationRewriter to plan " +
"aggregation queries with DISTINCT keyword. This is an internal flag that is " +
"used to benchmark the performance impact of using DistinctAggregationRewriter to " +
"plan aggregation queries with a single distinct column.")
object Deprecated {
val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
val EXTERNAL_SORT = "spark.sql.planner.externalSort"
val USE_SQL_AGGREGATE2 = "spark.sql.useAggregate2"
val TUNGSTEN_ENABLED = "spark.sql.tungsten.enabled"
val CODEGEN_ENABLED = "spark.sql.codegen"
val UNSAFE_ENABLED = "spark.sql.unsafe.enabled"
val SORTMERGE_JOIN = "spark.sql.planner.sortMergeJoin"
}
}
/**
* A class that enables the setting and getting of mutable config parameters/hints.
*
* In the presence of a SQLContext, these can be set and queried by passing SET commands
* into Spark SQL's query functions (i.e. sql()). Otherwise, users of this class can
* modify the hints by programmatically calling the setters and getters of this class.
*
* SQLConf is thread-safe (internally synchronized, so safe to be used in multiple threads).
*/
private[sql] class SQLConf extends Serializable with CatalystConf {
import SQLConf._
/** Only low degree of contention is expected for conf, thus NOT using ConcurrentHashMap. */
@transient protected[spark] val settings = java.util.Collections.synchronizedMap(
new java.util.HashMap[String, String]())
/** ************************ Spark SQL Params/Hints ******************* */
// TODO: refactor so that these hints accessors don't pollute the name space of SQLContext?
/**
* The SQL dialect that is used when parsing queries. This defaults to 'sql' which uses
* a simple SQL parser provided by Spark SQL. This is currently the only option for users of
* SQLContext.
*
* When using a HiveContext, this value defaults to 'hiveql', which uses the Hive 0.12.0 HiveQL
* parser. Users can change this to 'sql' if they want to run queries that aren't supported by
* HiveQL (e.g., SELECT 1).
*
* Note that the choice of dialect does not affect things like what tables are available or
* how query execution is performed.
*/
private[spark] def dialect: String = getConf(DIALECT)
private[spark] def useCompression: Boolean = getConf(COMPRESS_CACHED)
private[spark] def parquetCompressionCodec: String = getConf(PARQUET_COMPRESSION)
private[spark] def parquetCacheMetadata: Boolean = getConf(PARQUET_CACHE_METADATA)
private[spark] def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE)
private[spark] def numShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
private[spark] def targetPostShuffleInputSize: Long =
getConf(SHUFFLE_TARGET_POSTSHUFFLE_INPUT_SIZE)
private[spark] def adaptiveExecutionEnabled: Boolean = getConf(ADAPTIVE_EXECUTION_ENABLED)
private[spark] def minNumPostShufflePartitions: Int =
getConf(SHUFFLE_MIN_NUM_POSTSHUFFLE_PARTITIONS)
private[spark] def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
private[spark] def orcFilterPushDown: Boolean = getConf(ORC_FILTER_PUSHDOWN_ENABLED)
private[spark] def verifyPartitionPath: Boolean = getConf(HIVE_VERIFY_PARTITION_PATH)
private[spark] def metastorePartitionPruning: Boolean = getConf(HIVE_METASTORE_PARTITION_PRUNING)
private[spark] def nativeView: Boolean = getConf(NATIVE_VIEW)
def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE)
private[spark] def unsafeEnabled: Boolean = getConf(UNSAFE_ENABLED, getConf(TUNGSTEN_ENABLED))
private[spark] def subexpressionEliminationEnabled: Boolean =
getConf(SUBEXPRESSION_ELIMINATION_ENABLED)
private[spark] def autoBroadcastJoinThreshold: Int = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
private[spark] def defaultSizeInBytes: Long =
getConf(DEFAULT_SIZE_IN_BYTES, autoBroadcastJoinThreshold + 1L)
private[spark] def isParquetBinaryAsString: Boolean = getConf(PARQUET_BINARY_AS_STRING)
private[spark] def isParquetINT96AsTimestamp: Boolean = getConf(PARQUET_INT96_AS_TIMESTAMP)
private[spark] def writeLegacyParquetFormat: Boolean = getConf(PARQUET_WRITE_LEGACY_FORMAT)
private[spark] def inMemoryPartitionPruning: Boolean = getConf(IN_MEMORY_PARTITION_PRUNING)
private[spark] def columnNameOfCorruptRecord: String = getConf(COLUMN_NAME_OF_CORRUPT_RECORD)
private[spark] def broadcastTimeout: Int = getConf(BROADCAST_TIMEOUT)
private[spark] def defaultDataSourceName: String = getConf(DEFAULT_DATA_SOURCE_NAME)
private[spark] def partitionDiscoveryEnabled(): Boolean =
getConf(SQLConf.PARTITION_DISCOVERY_ENABLED)
private[spark] def partitionColumnTypeInferenceEnabled(): Boolean =
getConf(SQLConf.PARTITION_COLUMN_TYPE_INFERENCE)
private[spark] def parallelPartitionDiscoveryThreshold: Int =
getConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD)
// Do not use a value larger than 4000 as the default value of this property.
// See the comments of SCHEMA_STRING_LENGTH_THRESHOLD above for more information.
private[spark] def schemaStringLengthThreshold: Int = getConf(SCHEMA_STRING_LENGTH_THRESHOLD)
private[spark] def dataFrameEagerAnalysis: Boolean = getConf(DATAFRAME_EAGER_ANALYSIS)
private[spark] def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
private[spark] def dataFrameRetainGroupColumns: Boolean = getConf(DATAFRAME_RETAIN_GROUP_COLUMNS)
private[spark] def runSQLOnFile: Boolean = getConf(RUN_SQL_ON_FILES)
protected[spark] override def specializeSingleDistinctAggPlanning: Boolean =
getConf(SPECIALIZE_SINGLE_DISTINCT_AGG_PLANNING)
/** ********************** SQLConf functionality methods ************ */
/** Set Spark SQL configuration properties. */
def setConf(props: Properties): Unit = settings.synchronized {
props.asScala.foreach { case (k, v) => setConfString(k, v) }
}
/** Set the given Spark SQL configuration property using a `string` value. */
def setConfString(key: String, value: String): Unit = {
require(key != null, "key cannot be null")
require(value != null, s"value cannot be null for key: $key")
val entry = sqlConfEntries.get(key)
if (entry != null) {
// Only verify configs in the SQLConf object
entry.valueConverter(value)
}
settings.put(key, value)
}
/** Set the given Spark SQL configuration property. */
def setConf[T](entry: SQLConfEntry[T], value: T): Unit = {
require(entry != null, "entry cannot be null")
require(value != null, s"value cannot be null for key: ${entry.key}")
require(sqlConfEntries.get(entry.key) == entry, s"$entry is not registered")
settings.put(entry.key, entry.stringConverter(value))
}
/** Return the value of Spark SQL configuration property for the given key. */
def getConfString(key: String): String = {
Option(settings.get(key)).
orElse {
// Try to use the default value
Option(sqlConfEntries.get(key)).map(_.defaultValueString)
}.
getOrElse(throw new NoSuchElementException(key))
}
/**
* Return the value of Spark SQL configuration property for the given key. If the key is not set
* yet, return `defaultValue`. This is useful when `defaultValue` in SQLConfEntry is not the
* desired one.
*/
def getConf[T](entry: SQLConfEntry[T], defaultValue: T): T = {
require(sqlConfEntries.get(entry.key) == entry, s"$entry is not registered")
Option(settings.get(entry.key)).map(entry.valueConverter).getOrElse(defaultValue)
}
/**
* Return the value of Spark SQL configuration property for the given key. If the key is not set
* yet, return `defaultValue` in [[SQLConfEntry]].
*/
def getConf[T](entry: SQLConfEntry[T]): T = {
require(sqlConfEntries.get(entry.key) == entry, s"$entry is not registered")
Option(settings.get(entry.key)).map(entry.valueConverter).orElse(entry.defaultValue).
getOrElse(throw new NoSuchElementException(entry.key))
}
/**
* Return the `string` value of Spark SQL configuration property for the given key. If the key is
* not set yet, return `defaultValue`.
*/
def getConfString(key: String, defaultValue: String): String = {
val entry = sqlConfEntries.get(key)
if (entry != null && defaultValue != "") {
// Only verify configs in the SQLConf object
entry.valueConverter(defaultValue)
}
Option(settings.get(key)).getOrElse(defaultValue)
}
/**
* Return all the configuration properties that have been set (i.e. not the default).
* This creates a new copy of the config properties in the form of a Map.
*/
def getAllConfs: immutable.Map[String, String] =
settings.synchronized { settings.asScala.toMap }
/**
* Return all the configuration definitions that have been defined in [[SQLConf]]. Each
* definition contains key, defaultValue and doc.
*/
def getAllDefinedConfs: Seq[(String, String, String)] = sqlConfEntries.synchronized {
sqlConfEntries.values.asScala.filter(_.isPublic).map { entry =>
(entry.key, entry.defaultValueString, entry.doc)
}.toSeq
}
private[spark] def unsetConf(key: String): Unit = {
settings.remove(key)
}
private[spark] def unsetConf(entry: SQLConfEntry[_]): Unit = {
settings.remove(entry.key)
}
private[spark] def clear(): Unit = {
settings.clear()
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy