All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.SparkSessionExtensions.scala Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql

import scala.collection.mutable

import org.apache.spark.annotation.{DeveloperApi, Experimental, Unstable}
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.analysis.TableFunctionRegistry.TableFunctionBuilder
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}

/**
 * :: Experimental ::
 * Holder for injection points to the [[SparkSession]]. We make NO guarantee about the stability
 * regarding binary compatibility and source compatibility of methods here.
 *
 * This current provides the following extension points:
 *
 * 
    *
  • Analyzer Rules.
  • *
  • Check Analysis Rules.
  • *
  • Cache Plan Normalization Rules.
  • *
  • Optimizer Rules.
  • *
  • Pre CBO Rules.
  • *
  • Planning Strategies.
  • *
  • Customized Parser.
  • *
  • (External) Catalog listeners.
  • *
  • Columnar Rules.
  • *
  • Adaptive Query Post Planner Strategy Rules.
  • *
  • Adaptive Query Stage Preparation Rules.
  • *
  • Adaptive Query Execution Runtime Optimizer Rules.
  • *
  • Adaptive Query Stage Optimizer Rules.
  • *
* * The extensions can be used by calling `withExtensions` on the [[SparkSession.Builder]], for * example: * {{{ * SparkSession.builder() * .master("...") * .config("...", true) * .withExtensions { extensions => * extensions.injectResolutionRule { session => * ... * } * extensions.injectParser { (session, parser) => * ... * } * } * .getOrCreate() * }}} * * The extensions can also be used by setting the Spark SQL configuration property * `spark.sql.extensions`. Multiple extensions can be set using a comma-separated list. For example: * {{{ * SparkSession.builder() * .master("...") * .config("spark.sql.extensions", "org.example.MyExtensions,org.example.YourExtensions") * .getOrCreate() * * class MyExtensions extends Function1[SparkSessionExtensions, Unit] { * override def apply(extensions: SparkSessionExtensions): Unit = { * extensions.injectResolutionRule { session => * ... * } * extensions.injectParser { (session, parser) => * ... * } * } * } * * class YourExtensions extends SparkSessionExtensionsProvider { * override def apply(extensions: SparkSessionExtensions): Unit = { * extensions.injectResolutionRule { session => * ... * } * extensions.injectFunction(...) * } * } * }}} * * Note that none of the injected builders should assume that the [[SparkSession]] is fully * initialized and should not touch the session's internals (e.g. the SessionState). */ @DeveloperApi @Experimental @Unstable class SparkSessionExtensions { type RuleBuilder = SparkSession => Rule[LogicalPlan] type CheckRuleBuilder = SparkSession => LogicalPlan => Unit type StrategyBuilder = SparkSession => Strategy type ParserBuilder = (SparkSession, ParserInterface) => ParserInterface type FunctionDescription = (FunctionIdentifier, ExpressionInfo, FunctionBuilder) type TableFunctionDescription = (FunctionIdentifier, ExpressionInfo, TableFunctionBuilder) type ColumnarRuleBuilder = SparkSession => ColumnarRule type QueryPostPlannerStrategyBuilder = SparkSession => Rule[SparkPlan] type QueryStagePrepRuleBuilder = SparkSession => Rule[SparkPlan] type QueryStageOptimizerRuleBuilder = SparkSession => Rule[SparkPlan] private[this] val columnarRuleBuilders = mutable.Buffer.empty[ColumnarRuleBuilder] private[this] val queryPostPlannerStrategyRuleBuilders = mutable.Buffer.empty[QueryPostPlannerStrategyBuilder] private[this] val queryStagePrepRuleBuilders = mutable.Buffer.empty[QueryStagePrepRuleBuilder] private[this] val runtimeOptimizerRules = mutable.Buffer.empty[RuleBuilder] private[this] val queryStageOptimizerRuleBuilders = mutable.Buffer.empty[QueryStageOptimizerRuleBuilder] /** * Build the override rules for columnar execution. */ private[sql] def buildColumnarRules(session: SparkSession): Seq[ColumnarRule] = { columnarRuleBuilders.map(_.apply(session)).toSeq } /** * Build the override rules for the query post planner strategy phase of adaptive query execution. */ private[sql] def buildQueryPostPlannerStrategyRules( session: SparkSession): Seq[Rule[SparkPlan]] = { queryPostPlannerStrategyRuleBuilders.map(_.apply(session)).toSeq } /** * Build the override rules for the query stage preparation phase of adaptive query execution. */ private[sql] def buildQueryStagePrepRules(session: SparkSession): Seq[Rule[SparkPlan]] = { queryStagePrepRuleBuilders.map(_.apply(session)).toSeq } /** * Build the override rules for the optimizer of adaptive query execution. */ private[sql] def buildRuntimeOptimizerRules(session: SparkSession): Seq[Rule[LogicalPlan]] = { runtimeOptimizerRules.map(_.apply(session)).toSeq } /** * Build the override rules for the query stage optimizer phase of adaptive query execution. */ private[sql] def buildQueryStageOptimizerRules(session: SparkSession): Seq[Rule[SparkPlan]] = { queryStageOptimizerRuleBuilders.map(_.apply(session)).toSeq } /** * Inject a rule that can override the columnar execution of an executor. */ def injectColumnar(builder: ColumnarRuleBuilder): Unit = { columnarRuleBuilders += builder } /** * Inject a rule that applied between `plannerStrategy` and `queryStagePrepRules`, so * it can get the whole plan before injecting exchanges. * Note, these rules can only be applied within AQE. */ def injectQueryPostPlannerStrategyRule(builder: QueryPostPlannerStrategyBuilder): Unit = { queryPostPlannerStrategyRuleBuilders += builder } /** * Inject a rule that can override the query stage preparation phase of adaptive query * execution. */ def injectQueryStagePrepRule(builder: QueryStagePrepRuleBuilder): Unit = { queryStagePrepRuleBuilders += builder } /** * Inject a runtime `Rule` builder into the [[SparkSession]]. * The injected rules will be executed after built-in * [[org.apache.spark.sql.execution.adaptive.AQEOptimizer]] rules are applied. * A runtime optimizer rule is used to improve the quality of a logical plan during execution * which can leverage accurate statistics from shuffle. * * Note that, it does not work if adaptive query execution is disabled. */ def injectRuntimeOptimizerRule(builder: RuleBuilder): Unit = { runtimeOptimizerRules += builder } /** * Inject a rule that can override the query stage optimizer phase of adaptive query * execution. */ def injectQueryStageOptimizerRule(builder: QueryStageOptimizerRuleBuilder): Unit = { queryStageOptimizerRuleBuilders += builder } private[this] val resolutionRuleBuilders = mutable.Buffer.empty[RuleBuilder] /** * Build the analyzer resolution `Rule`s using the given [[SparkSession]]. */ private[sql] def buildResolutionRules(session: SparkSession): Seq[Rule[LogicalPlan]] = { resolutionRuleBuilders.map(_.apply(session)).toSeq } /** * Inject an analyzer resolution `Rule` builder into the [[SparkSession]]. These analyzer * rules will be executed as part of the resolution phase of analysis. */ def injectResolutionRule(builder: RuleBuilder): Unit = { resolutionRuleBuilders += builder } private[this] val postHocResolutionRuleBuilders = mutable.Buffer.empty[RuleBuilder] /** * Build the analyzer post-hoc resolution `Rule`s using the given [[SparkSession]]. */ private[sql] def buildPostHocResolutionRules(session: SparkSession): Seq[Rule[LogicalPlan]] = { postHocResolutionRuleBuilders.map(_.apply(session)).toSeq } /** * Inject an analyzer `Rule` builder into the [[SparkSession]]. These analyzer * rules will be executed after resolution. */ def injectPostHocResolutionRule(builder: RuleBuilder): Unit = { postHocResolutionRuleBuilders += builder } private[this] val checkRuleBuilders = mutable.Buffer.empty[CheckRuleBuilder] /** * Build the check analysis `Rule`s using the given [[SparkSession]]. */ private[sql] def buildCheckRules(session: SparkSession): Seq[LogicalPlan => Unit] = { checkRuleBuilders.map(_.apply(session)).toSeq } /** * Inject an check analysis `Rule` builder into the [[SparkSession]]. The injected rules will * be executed after the analysis phase. A check analysis rule is used to detect problems with a * LogicalPlan and should throw an exception when a problem is found. */ def injectCheckRule(builder: CheckRuleBuilder): Unit = { checkRuleBuilders += builder } private[this] val planNormalizationRules = mutable.Buffer.empty[RuleBuilder] def buildPlanNormalizationRules(session: SparkSession): Seq[Rule[LogicalPlan]] = { planNormalizationRules.map(_.apply(session)).toSeq } /** * Inject a plan normalization `Rule` builder into the [[SparkSession]]. The injected rules will * be executed just before query caching decisions are made. Such rules can be used to improve the * cache hit rate by normalizing different plans to the same form. These rules should never modify * the result of the LogicalPlan. */ def injectPlanNormalizationRule(builder: RuleBuilder): Unit = { planNormalizationRules += builder } private[this] val optimizerRules = mutable.Buffer.empty[RuleBuilder] private[sql] def buildOptimizerRules(session: SparkSession): Seq[Rule[LogicalPlan]] = { optimizerRules.map(_.apply(session)).toSeq } /** * Inject an optimizer `Rule` builder into the [[SparkSession]]. The injected rules will be * executed during the operator optimization batch. An optimizer rule is used to improve the * quality of an analyzed logical plan; these rules should never modify the result of the * LogicalPlan. */ def injectOptimizerRule(builder: RuleBuilder): Unit = { optimizerRules += builder } private[this] val preCBORules = mutable.Buffer.empty[RuleBuilder] private[sql] def buildPreCBORules(session: SparkSession): Seq[Rule[LogicalPlan]] = { preCBORules.map(_.apply(session)).toSeq } /** * Inject an optimizer `Rule` builder that rewrites logical plans into the [[SparkSession]]. * The injected rules will be executed once after the operator optimization batch and * before any cost-based optimization rules that depend on stats. */ def injectPreCBORule(builder: RuleBuilder): Unit = { preCBORules += builder } private[this] val plannerStrategyBuilders = mutable.Buffer.empty[StrategyBuilder] private[sql] def buildPlannerStrategies(session: SparkSession): Seq[Strategy] = { plannerStrategyBuilders.map(_.apply(session)).toSeq } /** * Inject a planner `Strategy` builder into the [[SparkSession]]. The injected strategy will * be used to convert a `LogicalPlan` into a executable * [[org.apache.spark.sql.execution.SparkPlan]]. */ def injectPlannerStrategy(builder: StrategyBuilder): Unit = { plannerStrategyBuilders += builder } private[this] val parserBuilders = mutable.Buffer.empty[ParserBuilder] private[sql] def buildParser( session: SparkSession, initial: ParserInterface): ParserInterface = { parserBuilders.foldLeft(initial) { (parser, builder) => builder(session, parser) } } /** * Inject a custom parser into the [[SparkSession]]. Note that the builder is passed a session * and an initial parser. The latter allows for a user to create a partial parser and to delegate * to the underlying parser for completeness. If a user injects more parsers, then the parsers * are stacked on top of each other. */ def injectParser(builder: ParserBuilder): Unit = { parserBuilders += builder } private[this] val injectedFunctions = mutable.Buffer.empty[FunctionDescription] private[this] val injectedTableFunctions = mutable.Buffer.empty[TableFunctionDescription] private[sql] def registerFunctions(functionRegistry: FunctionRegistry) = { for ((name, expressionInfo, function) <- injectedFunctions) { functionRegistry.registerFunction(name, expressionInfo, function) } functionRegistry } private[sql] def registerTableFunctions(tableFunctionRegistry: TableFunctionRegistry) = { for ((name, expressionInfo, function) <- injectedTableFunctions) { tableFunctionRegistry.registerFunction(name, expressionInfo, function) } tableFunctionRegistry } /** * Injects a custom function into the [[org.apache.spark.sql.catalyst.analysis.FunctionRegistry]] * at runtime for all sessions. */ def injectFunction(functionDescription: FunctionDescription): Unit = { injectedFunctions += functionDescription } /** * Injects a custom function into the * [[org.apache.spark.sql.catalyst.analysis.TableFunctionRegistry]] at runtime for all sessions. */ def injectTableFunction(functionDescription: TableFunctionDescription): Unit = { injectedTableFunctions += functionDescription } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy