All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.sql.DeltaSparkSessionExtension.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (2020) The Delta Lake Project Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.delta.sql

import io.delta.sql.parser.DeltaSqlParser

import org.apache.spark.sql.SparkSessionExtensions

/**
  * An extension for Spark SQL to activate Delta SQL parser to support Delta SQL grammar.
  *
  * Scala example to create a `SparkSession` with the Delta SQL parser:
  * {{{
  *    import org.apache.spark.sql.SparkSession
  *
  *    val spark = SparkSession
  *       .builder()
  *       .appName("...")
  *       .master("...")
  *       .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
  *       .getOrCreate()
  * }}}
  *
  * Java example to create a `SparkSession` with the Delta SQL parser:
  * {{{
  *    import org.apache.spark.sql.SparkSession;
  *
  *    SparkSession spark = SparkSession
  *                 .builder()
  *                 .appName("...")
  *                 .master("...")
  *                 .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
  *                 .getOrCreate();
  * }}}
  *
  * Python example to create a `SparkSession` with the Delta SQL parser (PySpark doesn't pick up the
  * SQL conf "spark.sql.extensions" in Apache Spark 2.4.x, hence we need to activate it manually in
  * 2.4.x. However, because `SparkSession` has been created and everything has been materialized, we
  * need to clone a new session to trigger the initialization. See SPARK-25003):
  * {{{
  *    from pyspark.sql import SparkSession
  *
  *    spark = SparkSession \
  *        .builder \
  *        .appName("...") \
  *        .master("...") \
  *        .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
  *        .getOrCreate()
  *    if spark.sparkContext().version < "3.":
  *        spark.sparkContext()._jvm.io.delta.sql.DeltaSparkSessionExtension() \
  *            .apply(spark._jsparkSession.extensions())
  *        spark = SparkSession(spark.sparkContext(), spark._jsparkSession.cloneSession())
  * }}}
  *
  * @since 0.4.0
  */
class DeltaSparkSessionExtension extends (SparkSessionExtensions => Unit) {
  override def apply(extensions: SparkSessionExtensions): Unit = {
    extensions.injectParser { (session, parser) =>
      new DeltaSqlParser(parser)
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy