
org.apache.spark.sql.TableValuedFunction.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of databricks-connect Show documentation
Show all versions of databricks-connect Show documentation
Develop locally and connect IDEs, notebook servers and running applications to Databricks clusters.
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql
import scala.jdk.CollectionConverters._
class TableValuedFunction(sparkSession: SparkSession) {
/**
* Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
* range from 0 to `end` (exclusive) with step value 1.
*
* @since 4.0.0
*/
def range(end: Long): Dataset[java.lang.Long] = {
sparkSession.range(end)
}
/**
* Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
* range from `start` to `end` (exclusive) with step value 1.
*
* @since 4.0.0
*/
def range(start: Long, end: Long): Dataset[java.lang.Long] = {
sparkSession.range(start, end)
}
/**
* Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
* range from `start` to `end` (exclusive) with a step value.
*
* @since 4.0.0
*/
def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
sparkSession.range(start, end, step)
}
/**
* Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
* range from `start` to `end` (exclusive) with a step value, with partition number specified.
*
* @since 4.0.0
*/
def range(
start: Long,
end: Long,
step: Long,
numPartitions: Int): Dataset[java.lang.Long] = {
sparkSession.range(start, end, step, numPartitions)
}
private def fn(name: String, args: Seq[Column]): Dataset[Row] = {
sparkSession.newDataFrame { builder =>
builder.getUnresolvedTableValuedFunctionBuilder
.setFunctionName(name)
.addAllArguments(args.map(_.expr).asJava)
}
}
/**
* Creates a `DataFrame` containing a new row for each element in the given array or map column.
* Uses the default column name `col` for elements in the array and `key` and `value` for
* elements in the map unless specified otherwise.
*
* @group generator_funcs
* @since 4.0.0
*/
def explode(collection: Column): Dataset[Row] =
fn("explode", Seq(collection))
/**
* Creates a `DataFrame` containing a new row for each element in the given array or map column.
* Uses the default column name `col` for elements in the array and `key` and `value` for
* elements in the map unless specified otherwise. Unlike explode, if the array/map is null or
* empty then null is produced.
*
* @group generator_funcs
* @since 4.0.0
*/
def explode_outer(collection: Column): Dataset[Row] =
fn("explode_outer", Seq(collection))
/**
* Creates a `DataFrame` containing a new row for each element in the given array of structs.
*
* @group generator_funcs
* @since 4.0.0
*/
def inline(input: Column): Dataset[Row] =
fn("inline", Seq(input))
/**
* Creates a `DataFrame` containing a new row for each element in the given array of structs.
* Unlike inline, if the array is null or empty then null is produced for each nested column.
*
* @group generator_funcs
* @since 4.0.0
*/
def inline_outer(input: Column): Dataset[Row] =
fn("inline_outer", Seq(input))
/**
* Creates a `DataFrame` containing a new row for a json column according to the given field
* names.
*
* @group json_funcs
* @since 4.0.0
*/
@scala.annotation.varargs
def json_tuple(input: Column, fields: Column*): Dataset[Row] =
fn("json_tuple", input +: fields)
/**
* Creates a `DataFrame` containing a new row for each element with position in the given array
* or map column. Uses the default column name `pos` for position, and `col` for elements in the
* array and `key` and `value` for elements in the map unless specified otherwise.
*
* @group generator_funcs
* @since 4.0.0
*/
def posexplode(collection: Column): Dataset[Row] =
fn("posexplode", Seq(collection))
/**
* Creates a `DataFrame` containing a new row for each element with position in the given array
* or map column. Uses the default column name `pos` for position, and `col` for elements in the
* array and `key` and `value` for elements in the map unless specified otherwise. Unlike
* posexplode, if the array/map is null or empty then the row (null, null) is produced.
*
* @group generator_funcs
* @since 4.0.0
*/
def posexplode_outer(collection: Column): Dataset[Row] =
fn("posexplode_outer", Seq(collection))
/**
* Separates `col1`, ..., `colk` into `n` rows. Uses column names col0, col1, etc. by default
* unless specified otherwise.
*
* @group generator_funcs
* @since 4.0.0
*/
@scala.annotation.varargs
def stack(n: Column, fields: Column*): Dataset[Row] =
fn("stack", n +: fields)
/**
* Gets all of the Spark SQL string collations.
*
* @group generator_funcs
* @since 4.0.0
*/
def collations(): Dataset[Row] =
fn("collations", Seq.empty)
/**
* Gets Spark SQL keywords.
*
* @group generator_funcs
* @since 4.0.0
*/
def sql_keywords(): Dataset[Row] =
fn("sql_keywords", Seq.empty)
/**
* Separates a variant object/array into multiple rows containing its fields/elements. Its
* result schema is `struct<pos int, key string, value variant>`. `pos` is the position of
* the field/element in its parent object/array, and `value` is the field/element value. `key`
* is the field name when exploding a variant object, or is NULL when exploding a variant array.
* It ignores any input that is not a variant array/object, including SQL NULL, variant null,
* and any other variant values.
*
* @group variant_funcs
* @since 4.0.0
*/
def variant_explode(input: Column): Dataset[Row] =
fn("variant_explode", Seq(input))
/**
* Separates a variant object/array into multiple rows containing its fields/elements. Its
* result schema is `struct<pos int, key string, value variant>`. `pos` is the position of
* the field/element in its parent object/array, and `value` is the field/element value. `key`
* is the field name when exploding a variant object, or is NULL when exploding a variant array.
* Unlike variant_explode, if the given variant is not a variant array/object, including SQL
* NULL, variant null, and any other variant values, then NULL is produced.
*
* @group variant_funcs
* @since 4.0.0
*/
def variant_explode_outer(input: Column): Dataset[Row] =
fn("variant_explode_outer", Seq(input))
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy