All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.github.mrpowers.spark.daria.sql.SparkSessionExt.scala Maven / Gradle / Ivy
package com.github.mrpowers.spark.daria.sql
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.sql.types.{DataType, StructField, StructType}
object SparkSessionExt {
implicit class SparkSessionMethods(spark: SparkSession) {
private def asRows[U](values: List[U]): List[Row] = {
values.map {
case x: Row => x.asInstanceOf[Row]
case y: Product => Row(y.productIterator.toList: _*)
case a => Row(a)
}
}
private def asSchema[U <: Product](fields: List[U]): List[StructField] = {
fields.map {
case x: StructField => x.asInstanceOf[StructField]
case (name: String, dataType: DataType, nullable: Boolean) =>
StructField(
name,
dataType,
nullable
)
}
}
/**
* Creates a DataFrame, similar to createDataFrame, but with better syntax
* spark-daria defined a createDF method that allows for the terse syntax of `toDF` and the control of `createDataFrame`.
*
* spark.createDF(
* List(
* ("bob", 45),
* ("liz", 25),
* ("freeman", 32)
* ), List(
* ("name", StringType, true),
* ("age", IntegerType, false)
* )
* )
*
* The `createDF` method can also be used with lists of `Row` and `StructField` objects.
*
* spark.createDF(
* List(
* Row("bob", 45),
* Row("liz", 25),
* Row("freeman", 32)
* ), List(
* StructField("name", StringType, true),
* StructField("age", IntegerType, false)
* )
* )
*/
def createDF[U, T <: Product](rowData: List[U], fields: List[T]): DataFrame = {
spark.createDataFrame(
spark.sparkContext.parallelize(asRows(rowData)),
StructType(asSchema(fields))
)
}
/**
* Creates an empty DataFrame given schema fields
*
* This is a handy fallback when you fail to read from a data source
*
* val schema = List(StructField("col1", IntegerType))
* val df = Try {
* spark.read.parquet("non-existent-path")
* }.getOrElse(spark.createEmptyDf(schema))
*/
def createEmptyDF[T <: Product](fields: List[T]): DataFrame = {
spark.createDataFrame(
spark.sparkContext.emptyRDD[Row],
StructType(asSchema(fields))
)
}
}
}