bio.ferlab.datalake.spark3.transformation.Implicits.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datalake-spark3_2.12 Show documentation
Show all versions of datalake-spark3_2.12 Show documentation
Library built on top of Apache Spark to speed-up data lakes development..
package bio.ferlab.datalake.spark3.transformation
import org.apache.spark.sql.{Column, DataFrame}
object Implicits {
implicit class DataFrameOperations(df: DataFrame) {
def dropDuplicates(partitionByExpr: Seq[String],
orderByExpr: Column*): DataFrame =
DropDuplicates(partitionByExpr, orderByExpr:_*).transform(df)
@deprecated("use [[dropDuplicates]]", "0.2.3")
def keepFirstWithinPartition(partitionByExpr: Seq[String],
orderByExpr: Column*): DataFrame =
KeepFirstWithinPartition(partitionByExpr, orderByExpr:_*).transform(df)
}
}