bio.ferlab.datalake.spark3.transformation.SHA256.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datalake-spark3_2.12 Show documentation
Show all versions of datalake-spark3_2.12 Show documentation
Library built on top of Apache Spark to speed-up data lakes development..
package bio.ferlab.datalake.spark3.transformation
import bio.ferlab.datalake.spark3.transformation.HashTransformation.SimpleHashTransformation
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.StringType
case class SHA256(salt: String, override val columns: String*) extends SimpleHashTransformation {
override def transform: DataFrame => DataFrame = { df =>
columns.foldLeft(df){ case (d, column) =>
d.withColumn(column,
when(col(column).isNull, nullValues)
.otherwise(sha2(concat_ws("_", col(column).cast(StringType), lit(salt)), 256)))
}
}
}