bio.ferlab.datalake.spark3.transformation.InputFileTimestamp.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datalake-spark3_2.12 Show documentation
Show all versions of datalake-spark3_2.12 Show documentation
Library built on top of Apache Spark to speed-up data lakes development..
package bio.ferlab.datalake.spark3.transformation
import org.apache.spark.sql.functions._
import org.apache.spark.sql.DataFrame
case class InputFileTimestamp(columnName: String,
regex: String = "(\\d{8}_\\d{6})",
format: String = "yyyyMMdd_HHmmss") extends Transformation {
override def transform: DataFrame => DataFrame = {
_.withColumn(columnName, to_timestamp(regexp_extract(input_file_name(), regex, 0), format))
}
}