All Downloads are FREE. Search and download functionalities are using the official Maven repository.

bio.ferlab.datalake.spark3.transformation.InputFileName.scala Maven / Gradle / Ivy

There is a newer version: 14.8.0
Show newest version
package bio.ferlab.datalake.spark3.transformation

import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._

case class InputFileName(columnName: String, regex: Option[String] = None) extends Transformation {
  override def transform: DataFrame => DataFrame = { df =>
    regex.fold(
      df.withColumn(columnName, input_file_name())
    )(rg =>
      df.withColumn(columnName, regexp_extract(input_file_name(), rg, 1))
    )
  }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy