All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.mrpowers.spark.daria.elt.Parser.scala Maven / Gradle / Ivy

package com.github.mrpowers.spark.daria.elt

import org.apache.spark.sql.functions._
import org.apache.spark.sql.{Column, DataFrame}

abstract class Parser {

  val validConditionExpr: Column
  val additionalDetailsExpr: Column

  def prepare(source: DataFrame): DataFrame = source
  def parse(source: DataFrame): DataFrame
  def complete(source: DataFrame): DataFrame = source

  final def apply(source: DataFrame): DataFrame = {
    source
      .transform(prepare)
      .select(struct("*") as 'origin)
      .transform(parse)
      .transform(setParseDetails)
      .transform(complete)
  }

  final def setParseDetails(parsed: DataFrame): DataFrame = {
    parsed.withColumn(
      "parse_details",
      struct(
        when(validConditionExpr, lit("OK")).otherwise(lit("NOT_VALID")) as 'status,
        additionalDetailsExpr as 'info,
        current_timestamp() as 'at
      )
    )
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy