All Downloads are FREE. Search and download functionalities are using the official Maven repository.

za.co.absa.standardization.udf.UDFBuilder.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021 ABSA Group Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package za.co.absa.standardization.udf

import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.types.DataType
import za.co.absa.standardization.config.StandardizationConfig
import za.co.absa.standardization.types.parsers.NumericParser
import za.co.absa.standardization.types.parsers.NumericParser.NumericParserException

import scala.reflect.runtime.universe._
import scala.util.{Failure, Success}

object UDFBuilder {
  def stringUdfViaNumericParser[T: TypeTag](sourceDataType: DataType,
                                            targetDataType: DataType,
                                            parser: NumericParser[T],
                                            columnNullable: Boolean,
                                            columnNameForError: String,
                                            stdConfig: StandardizationConfig,
                                            defaultValue: Option[T]): UserDefinedFunction = {
    // ensuring all values sent to the UDFBuilder are instantiated
    val vParser = parser
    val vColumnNameForError = columnNameForError
    val vDefaultValue = defaultValue
    val vColumnNullable = columnNullable
    val vStdConfig = stdConfig

    udf[UDFResult[T], String](numericParserToTyped(_, sourceDataType, targetDataType, vParser, vColumnNullable,  vColumnNameForError, vStdConfig, vDefaultValue))
  }

  private def numericParserToTyped[T](input: String,
                                      sourceDataType: DataType,
                                      targetDataType: DataType,
                                      parser: NumericParser[T],
                                      columnNullable: Boolean,
                                      columnNameForError: String,
                                      stdConfig: StandardizationConfig,
                                      defaultValue: Option[T]): UDFResult[T] = {
    val result = Option(input) match {
      case Some(string) => parser.parse(string).map(Some(_))
      case None if columnNullable => Success(None)
      case None => Failure(nullException)
    }
    UDFResult.fromTry(result, columnNameForError, input, sourceDataType.typeName, targetDataType.typeName, None, stdConfig, defaultValue)
  }

  private val nullException = new NumericParserException("Null value on input for non-nullable field")
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy