All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.sourced.gitbase.spark.udf.package.scala Maven / Gradle / Ivy

The newest version!
package tech.sourced.gitbase.spark

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import scala.util.control.NonFatal

package object udf {
  private[udf] var spark: SparkSession = _

  private val gitbaseUdfs = Seq(
    Language,
    UastXPath,
    UastExtract,
    UastChildren,
    IsBinary
  )

  private val gitbaseExprs = Seq(
    Uast,
    UastMode
  )

  private val udfs = Seq(
    UastExtractParse,
    ParseCommitParents
  )

  def isSupported(name: String): Boolean = gitbaseUdfs.exists(f => f.name == name) ||
    gitbaseExprs.exists(f => f.name == name)

  def registerUDFs(ss: SparkSession): Unit = {
    spark = ss
    gitbaseUdfs.foreach(f => spark.udf.register(f.name, f.function.withName(f.name)))
    udfs.foreach(f => spark.udf.register(f.name, f.function.withName(f.name)))
    gitbaseExprs.foreach(f =>
      spark.sessionState.functionRegistry.createOrReplaceTempFunction(f.name, f.function)
    )
  }

  private[udf] object JsonArrayParser extends Logging {
    private val mapper = new ObjectMapper().registerModule(DefaultScalaModule)

    def extract(jsonArray: Array[Byte]): Option[Seq[String]] = {
      jsonArray match {
        case a if a == null || a.isEmpty => None
        case a => try {
          Option(mapper.readValue(a, classOf[Seq[String]]))
        } catch {
          case NonFatal(e) =>
            log.warn("Error trying to parse info from json array", e)
            None
        }
      }
    }
  }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy