All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.sourced.gitbase.spark.package.scala Maven / Gradle / Ivy

The newest version!
package tech.sourced.gitbase

import org.apache.spark.SparkConf
import org.apache.spark.sql.{SparkSession, SparkSessionExtensions}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.jdbc.JdbcDialects
import tech.sourced.gitbase.spark.udf.BblfshUtils

package object spark {

  implicit class GitbaseSessionBuilder(val builder: SparkSession.Builder) extends AnyVal {

    def registerGitbaseSource(server: String = "",
                              config: Map[String, String] = defaultConfig,
                              rules: Seq[Rule[LogicalPlan]] = rule.getAll
                             ): SparkSession.Builder = {

      val gsConfig = if (server.isEmpty) {
        config
      } else {
        val s = server.split(",").head.trim
        config + (DefaultSource.GitbaseUrlKey -> s)
      }

      builder
        .config(new SparkConf(false).setAll(gsConfig))
        .withExtensions(injectRules(rules))

      val ss = builder.getOrCreate()
      udf.registerUDFs(ss)
      createTempViews(ss)
      JdbcDialects.registerDialect(GitbaseDialect())

      builder
    }

  }

  val defaultConfig = Map(
    BblfshUtils.hostKey ->
      scala.util.Properties.envOrElse("BBLFSH_HOST", BblfshUtils.defaultHost),
    BblfshUtils.portKey ->
      scala.util.Properties.envOrElse("BBLFSH_PORT", BblfshUtils.defaultPort.toString),
    DefaultSource.GitbaseUrlKey ->
      scala.util.Properties.envOrElse("GITBASE_SERVERS", "0.0.0.0:3306")
  )

  def injectRules(rules: Seq[Rule[LogicalPlan]]): SparkSessionExtensions => Unit = {
    extensions: SparkSessionExtensions =>
      rules.foreach(rule =>
        extensions.injectOptimizerRule(session => rule)
      )
  }

  def createTempViews(ss: SparkSession): Unit = {
    Sources.orderedSources.foreach(source =>
      ss.read
        .format(DefaultSource.Name)
        .option(DefaultSource.TableNameKey, source)
        .option(DefaultSource.GitbaseUrlKey, ss.conf.get(DefaultSource.GitbaseUrlKey))
        .load()
        .createOrReplaceTempView(source)
    )
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy