All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.seabow.datax.common.SparkUtils.scala Maven / Gradle / Ivy

package io.github.seabow.datax.common

import org.apache.hadoop.conf.Configuration
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.{SparkEnv, SparkFiles, SparkHadoopUtilProxy}
import org.apache.spark.sql.SparkSession

import scala.io.Source

object SparkUtils {
  def sparkSession = SparkSession.getActiveSession

  /**
   * while "--files" option of spark-submit won't upload files to driver in client mode,
   * this method make a way to fetch --file file content from executors to driver.
   *
   * @param fileName
   * @return
   */
  def getFileContent(fileName: String): String = {
    sparkSession.get.sparkContext.deployMode match {
      case "client" => Source.fromFile(fileName, "utf8").mkString
      case _ => sparkSession.get.sparkContext.parallelize(Seq(1)).repartition(1).map(r => Source.fromFile(SparkFiles.get(fileName)).mkString).first()
    }
  }

  def getHadoopConf(): Configuration = {
    sparkSession match {
      case Some(spark) => spark.sparkContext.hadoopConfiguration
      case None =>
        if(SparkEnv.get!=null){
          SparkHadoopUtilProxy.newConfiguration
        }else{
          new Configuration()
        }

    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy