All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.seabow.datax.common.HiveUtils.scala Maven / Gradle / Ivy

package io.github.seabow.datax.common

import org.apache.spark.sql.SparkSession

import scala.collection.mutable.ListBuffer

case class TableInfo(partitionSpec:String,location:String,provider:String)

object HiveUtils {
  def getPartitionSpecAndLocation(table:String):(String,String)={
    val spark=SparkSession.getActiveSession.get
    val describeDF=spark.sql(s"describe EXTENDED $table")
    val location=describeDF.collect().filter(_.getAs[String]("col_name").equals("Location")).head.getAs[String]("data_type")
    var startCollectPartitionColsFlag=false
    var partitionSpec=ListBuffer.empty[String]
    describeDF.collect().foreach{
      r=>
        if(r.getAs[String]("col_name").equals("# Detailed Table Information")){
          startCollectPartitionColsFlag=false
        }
        if(startCollectPartitionColsFlag){
          partitionSpec.append(r.getAs[String]("col_name"))
        }
        if(r.getAs[String]("col_name").equals("# col_name")){
          startCollectPartitionColsFlag=true
        }
    }
    (partitionSpec.filter(_.nonEmpty).mkString(","),location)
  }

  def getProvider(table:String):String={
    val spark=SparkSession.active
    val describeInfo=spark.sql(s"describe EXTENDED $table").collect()
    val provider=describeInfo.filter(_.getAs[String]("col_name").equals("Provider")).head.getAs[String]("data_type")
    provider
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy