io.eels.component.hive.TableSpec.scala Maven / Gradle / Ivy
The newest version!
package io.eels.component.hive
import org.apache.hadoop.hive.metastore.TableType
import org.apache.hadoop.hive.metastore.api.FieldSchema
//package io.eels.component.hive
//import java.nio.file.Path
//import java.util.Date
//import com.fasterxml.jackson.annotation.JsonInclude
//import com.fasterxml.jackson.databind.ObjectMapper
//import com.fasterxml.jackson.module.scala.DefaultScalaModule
//import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper
//import io.eels.schema.Schema
//import org.apache.hadoop.fs.FileSystem
//import org.apache.hadoop.hive.metastore.IMetaStoreClient
//object HiveSpecFn {
// def toSchemas(spec: HiveSpec): Map[String, Schema] = {
// { table =>
// val columns = { field =>
// val (schemaType, precision, scale) = HiveSchemaFns.toSchemaType(field.`type`)
// Column(, schemaType, true, precision, scale, true, None)
// }
// table.tableName -> Schema(columns)
// }.toMap
// }
// def toHiveSpec(dbName: String, tableName: String)
// (implicit fs: FileSystem, client:IMetaStoreClient): HiveSpec = {
// val tableSpecs = client.getAllTables(dbName) { tableName =>
// val table = client.getTable(dbName, tableName)
// val location = table.getSd.getLocation
// val tableType = table.getTableType
// val partitions = client.listPartitions(dbName, tableName, Short.MaxValue) { partition =>
// PartitionSpec(
// partition.getValues.asScala.toList,
// partition.getSd.getLocation,
// partition.getParameters.asScala.toMap.filterKeys(_ != "transient_lastDdlTime")
// )
// }.toList
// val columns =, true)) { column =>
// HiveFieldSpec(, HiveSchemaFns.toHiveType(column), column.comment)
// }
// val owner = table.getOwner
// val retention = table.getRetention
// val createTime = table.getCreateTime
// val createTimeFormatted = new Date(createTime).toString
// val inputFormat = table.getSd.getInputFormat
// val outputFormat = table.getSd.getOutputFormat
// val serde = table.getSd.getSerdeInfo.getSerializationLib
// val params = table.getParameters.asScala.toMap.filterKeys(_ != "transient_lastDdlTime")
// val partitionKeys =
// HiveTableSpec(tableName, location, columns, tableType, partitionKeys, partitions, params, inputFormat, outputFormat, serde, retention, createTime, createTimeFormatted, owner)
// }
// HiveSpec(dbName, tableSpecs.toList)
// }
case class TableSpec(tableName: String,
tableType: TableType,
location: String,
cols: Seq[FieldSchema],
numBuckets: Int,
bucketNames: List[String],
params: Map[String, String],
inputFormat: String,
outputFormat: String,
serde: String,
retention: Int,
createTime: Long,
lastAccessTime: Long,
owner: String)
//object HiveSpec {
// private val mapper = new ObjectMapper with ScalaObjectMapper
// mapper.registerModule(DefaultScalaModule)
// mapper.setSerializationInclusion(JsonInclude.Include.NON_EMPTY)
// private val writer = mapper.writerWithDefaultPrettyPrinter()
// def apply(path: Path): HiveSpec = apply(Source.fromFile(path.toFile).getLines.mkString("\n"))
// def apply(str: String): HiveSpec = mapper.readValue[HiveSpec](str)
// def writeAsJson(spec: HiveSpec): String = writer.writeValueAsString(spec)
//case class PartitionSpec(values: List[String], location: String, params: Map[String, String])