All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.eels.component.hive.Compactor.scala Maven / Gradle / Ivy

The newest version!
package io.eels.component.hive

import com.sksamuel.exts.Logging
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.hive.metastore.IMetaStoreClient

case class Compactor(dbname: String, tablename: String)
                    (implicit fs: FileSystem, conf: Configuration,client: IMetaStoreClient) extends Logging {

  /**
    * Compacts the files in the given hive datbase.table
    * Returns the original number of files.
    * Automatically handles partitions.
    */
  def compactTo(targetFileCount: Int): Int = {
    val originalPaths = HiveTable(dbname, tablename).paths(false, false)
    HiveSource(dbname, tablename).toDataStream().to(HiveSink(dbname, tablename), targetFileCount)
    originalPaths.foreach(fs.delete(_, false))
    originalPaths.size
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy