All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.indix.pail.PailConsolidate.scala Maven / Gradle / Ivy
package com.indix.pail
import com.backtype.hadoop.pail.Pail
import com.indix.commons.FSUtils
import org.apache.commons.cli.{Options, PosixParser}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.joda.time.DateTime
import org.slf4j.LoggerFactory
import util.{DateHelper, DateTimeFormatter}
class PailConsolidate(inputDir: String, subDir: String, pipelineLabel: String, nonMRConsolidation: Boolean = false) {
val logger = LoggerFactory.getLogger(this.getClass)
def conf: Configuration = new Configuration()
val component = "CONSOLIDATE_" + pipelineLabel
val writeLock = new PailLock(PailLockType.WRITE, component, inputDir)
def consolidateUsingPail(fileSystem: FileSystem, consolidationDir: String) {
val pail = Pail.create(fileSystem, consolidationDir, false)
pail.consolidate()
}
def consolidateUsingPailNonMR(fileSystem: FileSystem, consolidationDir: String): Unit = {
val pail = Pail.create(fileSystem, consolidationDir, false)
pail.consolidateNonMR()
}
def run() = {
try {
logger.info("Starting consolidate... " + subDir + " Aquiring write lock on " + inputDir)
val fileSystem = new Path(subDir).getFileSystem(conf)
writeLock.acquire()
if(nonMRConsolidation) consolidateUsingPailNonMR(fileSystem, subDir) else consolidateUsingPail(fileSystem, subDir)
logger.info("Consolidate done.")
} finally {
writeLock.release()
}
}
}
object PailConsolidate {
def main(args: Array[String]) = {
if (args.length < 1) {
println("Usage: java -cp PailConsolidate /root/dir/to/consolidate [singleNodeConsolidation: true or false]")
System.exit(1)
}
val isSingleNodeConsolidation = if(args.length == 2) args(1).toBoolean else false
val pailConsolidate = new PailConsolidate(args(0), args(0), Option(System.getenv("GO_PIPELINE_LABEL")).filter(_.nonEmpty).getOrElse("MANUAL"), isSingleNodeConsolidation)
pailConsolidate.run()
}
}
object IxPailConsolidator extends FSUtils with ArgsParser {
def conf: Configuration = new Configuration()
def main(args: Array[String]) = {
implicit val cli = new PosixParser().parse(options, args)
val pailRoot = cmdArgs("input-dir")
val pipelineFromEnv = Option(System.getenv("GO_PIPELINE_LABEL")).filter(_.nonEmpty).getOrElse("MANUAL")
val pipelineLabel = cmdOptionalArgs("pipeline").getOrElse(pipelineFromEnv)
val numTimePartitionUnitsToCover = cmdOptionalArgs("num-partition-units").getOrElse("2").toInt
val strategy = cmdOptionalArgs("strategy").getOrElse("all")
val thisMoment = DateTime.now()
def getSubDirToProcess(strategy: String, thisMoment: DateTime, i: Int) = strategy match {
case "hourly" => thisMoment.minusHours(i).toString(DateTimeFormatter.format())
case "daily" => thisMoment.minusDays(i).toString(DateTimeFormatter.format())
case "weekly" => DateHelper.weekInterval(thisMoment.minusWeeks(i))
case "all" => ""
case _ => throw new RuntimeException("Unsupported strategy. Supported ones are: hourly|daily|weekly|all")
}
val dirsToConsolidate = (0 until numTimePartitionUnitsToCover).map { i =>
pailRoot + "/" + getSubDirToProcess(strategy, thisMoment, i)
}.filter(exists).toSet
dirsToConsolidate.foreach { subDirToProcess => new PailConsolidate(pailRoot, subDirToProcess, pipelineLabel).run() }
}
override val options = {
val cmdOptions = new Options()
cmdOptions.addOption("i", "input-dir", true, "Input Directory")
cmdOptions.addOption("p", "pipeline", true, "Pipeline")
cmdOptions.addOption("n", "num-parittion-units", true, "Number of Partition Units")
cmdOptions.addOption("s", "strategy", true, "Pail Strategy")
cmdOptions
}
}