All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hindog.grid.hadoop.HadoopLauncher.scala Maven / Gradle / Ivy

package com.hindog.grid.hadoop

import com.hindog.grid.GridConfig
import com.hindog.grid.launch.Launcher
import com.hindog.grid.repo.Resource
import org.apache.commons.io.FileUtils
import org.apache.hadoop.conf.Configuration

import scala.collection.Seq

import java.io.File

trait HadoopLauncher extends Launcher[HadoopLauncher.Config] {

  override def isSubmitted: Boolean = {
    val stackMatch = new Throwable().getStackTrace.exists(_.getClassName == "org.apache.hadoop.util.RunJar")
    val envFlag = Option(System.getenv("GRID_SUBMIT")).nonEmpty
    val commandMatch = System.getProperty("sun.java.command").startsWith("org.apache.hadoop.util.RunJar ")
    val propMatch = Option(System.getProperty("hadoop.home.dir")).nonEmpty
    val isSubmitted = envFlag | stackMatch | commandMatch | propMatch

    isSubmitted
  }
  
  override protected[grid] def createLaunchConfig(args: Array[String] = Array.empty): HadoopLauncher.Config = {
    new HadoopLauncher.Config()
      .withMainClass(getClass.getName.stripSuffix("$"))
      .withArgs(args)
      .withShellCommand(Seq("spark-submit"))
      .withConf(_ => new Configuration(true))
  }

}

object HadoopLauncher {

  class Config extends Launcher.Config {

    override type Conf = Configuration

    //  arg("--jt", "mapreduce.jobtracker.address")
    //  arg("--fs", "fs.defaultFS")
    arg("--libjars", "tmpjars")

    override private[grid] var conf = new Configuration(true)

    override protected def getConfValue(key: String): Option[String] = Option(conf.get(key))

    override def buildProcess(): ProcessBuilder = {
      remoteHooks.foreach(_.apply(this))

      import scala.collection.JavaConverters._
      val envConf = HadoopEnvironment.loadConfiguration(true)
      val finalConf = getConf

      val defaultHadoopClasspath = HadoopEnvironment.classpath
      val fullClasspath = defaultHadoopClasspath ++ applicationClasspath
      val jdkJars = FileUtils.listFiles(new File(System.getProperty("java.home")), Array(".jar"), true).asScala.map(_.toURI.toURL.toString).toSet
      val classpathFilter = classpathFilters.foldLeft((cp: Iterable[Resource]) => cp.filter { jar => !jdkJars.contains(jar.uri.toURL.toString) }) {
        (acc, cur) => (cp: Iterable[Resource]) => cur(acc(cp))
      }

      val cp = classpathFilter(applicationClasspath)

      if (cp.nonEmpty && finalConf.get("tmpjars") == null) {
        finalConf.setStrings("tmpjars", cp.map(_.uri.toString).mkString(","))
      }

      val ignoreConfKeys = argumentConfKeys

      // TODO: fix substitutions
      val cmd = shellCommand ++
      Seq("jar", resolveApplicationJar, mainClass) ++
      arguments.flatMap(_.apply()) ++
      finalConf.iterator().asScala.map(me => me.getKey -> me.getValue).filterNot(kv => kv._2.contains("${") || envConf.get(kv._1) == kv._2 || ignoreConfKeys.contains(kv._1)).flatMap { case (key, value) => Array("--conf", s"$key=$value") }

      val builder = new ProcessBuilder(cmd.toSeq: _*)
      builder.environment().put("HADOOP_CLASSPATH", fullClasspath.map(_.toString).mkString(":"))
      builder
    }

    override def gridConfig: GridConfig = super.gridConfig.withInheritedSystemPropertiesFilter(_.startsWith("hadoop."))


  }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy