All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.cloudera.livy.utils.SparkProcessBuilder.scala Maven / Gradle / Ivy

/*
 * Licensed to Cloudera, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Cloudera, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.cloudera.livy.utils

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

import com.cloudera.livy.{LivyConf, Logging}
import com.cloudera.livy.util.LineBufferedProcess

class SparkProcessBuilder(livyConf: LivyConf) extends Logging {

  private[this] var _executable: String = livyConf.sparkSubmit()
  private[this] var _master: Option[String] = None
  private[this] var _deployMode: Option[String] = None
  private[this] var _className: Option[String] = None
  private[this] var _name: Option[String] = None
  private[this] val _conf = mutable.HashMap[String, String]()
  private[this] var _driverClassPath: ArrayBuffer[String] = ArrayBuffer()
  private[this] var _proxyUser: Option[String] = None
  private[this] var _queue: Option[String] = None
  private[this] var _env: ArrayBuffer[(String, String)] = ArrayBuffer()
  private[this] var _redirectOutput: Option[ProcessBuilder.Redirect] = None
  private[this] var _redirectError: Option[ProcessBuilder.Redirect] = None
  private[this] var _redirectErrorStream: Option[Boolean] = None

  def executable(executable: String): SparkProcessBuilder = {
    _executable = executable
    this
  }

  def master(masterUrl: String): SparkProcessBuilder = {
    _master = Some(masterUrl)
    this
  }

  def deployMode(deployMode: String): SparkProcessBuilder = {
    _deployMode = Some(deployMode)
    this
  }

  def className(className: String): SparkProcessBuilder = {
    _className = Some(className)
    this
  }

  def name(name: String): SparkProcessBuilder = {
    _name = Some(name)
    this
  }

  def conf(key: String): Option[String] = {
    _conf.get(key)
  }

  def conf(key: String, value: String, admin: Boolean = false): SparkProcessBuilder = {
    this._conf(key) = value
    this
  }

  def conf(conf: Traversable[(String, String)]): SparkProcessBuilder = {
    conf.foreach { case (key, value) => this.conf(key, value) }
    this
  }

  def driverJavaOptions(driverJavaOptions: String): SparkProcessBuilder = {
    conf("spark.driver.extraJavaOptions", driverJavaOptions)
  }

  def driverClassPath(classPath: String): SparkProcessBuilder = {
    _driverClassPath += classPath
    this
  }

  def driverClassPaths(classPaths: Traversable[String]): SparkProcessBuilder = {
    _driverClassPath ++= classPaths
    this
  }

  def driverCores(driverCores: Int): SparkProcessBuilder = {
    this.driverCores(driverCores.toString)
  }

  def driverMemory(driverMemory: String): SparkProcessBuilder = {
    conf("spark.driver.memory", driverMemory)
  }

  def driverCores(driverCores: String): SparkProcessBuilder = {
    conf("spark.driver.cores", driverCores)
  }

  def executorCores(executorCores: Int): SparkProcessBuilder = {
    this.executorCores(executorCores.toString)
  }

  def executorCores(executorCores: String): SparkProcessBuilder = {
    conf("spark.executor.cores", executorCores)
  }

  def executorMemory(executorMemory: String): SparkProcessBuilder = {
    conf("spark.executor.memory", executorMemory)
  }

  def numExecutors(numExecutors: Int): SparkProcessBuilder = {
    this.numExecutors(numExecutors.toString)
  }

  def numExecutors(numExecutors: String): SparkProcessBuilder = {
    this.conf("spark.executor.instances", numExecutors)
  }

  def proxyUser(proxyUser: String): SparkProcessBuilder = {
    _proxyUser = Some(proxyUser)
    this
  }

  def queue(queue: String): SparkProcessBuilder = {
    _queue = Some(queue)
    this
  }

  def env(key: String, value: String): SparkProcessBuilder = {
    _env += ((key, value))
    this
  }

  def redirectOutput(redirect: ProcessBuilder.Redirect): SparkProcessBuilder = {
    _redirectOutput = Some(redirect)
    this
  }

  def redirectError(redirect: ProcessBuilder.Redirect): SparkProcessBuilder = {
    _redirectError = Some(redirect)
    this
  }

  def redirectErrorStream(redirect: Boolean): SparkProcessBuilder = {
    _redirectErrorStream = Some(redirect)
    this
  }

  def start(file: Option[String], args: Traversable[String]): LineBufferedProcess = {
    var arguments = ArrayBuffer(_executable)

    def addOpt(option: String, value: Option[String]): Unit = {
      value.foreach { v =>
        arguments += option
        arguments += v
      }
    }

    def addList(option: String, values: Traversable[String]): Unit = {
      if (values.nonEmpty) {
        arguments += option
        arguments += values.mkString(",")
      }
    }

    addOpt("--master", _master)
    addOpt("--deploy-mode", _deployMode)
    addOpt("--name", _name)
    addOpt("--class", _className)
    _conf.foreach { case (key, value) =>
      if (key == "spark.submit.pyFiles") {
         arguments += "--py-files"
         arguments += f"$value"
      } else {
         arguments += "--conf"
         arguments += f"$key=$value"
      }
    }
    addList("--driver-class-path", _driverClassPath)

    if (livyConf.getBoolean(LivyConf.IMPERSONATION_ENABLED)) {
      addOpt("--proxy-user", _proxyUser)
    }

    addOpt("--queue", _queue)

    arguments += file.getOrElse("spark-internal")
    arguments ++= args

    val argsString = arguments
      .map("'" + _.replace("'", "\\'") + "'")
      .mkString(" ")

    info(s"Running $argsString")

    val pb = new ProcessBuilder(arguments.asJava)
    val env = pb.environment()

    for ((key, value) <- _env) {
      env.put(key, value)
    }

    _redirectOutput.foreach(pb.redirectOutput)
    _redirectError.foreach(pb.redirectError)
    _redirectErrorStream.foreach(pb.redirectErrorStream)

    new LineBufferedProcess(pb.start())
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy