
org.apache.spark.deploy.csharp.CSharpRunner.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-clr Show documentation
Show all versions of spark-clr Show documentation
C# language binding and extensions to Apache Spark
The newest version!
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.
package org.apache.spark.deploy.csharp
import java.io.File
import java.util.concurrent.{Semaphore, TimeUnit}
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkConf
import org.apache.spark.SecurityManager
import org.apache.spark.api.csharp.CSharpBackend
import org.apache.spark.deploy.{SparkHadoopUtil, SparkSubmitArguments, PythonRunner}
import org.apache.spark.util.{Utils, RedirectThread}
import org.apache.spark.util.csharp.{Utils => CSharpSparkUtils}
/**
* Launched by sparkclr-submit.cmd. It launches CSharpBackend, gets its port number and launches C# process
* passing the port number to it.
* The runner implementation is mostly identical to RRunner with SparkCLR-specific customizations
*/
object CSharpRunner {
def main(args: Array[String]): Unit = {
//determines if CSharpBackend need to be run in debug mode
//in debug mode this runner will not launch C# process
var runInDebugMode = false
if (args.length == 0) {
throw new IllegalArgumentException("At least one argument is expected for CSharpRunner")
}
if (args.length == 1 && args(0).equalsIgnoreCase("debug")) {
runInDebugMode = true
println("[CSharpRunner.main] Debug mode is set. CSharp executable will not be launched as a sub-process.")
}
var csharpExecutable = ""
var otherArgs: Array[String] = null
if (!runInDebugMode) {
if(args(0).toLowerCase.endsWith(".zip")) {
var zipFileName = args(0)
val driverDir = new File("").getAbsoluteFile
if (zipFileName.toLowerCase.startsWith("hdfs://")) {
// standalone cluster mode, need to download the zip file from hdfs.
zipFileName = downloadDriverFile(zipFileName, driverDir.getAbsolutePath).getName
}
println(s"[CSharpRunner.main] Unzipping driver $zipFileName in $driverDir")
CSharpSparkUtils.unzip(new File(zipFileName), driverDir)
csharpExecutable = PythonRunner.formatPath(args(1)) //reusing windows-specific formatting in PythonRunner
otherArgs = args.slice(2, args.length)
} else if(new File(args(0)).isDirectory) {
// In local mode, there will no zip file generated if given a directory, skip uncompression in this case
csharpExecutable = PythonRunner.formatPath(args(1)) //reusing windows-specific formatting in PythonRunner
otherArgs = args.slice(2, args.length)
} else {
csharpExecutable = PythonRunner.formatPath(args(0))
otherArgs = args.slice(1, args.length)
}
} else {
otherArgs = args.slice(1, args.length)
}
var processParameters = new java.util.ArrayList[String]()
processParameters.add(csharpExecutable)
otherArgs.foreach( arg => processParameters.add(arg) )
println("[CSharpRunner.main] Starting CSharpBackend!")
// Time to wait for CSharpBackend to initialize in seconds
val backendTimeout = sys.env.getOrElse("CSHARPBACKEND_TIMEOUT", "120").toInt
// Launch a SparkCLR backend server for the C# process to connect to; this will let it see our
// Java system properties etc.
val csharpBackend = new CSharpBackend()
@volatile var csharpBackendPortNumber = 0
val initialized = new Semaphore(0)
val csharpBackendThread = new Thread("CSharpBackend") {
override def run() {
csharpBackendPortNumber = csharpBackend.init()
println("[CSharpRunner.main] Port number used by CSharpBackend is " + csharpBackendPortNumber) //TODO - send to logger also
initialized.release()
csharpBackend.run()
}
}
csharpBackendThread.start()
if (initialized.tryAcquire(backendTimeout, TimeUnit.SECONDS)) {
if (!runInDebugMode) {
var returnCode = -1
try {
val builder = new ProcessBuilder(processParameters)
val env = builder.environment()
env.put("CSHARPBACKEND_PORT", csharpBackendPortNumber.toString)
for ((key, value) <- Utils.getSystemProperties if key.startsWith("spark.")) {
env.put(key, value)
println("[CSharpRunner.main] adding key=" + key + " and value=" + value + " to environment")
}
builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize
val process = builder.start()
new RedirectThread(process.getInputStream, System.out, "redirect CSharp output").start()
returnCode = process.waitFor()
closeBackend(csharpBackend)
} catch {
case t: Throwable => println("[CSharpRunner.main]" + t.getMessage + "\n" + t.getStackTrace)
}
println("[CSharpRunner.main] Return CSharpBackend code " + returnCode)
System.exit(returnCode)
} else {
println("***********************************************************************")
println("* [CSharpRunner.main] Backend running debug mode. Press enter to exit *")
println("***********************************************************************")
Console.readLine()
closeBackend(csharpBackend)
System.exit(0)
}
} else {
// scalastyle:off println
println("[CSharpRunner.main] CSharpBackend did not initialize in " + backendTimeout + " seconds")
// scalastyle:on println
System.exit(-1)
}
}
/**
* Download HDFS file into the supplied directory and return its local path.
* Will throw an exception if there are errors during downloading.
*/
private def downloadDriverFile(hdfsFilePath: String, driverDir: String): File = {
val sparkConf = new SparkConf()
val filePath = new Path(hdfsFilePath)
val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf)
val jarFileName = filePath.getName
val localFile = new File(driverDir, jarFileName)
if (!localFile.exists()) { // May already exist if running multiple workers on one node
println(s"Copying user file $filePath to $driverDir")
Utils.fetchFile(
hdfsFilePath,
new File(driverDir),
sparkConf,
new SecurityManager(sparkConf),
hadoopConf,
System.currentTimeMillis(),
useCache = false)
}
if (!localFile.exists()) { // Verify copy succeeded
throw new Exception(s"Did not see expected $jarFileName in $driverDir")
}
localFile
}
def closeBackend(csharpBackend: CSharpBackend): Unit = {
println("[CSharpRunner.main] closing CSharpBackend")
csharpBackend.close()
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy