tasks.TaskSystemComponents.scala Maven / Gradle / Ivy
The newest version!
/*
* The MIT License
*
* Copyright (c) 2015 ECOLE POLYTECHNIQUE FEDERALE DE LAUSANNE, Switzerland,
* Group Fellay
* Modified work, Copyright (c) 2016 Istvan Bartha
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the Software
* is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package tasks
import tasks.caching._
import tasks.queue._
import tasks.deploy._
import tasks.util._
import tasks.util.config.TasksConfig
import tasks.fileservice._
import tasks.wire._
import tasks.elastic._
import tasks.shared._
import akka.actor._
import java.io.File
import scala.concurrent.Await
import scala.concurrent.duration._
import scala.util._
import cats.effect.unsafe.implicits.global
import org.http4s.ember.client.EmberClientBuilder
import cats.effect.IO
import org.http4s.ember.server.EmberServerBuilder
import tasks.fileservice.proxy.ProxyFileStorage
import cats.effect.kernel.Resource
import org.http4s.client.Client
import tasks.tasksConfig
import org.http4s.server.Server
import com.typesafe.config.ConfigFactory
case class TaskSystemComponents private[tasks] (
private[tasks] val queue: QueueActor,
private[tasks] val fs: FileServiceComponent,
private[tasks] val actorsystem: ActorSystem,
private[tasks] val cache: TaskResultCache,
private[tasks] val nodeLocalCache: NodeLocalCache.State,
private[tasks] val filePrefix: FileServicePrefix,
private[tasks] val tasksConfig: TasksConfig,
private[tasks] val historyContext: HistoryContext,
private[tasks] val priority: Priority,
private[tasks] val labels: Labels,
private[tasks] val lineage: TaskLineage
) {
def withChildPrefix(name: String) =
this.copy(filePrefix = this.filePrefix.append(name))
def withChildPrefix(names: Seq[String]) =
this.copy(filePrefix = this.filePrefix.append(names))
def withFilePrefix[B](
prefix: Seq[String]
)(fun: TaskSystemComponents => B): B =
fun(this.withChildPrefix(prefix))
}
object TaskSystemComponents {
def make(
hostConfig: Resource[IO, HostConfiguration],
elasticSupport: Resource[IO, Option[ElasticSupport]],
config: TasksConfig
): Resource[IO, (TaskSystemComponents, HostConfiguration)] =
hostConfig.flatMap { hostConfig =>
elasticSupport.attempt
.map {
case Right(x) => x
case Left(e) =>
scribe.error(
"Failed to create elasticsupport. Continue without it. If this is a worker then self shutdown won't work. If this is a master then spawning nodes won't work.",
e
)
None
}
.flatMap { elasticSupport =>
val masterAddress: tasks.util.SimpleSocketAddress =
hostConfig.master
val proxyStoragePort = masterAddress.port + 2
val packageServerPort = hostConfig.myAddressBind.getPort + 1
val packageServerHostname = hostConfig.myAddressExternal.getOrElse(hostConfig.myAddressBind).getHostName
val rootHistory = NoHistory
val s3Client =
Resource.make[IO, Option[tasks.fileservice.s3.S3]](IO {
if (
config.storageURI.getScheme == "s3" || config.s3RemoteEnabled
) {
val s3AWSSDKClient =
tasks.fileservice.s3.S3
.makeAWSSDKClient(config.s3RegionProfileName)
Option(new tasks.fileservice.s3.S3(s3AWSSDKClient))
} else None
})(v => IO { v.foreach(_.s3.close) })
val httpClient =
if (config.httpRemoteEnabled)
EmberClientBuilder
.default[IO]
.build
.map(Option(_))
else Resource.pure[IO, Option[Client[IO]]](None)
val streamHelper = httpClient.flatMap { http =>
s3Client.map { s3 =>
new StreamHelper(s3, http)
}
}
val emitLog = Resource.eval(IO {
scribe.info("Listening on: " + hostConfig.myAddressBind.toString)
scribe.info("External address: " + hostConfig.myAddressExternal.toString)
scribe.info("CPU: " + hostConfig.availableCPU.toString)
scribe.info("RAM: " + hostConfig.availableMemory.toString)
scribe.info("SCRATCH: " + hostConfig.availableScratch.toString)
scribe
.info("GPU: " + hostConfig.availableGPU.mkString("[", ", ", "]"))
scribe.info("Roles: " + hostConfig.myRoles.mkString(", "))
scribe.info("Elastic: " + elasticSupport)
if (
hostConfig.availableCPU > Runtime
.getRuntime()
.availableProcessors()
) {
scribe.warn(
"Number of CPUs in the machine is " + Runtime
.getRuntime()
.availableProcessors + ". numCPU should not be greater than this."
)
}
scribe.info("Master node address is: " + hostConfig.master.toString)
})
val proxyStorageClient: Resource[IO, ManagedFileStorage] = Resource
.eval(IO {
scribe.info(
s"Trying to use main application's http proxy storage on address ${masterAddress.hostName} and port ${proxyStoragePort}"
)
})
.flatMap { _ =>
import org.http4s.Uri
ProxyFileStorage
.makeClient(
uri = org.http4s.Uri(
scheme = Some(Uri.Scheme.http),
authority = Some(
Uri.Authority(
host =
Uri.Host.unsafeFromString(masterAddress.hostName),
port = Some(proxyStoragePort)
)
)
)
)
}
val remoteFileStorage = streamHelper
.map(streamHelper => new RemoteFileStorage()(streamHelper, config))
def proxyFileStorageHttpServer(storage: ManagedFileStorage) = {
Resource
.eval(IO {
scribe.info("Starting http server for proxy file storage")
})
.flatMap { _ =>
import com.comcast.ip4s._
EmberServerBuilder
.default[IO]
.withHost(ipv4"0.0.0.0")
.withPort(com.comcast.ip4s.Port.fromInt(proxyStoragePort).get)
.withHttpApp(ProxyFileStorage.service(storage).orNotFound)
.build
.evalTap(server =>
IO {
scribe
.info(
s"Started proxy storage server on ${server.baseUri}"
)
}
)
}
}
val managedFileStorage = {
val fileStore =
if (
(config.storageURI.toString == "" || config.connectToProxyFileServiceOnMain) && !hostConfig.isQueue
) {
proxyStorageClient
} else {
val s3bucket =
if (
config.storageURI.getScheme != null && config.storageURI.getScheme == "s3"
) {
Some(
(
config.storageURI.getAuthority,
config.storageURI.getPath.drop(1)
)
)
} else None
if (s3bucket.isDefined) {
val actorsystem = 1 // shade implicit conversion
val _ = actorsystem // suppress unused warning
s3Client.map(s3Client =>
new s3.S3Storage(
bucketName = s3bucket.get._1,
folderPrefix = s3bucket.get._2,
sse = config.s3ServerSideEncryption,
cannedAcls = config.s3CannedAcl,
grantFullControl = config.s3GrantFullControl,
uploadParallelism = config.s3UploadParallelism,
s3 = s3Client.get
)(config)
)
} else {
Resource
.eval(IO {
val storageFolderPath =
if (config.storageURI.getScheme == null)
config.storageURI.getPath
else if (config.storageURI.getScheme == "file")
config.storageURI.getPath
else {
scribe.error(
s"${config.storageURI} unknown protocol, use s3://bucket/key or file:/// (with absolute path), or just a plain path string (absolute or relative"
)
throw new RuntimeException(
s"${config.storageURI} unknown protocol, use s3://bucket/key or file:/// (with absolute path), or just a plain path string (absolute or relative"
)
}
val storageFolder =
new File(storageFolderPath).getCanonicalFile
if (storageFolder.isFile) {
scribe.error(s"$storageFolder is a file. Abort.")
throw new RuntimeException(
s"$storageFolder is a file. Abort."
)
}
if (!storageFolder.isDirectory) {
if (hostConfig.isQueue) {
scribe.warn(
s"Folder $storageFolder does not exists and this is a master node. Try to create the folder $storageFolder for file storage. "
)
storageFolder.mkdirs
Resource.pure[IO, ManagedFileStorage](
(new FolderFileStorage(storageFolder)(config))
)
} else {
scribe.warn(
s"Folder $storageFolder does not exists. This is not a master node. Reverting to proxy via main node."
)
proxyStorageClient
}
} else {
Resource.pure[IO, ManagedFileStorage](
new FolderFileStorage(storageFolder)(config)
)
}
})
.flatMap(identity)
}
}
fileStore
.flatMap {
case fs: ManagedFileStorage
if config.storageEncryptionKey.isDefined =>
Resource.make(
IO(
new EncryptedManagedFileStorage(
fs,
config.storageEncryptionKey.get
)
)
)(e => IO(e.destroyKey()))
case fs: ManagedFileStorage => Resource.pure(fs)
}
.flatMap { fileStore =>
fileStore match {
case fs: ManagedFileStorage if config.proxyStorage =>
proxyFileStorageHttpServer(fs).map(_ => fs)
case fs: ManagedFileStorage => Resource.pure(fs)
}
}
}
val fileServiceComponent = managedFileStorage
.flatMap(managedFileStorage =>
remoteFileStorage.map { remoteFileStorage =>
scribe.info("File store: " + managedFileStorage) // wrap this
FileServiceComponent(
managedFileStorage,
remoteFileStorage
)
}
)
def cacheActor(fs: FileServiceComponent) = Resource.eval(IO {
val cache: Cache =
if (config.cacheEnabled)
new SharedFileCache()(
fs,
config
)
else new DisabledCache
new TaskResultCache(cache, fs, config)
})
val nodeLocalCache =
Resource.eval(NodeLocalCache.start.timeout(60 seconds))
def initFailed(remoteNodeRegistry: Option[ActorRef]): Unit = {
if (!hostConfig.isApp && hostConfig.isWorker) {
scribe.error(
"Initialization failed. This is a follower node, notifying remote node registry."
)
remoteNodeRegistry.foreach(
_ ! InitFailed(
PendingJobId(elasticSupport.get.getNodeName.getNodeName)
)
)
}
}
case class ActorSet1(
queueActor: ActorRef,
reaperActor: ActorRef,
remoteNodeRegistry: Option[ActorRef]
)
def makeActors(
fileServiceComponent: FileServiceComponent,
system: ActorSystem,
cache: TaskResultCache
) = {
Resource.make {
IO.interruptible {
val reaperActor: ActorRef =
elasticSupport.flatMap(
_.reaperFactory.map(_.apply(system, config))
) match {
case None =>
system.actorOf(
Props[ShutdownActorSystemReaper](),
name = "reaper"
)
case Some(reaper) => reaper
}
val remoteNodeRegistry: Option[ActorRef] =
if (
!hostConfig.isApp && hostConfig.isWorker && elasticSupport.isDefined
) {
scribe.info(
"This is a remote worker node. Looking for remote node registry."
)
val remoteActorPath =
s"akka://tasks@${masterAddress.getHostName}:${masterAddress.getPort}/user/noderegistry"
val noderegistry = Try(
Await.result(
system
.actorSelection(remoteActorPath)
.resolveOne(60 seconds),
atMost = 60 seconds
)
)
scribe.info("Remote node registry: " + noderegistry)
noderegistry match {
case Success(nr) => Some(nr)
case Failure(e) =>
scribe.error(
e,
"Failed to contact remote node registry. Shut down job."
)
try {
elasticSupport.get.selfShutdownNow()
} finally {
scribe.info("Stop jvm")
System.exit(1)
}
None
}
} else None
val queueActor =
try {
if (hostConfig.isQueue) {
val localActor =
system.actorOf(
Props(new TaskQueue(Nil, cache)(config))
.withDispatcher("taskqueue"),
"queue"
)
reaperActor ! WatchMe(localActor)
localActor
} else {
val actorPath =
s"akka://tasks@${masterAddress.getHostName}:${masterAddress.getPort}/user/queue"
val remoteActor = Await.result(
system
.actorSelection(actorPath)
.resolveOne(600 seconds),
atMost = 600 seconds
)
remoteActor
}
} catch {
case e: Throwable => {
initFailed(remoteNodeRegistry)
throw e
}
}
scribe.info("Queue: " + queueActor)
ActorSet1(queueActor, reaperActor, remoteNodeRegistry)
}
}(actorSet =>
IO {
awaitReaper(actorSet.reaperActor)
if (hostConfig.isQueue) {
actorSet.queueActor ! PoisonPill
}
}
)
}
def elasticSupportFactory(
queueActor: ActorRef
): Resource[IO, Option[ElasticSupport#Inner]] =
if (hostConfig.isApp || hostConfig.isWorker) {
val codeAddress =
if (hostConfig.isApp)
Some(
elastic.CodeAddress(
SimpleSocketAddress(
packageServerHostname,
packageServerPort
),
config.codeVersion
)
)
else None
Resource.eval[IO, Option[ElasticSupport#Inner]](IO {
elasticSupport.map(es =>
es(
masterAddress = hostConfig.master,
queueActor = QueueActor(queueActor),
resource = ResourceAvailable(
cpu = hostConfig.availableCPU,
memory = hostConfig.availableMemory,
scratch = hostConfig.availableScratch,
gpu = hostConfig.availableGPU,
image = hostConfig.image
),
codeAddress = codeAddress,
eventListener = None
)(config)
)
})
} else Resource.pure(None)
def packageServer(
elasticSupportFactory: Option[ElasticSupport#Inner]
): Resource[IO, Option[Server]] = Resource
.eval(IO {
if (hostConfig.isApp && elasticSupportFactory.isDefined) {
Try(Deployment.pack(config)) match {
case Success(pack) =>
scribe
.info(
"Written executable package to: ",
pack.getAbsolutePath
)
val service = new PackageServer(pack)
val actorsystem = 1 // shade implicit conversion
val _ = actorsystem // suppress unused warning
import com.comcast.ip4s._
val server = EmberServerBuilder
.default[IO]
.withHost(ipv4"0.0.0.0")
.withPort(
com.comcast.ip4s.Port.fromInt(packageServerPort).get
)
.withHttpApp(service.route.orNotFound)
.build
// scribe.info(s"Started package server on $server")
(server.map(Some(_)): Resource[IO, Option[Server]])
case Failure(e) =>
scribe.error(
e,
s"Packaging self failed. Main thread exited? Skip starting package server."
)
Resource.pure[IO, Option[Server]](Option.empty[Server])
}
} else Resource.pure[IO, Option[Server]](Option.empty[Server])
})
.flatMap(identity)
def localNodeRegistry(
elasticSupportFactory: Option[ElasticSupport#Inner],
reaperActor: ActorRef,
system: ActorSystem
): Resource[IO, Option[ActorRef]] =
Resource.make(IO {
if (hostConfig.isApp && elasticSupportFactory.isDefined) {
val props = Props(elasticSupportFactory.get.createRegistry.get)
val localActor = system
.actorOf(
props.withDispatcher("noderegistry-pinned"),
"noderegistry"
)
reaperActor ! WatchMe(localActor)
Some(localActor)
} else None
}) { localActor =>
IO { localActor.foreach(_ ! PoisonPill) }
}
def launcherActor(
queueActor: ActorRef,
nodeLocalCache: NodeLocalCache.State,
fs: FileServiceComponent,
cache: TaskResultCache,
system: ActorSystem
) =
Resource.eval(IO {
if (hostConfig.availableCPU > 0 && hostConfig.isWorker) {
val refreshInterval = config.askInterval
val localActor = system.actorOf(
Props(
new Launcher(
queueActor,
nodeLocalCache,
VersionedResourceAvailable(
config.codeVersion,
ResourceAvailable(
cpu = hostConfig.availableCPU,
memory = hostConfig.availableMemory,
scratch = hostConfig.availableScratch,
gpu = hostConfig.availableGPU,
image = hostConfig.image
)
),
refreshInterval = refreshInterval,
remoteStorage = fs.remote,
managedStorage = fs.storage,
cache = cache
)(config)
).withDispatcher("launcher"),
"launcher"
)
Some(localActor)
} else None
})
def components(
queueActor: ActorRef,
fileServiceComponent: FileServiceComponent,
cache: TaskResultCache,
nodeLocalCache: NodeLocalCache.State,
system: ActorSystem
) = TaskSystemComponents(
queue = QueueActor(queueActor),
fs = fileServiceComponent,
actorsystem = system,
cache = cache,
nodeLocalCache = nodeLocalCache,
filePrefix = FileServicePrefix(Vector()),
tasksConfig = config,
historyContext = rootHistory,
priority = Priority(0),
labels = Labels.empty,
lineage = TaskLineage.root
)
def notifyRegistry(
elasticSupportFactory: Option[ElasticSupport#Inner],
launcherActor: Option[ActorRef],
remoteNodeRegistry: Option[ActorRef],
system: ActorSystem
) =
Resource.eval(IO {
if (
!hostConfig.isApp && hostConfig.isWorker && elasticSupportFactory.isDefined && launcherActor.isDefined
) {
scribe.info("Getting node name..")
val nodeName = elasticSupportFactory.get.getNodeName
scribe.info(
"This is a worker node. ElasticNodeAllocation is enabled. Notifying remote node registry about this node. Node name: " + nodeName + ". Launcher actor address is: " + launcherActor.get
)
val tempFolderWriteable =
if (!config.checkTempFolderOnSlaveInitialization) true
else
Try {
val testFile = tasks.util.TempFile.createTempFile("test")
testFile.delete
}.isSuccess
if (!tempFolderWriteable) {
scribe.error(
s"Temp folder is not writeable (${System.getProperty("java.io.tmpdir")}). Failing slave init."
)
initFailed(remoteNodeRegistry)
} else {
remoteNodeRegistry.get ! NodeComingUp(
Node(
RunningJobId(nodeName),
ResourceAvailable(
hostConfig.availableCPU,
hostConfig.availableMemory,
hostConfig.availableScratch,
hostConfig.availableGPU,
hostConfig.image
),
launcherActor.get
)
)
system.actorOf(
Props(elasticSupportFactory.get.createSelfShutdown)
.withDispatcher("selfshutdown-pinned")
)
}
} else {
scribe.info("This is not a follower node.")
}
})
def awaitReaper(reaperActor: ActorRef) = IO.interruptible {
val latch = new java.util.concurrent.CountDownLatch(1)
reaperActor ! Latch(latch)
scribe.info(
"Shutting down tasksystem. Blocking until all watched actors have terminated."
)
latch.await
}
def makeAS = Resource.make(IO {
val finalAkkaConfiguration = {
val actorProvider = hostConfig match {
case _: LocalConfiguration => "akka.actor.LocalActorRefProvider"
case _ => "akka.remote.RemoteActorRefProvider"
}
val serializers = hostConfig match {
case _: LocalConfiguration => ""
case _ => """
serializers {
static = "tasks.wire.StaticMessageSerializer"
sch = "tasks.wire.ScheduleTaskSerializer"
sf = "tasks.wire.SharedFileSerializer"
}
serialization-bindings {
"tasks.wire.StaticMessage" = static
"tasks.queue.ScheduleTask" = sch
"tasks.fileservice.SharedFile" = sf
}
"""
}
val externalAddress = hostConfig.myAddressExternal.getOrElse(hostConfig.myAddressBind)
val internalAddress = hostConfig.myAddressBind
val akkaProgrammaticalConfiguration =
ConfigFactory.parseString(s"""
akka {
actor {
provider = "${actorProvider}"
$serializers
}
remote {
artery {
canonical.hostname = "${externalAddress.getHostName}"
canonical.port = ${externalAddress.getPort.toString}
bind.hostname = "${internalAddress.getHostName}"
bind.port = ${internalAddress.getPort.toString}
}
}
}
""")
ConfigFactory.defaultOverrides
.withFallback(akkaProgrammaticalConfiguration)
.withFallback(ConfigFactory.parseResources("akka.conf"))
.withFallback(ConfigFactory.load)
}
ActorSystem(config.actorSystemName, finalAkkaConfiguration)
})(as =>
IO.fromFuture {
IO(as.terminate())
}.void
)
for {
_ <- emitLog
as <- makeAS
fileServiceComponent <- fileServiceComponent
nodeLocalCache <- nodeLocalCache
cache <- cacheActor(fileServiceComponent)
actorSet <- makeActors(
fileServiceComponent = fileServiceComponent,
system = as,
cache = cache
)
elasticSupportFactory <- elasticSupportFactory(
actorSet.queueActor
)
_ <- packageServer(
elasticSupportFactory
)
localNodeRegistry <- localNodeRegistry(
elasticSupportFactory,
actorSet.reaperActor,
as
)
launcherActor <- launcherActor(
queueActor = actorSet.queueActor,
nodeLocalCache = nodeLocalCache,
fs = fileServiceComponent,
system = as,
cache = cache
)
_ <- notifyRegistry(
elasticSupportFactory,
launcherActor,
actorSet.remoteNodeRegistry,
as
)
} yield (
components(
actorSet.queueActor,
fileServiceComponent,
cache,
nodeLocalCache,
as
),
hostConfig
)
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy