All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dimajix.flowman.execution.Runner.scala Maven / Gradle / Ivy

/*
 * Copyright (C) 2018 The Flowman Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dimajix.flowman.execution

import java.time.Duration
import java.time.Instant
import java.time.ZoneId

import scala.collection.mutable
import scala.util.control.NonFatal
import scala.util.matching.Regex

import org.apache.commons.lang3.StringUtils
import org.slf4j.ILoggerFactory
import org.slf4j.Logger

import com.dimajix.common.ExceptionUtils
import com.dimajix.common.ExceptionUtils.reasons
import com.dimajix.common.No
import com.dimajix.common.Trilean
import com.dimajix.common.Unknown
import com.dimajix.common.text.ConsoleColors._
import com.dimajix.common.text.TimeFormatter
import com.dimajix.flowman.config.FlowmanConf
import com.dimajix.flowman.history.StateStore
import com.dimajix.flowman.history.StateStoreAdaptorListener
import com.dimajix.flowman.history.TargetState
import com.dimajix.flowman.model.Hook
import com.dimajix.flowman.model.Job
import com.dimajix.flowman.model.JobResult
import com.dimajix.flowman.model.JobWrapper
import com.dimajix.flowman.model.LifecycleResult
import com.dimajix.flowman.model.MappingIdentifier
import com.dimajix.flowman.model.Prototype
import com.dimajix.flowman.model.RelationIdentifier
import com.dimajix.flowman.model.Result
import com.dimajix.flowman.model.Target
import com.dimajix.flowman.model.TargetDigest
import com.dimajix.flowman.model.TargetIdentifier
import com.dimajix.flowman.model.TargetResult
import com.dimajix.flowman.model.Test
import com.dimajix.flowman.model.TestWrapper
import com.dimajix.flowman.spi.LogFilter
import com.dimajix.spark.SparkUtils.withJobGroup


private[execution] sealed class RunnerImpl(runner: Runner) {
    protected val logger: Logger = runner.loggerFactory.getLogger(classOf[RunnerImpl].getName)
    protected val logFilters: Seq[LogFilter] = LogFilter.filters

    def executeTarget(execution:Execution, target:Target, phase:Phase, dryRun:Boolean) : TargetResult = {
        val result =
            if (!dryRun) {
                execution.execute(target, phase)
            }
            else {
                TargetResult(target, phase, Status.SUCCESS)
            }

        val duration = result.duration
        result.status match {
            case Status.SUCCESS =>
                logger.info(green(s"Successfully finished phase '$phase' for target '${target.identifier}' in ${TimeFormatter.toString(duration)}"))
            case Status.SUCCESS_WITH_ERRORS =>
                logger.warn(yellow(s"Successfully finished phase '$phase' for target '${target.identifier}' with errors  in ${TimeFormatter.toString(duration)}"))
            case Status.SKIPPED =>
                logger.info(green(s"Skipped phase '$phase' for target '${target.identifier}'"))
            case Status.FAILED if result.exception.nonEmpty =>
                logger.error(red(s"Failed phase '$phase' for target '${target.identifier}'  after ${TimeFormatter.toString(duration)} with exception:\n  ${reasons(result.exception.get)}"))
            case Status.FAILED =>
                logger.error(red(s"Failed phase '$phase' for target '${target.identifier}' after ${TimeFormatter.toString(duration)}"))
            case Status.ABORTED =>
                logger.error(red(s"Aborted phase '$phase' for target '${target.identifier}' after ${TimeFormatter.toString(duration)}"))
            case status =>
                logger.warn(yellow(s"Finished '$phase' for target '${target.identifier}' with unknown status ${status.upper}"))
        }

        result
    }

    protected val lineSize = 109
    protected val separator: String = boldWhite(StringUtils.repeat('-', lineSize))
    protected val doubleSeparator: String = boldWhite(StringUtils.repeat('=', lineSize))
    def logSubtitle(s:String) : Unit = {
        val l = (lineSize - 2 - s.length) / 2
        val t = if (l > 3) {
            val lsep = StringUtils.repeat('-', l)
            val rsep = StringUtils.repeat('-', lineSize - 2 - s.length - l)
            boldWhite(lsep) + " " + boldCyan(s) + " " + boldWhite(rsep)
        }
        else {
            boldWhite("--- ") + boldCyan(s) + boldWhite(" ---")
        }

        logger.info("")
        logger.info(t)
    }

    def logTitle(title:String) : Unit = {
        logger.info("")
        logger.info(separator)
        logger.info(boldWhite(s"  $title"))
        logger.info(separator)
    }

    def logEnvironment(context:Context) : Unit = {
        val projects = mutable.Set[String]()

        def logEnv(context:Context) : Unit = {
            val project = context.project
            val name = project.map(_.name).getOrElse("")
            if (!projects.contains(name)) {
                projects.add(name)

                // Log environment of given context
                logger.info(s"Environment of ${project.map(p => s"project '${p.name}'").getOrElse("session")}:")
                context.environment.toSeq.sortBy(_._1).foreach { case (key, value) =>
                    LogFilter.filter(logFilters, key, value.toString)
                        .foreach { case (key, value) => logger.info(s"  $key = $value") }
                }

                // Log environments of imported projects
                val imports = project.toSeq.flatMap(_.imports.map(_.project))
                val root = context.root
                imports.foreach(i => logEnv(root.getProjectContext(i)))
            }
        }

        // Log env of first context
        logEnv(context)
        logEnv(context.root)
        logger.info("")
    }

    def logStatus(title:String, status:Status, duration: Duration, endTime:Instant, double:Boolean=false) : Unit = {
        val msg = status match {
            case Status.SUCCESS|Status.SKIPPED =>
                boldGreen(s"${status.upper} $title")
            case Status.SUCCESS_WITH_ERRORS =>
                boldYellow(s"${status.upper} $title")
            case Status.ABORTED|Status.FAILED =>
                boldRed(s"${status.upper} $title")
            case Status.RUNNING =>
                boldYellow(s"ALREADY RUNNING $title")
            case status =>
                boldRed(s"UNKNOWN STATE '$status' in $title. Assuming failure")
        }

        val sep = if (double) doubleSeparator else separator
        logger.info(sep)
        logger.info(msg)
        logger.info(sep)
        logger.info(s"Total time: ${TimeFormatter.toString(duration)}")
        logger.info(s"Finished at: ${endTime.atZone(ZoneId.systemDefault())}")
        logger.info(sep)
    }

    def logJobResult(title:String, result:JobResult) : Unit = {
        if (result.children.length > 1) {
            val args = result.arguments
            logger.info(separator)
            logger.info(boldWhite(s"Execution summary for ${result.category.lower} '${result.identifier}' ${args.map(kv => kv._1 + "=" + kv._2).mkString(", ")}"))
            logger.info("")
            for (child <- result.children) {
                val name = child.identifier.toString
                val status = s"${this.status(child.status)} [${StringUtils.leftPad(TimeFormatter.toString(child.duration), 10)}]"
                val dots = StringUtils.repeat('.', lineSize - child.status.upper.length - name.length - 15)
                logger.info(s"$name $dots $status")
            }
        }
        logStatus(title, result.status, result.duration, result.endTime)
    }

    def logLifecycleResult(title:String, result:LifecycleResult) : Unit = {
        logger.info("")
        if (result.children.length > 1) {
            val args = result.arguments
            logger.info(doubleSeparator)
            logger.info(boldWhite(s"Overall lifecycle summary for ${result.category.lower} '${result.identifier}' ${args.map(kv => kv._1 + "=" + kv._2).mkString(", ")}"))
            logger.info("")
            for (child <- result.children) {
                val name = s"Phase ${child.phase.upper}"
                val status = s"${this.status(child.status)} [${StringUtils.leftPad(TimeFormatter.toString(child.duration), 10)}]"
                val dots = StringUtils.repeat('.', lineSize - child.status.upper.length - name.length - 15)
                logger.info(s"$name $dots $status")
            }
        }
        logStatus(title, result.status, result.duration, result.endTime, double=true)
    }

    private def status(status:Status) : String = {
        status match {
            case Status.SUCCESS|Status.SKIPPED => boldGreen(status.upper)
            case Status.SUCCESS_WITH_ERRORS|Status.RUNNING => boldYellow(status.upper)
            case Status.FAILED|Status.ABORTED => boldRed(status.upper)
            case _ => boldRed(status.upper)
        }
    }
}


/**
 * Private implementation of Job specific methods
 */
private[execution] final class JobRunnerImpl(runner:Runner) extends RunnerImpl(runner) {
    private val stateStore = runner.stateStore
    private val stateStoreListener = new StateStoreAdaptorListener(stateStore)

    /**
     * Executes a single job using the given execution and a map of parameters. The Runner may decide not to
     * execute a specific job, because some information may indicate that the job has already been successfully
     * run in the past. This behaviour can be overridden with the force flag
     * @param phases
     * @return
     */
    def executeJob(
        job:Job,
        phases:Seq[Phase],
        args:Map[String,Any]=Map.empty,
        targets:(Phase,TargetIdentifier) => Boolean,
        dirtyTargets:(Phase,TargetIdentifier) => Boolean,
        force:Boolean=false,
        keepGoing:Boolean=false,
        dryRun:Boolean=false,
        ignoreHistory:Boolean=false,
        isolated:Boolean=true
    ) : LifecycleResult = {
        require(args != null)
        require(phases != null)
        require(args != null)

        // Disallow reusing current context if job parameters of environment are non-empty
        val isolated2 = isolated || job.parameters.nonEmpty || job.environment.nonEmpty

        val prj = job.project.map(prj => s" in project '${prj.name}'${prj.version.map(v => s" (version $v)").getOrElse("")}").getOrElse("")
        val iso = if(isolated2) "with isolation" else "without isolation"
        logger.info("")
        logger.info(separator)
        logger.info(s"Executing phases ${phases.map(p => "'" + p + "'").mkString(",")} for job '${job.name}'$prj $iso")

        def createListeners(context: Context) : Seq[ExecutionListener] = {
            if (!dryRun) {
                val extraListeners = (runner.hooks ++ job.hooks).flatMap { listener =>
                    try {
                        Some(listener.instantiate(context))
                    }
                    catch {
                        case NonFatal(ex) =>
                            logger.warn(s"Error creating execution listener, will be ignored. Reason:\n  ${reasons(ex)}")
                            None
                    }
                }
                stateStoreListener +: extraListeners
            }
            else {
                Seq.empty
            }
        }

        val startTime = Instant.now()
        runner.withExecution(isolated2) { execution =>
            runner.withJobContext(job, args, Some(execution), force, dryRun, isolated2) { (context, arguments) =>
                val title = s"lifecycle for job '${job.identifier}' ${arguments.map(kv => kv._1 + "=" + kv._2).mkString(", ")}"
                val listeners = createListeners(context)
                val result = execution.withListeners(listeners) { execution =>
                    execution.monitorLifecycle(job, arguments, phases) { execution =>
                        val results = Result.flatMap(phases, keepGoing) { phase =>
                            // Check if build phase really contains any active target. Otherwise we skip this phase and mark it
                            // as SUCCESS (an empty list is always executed as SUCCESS)
                            val isActive = job.targets
                                .filter(t => targets(phase,t))
                                .exists { target =>
                                    // This might throw exceptions for non-existing targets. The same
                                    // exception will be thrown and handled properly in executeJobPhase
                                    try {
                                        context.getTarget(target).phases.contains(phase)
                                    } catch {
                                        case NonFatal(_) => true
                                    }
                                }

                            if (isActive)
                                Some(executeJobPhase(execution, context, job, phase, arguments, targets, dirtyTargets, force=force, keepGoing=keepGoing, dryRun=dryRun, ignoreHistory=ignoreHistory))
                            else
                                None
                        }

                        val instance = job.lifecycle(phases, arguments.map { case (k, v) => k -> v.toString })
                        LifecycleResult(job, instance, results, startTime)
                    }
                }

                logLifecycleResult(title, result)
                result
            }
        }
    }

    private def executeJobPhase(
        execution: Execution,
        jobContext:Context,
        job:Job, phase:Phase,
        arguments:Map[String,Any],
        targets:(Phase,TargetIdentifier) => Boolean,
        dirtyTargets:(Phase,TargetIdentifier) => Boolean,
        force:Boolean,
        keepGoing:Boolean,
        dryRun:Boolean,
        ignoreHistory:Boolean) : JobResult = {
        runner.withPhaseContext(jobContext, phase) { context =>
            val titleArgs = if (arguments.nonEmpty) " " + arguments.map(kv => kv._1 + "=" + kv._2).mkString(", ") else ""
            val title = s"${phase.upper} job '${job.identifier}'$titleArgs"
            logTitle(title)
            logEnvironment(context)

            val allMetrics = job.metrics.map(_.instantiate(context))

            val startTime = Instant.now()
            val result =
                execution.withMetrics(allMetrics) { execution =>
                    execution.monitorJob(job, arguments, phase) { execution =>
                        val instance = job.digest(phase, arguments.map { case (k, v) => k -> v.toString })
                        try {
                            val results = executeJobTargets(execution, context, job, phase, targets, dirtyTargets, force=force, keepGoing=keepGoing, dryRun=dryRun, ignoreHistory=ignoreHistory)
                            JobResult(job, instance, results, startTime)
                        }
                        catch {
                            case NonFatal(ex) =>
                                // Primarily exceptions during target instantiation will be caught here
                                logger.error(s"Caught exception during $title:\n  ${reasons(ex)}")
                                JobResult(job, instance, ex, startTime)
                        }
                    }
                }

            logJobResult(title, result)
            result
        }
    }

    /**
     * Executes a single phase of the job. This method will also check if the arguments passed to the constructor
     * are correct and sufficient, otherwise an IllegalArgumentException will be thrown.
     *
     * @param context
     * @param phase
     * @param token
     * @return
     */
    private def executeJobTargets(
        execution:Execution,
        context:Context,
        job:Job,
        phase:Phase,
        targets:(Phase,TargetIdentifier) => Boolean,
        dirtyTargets:(Phase,TargetIdentifier) => Boolean,
        force:Boolean,
        keepGoing:Boolean,
        dryRun:Boolean,
        ignoreHistory:Boolean
    ) : Seq[TargetResult] = {
        require(phase != null)

        def targetFilter(target: Target): Boolean = targets(phase, target.identifier)
        def dirtyFilter(target: Target): Boolean = dirtyTargets(phase, target.identifier)

        // This will throw an exception if instantiation fails
        val jobTargets = job.targets.map(t => context.getTarget(t))

        val clazz = execution.flowmanConf.getConf(FlowmanConf.EXECUTION_EXECUTOR_CLASS)
        val executor = Executor.newInstance(clazz, execution, context)

        val dirtyManager = new DirtyTargets(execution, jobTargets, phase)
        dirtyManager.taint(dirtyFilter _)

        executor.execute(phase, jobTargets, targetFilter, keepGoing) { (execution, target, phase) =>
            val sc = execution.spark.sparkContext
            withJobGroup(sc, target.name, s"$phase target ${target.identifier}") {
                val dirty = dirtyManager.isDirty(target)
                val result = executeTargetPhase(execution, target, phase, force || dirty, dryRun, ignoreHistory)
                if (result.status == Status.SUCCESS) {
                    dirtyManager.taint(target)
                }
                result
            }
        }
    }

    /**
     * Executes a single target using the given execution and a map of parameters. The Runner may decide not to
     * execute a specific target, because some information may indicate that the job has already been successfully
     * run in the past. This behaviour can be overriden with the force flag
     * @param target
     * @param phase
     * @return
     */
    private def executeTargetPhase(execution: Execution, target:Target, phase:Phase, force:Boolean, dryRun:Boolean, ignoreHistory:Boolean=false) : TargetResult = {
        val forceDirty = force || execution.flowmanConf.getConf(FlowmanConf.EXECUTION_TARGET_FORCE_DIRTY)
        val useHistory = !ignoreHistory && execution.flowmanConf.getConf(FlowmanConf.EXECUTION_TARGET_USE_HISTORY)

        // We need to check the target *before* we run code inside the monitor (which will mark the target as RUNNING)
        val isClean = !forceDirty && useHistory && checkTarget(target.digest(phase))

        def isDirty : Trilean = {
            try {
                target.dirty(execution, phase)
            }
            catch {
                case NonFatal(ex) =>
                    logger.warn(yellow(s"Cannot infer dirty status for target '${target.identifier}' because of exception:\n  ${ExceptionUtils.reasons(ex)}"))
                    Unknown
            }
        }

        val startTime = Instant.now()
        execution.monitorTarget(target, phase) { execution =>
            logSubtitle(s"$phase target '${target.identifier}'")

            // First checkJob if execution is really required
            if (isClean) {
                logger.info(cyan(s"Target '${target.identifier}' up to date for phase '$phase' according to state store, skipping execution"))
                logger.info("")
                TargetResult(target, phase, Status.SKIPPED, startTime)
            }
            else if (!forceDirty && isDirty == No) {
                logger.info(cyan(s"Target '${target.identifier}' not dirty in phase $phase, skipping execution"))
                logger.info("")
                TargetResult(target, phase, Status.SKIPPED, startTime)
            }
            else {
                executeTarget(execution, target, phase, dryRun)
            }
        }
    }

    /**
     * Performs some checks, if the target is already up to date
     * @param target
     * @return
     */
    private def checkTarget(target:TargetDigest) : Boolean = {
        val phase = target.phase

        def checkState(state:TargetState) : Boolean = {
            val lifecycle = Lifecycle.ofPhase(phase)
            if (!lifecycle.contains(state.phase)) {
                // Different lifecycle => target is not valid
                false
            } else if (lifecycle.indexOf(state.phase) < lifecycle.indexOf(phase)) {
                // Same lifecycle, but previous phase => target is not valid
                false
            } else {
                state.status.success
            }
        }

        try {
            stateStore.getTargetState(target) match {
                case Some(state: TargetState) => checkState(state)
                case _ => false
            }
        }
        catch {
            case NonFatal(ex) =>
                logger.error(s"Cannot retrieve status from history database. Exception:\n  ${ExceptionUtils.reasons(ex)}")
                false
        }
    }
}


/**
 * Private Implementation for Test specific methods
 * @param runner
 */
private[execution] final class TestRunnerImpl(runner:Runner) extends RunnerImpl(runner) {
    def executeTest(test:Test, keepGoing:Boolean=false, dryRun:Boolean=false) : Status = {
        runner.withExecution(true) { execution =>
            runner.withTestContext(test, Some(execution), dryRun) { context =>
                val title = s"Running test '${test.identifier}'"
                logTitle(title)
                logEnvironment(context)

                val startTime = Instant.now()

                // Get all targets once here. Otherwise the fixtures would be instantiated over and over again for
                // each phase.
                val targets = test.targets.map(t => context.getTarget(t)) ++ test.fixtures.values.map(_.instantiate(context))

                def runPhase(phase: Phase): Seq[TargetResult] = {
                    // Only execute phase if there are targets. This will save some logging outputs
                    if (targets.exists(_.phases.contains(phase))) {
                        runner.withPhaseContext(context, phase) { context =>
                            executeTestTargets(execution, context, targets, phase, keepGoing, dryRun)
                        }
                    }
                    else {
                        Seq()
                    }
                }

                // First create test environment via fixtures
                val buildStatus = Status.ofAll(Lifecycle.BUILD, keepGoing) { phase =>
                    val phaseResults = runPhase(phase)
                    Status.ofAll(phaseResults.map(_.status))
                }
                // Now run tests if fixtures where successful
                val testStatus =
                    if (buildStatus == Status.SUCCESS || keepGoing) {
                        val sc = execution.spark.sparkContext
                        withJobGroup(sc, test.name, s"EXECUTE test ${test.identifier}") {
                            executeTestAssertions(execution, context, test, keepGoing, dryRun)
                        }
                    }
                    else {
                        Status.SKIPPED
                    }
                // Finally clean up, even in case of possible failures.
                val destroyStatus = Status.ofAll(Lifecycle.DESTROY, true) { phase =>
                    val phaseResults = runPhase(phase)
                    Status.ofAll(phaseResults.map(_.status))
                }

                // Compute complete status - which is only SUCCESS if all steps have been executed successfully
                val status = Status.ofAll(Seq(buildStatus, testStatus, destroyStatus))
                val endTime = Instant.now()
                val duration = Duration.between(startTime, endTime)
                logStatus(title, status, duration, endTime)
                status
            }
        }
    }

    private def executeTestAssertions(
        execution: Execution,
        context:Context,
        test:Test,
        keepGoing:Boolean,
        dryRun:Boolean
    ) : Status = {
        val title = s"assert test '${test.identifier}'"
        logSubtitle(title)

        try {
            val startTime = Instant.now()

            // First instantiate all assertions
            val instances = test.assertions.values.toSeq.map( _.instantiate(context))

            // Execute all assertions
            val runner = new AssertionRunner(context, execution)
            val results = runner.run(instances, keepGoing=keepGoing, dryRun=dryRun)

            val endTime = Instant.now()
            val duration = Duration.between(startTime, endTime)
            val numSucceeded = results.map(_.numSuccesses).sum
            val numFailed = results.map(_.numFailures).sum
            val numExceptions = results.map(_.numExceptions).sum

            logger.info(cyan(s"$numSucceeded assertions passed, $numFailed failed, $numExceptions exceptions"))
            logger.info(cyan(s"Executed ${numSucceeded + numFailed} assertions in  ${duration.toMillis / 1000.0} s"))

            if (numFailed + numExceptions > 0) Status.FAILED else Status.SUCCESS
        }
        catch {
            // Catch all exceptions
            case NonFatal(ex) =>
                logger.error(s"Caught exception during $title: ${reasons(ex)}")
                Status.FAILED
        }
    }

    private def executeTestTargets(execution:Execution, context:Context, targets:Seq[Target], phase:Phase, keepGoing:Boolean, dryRun:Boolean) : Seq[TargetResult] = {
        require(phase != null)

        val clazz = execution.flowmanConf.getConf(FlowmanConf.EXECUTION_EXECUTOR_CLASS)
        val executor = Executor.newInstance(clazz, execution, context)

        executor.execute(phase, targets, _ => true, keepGoing) { (execution, target, phase) =>
            val sc = execution.spark.sparkContext
            withJobGroup(sc, target.name, s"$phase target ${target.identifier}") {
                logSubtitle(s"$phase target '${target.identifier}'")
                executeTarget(execution, target, phase, dryRun)
            }
        }
    }
}


/**
 * The [[Runner]] class should be used for executing jobs, targets and tests. It will take care of applying additonal
 * environment variables, measuring execution time, publishing metrics, error handling and more.
 *
 * @param parentExecution
 * @param stateStore
 * @param hooks
 */
final class Runner(
    private[execution] val parentExecution:Execution,
    private[execution] val stateStore: StateStore,
    private[execution] val hooks: Seq[Prototype[Hook]]=Seq.empty
) {
    require(parentExecution != null)
    require(stateStore != null)
    require(hooks != null)

    val loggerFactory: ILoggerFactory = parentExecution.loggerFactory
    private val logger = loggerFactory.getLogger(classOf[Runner].getName)

    /**
      * Executes a single job using the given execution and a map of parameters. The Runner may decide not to
      * execute a specific job, because some information may indicate that the job has already been successfully
      * run in the past. This behaviour can be overridden with the force flag
      * @param job - The [[Job]] to be executed
      * @param phases - The execution phases to be executed
      * @param args - Optional list of job parameters
      * @param targets - Optional list of regular expressions for matching targets to be executed
      * @param dirtyTargets - Optional list of regulat expressions for matching targets to be considered dirty
      * @param force - Force target execution, even for clean targets
      * @param keepGoing - Keep going, even if some targets fail
      * @param dryRun - Only perform dry run
      * @param ignoreHistory - Ignore job/target state in history database
      * @param isolated - Setup isolated context and execution environment
      * @return
      */
    def executeJob(job:Job, phases:Seq[Phase], args:Map[String,Any]=Map.empty, targets:Seq[Regex]=Seq(".*".r), dirtyTargets:Seq[Regex]=Seq.empty, force:Boolean=false, keepGoing:Boolean=false, dryRun:Boolean=false, ignoreHistory:Boolean=false, isolated:Boolean=true) : Status = {
        require(args != null)
        require(phases != null)
        require(args != null)

        def targetFilter(phase: Phase, target: TargetIdentifier): Boolean = targets.exists(_.unapplySeq(target.name).nonEmpty)
        def dirtyFilter(phase: Phase, target: TargetIdentifier): Boolean = dirtyTargets.exists(_.unapplySeq(target.name).nonEmpty)
        executeJob(job, phases, args, targetFilter _, dirtyFilter _, force, keepGoing, dryRun, ignoreHistory, isolated)
    }

    def executeJob(job: Job, phases: Seq[Phase], args: Map[String, Any], targets:(Phase,TargetIdentifier) => Boolean, dirtyTargets: (Phase,TargetIdentifier) => Boolean, force: Boolean, keepGoing: Boolean, dryRun: Boolean, ignoreHistory: Boolean, isolated: Boolean): Status = {
        require(args != null)
        require(phases != null)
        require(args != null)

        val runner = new JobRunnerImpl(this)
        val result = runner.executeJob(job, phases, args, targets, dirtyTargets=dirtyTargets, force=force, keepGoing=keepGoing, dryRun=dryRun, isolated=isolated, ignoreHistory=ignoreHistory)
        result.status
    }

    /**
     * Executes an individual test.
     * @param test
     * @param keepGoing - Continue running assertions even if unexpected exceptions are raised.
     * @param dryRun
     * @return
     */
    def executeTest(test:Test, keepGoing:Boolean=false, dryRun:Boolean=false) : Status = {
        val runner = new TestRunnerImpl(this)
        runner.executeTest(test, keepGoing, dryRun)
    }

    /**
     * Executes a single target using the given execution and a map of parameters. The Runner may decide not to
     * execute a specific target, because some information may indicate that the job has already been successfully
     * run in the past. This behaviour can be overriden with the force flag
     *
     * @param targets
     * @param phases - The execution phases to be executed
     * @param jobName   - Name of job to be created containing all targets
     * @param force     - Force target execution, even for clean targets
     * @param keepGoing - Keep going, even if some targets fail
     * @param dryRun    - Only perform dry run
     * @return
     */
    def executeTargets(targets:Seq[Target], phases:Seq[Phase], jobName:String="execute-target", force:Boolean, keepGoing:Boolean=false, dryRun:Boolean=false, isolated:Boolean=false) : Status = {
        if (targets.nonEmpty) {
            val context = targets.head.context

            // Build a new scoped job context, which contains the given targets. This way, the targets do not need
            // to be part of the project, but they still *can* be part
            val jobContext = ScopeContext.builder(context)
                .withTargets(targets.map(tgt => (tgt.name, Prototype.of(tgt))).toMap)
                .build()
            val job = Job.builder(jobContext)
                .setName(jobName)
                .setTargets(targets.map(tgt => TargetIdentifier(tgt.name)))
                .build()

            executeJob(job, phases, force=force, keepGoing=keepGoing, dryRun=dryRun, ignoreHistory=true, isolated=isolated)
        }
        else {
            Status.SUCCESS
        }
   }

    /**
     * Create new execution environment
     * @param isolated - set up an isolated execution environment which does not share any resources
     * @param fn
     * @tparam T
     * @return
     */
    def withExecution[T](isolated:Boolean=false)(fn:Execution => T) : T = {
        val execution : Execution = new ScopedExecution(parentExecution, isolated)
        val result = fn(execution)

        // Wait for any running background activities, and do not perform a cleanup
        val ops = execution.activities
        val activeOps = ops.listActive()
        if (activeOps.nonEmpty) {
            logger.info("Some background activities are still active:")
            activeOps.foreach(o => logger.info(s"  - s${o.name}"))
            logger.info("Waiting for termination...")
            ops.awaitTermination()
        }

        // Finally release any resources
        execution.cleanup()
        result
    }

    /**
     * Provides a context for the given job. This will apply all environment variables of the job and add
     * additional variables like a `force` flag.
     * @param job
     * @param args - Optional job parameters
     * @param execution - Optional execution used by context for analyzing
     * @param force - Force execution even for non-dirty targets
     * @param dryRun - Only simulate execution
     * @param isolated - Force isolated context
     * @param fn
     * @tparam T
     * @return
     */
    def withJobContext[T](job:Job, args:Map[String,Any]=Map.empty, execution:Option[Execution]=None, force:Boolean=false, dryRun:Boolean=false, isolated:Boolean=true)(fn:(Context,Map[String,Any]) => T) : T = {
        val arguments : Map[String,Any] = job.parameters.flatMap(p => p.default.map(d => p.name -> d)).toMap ++ args
        arguments.toSeq.sortBy(_._1).foreach { case (k,v) => logger.info(s"Job argument $k=$v")}

        verifyArguments(job,arguments)

        val jobContext =
            if (isolated || arguments.nonEmpty || job.environment.nonEmpty) {
                // Use root context to prevent project envs leaking into root context
                val rootContext = RootContext.builder(job.context.root)
                    .withEnvironment("force", force)
                    .withEnvironment("dryRun", dryRun)
                    // Override any job variable
                    .withEnvironment("job", JobWrapper(job), SettingLevel.SCOPE_OVERRIDE)
                    .withEnvironment(arguments, SettingLevel.SCOPE_OVERRIDE)
                    .withEnvironment(job.environment, SettingLevel.JOB_OVERRIDE)
                    .withExecution(execution)
                    .build()
                job.context.project match {
                    case Some(project) => rootContext.getProjectContext(project)
                    case None => rootContext
                }
            }
            else {
                ScopeContext.builder(job.context)
                    .withEnvironment("force", force)
                    .withEnvironment("dryRun", dryRun)
                    // Override any job variable
                    .withEnvironment("job", JobWrapper(job), SettingLevel.SCOPE_OVERRIDE)
                    .build()
            }

        fn(jobContext, arguments)
    }

    /**
     * Provides a context for a given test. This will apply all environment variables of the test case and add
     * additional variables like a `force` flag.
     * @param test
     * @param dryRun
     * @param fn
     * @tparam T
     * @return
     */
    def withTestContext[T](test:Test, execution:Option[Execution]=None, dryRun:Boolean=false)(fn:(Context) => T) : T = {
        val project = test.project.map(_.name)
        val rootContext = RootContext.builder(test.context.root)
            .withEnvironment("force", false)
            .withEnvironment("dryRun", dryRun)
            // Override any test variable
            .withEnvironment("test", TestWrapper(test), SettingLevel.SCOPE_OVERRIDE)
            .withEnvironment(test.environment, SettingLevel.JOB_OVERRIDE)
            .withExecution(execution)
            .overrideRelations(test.overrideRelations.map(kv => RelationIdentifier(kv._1, project) -> kv._2))
            .overrideMappings(test.overrideMappings.map(kv => MappingIdentifier(kv._1, project) -> kv._2))
            .build()

        val projectContext = test.context.project match {
            case Some(project) => rootContext.getProjectContext(project)
            case None => rootContext
        }
        fn(projectContext)
    }

    /**
     * Creates an code environment containing a [[Context]] for the specified phase
     * @param phase
     * @param fn
     * @tparam T
     * @return
     */
    def withPhaseContext[T](jobContext:Context, phase:Phase)(fn:Context => T) : T = {
        val context = ScopeContext.builder(jobContext)
            .withEnvironment("phase", phase.toString)
            .build()
        fn(context)
    }

    /**
     * Creates an code environment containing a [[Environment]] for the specified phase
     * @param phase
     * @param fn
     * @tparam T
     * @return
     */
    def withEnvironment[T](job:Job, phase:Phase, args:Map[String,Any], force:Boolean, dryRun:Boolean)(fn:Environment => T) : T = {
        withJobContext(job, args, force=force, dryRun=dryRun) { (jobContext,_) =>
            withPhaseContext(jobContext, phase) { context =>
                fn(context.environment)
            }
        }
    }

    def withEnvironment[T](test:Test, dryRun:Boolean)(fn:Environment => T) : T = {
        withTestContext(test, dryRun=dryRun) { context =>
            fn(context.environment)
        }
    }

    private def verifyArguments(job:Job, arguments:Map[String,Any]) : Unit = {
        // Verify job arguments. This is moved from the constructor into this place, such that only this method throws an exception
        val argNames = arguments.keySet
        val paramNames = job.parameters.map(_.name).toSet
        argNames.diff(paramNames).foreach(p => throw new IllegalArgumentException(s"Unexpected argument '$p' not defined in job '${job.identifier}'"))
        paramNames.diff(argNames).foreach(p => throw new IllegalArgumentException(s"Required parameter '$p' not specified for job '${job.identifier}'"))
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy