All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.nemerosa.ontrack.job.support.DefaultJobScheduler.kt Maven / Gradle / Ivy

package net.nemerosa.ontrack.job.support

import io.micrometer.core.instrument.MeterRegistry
import net.nemerosa.ontrack.common.Time
import net.nemerosa.ontrack.job.*
import org.apache.commons.lang3.Validate
import org.slf4j.LoggerFactory
import org.springframework.scheduling.support.CronTrigger
import java.time.Duration
import java.time.LocalDateTime
import java.time.ZoneOffset
import java.time.temporal.ChronoUnit
import java.util.*
import java.util.concurrent.*
import java.util.concurrent.atomic.AtomicBoolean
import java.util.concurrent.atomic.AtomicLong
import java.util.concurrent.atomic.AtomicReference
import kotlin.math.abs

/**
 * @property meterRegistry If set, the scheduler will register job metrics
 */
class DefaultJobScheduler
@JvmOverloads
constructor(
    private val jobDecorator: JobDecorator,
    private val scheduler: TaskExecutor,
    private val jobListener: JobListener,
    initiallyPaused: Boolean,
    private val jobExecutorService: Executor,
    private val scattering: Boolean,
    private val scatteringRatio: Double,
    private val meterRegistry: MeterRegistry? = null,
    private val timeout: Duration? = null,
    timeoutControllerInterval: Duration? = null,
) : JobScheduler {

    private val logger = LoggerFactory.getLogger(JobScheduler::class.java)

    private val services = ConcurrentHashMap(TreeMap())
    private val schedulerPaused: AtomicBoolean

    private val idGenerator = AtomicLong()

    private fun MeterRegistry.statusGauge(
        name: String,
        statusFilterFn: (JobStatus) -> Boolean,
    ) {
        gauge(
            "ontrack_job_${name}_total",
            services
        ) {
            it.filter { (_, service) -> statusFilterFn(service.jobStatus) }
                .size.toDouble()
        }
    }

    init {
        Validate.inclusiveBetween(0.0, 1.0, scatteringRatio)
        this.schedulerPaused = AtomicBoolean(initiallyPaused)
        // Metrics
        if (meterRegistry != null) {
            // count
            meterRegistry.gaugeMapSize(
                "ontrack_job_count_total",
                emptyList(),
                services
            )
            meterRegistry.statusGauge("running") { it.isRunning }
            meterRegistry.statusGauge("disabled") { it.isDisabled }
            meterRegistry.statusGauge("paused") { it.isPaused }
            meterRegistry.statusGauge("error") { it.isError }
            meterRegistry.statusGauge("timeout") { it.isTimeout }
            meterRegistry.statusGauge("invalid") { !it.isValid }
            meterRegistry.gauge(
                "ontrack_job_error_count_total",
                services
            ) { schedulerMap ->
                schedulerMap.values.sumOf { it.jobStatus.lastErrorCount }.toDouble()
            }
            meterRegistry.gauge(
                "ontrack_job_timeout_count_total",
                services
            ) { schedulerMap ->
                schedulerMap.values.sumOf { it.jobStatus.lastTimeoutCount }.toDouble()
            }
        }
        // Scheduling the timeout controller job
        if (timeoutControllerInterval != null) {
            scheduler.scheduleAtFixedDelay(
                createTimeoutControllerJob(),
                timeoutControllerInterval,
                timeoutControllerInterval
            )
        }
    }

    private fun createTimeoutControllerJob() = Runnable {
        val stopped = checkForTimeouts()
        logger.debug("[scheduler] $stopped job(s) have been stopped because of timeout")
    }

    override fun checkForTimeouts(): Int = services.values.count { jobScheduledService ->
        // And checks them for timeout
        jobScheduledService.checkForTimeout()
    }

    override fun schedule(job: Job, schedule: Schedule) {
        logger.debug("[scheduler][job]{} Scheduling with {}", job.key, schedule)
        // Manages existing schedule
        val existingService = services[job.key]
        if (existingService != null) {
            logger.debug("[scheduler][job]{} Modifying existing schedule", job.key)
            existingService.update(
                job,
                schedule
            )
        }
        // Creates and starts the scheduled service
        else {
            logger.debug("[scheduler][job]{} Starting scheduled service", job.key)
            // Copy stats from old schedule
            val jobScheduledService = JobScheduledService(
                initialJob = job,
                initialSchedule = schedule,
                pausedAtStartup = jobListener.isPausedAtStartup(job.key)
            )
            // Registration
            services[job.key] = jobScheduledService
        }
    }

    override fun unschedule(key: JobKey): Boolean {
        return unschedule(key, true)
    }

    private fun unschedule(key: JobKey, forceStop: Boolean): Boolean {
        logger.debug("[scheduler][job]{} Unscheduling job", key)
        val existingService = services.remove(key)
        return if (existingService != null) {
            logger.debug("[scheduler][job]{} Stopping running job", key)
            existingService.cancel(forceStop)
            true
        } else {
            false
        }
    }

    override fun pause() {
        schedulerPaused.set(true)
    }

    override fun resume() {
        schedulerPaused.set(false)
    }

    override fun isPaused(): Boolean {
        return schedulerPaused.get()
    }

    override fun pause(key: JobKey): Boolean {
        val existingService = services[key]
        if (existingService != null) {
            existingService.pause()
            return true
        } else {
            throw JobNotScheduledException(key)
        }
    }

    override fun resume(key: JobKey): Boolean {
        val existingService = services[key]
        if (existingService != null) {
            existingService.resume()
            return true
        } else {
            throw JobNotScheduledException(key)
        }
    }

    override fun getJobStatus(key: JobKey): Optional {
        val existingService = services[key]
        return if (existingService != null) {
            Optional.of(existingService.jobStatus)
        } else {
            Optional.empty()
        }
    }

    override fun getJobKey(id: Long): Optional {
        return services.values.stream()
            .filter { service -> service.id == id }
            .map { it.jobKey }
            .findFirst()
    }

    override fun stop(key: JobKey): Boolean {
        val existingService = services[key]
        return existingService?.stop() ?: throw JobNotScheduledException(key)
    }

    override fun getAllJobKeys(): Collection {
        return services.keys
    }

    override fun getJobKeysOfType(type: JobType): Collection {
        return allJobKeys
            .filter { key -> key.sameType(type) }
            .toSet()
    }

    override fun getJobKeysOfCategory(category: JobCategory): Collection {
        return allJobKeys
            .filter { key -> key.sameCategory(category) }
            .toSet()
    }

    override fun getJobStatuses(): Collection {
        return services.values
            .map { it.jobStatus }
            .sortedBy { it.id }
            .toList()
    }

    override fun fireImmediately(jobKey: JobKey): Optional> {
        // Gets the existing scheduled service
        val jobScheduledService = services[jobKey] ?: throw JobNotScheduledException(jobKey)
        // Fires the job immediately
        return jobScheduledService.doRun(true)
    }

    private inner class JobScheduledService(
        initialJob: Job,
        initialSchedule: Schedule,
        pausedAtStartup: Boolean,
    ) : Runnable {

        private var job = initialJob
        private var schedule = initialSchedule

        val id: Long = idGenerator.incrementAndGet()
        private var actualSchedule: Schedule = Schedule.NONE
        private var scheduledFuture: ScheduledFuture<*>? = null

        private val paused: AtomicBoolean = AtomicBoolean(pausedAtStartup)

        private val currentExecution = AtomicReference>()
        private val runProgress = AtomicReference()
        private val runCount = AtomicLong()
        private val startTime = AtomicLong()
        private val lastRunDate = AtomicReference()
        private val lastRunDurationMs = AtomicLong()
        private val lastErrorCount = AtomicLong()
        private val lastTimeoutCount = AtomicLong()
        private val lastError = AtomicReference(null)

        init {
            // Paused at startup
            if (pausedAtStartup) {
                logger.debug("[job]{} Job paused at startup", job.key)
            }
            // Initial schedule
            createSchedule()
        }

        private fun createSchedule() {
            val cron = schedule.cron
            if (cron == null || cron.isBlank()) {
                // Converting all units to milliseconds
                var initialPeriod = TimeUnit.MILLISECONDS.convert(schedule.initialPeriod, schedule.unit)
                val period = TimeUnit.MILLISECONDS.convert(schedule.period, schedule.unit)
                // Scattering
                if (scattering) {
                    // Computes the hash for the job key
                    val hash = abs(job.key.toString().hashCode()) % 10000
                    // Period to consider
                    val scatteringMax = (period * scatteringRatio).toLong()
                    if (scatteringMax > 0) {
                        // Modulo on the period
                        val delay = hash * scatteringMax / 10000
                        logger.debug("[job]{} Scattering enabled - additional delay: {} ms", job.key, delay)
                        // Adding to the initial delay
                        initialPeriod += delay
                    }
                }
                // Actual schedule
                actualSchedule = Schedule(
                    initialPeriod,
                    period,
                    TimeUnit.MILLISECONDS
                )
                // Scheduling now
                scheduledFuture = if (schedule.period > 0) {
                    scheduler.scheduleAtFixedDelay(
                        this,
                        Duration.ofMillis(initialPeriod),
                        Duration.ofMillis(period)
                    )
                } else {
                    logger.debug("[job]{} Job not scheduled since period = 0", job.key)
                    null
                }
            } else {
                scheduledFuture = scheduler.scheduleCron(this, cron)
            }
        }

        /**
         * Updates (if needed) the service to use the new job.
         */
        fun update(
            newJob: Job,
            newSchedule: Schedule,
        ) {
            // Checks the key of the job
            if (job.key != newJob.key) {
                throw IllegalStateException("The job assigned to a job service " +
                        "cannot have a different key. " +
                        "Expected=${job.key}, Actual=${newJob.key}")
            }
            // Adapting the schedule if needed
            if (newSchedule != schedule) {
                // Cancels current execution service (NOT any currently running job!)
                cancel(false)
                // Changes the schedule
                schedule = newSchedule
                // Reschedules
                createSchedule()
            }
            // Replacing the job itself
            job = newJob
        }

        val jobKey: JobKey = job.key

        private val run: Runnable
            get() {
                val jobRunListener = DefaultJobRunListener()
                val rootTask = { job.task.run(jobRunListener) }
                val decoratedTask = jobDecorator.decorate(job, rootTask)
                val runnable = MonitoredRun(decoratedTask, object : MonitoredRunListenerAdapter() {
                    override fun onCompletion() {
                        logger.debug("[job][task]{} Removed job execution", job.key)
                        currentExecution.set(null)
                    }
                })
                val monitoredRunListener = object : MonitoredRunListener {
                    override fun onStart() {
                        logger.debug("[job][task]{} On start", job.key)
                        lastRunDate.set(Time.now())
                        startTime.set(System.currentTimeMillis())
                        runCount.incrementAndGet()
                        jobListener.onJobStart(job.key)
                    }

                    override fun onSuccess(duration: Long) {
                        lastRunDurationMs.set(duration)
                        logger.debug("[job][task]{} Success in {} ms", job.key, duration)
                        jobListener.onJobEnd(job.key, duration)
                        lastErrorCount.set(0)
                        lastError.set(null)
                        lastTimeoutCount.set(0)
                    }

                    override fun onFailure(ex: Exception) {
                        lastErrorCount.incrementAndGet()
                        lastError.set(ex.message)
                        lastTimeoutCount.set(0)
                        logger.debug("[job][task]{} Failure: {}", job.key, ex.message)
                        try {
                            jobListener.onJobError(jobStatus, ex)
                        } catch (uncaught: Exception) {
                            logger.error("[job][task]${job.key} Could not process error for job because of:", uncaught)
                            logger.error("[job][task]${job.key} Initial error for job:", ex)
                        }
                    }

                    override fun onCompletion() {
                        runProgress.set(null)
                        startTime.set(0)
                        logger.debug("[job][task]{} Job completed.", job.key)
                        jobListener.onJobComplete(job.key)
                    }
                }
                return MonitoredRun(runnable, monitoredRunListener)
            }

        val jobStatus: JobStatus
            get() {
                val valid = job.isValid
                return JobStatus(
                    id = id,
                    key = job.key,
                    schedule = schedule,
                    actualSchedule = actualSchedule,
                    description = job.description,
                    isRunning = currentExecution.get() != null,
                    isValid = valid,
                    isPaused = paused.get(),
                    isDisabled = job.isDisabled,
                    progress = runProgress.get(),
                    runCount = runCount.get(),
                    lastRunDate = lastRunDate.get(),
                    lastRunDurationMs = lastRunDurationMs.get(),
                    nextRunDate = getNextRunDate(valid),
                    lastErrorCount = lastErrorCount.get(),
                    lastTimeoutCount = lastTimeoutCount.get(),
                    lastError = lastError.get()
                )
            }

        override fun run() {
            if (!schedulerPaused.get()) {
                doRun(false)
            }
        }

        fun doRun(force: Boolean): Optional> {
            logger.debug("[job][run]{} Trying to run now - forced = {}", job.key, force)
            if (job.isValid) {
                if (job.isDisabled) {
                    logger.debug("[job][run]{} Not allowed to run now because disabled", job.key)
                    return Optional.empty()
                } else if (paused.get() && !force) {
                    logger.debug("[job][run]{} Not allowed to run now because paused", job.key)
                    return Optional.empty()
                } else if (currentExecution.get() != null) {
                    logger.debug("[job][run]{} Not allowed to run now because already running", job.key)
                    return Optional.empty()
                } else {
                    // Task to run
                    val taskRun = run
                    // Scheduling
                    logger.debug("[job][run]{} Job task submitted asynchronously", job.key)
                    val execution = CompletableFuture.runAsync(taskRun, jobExecutorService)
                    currentExecution.set(execution)
                    return Optional.of(execution)
                }
            } else {
                logger.debug("[job][run]{} Not valid - removing from schedule", job.key)
                unschedule(job.key, false)
                return Optional.empty()
            }
        }

        /**
         * Checks if the job is in timeout and if yes, [stops][stop] it.
         *
         * @return True if the job _was_ in timeout and had to be stopped.
         */
        fun checkForTimeout(): Boolean {
            val timeout = job.timeout ?: [email protected]
            return if (timeout != null) {
                // Is this job running?
                if (currentExecution.get() != null) {
                    // We take the actual start date of the run
                    val start = startTime.get()
                    if (start != 0L) {
                        // Current execution time of this job
                        val now = System.currentTimeMillis()
                        val elapsed = now - start
                        // logger.debug("[job][timeout]{} Timeout - start:   {}", job.key, start)
                        // logger.debug("[job][timeout]{} Timeout - now:     {}", job.key, now)
                        // logger.debug("[job][timeout]{} Timeout - elasped: {}", job.key, elapsed)
                        // logger.debug("[job][timeout]{} Timeout - timeout: {}", job.key, timeout.toMillis())
                        // If this time exceeds the timeout
                        if (elapsed >= timeout.toMillis()) {
                            // Logging
                            logger.info("[job][timeout]{} Timeout - stopping the job", job.key)
                            // We stop the job
                            stop()
                            // Metrics for this job
                            lastTimeoutCount.incrementAndGet()
                            // We assume it's been stopped
                            true
                        }
                        // Still under the timeout, we keep running
                        else {
                            logger.debug("[job][timeout]{} Timeout - still OK", job.key)
                            false
                        }
                    } else {
                        // Not started yet, won't be stopped
                        logger.debug("[job][timeout]{} Timeout - not started", job.key)
                        false
                    }
                }
                // Job is not running, won't be stopped
                else {
                    logger.debug("[job][timeout]{} Timeout - not running", job.key)
                    false
                }
            }
            // No timeout, so won't be stopped
            else {
                logger.debug("[job][timeout]{} Timeout - not configured for timeout", job.key)
                false
            }
        }

        fun stop(): Boolean {
            logger.debug("[job]{} Stopping job", job.key)
            return currentExecution.updateAndGet { current ->
                current?.cancel(true)
                null
            } == null
        }

        fun cancel(forceStop: Boolean): Boolean {
            logger.debug("[job]{} Cancelling job (forcing = {})", job.key, forceStop)
            if (forceStop) {
                stop()
            }
            return scheduledFuture?.cancel(forceStop) ?: false
        }

        private fun getNextRunDate(valid: Boolean): LocalDateTime? {
            return if (valid) {
                scheduledFuture
                    ?.getDelay(TimeUnit.SECONDS)
                    ?.let { Time.now().plus(it, ChronoUnit.SECONDS) }
            } else {
                null
            }
        }

        fun pause() {
            if (scheduledFuture != null) {
                paused.set(true)
                jobListener.onJobPaused(job.key)
            }
        }

        fun resume() {
            if (scheduledFuture != null) {
                paused.set(false)
                jobListener.onJobResumed(job.key)
            }
        }

        private inner class DefaultJobRunListener : JobRunListener {

            override fun progress(progress: JobRunProgress) {
                jobListener.onJobProgress(job.key, progress)
                logger.debug("[job][progress]{} {}",
                    job.key,
                    progress.text
                )
                runProgress.set(progress)
            }

        }

    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy