All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dimajix.flowman.spec.history.RepositoryStateStore.scala Maven / Gradle / Ivy

There is a newer version: 1.2.0-synapse3.3-spark3.3-hadoop3.3
Show newest version
/*
 * Copyright (C) 2018 The Flowman Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dimajix.flowman.spec.history

import java.time.Clock
import java.time.ZoneId

import com.dimajix.common.ExceptionUtils.reasons
import com.dimajix.flowman.documentation.Documenter
import com.dimajix.flowman.documentation.EntityDoc
import com.dimajix.flowman.execution.Status
import com.dimajix.flowman.graph.GraphBuilder
import com.dimajix.flowman.history.AbstractStateStore
import com.dimajix.flowman.history.DocumentationQuery
import com.dimajix.flowman.history.DocumenterToken
import com.dimajix.flowman.history.Graph
import com.dimajix.flowman.history.JobColumn
import com.dimajix.flowman.history.JobOrder
import com.dimajix.flowman.history.JobQuery
import com.dimajix.flowman.history.JobState
import com.dimajix.flowman.history.JobToken
import com.dimajix.flowman.history.Measurement
import com.dimajix.flowman.history.MetricSeries
import com.dimajix.flowman.history.TargetColumn
import com.dimajix.flowman.history.TargetOrder
import com.dimajix.flowman.history.TargetQuery
import com.dimajix.flowman.history.TargetState
import com.dimajix.flowman.history.TargetToken
import com.dimajix.flowman.model.DocumenterResult
import com.dimajix.flowman.model.Job
import com.dimajix.flowman.model.JobDigest
import com.dimajix.flowman.model.JobResult
import com.dimajix.flowman.model.Logging
import com.dimajix.flowman.model.Target
import com.dimajix.flowman.model.TargetDigest
import com.dimajix.flowman.model.TargetResult
import com.dimajix.flowman.spi.LogFilter



object RepositoryStateStore {
    case class RepositoryDocumenterToken(
        parent:Option[RepositoryJobToken]
    ) extends DocumenterToken

    case class RepositoryTargetToken(
        run:TargetState,
        parent:Option[RepositoryJobToken]
    ) extends TargetToken

    case class RepositoryJobToken(
        run:JobState,
        graph:GraphBuilder
    ) extends JobToken
}


abstract class RepositoryStateStore extends AbstractStateStore with Logging {
    import RepositoryStateStore._

    private val logFilters = LogFilter.filters

    /**
      * Returns the state of a job, or None if no information is available
      * @param job
      * @return
      */
    override def getJobState(job: JobDigest): Option[JobState] = {
        logger.debug(s"Checking last state of '${job.phase}' job '${job.job}' in history database")
        withRepository { repository =>
            repository.getJobState(job)
        }
    }

    /**
     * Returns all metrics belonging to a specific job instance
     * @param jobId
     * @return
     */
    override def getJobMetrics(jobId:String) : Seq[Measurement] = {
        withRepository { repository =>
            repository.getJobMetrics(jobId)
        }
    }

    /**
     * Returns the execution graph belonging to a specific job run
     *
     * @param jobId
     * @return
     */
    override def getJobGraph(jobId: String): Option[Graph] = {
        withRepository { repository =>
            repository.getJobGraph(jobId)
        }
    }

    /**
     * Returns the execution environment of a specific job run
     *
     * @param jobId
     * @return
     */
    override def getJobEnvironment(jobId: String): Map[String, String] = {
        withRepository { repository =>
            repository.getJobEnvironment(jobId)
        }
    }

    /**
      * Starts the run and returns a token, which can be anything
     *
     * @param digest
      * @return
      */
    override def startJob(job:Job, digest:JobDigest) : JobToken = {
        val now = Clock.systemDefaultZone().instant().atZone(ZoneId.systemDefault())
        val state = JobState(
            "",
            job.namespace.map(_.name).getOrElse(""),
            job.project.map(_.name).getOrElse(""),
            job.project.flatMap(_.version).getOrElse(""),
            job.name,
            digest.phase,
            digest.args,
            Status.RUNNING,
            Some(now),
            None
        )

        // Create redacted environment
        val env = job.context.environment.toMap
            .flatMap { case(key,value) =>
                LogFilter.filter(logFilters, key, value.toString)
            }

        logger.debug(s"Start '${digest.phase}' job '${state.job}' in history database")
        val run = withRepository { repository =>
            repository.insertJobState(state, env)
        }

        RepositoryJobToken(run, new GraphBuilder(job.context, digest.phase))
    }

    /**
      * Marks a run as a success
      *
      * @param token
      */
    override def finishJob(token:JobToken, result: JobResult, metrics:Seq[Measurement]=Seq.empty) : Unit = {
        val status = result.status
        val jdbcToken = token.asInstanceOf[RepositoryJobToken]
        val run = jdbcToken.run

        val now = Clock.systemDefaultZone().instant().atZone(ZoneId.systemDefault())
        val graph = Graph.ofGraph(jdbcToken.graph.build())
        withRepository { repository =>
            logger.info(s"Mark '${run.phase}' job '${run.job}' as $status in history database")
            repository.updateJobState(run.copy(endDateTime=Some(now), status=status, error=result.exception.map(reasons)))
            logger.info(s"Storing execution metrics of '${run.phase}' job '${run.job}' in history database")
            repository.insertJobMetrics(run.id, metrics)
            logger.info(s"Storing execution graph of '${run.phase}' job '${run.job}' in history database")
            repository.insertJobGraph(run.id, graph)
        }
    }

    /**
      * Returns the state of a specific target on its last run, or None if no information is available
      * @param target
      * @return
      */
    override def getTargetState(target:TargetDigest) : Option[TargetState] = {
        logger.debug(s"Checking state of target '${target.target}' in history database")
        withRepository { repository =>
            repository.getTargetState(target)
        }
    }

    def getTargetState(targetId: String): TargetState = {
        withRepository { repository =>
            repository.getTargetState(targetId)
        }
    }

    /**
      * Starts the run and returns a token, which can be anything
      * @param digest
      * @return
      */
    override def startTarget(target:Target, digest:TargetDigest, parent:Option[JobToken]) : TargetToken = {
        val now = Clock.systemDefaultZone().instant().atZone(ZoneId.systemDefault())

        val parentRun = parent.map(_.asInstanceOf[RepositoryJobToken])
        val run =  TargetState(
            "",
            parentRun.map(_.run.id),
            target.namespace.map(_.name).getOrElse(""),
            target.project.map(_.name).getOrElse(""),
            target.project.flatMap(_.version).getOrElse(""),
            target.name,
            digest.partitions,
            digest.phase,
            Status.RUNNING,
            Some(now),
            None,
            None
        )

        logger.debug(s"Start '${digest.phase}' target '${run.target}' in history database")
        val run2 = withRepository { repository =>
            repository.insertTargetState(run)
        }
        RepositoryTargetToken(run2, parentRun)
    }

    /**
      * Marks a run as a success
      *
      * @param token
      */
    override def finishTarget(token:TargetToken, result: TargetResult) : Unit = {
        val status = result.status
        val jdbcToken = token.asInstanceOf[RepositoryTargetToken]
        val run = jdbcToken.run
        logger.info(s"Mark '${run.phase}' target '${run.target}' as $status in history database")

        val now = Clock.systemDefaultZone().instant().atZone(ZoneId.systemDefault())
        withRepository{ repository =>
            repository.updateTargetState(run.copy(endDateTime=Some(now), status=status, error=result.exception.map(reasons)))
        }

        // Add target to Job's build graph
        if (status != Status.SKIPPED) {
            jdbcToken.parent.foreach {
                _.graph.addTarget(result.target)
            }
        }
    }

    /**
     * Starts the run and returns a token, which can be anything
     *
     * @param documenter
     * @return
     */
    override def startDocumenter(documenter: Documenter, parent: Option[JobToken]): DocumenterToken = {
        val parentRun = parent.map(_.asInstanceOf[RepositoryJobToken])
        RepositoryDocumenterToken(parentRun)
    }

    /**
     * Sets the status of a job after it has been started
     *
     * @param token The token returned by startJob
     * @param status
     */
    override def finishDocumenter(token: DocumenterToken, result: DocumenterResult): Unit = {
        result.documentation.foreach { doc =>
            token.asInstanceOf[RepositoryDocumenterToken].parent.foreach { parent =>
                withRepository { repository =>
                    logger.info(s"Storing documentation of '${parent.run.phase}' job '${parent.run.job}' in history database")
                    repository.insertJobDocumentation(parent.run.id, doc)
                }
            }
        }
    }

    /**
      * Returns a list of job matching the query criteria
     *
     * @param query
      * @param limit
      * @param offset
      * @return
      */
    override def findJobs(query:JobQuery, order:Seq[JobOrder]=Seq(), limit:Int=10000, offset:Int=0) : Seq[JobState] = {
        withRepository { repository =>
            repository.findJobs(query, order, limit, offset)
        }
    }


    override def countJobs(query: JobQuery): Int = {
        withRepository { repository =>
            repository.countJobs(query)
        }
    }


    override def countJobs(query: JobQuery, grouping: JobColumn): Map[String, Int] = {
        withRepository { repository =>
            repository.countJobs(query, grouping).toMap
        }
    }

    override def findJobMetrics(jobQuery: JobQuery, groupings: Seq[String]): Seq[MetricSeries] = {
        withRepository { repository =>
            repository.findMetrics(jobQuery, groupings)
        }
    }

    /**
      * Returns a list of job matching the query criteria
     *
     * @param query
      * @param limit
      * @param offset
      * @return
      */
    override def findTargets(query:TargetQuery, order:Seq[TargetOrder]=Seq.empty, limit:Int=10000, offset:Int=0) : Seq[TargetState] = {
        withRepository { repository =>
            repository.findTargets(query, order, limit, offset)
        }
    }

    override def countTargets(query: TargetQuery): Int = {
        withRepository { repository =>
            repository.countTargets(query)
        }
    }

    override def countTargets(query: TargetQuery, grouping: TargetColumn): Map[String, Int] = {
        withRepository { repository =>
            repository.countTargets(query, grouping).toMap
        }
    }


    override def findDocumentation(query: DocumentationQuery): Seq[EntityDoc] = {
        withRepository { repository =>
            repository.findDocumentation(query)
        }
    }

    def repository : StateRepository

    /**
      * Performs some a task with a JDBC session, also automatically performing retries and timeouts
      *
      * @param query
      * @tparam T
      * @return
      */
    protected def withRepository[T](query: StateRepository => T): T = {
        ensureTables()
        query(repository)
    }

    private var tablesCreated: Boolean = false

    private def ensureTables(): Unit = {
        // Create Database if not exists
        if (!tablesCreated) {
            repository.create()
            tablesCreated = true
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy