com.dimajix.flowman.spec.hook.SimpleReportHook.scala Maven / Gradle / Ivy
/*
* Copyright 2021 Kaya Kupferschmidt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dimajix.flowman.spec.hook
import java.io.PrintStream
import java.time.Duration
import java.time.Instant
import java.time.ZoneId
import java.util.Locale
import com.fasterxml.jackson.annotation.JsonProperty
import org.apache.hadoop.fs.FileAlreadyExistsException
import org.apache.hadoop.fs.Path
import org.slf4j.LoggerFactory
import com.dimajix.flowman.execution.AssertionToken
import com.dimajix.flowman.execution.Context
import com.dimajix.flowman.execution.Execution
import com.dimajix.flowman.execution.JobToken
import com.dimajix.flowman.execution.LifecycleToken
import com.dimajix.flowman.execution.OutputMode
import com.dimajix.flowman.execution.Phase
import com.dimajix.flowman.execution.Status
import com.dimajix.flowman.execution.TargetToken
import com.dimajix.flowman.execution.Token
import com.dimajix.flowman.model.Assertion
import com.dimajix.flowman.model.AssertionResult
import com.dimajix.flowman.model.BaseHook
import com.dimajix.flowman.model.Hook
import com.dimajix.flowman.model.Job
import com.dimajix.flowman.model.JobDigest
import com.dimajix.flowman.model.JobLifecycle
import com.dimajix.flowman.model.JobResult
import com.dimajix.flowman.model.LifecycleResult
import com.dimajix.flowman.model.Target
import com.dimajix.flowman.model.TargetDigest
import com.dimajix.flowman.model.TargetResult
import com.dimajix.flowman.spec.hook.SimpleReportHook.ReporterAssertionToken
import com.dimajix.flowman.spec.hook.SimpleReportHook.ReporterJobToken
import com.dimajix.flowman.spec.hook.SimpleReportHook.ReporterLifecycleToken
import com.dimajix.flowman.spec.hook.SimpleReportHook.ReporterTargetToken
import com.dimajix.flowman.spi.LogFilter
object SimpleReportHook {
case class ReporterLifecycleToken(output:Option[PrintStream]) extends LifecycleToken
case class ReporterJobToken(phase:Phase, output:Option[PrintStream]) extends JobToken
case class ReporterTargetToken(phase:Phase, output:Option[PrintStream]) extends TargetToken
case class ReporterAssertionToken(output:Option[PrintStream]) extends AssertionToken
}
case class SimpleReportHook(
instanceProperties: Hook.Properties,
location:Path,
mode:OutputMode = OutputMode.OVERWRITE
) extends BaseHook {
private val logger = LoggerFactory.getLogger(classOf[ReportHook])
private def newOutput():Option[PrintStream] = {
if (location.toString == "stdout") {
Some(System.out)
}
else if (location.toString == "stderr") {
Some(System.err)
}
else {
val fs = location.getFileSystem(context.hadoopConf)
val out = mode match {
case OutputMode.OVERWRITE => fs.create(location)
case OutputMode.APPEND => fs.append(location)
case OutputMode.ERROR_IF_EXISTS =>
if (fs.exists(location))
throw new FileAlreadyExistsException(s"Cannot open report output, file $location already exists")
fs.create(location)
case OutputMode.IGNORE_IF_EXISTS =>
if (!fs.exists(location)) {
fs.create(location)
}
else {
null
}
case _ => throw new IllegalArgumentException(s"Unsupported output mode $mode")
}
Option(out).map(s => new PrintStream(s))
}
}
private def boldWhite(str:String) : String = str
private def boldCyan(str:String) : String = str
private def boldGreen(str:String) : String = str
private def boldRed(str:String) : String = str
private def boldYellow(str:String) : String = str
private val logFilters = LogFilter.filters
private val smallSeparator = boldWhite((0 to 79).map(_ => "-").mkString)
private val bigSeparator = boldWhite((0 to 79).map(_ => "=").mkString)
private def printSubtitle(p:PrintStream, s:String) : Unit = {
val l = (77 - (s.length + 1)) / 2
val t = if (l > 3) {
val sep = (0 to l).map(_ => '-').mkString
boldWhite(sep) + " " + boldCyan(s) + " " + boldWhite(sep)
}
else {
boldWhite("--- ") + boldCyan(s) + boldWhite(" ---")
}
p.println("")
p.println(t)
}
private def printTitle(p:PrintStream, title:String) : Unit = {
p.println("")
p.println(smallSeparator)
p.println(boldWhite(s" $title"))
p.println(smallSeparator)
}
private def printBigTitle(p:PrintStream, title:String) : Unit = {
p.println("")
p.println(bigSeparator)
p.println(boldWhite(s" $title"))
p.println(bigSeparator)
}
private def printEnvironment(p:PrintStream, context:Context) : Unit = {
p.println("Environment:")
context.environment.toSeq.sortBy(_._1).foreach { keyValue =>
logFilters.foldLeft(Option(keyValue))((kv, f) => kv.flatMap(kv => f.filterConfig(kv._1,kv._2.toString)))
.foreach { case (key,value) => p.println(s" $key=$value") }
}
}
private def printStatus(p:PrintStream, title:String, status:Status, duration: Duration, endTime:Instant) : Unit = {
val msg = status match {
case Status.SUCCESS|Status.SKIPPED =>
boldGreen(s"${status.toString.toUpperCase(Locale.ROOT)} $title")
case Status.SUCCESS_WITH_ERRORS =>
boldYellow(s"${status.toString.toUpperCase(Locale.ROOT)} $title")
case Status.ABORTED|Status.FAILED =>
boldRed(s"${status.toString.toUpperCase(Locale.ROOT)} $title")
case Status.RUNNING =>
boldYellow(s"ALREADY RUNNING $title")
case status =>
boldRed(s"UNKNOWN STATE '$status' in $title. Assuming failure")
}
p.println(smallSeparator)
p.println(msg)
p.println(smallSeparator)
p.println(s"Total time: ${duration.toMillis / 1000.0} s")
p.println(s"Finished at: ${endTime.atZone(ZoneId.systemDefault())}")
p.println(smallSeparator)
}
/**
* Starts the run and returns a token, which can be anything
* @param job
* @return
*/
override def startLifecycle(execution:Execution, job:Job, instance:JobLifecycle) : LifecycleToken = {
val now = Instant.now()
logger.info(s"Creating new report to $location")
val output = newOutput()
output.foreach { p =>
printBigTitle(p, s"Processing job ${job.identifier} at $now")
printEnvironment(p, job.context)
}
ReporterLifecycleToken(output)
}
/**
* Sets the status of a job after it has been started
* @param token The token returned by startJob
* @param result
*/
override def finishLifecycle(execution:Execution, token:LifecycleToken, result:LifecycleResult) : Unit = {
val lifecycleToken = token.asInstanceOf[ReporterLifecycleToken]
lifecycleToken.output.foreach { p =>
val endTime = result.endTime
val duration = result.duration
val status = result.status
printStatus(p, s"Finished lifecycle of job ${result.job.identifier}", status, duration, endTime)
p.flush()
p.close()
logger.info(s"Closed report at $location")
}
}
/**
* Starts the run and returns a token, which can be anything
* @param job
* @return
*/
override def startJob(execution:Execution, job: Job, instance: JobDigest, parent:Option[Token]): JobToken = {
val now = Instant.now()
val output = parent.flatMap {
case ReporterLifecycleToken(output) => output
case _ => newOutput()
}
output.foreach { p =>
printTitle(p, s"${instance.phase} job ${job.identifier} at $now")
if (parent.isEmpty) {
printEnvironment(p, job.context)
}
}
ReporterJobToken(instance.phase, output)
}
/**
* Sets the status of a job after it has been started
* @param token The token returned by startJob
* @param result
*/
override def finishJob(execution:Execution, token: JobToken, result: JobResult): Unit = {
val jobToken = token.asInstanceOf[ReporterJobToken]
jobToken.output.foreach { p =>
val endTime = result.endTime
val duration = result.duration
val status = result.status
printStatus(p, s"${jobToken.phase} job ${result.job.identifier}", status, duration, endTime)
}
}
/**
* Starts the run and returns a token, which can be anything
* @param target
* @return
*/
override def startTarget(execution:Execution, target: Target, instance: TargetDigest, parent: Option[Token]): TargetToken = {
val now = Instant.now()
val output = parent.flatMap {
case ReporterJobToken(_, output) => output
case _ => None
}
output.foreach { p =>
printSubtitle(p, s"${instance.phase} target ${target.identifier} at $now")
}
ReporterTargetToken(instance.phase, output)
}
/**
* Sets the status of a job after it has been started
* @param token The token returned by startJob
* @param result
*/
override def finishTarget(execution:Execution, token: TargetToken, result: TargetResult): Unit = {
val targetToken = token.asInstanceOf[ReporterTargetToken]
targetToken.output.foreach { p =>
p.println(s"Finished ${targetToken.phase} target ${result.target.identifier} with status ${result.status} at ${result.endTime}")
}
}
/**
* Starts the assertion and returns a token, which can be anything
* @param assertion
* @return
*/
override def startAssertion(execution:Execution, assertion: Assertion, parent: Option[Token]): AssertionToken = {
val now = Instant.now()
val output = parent.flatMap {
case ReporterJobToken(_, output) => output
case ReporterTargetToken(_, output) => output
case _ => None
}
output.foreach { p =>
printSubtitle(p, s"Starting EXECUTE assertion ${assertion.name} at $now")
}
ReporterAssertionToken(output)
}
/**
* Sets the status of a assertion after it has been started
* @param token The token returned by startJob
* @param status
*/
override def finishAssertion(execution:Execution, token: AssertionToken, result: AssertionResult): Unit = {
val assertionToken = token.asInstanceOf[ReporterAssertionToken]
assertionToken.output.foreach { p =>
printSubtitle(p, s"Finished EXECUTE assertion ${result.assertion.name} with status ${result.status} at ${result.endTime}")
}
}
}
class SimpleReportHookSpec extends HookSpec {
@JsonProperty(value="location", required=true) private var location:String = _
@JsonProperty(value="mode", required=false) private var mode:Option[String] = None
override def instantiate(context: Context): SimpleReportHook = {
SimpleReportHook(
instanceProperties(context),
new Path(context.evaluate(location)),
OutputMode.ofString(context.evaluate(mode).getOrElse("overwrite"))
)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy