All Downloads are FREE. Search and download functionalities are using the official Maven repository.

za.co.absa.enceladus.common.ControlInfoValidation.scala Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Copyright 2018 ABSA Group Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package za.co.absa.enceladus.common

import za.co.absa.atum.core.Atum
import za.co.absa.atum.model.Checkpoint
import za.co.absa.enceladus.utils.implicits.OptionImplicits._
import za.co.absa.enceladus.utils.validation.ValidationException

import scala.util.{Failure, Success, Try}

object ControlInfoValidation {
  private val rawFieldName = "raw"
  private val sourceFieldName = "source"

  /**
   * Adds metadata about the number of records in raw and source data by checking Atum's checkpoints first.
   * @return Validation results
   */

  def addRawAndSourceRecordCountsToMetadata(): Try[Unit] = {
    val checkpoints = Atum.getControlMeasure.checkpoints
    val checkpointRawRecordCount = getCountFromGivenCheckpoint(rawFieldName, checkpoints)
    val checkpointSourceRecordCount = getCountFromGivenCheckpoint(sourceFieldName, checkpoints)

    checkpointRawRecordCount
      .foreach(e => Atum.setAdditionalInfo(s"${rawFieldName}_record_count" -> e.toString))

    checkpointSourceRecordCount
      .foreach(e => Atum.setAdditionalInfo(s"${sourceFieldName}_record_count" -> e.toString))

    validateFields(checkpointRawRecordCount, checkpointSourceRecordCount)
  }

  private def validateFields(checkpointRawRecordCount: Try[Long],
                             checkpointSourceRecordCount: Try[Long]): Try[Unit] = {
    val errorMessage = "Checkpoint validation failed: "

    (checkpointRawRecordCount, checkpointSourceRecordCount) match {
      case (Success(_), Failure(er)) =>
        Failure(new ValidationException(s"$errorMessage ${er.getMessage}", Seq(er.getMessage)))
      case (Failure(er), Success(_)) =>
        Failure(new ValidationException(s"$errorMessage ${er.getMessage}", Seq(er.getMessage)))
      case (Failure(er1), Failure(er2)) =>
        Failure(new ValidationException(s"$errorMessage ${er1.getMessage}, ${er2.getMessage}",
          Seq(er1.getMessage, er2.getMessage)))
      case (_, _) => Success()
    }
  }


  def getCountFromGivenCheckpoint(checkpointName: String, checkpoints: Seq[Checkpoint]): Try[Long] = {
    import za.co.absa.atum.core.Constants._

    for {
      checkpoint <- checkpoints
        .find(c => c.name.equalsIgnoreCase(checkpointName) || c.workflowName.equalsIgnoreCase(checkpointName))
        .toTry(new Exception(s"Missing $checkpointName checkpoint"))
      measurement <- checkpoint.controls.find(m => m.controlType.equalsIgnoreCase(controlTypeRecordCount))
        .toTry(new Exception(s"$checkpointName checkpoint does not have a $controlTypeRecordCount control"))
      res <- Try {
        val rowCount = measurement.controlValue.toString.toLong
        if (rowCount >= 0) rowCount else throw new Exception(s"Negative value")
      }.recoverWith {
        case t: Throwable =>
          Failure(new Exception(s"Wrong $checkpointName $controlTypeRecordCount value: ${t.getMessage}"))
      }
    } yield res

  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy