All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.timgent.sparkdataquality.checks.ArbDualDsCheck.scala Maven / Gradle / Ivy

package com.github.timgent.sparkdataquality.checks

import com.github.timgent.sparkdataquality.SdqError.ArbCheckError
import com.github.timgent.sparkdataquality.checks.CheckDescription.SimpleCheckDescription
import com.github.timgent.sparkdataquality.checks.QCCheck.DualDsQCCheck
import com.github.timgent.sparkdataquality.checkssuite.DescribedDsPair
import org.apache.spark.sql.Dataset

import scala.util.{Failure, Success, Try}

/**
  * Check for comparing a pair of datasets
  */
trait ArbDualDsCheck extends DualDsQCCheck {
  def description: CheckDescription

  override def qcType: QcType = QcType.ArbDualDsCheck

  def applyCheck(dsPair: DescribedDsPair): CheckResult
}

object ArbDualDsCheck {

  case class DatasetPair(ds: Dataset[_], dsToCompare: Dataset[_])

  def apply(
      checkDescription: String
  )(check: DatasetPair => RawCheckResult): ArbDualDsCheck = {
    new ArbDualDsCheck {
      override def description: SimpleCheckDescription = SimpleCheckDescription(checkDescription)

      override def applyCheck(ddsPair: DescribedDsPair): CheckResult = {
        val maybeRawCheckResult = Try(check(ddsPair.rawDatasetPair))
        maybeRawCheckResult match {
          case Failure(exception) =>
            CheckResult(
              qcType,
              CheckStatus.Error,
              "Check failed due to unexpected exception during evaluation",
              description,
              Some(ddsPair.datasourceDescription),
              errors = Seq(ArbCheckError(Some(ddsPair.datasourceDescription), description, Some(exception)))
            )
          case Success(rawCheckResult) => rawCheckResult.withDescription(qcType, description, Some(ddsPair.datasourceDescription))
        }
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy