Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 2018 ABSA Group Limited
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package za.co.absa.enceladus.conformance
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.commons.configuration2.Configuration
import org.apache.spark.SPARK_VERSION
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.slf4j.{Logger, LoggerFactory}
import za.co.absa.enceladus.common.Constants._
import za.co.absa.enceladus.common.version.SparkVersionGuard
import za.co.absa.enceladus.conformance.config.ConformanceConfig
import za.co.absa.enceladus.conformance.interpreter.{Always, DynamicInterpreter, FeatureSwitches}
import za.co.absa.enceladus.conformance.streaming.InfoDateFactory
import za.co.absa.enceladus.dao.MenasDAO
import za.co.absa.enceladus.dao.auth.{MenasCredentialsFactory, MenasKerberosCredentialsFactory, MenasPlainCredentialsFactory}
import za.co.absa.enceladus.dao.rest.{MenasConnectionStringParser, RestDaoFactory}
import za.co.absa.enceladus.model.Dataset
import za.co.absa.hyperdrive.ingestor.api.transformer.{StreamTransformer, StreamTransformerFactory}
class HyperConformance (implicit cmd: ConformanceConfig,
featureSwitches: FeatureSwitches,
menasBaseUrls: List[String],
infoDateFactory: InfoDateFactory) extends StreamTransformer {
val log: Logger = LoggerFactory.getLogger(this.getClass)
@throws[IllegalArgumentException]
def transform(rawDf: DataFrame): DataFrame = {
implicit val spark: SparkSession = rawDf.sparkSession
val menasCredentials = cmd.menasCredentialsFactory.getInstance()
implicit val dao: MenasDAO = RestDaoFactory.getInstance(menasCredentials, menasBaseUrls)
dao.authenticate()
logPreConformanceInfo(rawDf)
val conformance = dao.getDataset(cmd.datasetName, cmd.datasetVersion)
val conformedDf = applyConformanceTransformations(rawDf, conformance)
log.info(s"Raw schema: ${rawDf.schema.treeString}")
log.info(s"Publish schema: ${conformedDf.schema.treeString}")
conformedDf
}
def applyConformanceTransformations(rawDf: DataFrame, conformance: Dataset)
(implicit sparkSession: SparkSession, menasDAO: MenasDAO): DataFrame = {
import za.co.absa.enceladus.utils.implicits.DataFrameImplicits.DataFrameEnhancements
val reportVersion = getReportVersion
val infoDateColumn = infoDateFactory.getInfoDateColumn(rawDf)
val conformedDf = DynamicInterpreter.interpret(conformance, rawDf)
.withColumnIfDoesNotExist(InfoDateColumn, coalesce(infoDateColumn, current_date()))
.withColumnIfDoesNotExist(InfoDateColumnString, coalesce(date_format(infoDateColumn,"yyyy-MM-dd"), lit("")))
.withColumnIfDoesNotExist(InfoVersionColumn, lit(reportVersion))
conformedDf
}
private def logPreConformanceInfo(streamData: DataFrame): Unit = {
log.info(s"Menas URLs: ${menasBaseUrls.mkString(",")}, dataset=${cmd.datasetName}, version=${cmd.datasetVersion}")
log.info(s"Input schema: ${streamData.schema.prettyJson}")
}
@throws[IllegalArgumentException]
private def getReportVersion(implicit cmd: ConformanceConfig): Int = {
cmd.reportVersion match {
case Some(version) => version
case None => throw new IllegalArgumentException("Report version is not provided.")
}
}
}
/**
* This is the definition of Dynamic Conformance as a component of Hyperdrive.
*
* In order to use it in hyperdrive the component needs to be configured in 'ingestion.properties' as follows:
* {{{
* transformer.hyperconformance.menas.rest.uri=http://localhost:8080
* transformer.hyperconformance.dataset.name=example
* transformer.hyperconformance.dataset.version=1
* transformer.hyperconformance.report.date=2020-01-29
* transformer.hyperconformance.report.version=1
* transformer.hyperconformance.event.timestamp.column=EV_TIME
*
* # Either plain credentials
* transformer.hyperconformance.menas.credentials.file=/path/menas.credentials
*
* # Or a keytab
* transformer.hyperconformance.menas.auth.keytab=/path/to/keytab
* }}}
*/
object HyperConformance extends StreamTransformerFactory with HyperConformanceAttributes {
import HyperConformanceAttributes._
val log: Logger = LoggerFactory.getLogger(this.getClass)
private val defaultReportVersion = 1
@throws[IllegalArgumentException]
override def apply(conf: Configuration): StreamTransformer = {
log.info("Building HyperConformance")
SparkVersionGuard.fromDefaultSparkCompatibilitySettings.ensureSparkVersionCompatibility(SPARK_VERSION)
validateConfiguration(conf)
val menasCredentialsFactory = getMenasCredentialsFactory(conf: Configuration)
implicit val confConfig: ConformanceConfig = ConformanceConfig(publishPathOverride = None,
experimentalMappingRule = Some(true),
isCatalystWorkaroundEnabled = Some(true),
autocleanStandardizedFolder = Some(false),
datasetName = conf.getString(datasetNameKey),
datasetVersion = conf.getInt(datasetVersionKey),
reportDate = new SimpleDateFormat(ReportDateFormat).format(new Date()), // Still need a report date for mapping table patterns
reportVersion = Option(getReportVersion(conf)),
performanceMetricsFile = None,
folderPrefix = None,
persistStorageLevel = None,
menasCredentialsFactory = menasCredentialsFactory
)
implicit val featureSwitcher: FeatureSwitches = FeatureSwitches()
.setExperimentalMappingRuleEnabled(true)
.setCatalystWorkaroundEnabled(true)
.setControlFrameworkEnabled(false)
.setBroadcastStrategyMode(Always)
.setBroadcastMaxSizeMb(0)
implicit val reportDateCol: InfoDateFactory = InfoDateFactory.getFactoryFromConfig(conf)
implicit val menasBaseUrls: List[String] = MenasConnectionStringParser.parse(conf.getString(menasUriKey))
new HyperConformance()
}
private def getReportVersion(conf: Configuration): Int = {
if (conf.containsKey(reportVersionKey)) {
conf.getInt(reportVersionKey)
} else {
defaultReportVersion
}
}
@throws[IllegalArgumentException]
def validateConfiguration(conf: Configuration): Unit = {
val mandatoryKeys = List(menasUriKey, datasetNameKey, datasetVersionKey)
val missingKeys = mandatoryKeys.filterNot(key => conf.containsKey(key))
if (missingKeys.nonEmpty) {
throw new IllegalArgumentException(s"Missing mandatory configuration parameters for keys: ${missingKeys.mkString(", ")}.")
}
}
@throws[IllegalArgumentException]
private def getMenasCredentialsFactory(conf: Configuration): MenasCredentialsFactory = {
val hasCredentialsFile = conf.containsKey(menasCredentialsFileKey)
val hasKeytab = conf.containsKey(menasAuthKeytabKey)
(hasCredentialsFile, hasKeytab) match {
case (false, false) => throw new IllegalArgumentException("No authentication method is specified.")
case (true, false) => new MenasPlainCredentialsFactory(conf.getString(menasCredentialsFileKey))
case (false, true) => new MenasKerberosCredentialsFactory(conf.getString(menasAuthKeytabKey))
case (true, true) => throw new IllegalArgumentException("Either a credentials file or a keytab should be specified, but not both.")
}
}
}