com.sparkutils.quality.impl.util.LookupIdFunctionImpl.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quality_9.1.dbr_3.1_2.12 Show documentation
Show all versions of quality_9.1.dbr_3.1_2.12 Show documentation
A Spark library for managing in-process data quality rules via Spark SQL
The newest version!
package com.sparkutils.quality.impl.util
import com.sparkutils.shim.expressions.{UnresolvedFunction4 => UnresolvedFunction}
import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression, Literal}
import org.apache.spark.sql.types.StringType
import org.apache.spark.unsafe.types.UTF8String
import scala.annotation.tailrec
/**
* Allow 2.4 and 3 to co-exist
*/
object LookupIdFunctionImpl {
/**
* Note these only work on unresolved functions - actual names and expressions do not exist before analysis
* @param exp
* @return
*/
def identifyLookups(exp: Expression): Option[ExpressionLookupResult] = {
def children(res: Option[ExpressionLookupResult], children: Seq[Expression]): Option[ExpressionLookupResult] =
children.foldLeft(res){
(curRes, exp) =>
accumulate(curRes, exp)
}
@tailrec
def accumulate(res: Option[ExpressionLookupResult], exp: Expression): Option[ExpressionLookupResult] =
exp match {
// unresolved case where we cannot see more unresolved functions
case UnresolvedFunction(funcName, Seq(Literal(name: UTF8String, StringType), next), _, _) if funcName.replaceAll("_","").toLowerCase == "maplookup" || funcName.replaceAll("_","").toLowerCase =="mapcontains" =>
accumulate(res.map(r => r.copy(constants = r.constants + MapLookupType(name.toString))).orElse(Some(ExpressionLookupResult(Set(MapLookupType(name.toString)), false))), next)
// unresolved case where we have constants - it'll fail at creation
case UnresolvedFunction(funcName, Seq(exp, next), _, _) if funcName.replaceAll("_","").toLowerCase == "maplookup" || funcName.replaceAll("_","").toLowerCase =="mapcontains" =>
accumulate(res.map(r => r.copy(hasExpressionLookups = true)).orElse(Some(ExpressionLookupResult(Set.empty, true))), next)
// unresolved case where we cannot see more unresolved functions
case UnresolvedFunction(funcName, Seq(next, Literal(name: UTF8String, StringType)), _, _) if funcName.replaceAll("_","").toLowerCase == "probabilityin" =>
accumulate(res.map(r => r.copy(constants = r.constants + BloomLookupType(name.toString))).orElse(Some(ExpressionLookupResult(Set(BloomLookupType(name.toString)), false))), next)
// unresolved case where we have constants - it'll fail at creation
case UnresolvedFunction(funcName, Seq(next, exp), _, _) if funcName.replaceAll("_","").toLowerCase == "probabilityin" =>
accumulate(res.map(r => r.copy(hasExpressionLookups = true)).orElse(Some(ExpressionLookupResult(Set.empty, true))), next)
case _ : LeafExpression => res
case parent: Expression => children(res, parent.children)
}
accumulate(None, exp)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy