
com.sparkutils.quality.impl.RuleResultExpression.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quality_2.4_2.11 Show documentation
Show all versions of quality_2.4_2.11 Show documentation
A Spark library for managing in-process data quality rules via Spark SQL
package com.sparkutils.quality.impl
import com.sparkutils.quality.impl.MapUtils.getMapEntry
import com.sparkutils.quality.types._
import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckSuccess
import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenerator, CodegenContext, ExprCode}
import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
import org.apache.spark.sql.catalyst.util.MapData
import org.apache.spark.sql.shim.expressions.InputTypeChecks
import org.apache.spark.sql.types.{DataType, IntegerType, LongType, StringType}
object RuleResultExpression {
def getEntry(mapData: MapData, cachedPositions: Seq[Int], id: Long, dataType: DataType): (Any, Seq[Int]) =
getMapEntry(mapData, cachedPositions, id) {
(i: Int) => mapData.valueArray().get(i, dataType)
}
def apply(ruleSuiteResultsColumn: Column, ruleSuiteId: Column, ruleSetId: Column, ruleId: Column): Column =
new Column(new RuleResultExpression(Seq(ruleSuiteResultsColumn.expr, ruleSuiteId.expr, ruleSetId.expr, ruleId.expr)))
}
//TODO move to Quanternary after 2.4 is dropped
case class RuleResultExpression(children: Seq[Expression]) extends
Expression with InputTypeChecks {
protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
copy(children = newChildren)
protected lazy val extractResults =
if (children(0).dataType == ruleSuiteResultType)
2
else
1
@transient
protected var cachedSetPositions: Seq[Int] = Seq.empty
@transient
protected var cachedRulePositions: Seq[Int] = Seq.empty
private def resetIfNull(): Unit = {
if (cachedSetPositions eq null) {
cachedSetPositions = Seq.empty
}
if (cachedRulePositions eq null) {
cachedRulePositions = Seq.empty
}
}
override def nullable: Boolean = true
override def eval(inputRow: InternalRow): Any = {
resetIfNull()
val noneAreNull = Seq(children(0).eval(inputRow), children(1).eval(inputRow),
children(2).eval(inputRow), children(3).eval(inputRow))
if (noneAreNull.contains(null))
null
else {
val Seq(input1, input2, input3, input4) = noneAreNull
val theStruct = input1.asInstanceOf[InternalRow]
val suite = theStruct.getLong(0)
if (suite == input2) {
val (row, newCachedS) = RuleResultExpression.getEntry(theStruct.getMap(extractResults), cachedSetPositions, input3.asInstanceOf[Long], entryType)
cachedSetPositions = newCachedS
if (row == null)
null
else {
val (result, newCachedR) = RuleResultExpression.getEntry(access(row), cachedRulePositions, input4.asInstanceOf[Long], dataType)
cachedRulePositions = newCachedR
result
}
} else
null
}
}
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
ctx.references += this
val setClass = classOf[Seq[Int]].getName+"
© 2015 - 2025 Weber Informatics LLC | Privacy Policy