com.sparkutils.quality.impl.ProbabilityExpr.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quality_2.4_2.11 Show documentation
Show all versions of quality_2.4_2.11 Show documentation
A Spark library for managing in-process data quality rules via Spark SQL
The newest version!
package com.sparkutils.quality.impl
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, NullIntolerant, UnaryExpression}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType}
import com.sparkutils.quality.PassedInt
import org.apache.spark.sql.shim.expressions.InputTypeChecks
@ExpressionDescription(
usage = "probability(expr) - Returns the probability from a rule result as a double.",
examples = """
Examples:
> SELECT probability(1000);
0.01
""")
case class ProbabilityExpr(child: Expression) extends UnaryExpression with NullIntolerant with InputTypeChecks {
override def nullSafeEval(res: Any): Any = {
val full =
if (res.isInstanceOf[Integer])
res.asInstanceOf[Integer].toDouble
else
res.asInstanceOf[Long].toDouble
full / PassedInt
}
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
defineCodeGen(ctx, ev, c => s"((double)($c)) / ${PassedInt}")
override def dataType: DataType = DoubleType
override def inputDataTypes: Seq[Seq[DataType]] = Seq(Seq(IntegerType, LongType))
protected def withNewChildInternal(newChild: Expression): Expression = copy(child = newChild)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy