com.sparkutils.quality.impl.longPair.LongPairExpressions.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quality_9.1.dbr_3.1_2.12 Show documentation
Show all versions of quality_9.1.dbr_3.1_2.12 Show documentation
A Spark library for managing in-process data quality rules via Spark SQL
The newest version!
package com.sparkutils.quality.impl.longPair
import com.sparkutils.quality.impl.id.{GenericLongBasedID, model}
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Expression, NullIntolerant, UnaryExpression}
import org.apache.spark.sql.shim.expressions.InputTypeChecks
import org.apache.spark.sql.types.{DataType, LongType, StructField, StructType}
object LongPair {
val structType = StructType(Seq(
StructField("lower", LongType, false),
StructField("higher", LongType, false)))
}
object LongPairExpression {
def genRow(input1: Any, input2: Any) = InternalRow(input1, input2)
}
/**
* Creates RowIDs using two long fields
* @param left lower long
* @param right higher long
*/
case class LongPairExpression(left: Expression, right: Expression) extends BinaryExpression with NullIntolerant
with InputTypeChecks {
override protected def nullSafeEval(input1: Any, input2: Any): Any = {
LongPairExpression.genRow(input1, input2)
}
override def dataType: DataType = LongPair.structType
override def sql: String = s"(longPair(${left.sql}, ${right.sql}))"
override def inputDataTypes: Seq[Seq[DataType]] = Seq(Seq(LongType), Seq(LongType))
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
defineCodeGen(ctx, ev, (input1, input2) =>
s"com.sparkutils.quality.impl.longPair.LongPairExpression.genRow($input1, $input2)")
protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Expression = copy(left = newLeft, right = newRight)
}
/**
* Takes a prefixed lower and higher long pair field, must be 128bit with the provided prefix and converts to lower and higher for other functions
* @param prefix prefix of the nested fields
* @param child the id field
*/
case class PrefixedToLongPair(child: Expression, prefix: String) extends UnaryExpression with NullIntolerant
with InputTypeChecks {
override protected def nullSafeEval(input1: Any): Any = {
val i = input1.asInstanceOf[InternalRow]
InternalRow(i.getLong(1), i.getLong(2))
}
override def dataType: DataType = LongPair.structType
override def sql: String = s"(prefixedToLongPair(${child.sql}))"
override def inputDataTypes: Seq[Seq[DataType]] = Seq(Seq(GenericLongBasedID(model.ProvidedID, Array.ofDim[Long](2)).dataType(prefix)))
override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
defineCodeGen(ctx, ev, (input1) =>
s"new GenericInternalRow(new Object[]{((InternalRow)$input1).getLong(1), ((InternalRow)$input1).getLong(2)})")
protected def withNewChildInternal(newChild: Expression): Expression = copy(child = newChild)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy