com.sparkutils.quality.impl.rng.RngExpression.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quality_9.1.dbr_3.1_2.12 Show documentation
Show all versions of quality_9.1.dbr_3.1_2.12 Show documentation
A Spark library for managing in-process data quality rules via Spark SQL
The newest version!
package com.sparkutils.quality.impl.rng
import com.sparkutils.shim.expressions.StatefulLike
import org.apache.commons.rng.simple.RandomSource
import org.apache.spark.sql.Column
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionWithRandomSeed, LeafExpression, Literal, Rand}
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types.{BinaryType, DataType, LongType, StructField, StructType}
object RandomBytes {
/**
* Creates a random number generator using a given commons-rng source
*
* @param randomSource commons-rng random source
* @param numBytes the number of bytes to produce in the array, defaulting to 16
* @param seed the seed to use / mixin
* @return a column with the appropriate rng defined
*/
def apply(randomSource: RandomSource, numBytes: Int = 16, seed: Long = 0): Column =
new Column( apply(numBytes, randomSource, seed) )
def apply(numBytes: Int, randomSource: RandomSource, seed: Long): Expression =
if (randomSource.isJumpable)
RandBytesWithJump(seed, numBytes, randomSource)
else
RandBytesNonJump(seed, numBytes, randomSource)
}
/**
* Creates a Jumpable random number generator
* @param definedSeed starting / mixing seed for all new rng instances
* @param numBytes length of the binary array to fill in bytes
* @param source
*/
case class RandBytesWithJump(definedSeed: Long, numBytes: Int, source: RandomSource) extends RandBytes with Jumpable {
type ThisType = RandBytesWithJump
override def freshCopy: ThisType = copy()
}
case class RandBytesNonJump(definedSeed: Long, numBytes: Int, source: RandomSource) extends RandBytes with RngImpl {
type ThisType = RandBytesNonJump
override def freshCopy: ThisType = copy()
}
/**
* Base implementation for random number byte generation with pluggable implementations
*/
abstract class RandBytes extends LeafExpression with StatefulLike
with ExpressionWithRandomSeed with CodegenFallback with RngImpl {
type ThisType <: RandBytes
override def withNewSeed(seed: Long): ThisType = {
val r = freshCopy()
r.reSeed(seed)
r
}
override lazy val resolved: Boolean = isNull
override def nullable: Boolean = false
override def dataType: DataType = BinaryType
override protected def initializeInternal(partitionIndex: Int): Unit = {
// if rand is already set don't redo
reSeedOrBranch(partitionIndex)
}
override protected def evalInternal(input: InternalRow): Any = {
val res = nextBytes()
res
}
def seedExpression: Expression = Literal(definedSeed, LongType)
}
object RandomLongs {
val structType = com.sparkutils.quality.impl.longPair.LongPair.structType
/**
* Creates a random number generator using a given commons-rng source
*
* @param randomSource commons-rng random source
* @param seed the seed to use / mixin
* @return a column with the appropriate rng defined
*/
def apply(randomSource: RandomSource, seed: Long = 0): Column =
new Column( create(randomSource, seed) )
def create(randomSource: RandomSource, seed: Long = 0): Expression =
if (randomSource.isJumpable)
RandLongsWithJump(seed, randomSource)
else
RandLongsNonJump(seed, randomSource)
}
/**
* Creates a Jumpable random number generator
* @param definedSeed starting / mixing seed for all new rng instances
* @param source
*/
case class RandLongsWithJump(definedSeed: Long, source: RandomSource) extends RandLongs with Jumpable {
type ThisType = RandLongsWithJump
override def freshCopy: ThisType = copy()
}
case class RandLongsNonJump(definedSeed: Long, source: RandomSource) extends RandLongs with RngImpl {
type ThisType = RandLongsNonJump
override def freshCopy: ThisType = copy()
}
/**
* Base implementation for random number two long (128 bit) generation with pluggable implementations
*/
abstract class RandLongs extends LeafExpression with StatefulLike
with ExpressionWithRandomSeed with CodegenFallback with RngImpl {
type ThisType <: RandLongs
override def withNewSeed(seed: Long): ThisType = {
val r = freshCopy()
r.reSeed(seed)
r
}
override lazy val resolved: Boolean = isNull
override def nullable: Boolean = false
override def dataType: DataType = RandomLongs.structType
override protected def initializeInternal(partitionIndex: Int): Unit = {
// if rand is already set don't redo
reSeedOrBranch(partitionIndex)
}
override protected def evalInternal(input: InternalRow): Any = {
InternalRow(nextLong(), nextLong())
}
def numBytes: Int = 0
def seedExpression: Expression = Literal(definedSeed, LongType)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy