All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tech.ytsaurus.spyt.common.utils.CityHash.scala Maven / Gradle / Ivy

package tech.ytsaurus.spyt.common.utils

import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExpressionInfo, HashExpression, InterpretedHashFunction}
import org.apache.spark.sql.types.{DataType, LongType}
import org.apache.spark.sql.spyt.types.UInt64Type
import org.apache.spark.sql.{Column, SparkSession}

case class CityHash(children: Seq[Expression], seed: Long) extends HashExpression[Long] {
  def this(arguments: Seq[Expression]) = this(arguments, 0L)

  override def dataType: DataType = LongType

  override def prettyName: String = "cityhash"

  override protected def hasherClassName: String = classOf[CityHashImpl].getName

  override protected def computeHash(value: Any, dataType: DataType, seed: Long): Long = {
    CityHashFunction.hash(value, dataType, seed)
  }

  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): CityHash =
    copy(children = newChildren)
}

object CityHashFunction extends InterpretedHashFunction {
  override protected def hashInt(i: Int, seed: Long): Long = CityHashImpl.hashInt(i, seed)

  override protected def hashLong(l: Long, seed: Long): Long = CityHashImpl.hashLong(l, seed)

  override protected def hashUnsafeBytes(base: AnyRef, offset: Long, len: Int, seed: Long): Long = {
    CityHashImpl.hashUnsafeBytes(base, offset, len, seed)
  }
}

object CityHash {
  @scala.annotation.varargs
  def cityHashUdf(source: Column*): Column = {
    new Column(new CityHash(source.map(_.expr))).cast(UInt64Type)
  }

  def registerFunction(spark: SparkSession): Unit = {
    spark.sessionState.functionRegistry.registerFunction(
      new FunctionIdentifier("cityhash"),
      new ExpressionInfo("tech.ytsaurus.spyt.common.utils.CityHash", "cityhash"),
      (children: Seq[Expression]) =>
        Cast(new CityHash(children), UInt64Type)
    )
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy