All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.sql.QualityYamlExt.scala Maven / Gradle / Ivy

package org.apache.spark.sql

import com.sparkutils.quality.QualityException
import com.sparkutils.quality.impl.yaml.QualityYamlDecoding._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types.{CalendarIntervalType, DataType, DayTimeIntervalType, TimestampNTZType, UserDefinedType, YearMonthIntervalType}
import org.yaml.snakeyaml.nodes.{MappingNode, Node, NodeTuple, ScalarNode, Tag}
import com.sparkutils.quality.impl.yaml.QualityYamlEncoding._
import org.apache.spark.unsafe.types.CalendarInterval
import org.yaml.snakeyaml.DumperOptions

import scala.collection.JavaConverters._

object QualityYamlExt {

  private def createIntervalNode(interval: CalendarInterval)(implicit renderOptions: Map[String, String]) =
    if (interval eq null)
      createNullNode
    else {
      val tuples = Seq(
        new NodeTuple(
          createScalarNode(Tag.STR, "days"),
          createScalarNode(Tag.INT, interval.days)
        ),
        new NodeTuple(
          createScalarNode(Tag.STR, "microseconds"),
          createScalarNode(Tag.INT, interval.microseconds)
        ),
        new NodeTuple(
          createScalarNode(Tag.STR, "months"),
          createScalarNode(Tag.INT, interval.months)
        )
      )

      new MappingNode(Tag.MAP, tuples.asJava, DumperOptions.FlowStyle.FLOW)
    }


  def makeConverterExt(implicit renderOptions: Map[String, String]): ValueConverter = {

    case CalendarIntervalType =>
      (a: Any) =>
        createIntervalNode(a.asInstanceOf[CalendarInterval])

    case TimestampNTZType =>
      (a: Any) =>
        createScalarNode(Tag.INT, a)

    case ym: YearMonthIntervalType =>
      (a: Any) =>
        createScalarNode(Tag.INT, a)

    case dt: DayTimeIntervalType =>
      (a: Any) =>
        createScalarNode(Tag.INT, a)

    case udt: UserDefinedType[_] =>
      makeValueConverter.applyOrElse(udt.sqlType, makeConverterExt)

    case dataType => throw QualityException(s"Cannot find yaml representation for type $dataType")
  }

  def makeStructFieldConverterExt(implicit renderOptions: Map[String, String]): StructValueConverter = {

    case CalendarIntervalType =>
      (i: InternalRow, p: Int) =>
        createIntervalNode(i.getInterval(p))

    case TimestampNTZType =>
      (i: InternalRow, p: Int) =>
        createScalarNode(Tag.INT, i.getLong(p))

    case ym: YearMonthIntervalType =>
      (i: InternalRow, p: Int) =>
        createScalarNode(Tag.INT, i.getInt(p))

    case dt: DayTimeIntervalType =>
      (i: InternalRow, p: Int) =>
        createScalarNode(Tag.INT, i.getLong(p))

    case udt: UserDefinedType[_] =>
      // recurse ever?
      makeStructFieldConverter.applyOrElse(udt.sqlType, makeStructFieldConverterExt)

    case dataType => throw QualityException(s"Cannot find yaml representation for type $dataType")
  }

  def makeNodeConverterExt: NodeConverter = {
    case CalendarIntervalType =>
      def getField(tuples: Seq[NodeTuple])( name: String) =
        tuples.find(_.getKeyNode.asInstanceOf[ScalarNode].getValue == name)

      def getValue(node: NodeTuple) =
        node.getValueNode.asInstanceOf[ScalarNode].getValue

      (a: Node) =>
        if (a.getTag == Tag.NULL)
          null
        else {
            val map = a.asInstanceOf[MappingNode]
            val tuples = map.getValue.asScala
            val f = getField(tuples) _
            val r =
              for{
                days <- f("days").map(getValue(_).toInt)
                microseconds <- f("microseconds").map(getValue(_).toLong)
                months <- f("months").map(getValue(_).toInt)
              } yield
                new CalendarInterval(months, days, microseconds)
            r.orNull // not really exception territory
          }

    case TimestampNTZType =>
      (a: Node) =>
        a.scalar(_.toLong)

    case ym: YearMonthIntervalType =>
      (a: Node) =>
        a.scalar(_.toInt)

    case dt: DayTimeIntervalType =>
      (a: Node) =>
        a.scalar(_.toLong)

    case udt: UserDefinedType[_] =>
      // recurse ever?
      makeNodeConverter.applyOrElse(udt.sqlType, makeNodeConverterExt)

    case dataType => throw QualityException(s"Cannot find yaml representation for type $dataType")
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy