org.apache.spark.sql.cassandra.CassandraSQLRow.scala Maven / Gradle / Ivy

Go to download
package org.apache.spark.sql.cassandra

import java.math.BigInteger
import java.net.InetAddress
import java.sql.Timestamp
import java.time.{Instant, LocalTime}
import java.util.{Date, UUID}

import com.datastax.dse.driver.api.core.data.geometry.Geometry
import com.datastax.dse.driver.api.core.data.time.DateRange
import com.datastax.oss.driver.api.core.`type`.codec.TypeCodecs
import com.datastax.oss.driver.api.core.cql.Row
import com.datastax.oss.driver.api.core.data.CqlDuration
import com.datastax.spark.connector.rdd.reader.{RowReader, ThisRowReaderAsFactory}
import com.datastax.spark.connector.types.TypeConverter
import com.datastax.spark.connector.{CassandraRow, CassandraRowMetadata, GettableData, TupleValue, UDTValue}
import org.apache.spark.sql.types.Decimal
import org.apache.spark.sql.{Row => SparkRow}

final class CassandraSQLRow(val metaData: CassandraRowMetadata, val columnValues: IndexedSeq[AnyRef])
  extends GettableData with SparkRow with Serializable {

  protected def fieldNames = metaData

  private[spark] def this() = this(null, null) // required by Kryo for deserialization :(

  /** Generic getter for getting columns of any type.
    * Looks the column up by its index. First column starts at index 0. */
  private def get[T](index: Int)(implicit c: TypeConverter[T]): T =
    c.convert(columnValues(index))

  override def apply(i: Int) = columnValues(i)
  override def copy() = this // immutable
  override def size = super.size

  override def getDouble(i: Int) = get[Double](i)
  override def getFloat(i: Int) = get[Float](i)
  override def getLong(i: Int) = get[Long](i)
  override def getByte(i: Int) = get[Byte](i)
  override def getBoolean(i: Int) = get[Boolean](i)
  override def getShort(i: Int) = get[Short](i)
  override def getInt(i: Int) = get[Int](i)
  override def getString(i: Int) = get[String](i)
  override def get(i: Int) = get[Any](i)

  override def isNullAt(i: Int): Boolean = super[GettableData].isNullAt(i)

  override def toSeq: Seq[Any] = columnValues
}


object CassandraSQLRow {

  def fromJavaDriverRow(row: Row, metaData:CassandraRowMetadata): CassandraSQLRow = {
    val data = CassandraRow.dataFromJavaDriverRow(row, metaData)
    new CassandraSQLRow(metaData, data.map(toSparkSqlType))
  }

  implicit object CassandraSQLRowReader extends RowReader[CassandraSQLRow] with ThisRowReaderAsFactory[CassandraSQLRow] {

    override def read(row: Row, metaData:CassandraRowMetadata): CassandraSQLRow =
      fromJavaDriverRow(row, metaData)

    override def neededColumns = None
    override def targetClass = classOf[CassandraSQLRow]
  }

  def toSparkSqlType(value: Any): AnyRef = {
    val sparkSqlType: PartialFunction[Any, AnyRef] = {
      case geo: Geometry => geo.asWellKnownText()
      case date: Date => new Timestamp(date.getTime)
      case localDate: org.joda.time.LocalDate =>
        new java.sql.Date(localDate.toDateTimeAtStartOfDay().getMillis)
      case localDate: java.time.LocalDate => java.sql.Date.valueOf(localDate)
      case localTime: java.time.LocalTime => localTime.toNanoOfDay.asInstanceOf[AnyRef]
      case duration: java.time.Duration => duration.toMillis.asInstanceOf[AnyRef]
      case duration: CqlDuration => TypeCodecs.DURATION.format(duration)
      case instant: java.time.Instant => new java.sql.Timestamp(instant.toEpochMilli)
      case bigInteger: BigInteger => Decimal(bigInteger.toString)
      case bigDecimal: java.math.BigDecimal => Decimal(bigDecimal)
      case inetAddress: InetAddress => inetAddress.getHostAddress
      case uuid: UUID => uuid.toString
      case set: Set[_] => set.map(toSparkSqlType).toSeq
      case list: List[_] => list.map(toSparkSqlType)
      case map: Map[_, _] => map map { case(k, v) => (toSparkSqlType(k), toSparkSqlType(v))}
      case udt: UDTValue => SparkRow(udt.columnValues.map(toSparkSqlType): _*)
      case tupleValue: TupleValue => SparkRow(tupleValue.values.map(toSparkSqlType): _*)
      case dateRange: DateRange => dateRange.toString
      case _ => value.asInstanceOf[AnyRef]
    }
    sparkSqlType(value)
  }

  def toUnsafeSqlType(value: Any): AnyRef = {
    val sparkSqlType: PartialFunction[Any, AnyRef] = {
      case geom: Geometry => geom.asWellKnownText()
      case duration: CqlDuration => TypeCodecs.DURATION.format(duration)
      case javaDate: java.util.Date => java.sql.Timestamp.from(javaDate.toInstant)
      case localDate: java.time.LocalDate => java.sql.Date.valueOf(localDate)
      case localDate: org.joda.time.LocalDate =>
        java.time.LocalDate.of(localDate.getYear,localDate.getMonthOfYear, localDate.getDayOfMonth)
          .toEpochDay.asInstanceOf[AnyRef]
      case inetAddress: InetAddress => inetAddress.getHostAddress
      case uuid: UUID => uuid.toString
      case set: Set[_] => set.map(toUnsafeSqlType).toSeq
      case list: List[_] => list.map(toUnsafeSqlType)
      case map: Map[_, _] => map map { case(k, v) => (toUnsafeSqlType(k), toUnsafeSqlType(v))}
      case udt: UDTValue => UDTValue(udt.metaData, udt.columnValues.map(toUnsafeSqlType))
      case tupleValue: TupleValue => TupleValue(tupleValue.values.map(toUnsafeSqlType): _*)
      case dateRange: DateRange => dateRange.toString
      case time: LocalTime => time.toNanoOfDay: java.lang.Long
      case instant: Instant => java.sql.Timestamp.from(instant)
      case _ => value.asInstanceOf[AnyRef]
    }
    sparkSqlType(value)
  }


  val empty = new CassandraSQLRow(null, IndexedSeq.empty)
}