All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.spark.sql.cassandra.DataTypeConverter.scala Maven / Gradle / Ivy
package org.apache.spark.sql.cassandra
import com.datastax.spark.connector
import com.datastax.spark.connector.cql.ColumnDef
import com.datastax.spark.connector.types.{ColumnType, TupleFieldDef, UDTFieldDef}
import com.datastax.spark.connector.util.Logging
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.{types => catalystTypes}
/** Convert Cassandra data type to Catalyst data type */
object DataTypeConverter extends Logging {
private[cassandra] val primitiveTypeMap = Map[connector.types.ColumnType[_], catalystTypes.DataType](
connector.types.TextType -> catalystTypes.StringType,
connector.types.AsciiType -> catalystTypes.StringType,
connector.types.VarCharType -> catalystTypes.StringType,
connector.types.BooleanType -> catalystTypes.BooleanType,
connector.types.IntType -> catalystTypes.IntegerType,
connector.types.BigIntType -> catalystTypes.LongType,
connector.types.CounterType -> catalystTypes.LongType,
connector.types.FloatType -> catalystTypes.FloatType,
connector.types.DoubleType -> catalystTypes.DoubleType,
connector.types.SmallIntType -> catalystTypes.ShortType,
connector.types.TinyIntType -> catalystTypes.ByteType,
connector.types.VarIntType -> catalystTypes.DecimalType(38, 0), // no native arbitrary-size integer type
connector.types.DecimalType -> catalystTypes.DecimalType(38, 18),
connector.types.TimestampType -> catalystTypes.TimestampType,
connector.types.InetType -> catalystTypes.StringType,
connector.types.UUIDType -> catalystTypes.StringType,
connector.types.TimeUUIDType -> catalystTypes.StringType,
connector.types.BlobType -> catalystTypes.BinaryType,
connector.types.DateType -> catalystTypes.DateType,
connector.types.TimeType -> catalystTypes.LongType
)
/** Convert Cassandra data type to Catalyst data type */
def catalystDataType(cassandraType: connector.types.ColumnType[_], nullable: Boolean): catalystTypes.DataType = {
def catalystStructField(field: UDTFieldDef): StructField =
StructField(field.columnName, catalystDataType(field.columnType, nullable = true), nullable = true)
def catalystStructFieldFromTuple(field: TupleFieldDef): StructField =
StructField(field.index.toString, catalystDataType(field.columnType, nullable = true), nullable = true)
cassandraType match {
case connector.types.SetType(et) => catalystTypes.ArrayType(catalystDataType(et, nullable), nullable)
case connector.types.ListType(et) => catalystTypes.ArrayType(catalystDataType(et, nullable), nullable)
case connector.types.MapType(kt, vt) => catalystTypes.MapType(catalystDataType(kt, nullable), catalystDataType(vt, nullable), nullable)
case connector.types.UserDefinedType(_, fields) => catalystTypes.StructType(fields.map(catalystStructField))
case connector.types.TupleType(fields @ _* ) => catalystTypes.StructType(fields.map(catalystStructFieldFromTuple))
case connector.types.VarIntType =>
logWarning("VarIntType is mapped to catalystTypes.DecimalType with unlimited values.")
primitiveCatalystDataType(cassandraType)
case _ => primitiveCatalystDataType(cassandraType)
}
}
def primitiveCatalystDataType(cassandraType: connector.types.ColumnType[_]): catalystTypes.DataType = {
val getColumnType: PartialFunction[ColumnType[_], catalystTypes.DataType] = customCatalystDataType orElse { primitiveTypeMap }
getColumnType(cassandraType)
}
private lazy val customCatalystDataType: PartialFunction[ColumnType[_], catalystTypes.DataType] = {
ColumnType.customDriverConverter
.flatMap(clazz => Some(clazz.catalystDataType))
.getOrElse(PartialFunction.empty)
}
/** Create a Catalyst StructField from a Cassandra Column */
def toStructField(column: ColumnDef): StructField =
StructField(column.columnName, catalystDataType(column.columnType, nullable = true))
}