com.github.mjakubowski84.parquet4s.ValueCodecs.scala Maven / Gradle / Ivy
The newest version!
package com.github.mjakubowski84.parquet4s
import com.github.mjakubowski84.parquet4s.TimeValueCodecs.{
instantToLocalDateTime,
localDateTimeToInstant,
localDateTimeToTimestamp,
timestampToLocalDateTime
}
import java.nio.{ByteBuffer, ByteOrder}
import java.sql.{Date, Timestamp}
import java.time.*
import java.util.TimeZone
import scala.annotation.nowarn
import scala.collection.compat.*
import scala.reflect.ClassTag
trait PrimitiveValueDecoders {
implicit val stringDecoder: OptionalValueDecoder[String] = new OptionalValueDecoder[String] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): String =
value match {
case BinaryValue(binary) => binary.toStringUsingUTF8
}
}
implicit val charDecoder: RequiredValueDecoder[Char] = new RequiredValueDecoder[Char] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Char =
value match {
case IntValue(int) => int.toChar
}
}
implicit val booleanDecoder: RequiredValueDecoder[Boolean] = new RequiredValueDecoder[Boolean] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Boolean =
value match {
case BooleanValue(b) => b
}
}
implicit val intDecoder: RequiredValueDecoder[Int] = new RequiredValueDecoder[Int] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Int =
value match {
case IntValue(int) => int
case LongValue(long) => long.toInt
}
}
implicit val longDecoder: RequiredValueDecoder[Long] = new RequiredValueDecoder[Long] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Long =
value match {
case IntValue(int) => int.toLong
case LongValue(long) => long
}
}
implicit val doubleDecoder: RequiredValueDecoder[Double] = new RequiredValueDecoder[Double] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Double =
value match {
case DoubleValue(double) => double
case FloatValue(float) => float.toDouble
}
}
implicit val floatDecoder: RequiredValueDecoder[Float] = new RequiredValueDecoder[Float] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Float =
value match {
case DoubleValue(double) => double.toFloat
case FloatValue(float) => float
}
}
implicit val shortDecoder: RequiredValueDecoder[Short] = new RequiredValueDecoder[Short] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Short =
value match {
case IntValue(int) => int.toShort
}
}
implicit val byteDecoder: RequiredValueDecoder[Byte] = new RequiredValueDecoder[Byte] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Byte =
value match {
case IntValue(int) => int.toByte
}
}
implicit val decimalDecoder: OptionalValueDecoder[BigDecimal] = new OptionalValueDecoder[BigDecimal] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): BigDecimal =
value match {
case IntValue(int) => BigDecimal(int)
case LongValue(long) => BigDecimal.decimal(long)
case DoubleValue(double) => BigDecimal.decimal(double)
case FloatValue(float) => BigDecimal.decimal(float)
case BinaryValue(binary) => Decimals.decimalFromBinary(binary)
}
}
}
trait PrimitiveValueEncoders {
implicit val stringEncoder: OptionalValueEncoder[String] = new OptionalValueEncoder[String] {
def encodeNonNull(data: String, configuration: ValueCodecConfiguration): Value = BinaryValue(data)
}
implicit val charEncoder: RequiredValueEncoder[Char] = new RequiredValueEncoder[Char] {
def encodeNonNull(data: Char, configuration: ValueCodecConfiguration): Value = IntValue(data.toInt)
}
implicit val booleanEncoder: RequiredValueEncoder[Boolean] = new RequiredValueEncoder[Boolean] {
def encodeNonNull(data: Boolean, configuration: ValueCodecConfiguration): Value = BooleanValue(data)
}
implicit val intEncoder: RequiredValueEncoder[Int] = new RequiredValueEncoder[Int] {
def encodeNonNull(data: Int, configuration: ValueCodecConfiguration): Value = IntValue(data)
}
implicit val longEncoder: RequiredValueEncoder[Long] = new RequiredValueEncoder[Long] {
def encodeNonNull(data: Long, configuration: ValueCodecConfiguration): Value = LongValue(data)
}
implicit val doubleEncoder: RequiredValueEncoder[Double] = new RequiredValueEncoder[Double] {
def encodeNonNull(data: Double, configuration: ValueCodecConfiguration): Value = DoubleValue(data)
}
implicit val floatEncoder: RequiredValueEncoder[Float] = new RequiredValueEncoder[Float] {
def encodeNonNull(data: Float, configuration: ValueCodecConfiguration): Value = FloatValue(data)
}
implicit val shortEncoder: RequiredValueEncoder[Short] = new RequiredValueEncoder[Short] {
def encodeNonNull(data: Short, configuration: ValueCodecConfiguration): Value = IntValue(data.toInt)
}
implicit val byteEncoder: RequiredValueEncoder[Byte] = new RequiredValueEncoder[Byte] {
def encodeNonNull(data: Byte, configuration: ValueCodecConfiguration): Value = IntValue(data.toInt)
}
implicit val decimalEncoder: OptionalValueEncoder[BigDecimal] = new OptionalValueEncoder[BigDecimal] {
def encodeNonNull(data: BigDecimal, configuration: ValueCodecConfiguration): Value =
BinaryValue(Decimals.binaryFromDecimal(data))
}
}
private[parquet4s] object TimeValueCodecs {
val JulianDayOfEpoch = 2440588
val MillisPerSecond = 1000L
val MicrosPerMilli = 1000L
val NanosPerMicro = 1000L
val MicrosPerSecond: Long = MicrosPerMilli * MillisPerSecond
val NanosPerMilli: Long = NanosPerMicro * MicrosPerMilli
val NanosPerSecond: Long = NanosPerMilli * MillisPerSecond
val NanosPerDay = 86400000000000L
def decodeLocalDateTime(value: Value, timeZone: TimeZone): LocalDateTime =
value match {
case BinaryValue(binary) =>
val buf = ByteBuffer.wrap(binary.getBytes).order(ByteOrder.LITTLE_ENDIAN)
val fixedTimeInNanos = buf.getLong
val julianDay = buf.getInt
val date = LocalDate.ofEpochDay((julianDay - JulianDayOfEpoch).toLong)
val fixedTimeInMillis = Math.floorDiv(fixedTimeInNanos, NanosPerMilli)
val nanosLeft = Math.floorMod(fixedTimeInNanos, NanosPerMilli)
val timeInMillis = fixedTimeInMillis + timeZone.getRawOffset
val timeInNanos = (timeInMillis * NanosPerMilli) + nanosLeft
if (timeInNanos >= NanosPerDay) {
/*
* original value was encoded with time zone WEST to one that we read it with
* and we experience a day flip due to difference in time zone offset
*/
val time = LocalTime.ofNanoOfDay(timeInNanos - NanosPerDay)
LocalDateTime.of(date.plusDays(1), time)
} else if (timeInNanos < 0) {
/*
* original value was encoded with time zone EAST to one that we read it with
* and we experience a day flip due to difference in time zone offset
*/
val time = LocalTime.ofNanoOfDay(timeInNanos + NanosPerDay)
LocalDateTime.of(date.minusDays(1), time)
} else {
val time = LocalTime.ofNanoOfDay(timeInNanos)
LocalDateTime.of(date, time)
}
case DateTimeValue(value, TimestampFormat.Int64Millis) =>
LocalDateTime.ofInstant(Instant.ofEpochMilli(value), timeZone.toZoneId)
case DateTimeValue(value, TimestampFormat.Int64Micros) =>
val seconds = value / MicrosPerSecond
val micros = value % MicrosPerSecond
val nanos = micros * NanosPerMicro
LocalDateTime.ofInstant(Instant.ofEpochSecond(seconds, nanos), timeZone.toZoneId)
case DateTimeValue(value, TimestampFormat.Int64Nanos) =>
val seconds = value / NanosPerSecond
val nanos = value % NanosPerSecond
LocalDateTime.ofInstant(Instant.ofEpochSecond(seconds, nanos), timeZone.toZoneId)
}
def encodeLocalDateTime(data: LocalDateTime, timeZone: TimeZone): Value = BinaryValue {
val date = data.toLocalDate
val time = data.toLocalTime
val julianDay = JulianDayOfEpoch + date.toEpochDay.toInt
val timeInNanos = time.toNanoOfDay
val timeInMillis = Math.floorDiv(timeInNanos, NanosPerMilli)
val nanosLeft = Math.floorMod(timeInNanos, NanosPerMilli)
val fixedTimeInMillis = timeInMillis - timeZone.getRawOffset
val fixedTimeInNanos = fixedTimeInMillis * NanosPerMilli + nanosLeft
val buf = ByteBuffer.allocate(12).order(ByteOrder.LITTLE_ENDIAN)
buf.putLong(fixedTimeInNanos)
buf.putInt(julianDay)
buf.array()
}
def decodeLocalDate(value: Value): LocalDate =
value match {
case IntValue(epochDay) => LocalDate.ofEpochDay(epochDay.toLong)
}
def encodeLocalDate(data: LocalDate): Value = IntValue(data.toEpochDay.toInt)
def localDateTimeToInstant(dateTime: LocalDateTime, timeZone: TimeZone): Instant =
ZonedDateTime.of(dateTime, timeZone.toZoneId).toInstant
def instantToLocalDateTime(instant: Instant, timeZone: TimeZone): LocalDateTime =
LocalDateTime.ofInstant(instant, timeZone.toZoneId)
def localDateTimeToTimestamp(dateTime: LocalDateTime, timeZone: TimeZone): Timestamp =
Timestamp.from(ZonedDateTime.of(dateTime, timeZone.toZoneId).toInstant)
def timestampToLocalDateTime(timestamp: Timestamp, timeZone: TimeZone): LocalDateTime =
LocalDateTime.ofInstant(timestamp.toInstant, timeZone.toZoneId)
}
trait TimeValueDecoders {
implicit val localDateTimeDecoder: OptionalValueDecoder[LocalDateTime] = new OptionalValueDecoder[LocalDateTime] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): LocalDateTime =
TimeValueCodecs.decodeLocalDateTime(value, configuration.timeZone)
}
implicit val instantDecoder: OptionalValueDecoder[Instant] = new OptionalValueDecoder[Instant] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Instant = {
val timeZone = configuration.timeZone
localDateTimeToInstant(TimeValueCodecs.decodeLocalDateTime(value, timeZone), timeZone)
}
}
implicit val sqlTimestampDecoder: OptionalValueDecoder[java.sql.Timestamp] = new OptionalValueDecoder[Timestamp] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Timestamp = {
val timeZone = configuration.timeZone
localDateTimeToTimestamp(TimeValueCodecs.decodeLocalDateTime(value, timeZone), timeZone)
}
}
implicit val localDateDecoder: OptionalValueDecoder[LocalDate] = new OptionalValueDecoder[LocalDate] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): LocalDate =
TimeValueCodecs.decodeLocalDate(value)
}
implicit val sqlDateDecoder: OptionalValueDecoder[java.sql.Date] = new OptionalValueDecoder[Date] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Date =
java.sql.Date.valueOf(TimeValueCodecs.decodeLocalDate(value))
}
}
trait TimeValueEncoders {
implicit val localDateTimeEncoder: OptionalValueEncoder[LocalDateTime] = new OptionalValueEncoder[LocalDateTime] {
def encodeNonNull(data: LocalDateTime, configuration: ValueCodecConfiguration): Value =
TimeValueCodecs.encodeLocalDateTime(data, configuration.timeZone)
}
implicit val instantEncoder: OptionalValueEncoder[Instant] = new OptionalValueEncoder[Instant] {
def encodeNonNull(data: Instant, configuration: ValueCodecConfiguration): Value = {
val timeZone = configuration.timeZone
TimeValueCodecs.encodeLocalDateTime(instantToLocalDateTime(data, timeZone), timeZone)
}
}
implicit val sqlTimestampEncoder: OptionalValueEncoder[java.sql.Timestamp] = new OptionalValueEncoder[Timestamp] {
def encodeNonNull(data: Timestamp, configuration: ValueCodecConfiguration): Value = {
val timeZone = configuration.timeZone
TimeValueCodecs.encodeLocalDateTime(timestampToLocalDateTime(data, timeZone), timeZone)
}
}
implicit val localDateEncoder: OptionalValueEncoder[LocalDate] = new OptionalValueEncoder[LocalDate] {
def encodeNonNull(data: LocalDate, configuration: ValueCodecConfiguration): Value =
TimeValueCodecs.encodeLocalDate(data)
}
implicit val sqlDateEncoder: OptionalValueEncoder[java.sql.Date] = new OptionalValueEncoder[Date] {
def encodeNonNull(data: Date, configuration: ValueCodecConfiguration): Value =
TimeValueCodecs.encodeLocalDate(data.toLocalDate)
}
}
trait ComplexValueDecoders extends ProductDecoders {
implicit def collectionDecoder[T, Col[_]](implicit
@nowarn evidence: Col[T] <:< Iterable[T],
elementDecoder: ValueDecoder[T],
factory: Factory[T, Col[T]]
): OptionalValueDecoder[Col[T]] =
new OptionalValueDecoder[Col[T]] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Col[T] =
value match {
case listRecord: ListParquetRecord =>
listRecord.map((elementDecoder.decode _).curried(_)(configuration)).to(factory)
}
}
implicit def arrayDecoder[T, Col[_]](implicit
@nowarn evidence: Col[T] =:= Array[T],
classTag: ClassTag[T],
factory: Factory[T, Col[T]],
elementDecoder: ValueDecoder[T]
): OptionalValueDecoder[Col[T]] =
new OptionalValueDecoder[Col[T]] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Col[T] =
value match {
case listRecord: ListParquetRecord =>
listRecord.map((elementDecoder.decode _).curried(_)(configuration)).to(factory)
case binaryValue: BinaryValue if classTag.runtimeClass == classOf[Byte] =>
binaryValue.value.getBytes.asInstanceOf[Col[T]]
}
}
implicit def optionDecoder[T](implicit elementDecoder: ValueDecoder[T]): ValueDecoder[Option[T]] =
new ValueDecoder[Option[T]] {
def decode(value: Value, configuration: ValueCodecConfiguration): Option[T] =
value match {
case NullValue => None
case _ => Option(elementDecoder.decode(value, configuration))
}
}
implicit def mapDecoder[K, V](implicit
kDecoder: ValueDecoder[K],
vDecoder: ValueDecoder[V]
): OptionalValueDecoder[Map[K, V]] =
new OptionalValueDecoder[Map[K, V]] {
def decodeNonNull(value: Value, configuration: ValueCodecConfiguration): Map[K, V] =
value match {
case mapParquetRecord: MapParquetRecord =>
mapParquetRecord.map { case (mapKey, mapValue) =>
require(mapKey != NullValue, "Map cannot have null keys")
kDecoder.decode(mapKey, configuration) -> vDecoder.decode(mapValue, configuration)
}
}
}
}
trait ComplexValueEncoders extends ProductEncoders {
implicit def collectionEncoder[T, Col[_]](implicit
evidence: Col[T] <:< Iterable[T],
elementEncoder: ValueEncoder[T]
): OptionalValueEncoder[Col[T]] =
new OptionalValueEncoder[Col[T]] {
def encodeNonNull(data: Col[T], configuration: ValueCodecConfiguration): Value =
evidence(data)
.foldLeft(ListParquetRecord.Empty) { case (record, element) =>
record.appended(element, configuration)
}
}
implicit def arrayEncoder[T, Col[_]](implicit
evidence: Col[T] =:= Array[T],
classTag: ClassTag[T],
elementEncoder: ValueEncoder[T]
): OptionalValueEncoder[Col[T]] =
new OptionalValueEncoder[Col[T]] {
def encodeNonNull(data: Col[T], configuration: ValueCodecConfiguration): Value =
if (classTag.runtimeClass == classOf[Byte])
BinaryValue(data.asInstanceOf[Array[Byte]])
else
evidence(data)
.foldLeft(ListParquetRecord.Empty) { case (record, element) =>
record.appended(element, configuration)
}
}
implicit def optionEncoder[T](implicit elementEncoder: ValueEncoder[T]): ValueEncoder[Option[T]] =
new ValueEncoder[Option[T]] {
def encode(data: Option[T], configuration: ValueCodecConfiguration): Value =
data match {
case None => NullValue
case Some(t) => elementEncoder.encode(t, configuration)
}
}
implicit def mapEncoder[K, V](implicit
kEncoder: ValueEncoder[K],
vEncoder: ValueEncoder[V]
): OptionalValueEncoder[Map[K, V]] =
new OptionalValueEncoder[Map[K, V]] {
def encodeNonNull(data: Map[K, V], configuration: ValueCodecConfiguration): Value =
data.foldLeft(MapParquetRecord.Empty) { case (record, (key, value)) =>
require(key != null, "Map cannot have null keys")
record.updated(key, value, configuration)
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy