All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.json.JsonSerializer.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.json

import scala.language.implicitConversions
import java.nio.ByteBuffer
import java.nio.charset.Charset
import java.sql.Timestamp
import java.time.{Instant, LocalDate, LocalTime}
import com.typesafe.scalalogging.LazyLogging

/** JSON Serializer in BSON format as defined by http://bsonspec.org/spec.html.
  * This is not fully compatible with BSON spec, where the root must be a document/JsObject.
  * In contrast, the root can be any JsValue in our implementation. Correspondingly, the
  * root will always has the type byte as the first byte.
  *
  * Not Multi-threading safe. Each thread should have its own BsonSerializer instance.
  * Data size limit to 10MB by default.
  *
  * Although JsTime/JsDateTime can be represented to nanosecond precision, we don't
  * store the nano-of-second field to save the space. To preserve the high
  * precision of time, JsTimestamp should be employed and of course consumes more space.
  *
  * @author Haifeng Li
  */
class JsonSerializer(buffer: ByteBuffer = ByteBuffer.allocate(10 * 1024 * 1024)) extends LazyLogging {

  import JsonSerializer._

  def serialize(json: JsValue): Array[Byte] = {
    buffer.clear
    serialize(buffer, json, None)
    buffer
  }

  def deserialize(bytes: Array[Byte]): JsValue = {
    val buffer = ByteBuffer.wrap(bytes)
    deserialize(buffer)
  }

  def deserialize(buffer: ByteBuffer): JsValue = {
    buffer.get match { // data type
      case TYPE_BOOLEAN   => boolean(buffer)
      case TYPE_INT32     => int(buffer)
      case TYPE_INT64     => long(buffer)
      case TYPE_DOUBLE    => double(buffer)
      case TYPE_BIGDECIMAL=> decimal(buffer)
      case TYPE_DATETIME        => date(buffer)
      case TYPE_LOCAL_DATE      => localDate(buffer)
      case TYPE_LOCAL_TIME      => localTime(buffer)
      case TYPE_LOCAL_DATETIME  => localDatetime(buffer)
      case TYPE_TIMESTAMP       => timestamp(buffer)
      case TYPE_STRING    => string(buffer)
      case TYPE_BINARY    => binary(buffer)
      case TYPE_OBJECTID  => objectId(buffer)
      case TYPE_NULL      => JsNull
      case TYPE_UNDEFINED => JsUndefined
      case TYPE_DOCUMENT  =>
        val doc = JsObject()
        deserialize(buffer, doc)

      case TYPE_ARRAY     =>
        val doc = JsObject()
        deserialize(buffer, doc)
        val elements = doc.fields.map{case (k, v) => (k.toInt, v)}.toSeq.sortBy(_._1).map(_._2)
        JsArray(elements: _*)

      case x => throw new IllegalStateException("Unsupported BSON type: %02X" format x)
    }
  }

  /** Clears the object buffer. */
  def clear(): Unit = buffer.clear

  private def serialize(buffer: ByteBuffer, string: Option[String]): Unit = {
    if (string.isDefined) {
      serialize(buffer, string.get)
    }
  }

  private def serialize(buffer: ByteBuffer, string: String): Unit = {
    buffer.put(string.getBytes(UTF8))
    buffer.put(END_OF_STRING)
  }

  private def serialize(buffer: ByteBuffer, json: JsBoolean, ename: Option[String]): Unit = {
    buffer.put(TYPE_BOOLEAN)
    serialize(buffer, ename)
    buffer.put(if (json.value) TRUE else FALSE)
  }

  private def serialize(buffer: ByteBuffer, json: JsInt, ename: Option[String]): Unit = {
    buffer.put(TYPE_INT32)
    serialize(buffer, ename)
    buffer.putInt(json.value)
  }

  private def serialize(buffer: ByteBuffer, json: JsLong, ename: Option[String]): Unit = {
    buffer.put(TYPE_INT64)
    serialize(buffer, ename)
    buffer.putLong(json.value)
  }

  private def serialize(buffer: ByteBuffer, json: JsDouble, ename: Option[String]): Unit = {
    buffer.put(TYPE_DOUBLE)
    serialize(buffer, ename)
    buffer.putDouble(json.value)
  }

  private def serialize(buffer: ByteBuffer, json: JsDecimal, ename: Option[String]): Unit = {
    buffer.put(TYPE_BIGDECIMAL)
    serialize(buffer, ename)
    val bytes = json.value.toPlainString.getBytes(UTF8)
    buffer.putInt(bytes.length)
    buffer.put(bytes)
  }

  private def serialize(buffer: ByteBuffer, json: JsString, ename: Option[String]): Unit = {
    buffer.put(TYPE_STRING)
    serialize(buffer, ename)
    val bytes = json.value.getBytes(UTF8)
    buffer.putInt(bytes.length)
    buffer.put(bytes)
  }

  private def serialize(buffer: ByteBuffer, json: JsDate, ename: Option[String]): Unit = {
    buffer.put(TYPE_DATETIME)
    serialize(buffer, ename)
    val value = json.value.toEpochMilli
    buffer.putLong(value)
  }

  private def serialize(buffer: ByteBuffer, json: JsLocalDate, ename: Option[String]): Unit = {
    buffer.put(TYPE_LOCAL_DATE)
    serialize(buffer, ename)
    val value = json.value
    val date = value.getYear * 10000 + value.getMonthValue * 100 + value.getDayOfMonth
    buffer.putInt(date)
  }

  private def serialize(buffer: ByteBuffer, json: JsLocalTime, ename: Option[String]): Unit = {
    buffer.put(TYPE_LOCAL_TIME)
    serialize(buffer, ename)
    val value = json.value
    val time = value.getHour * 10000 + value.getMinute * 100 + value.getSecond
    buffer.putInt(time)
  }

  private def serialize(buffer: ByteBuffer, json: JsLocalDateTime, ename: Option[String]): Unit = {
    buffer.put(TYPE_LOCAL_DATETIME)
    serialize(buffer, ename)
    val value = json.value
    val date = value.getYear * 10000 + value.getMonthValue * 100 + value.getDayOfMonth
    val time = value.getHour * 10000 + value.getMinute * 100 + value.getSecond
    buffer.putInt(date)
    buffer.putInt(time)
  }

  private def serialize(buffer: ByteBuffer, json: JsTimestamp, ename: Option[String]): Unit = {
    buffer.put(TYPE_TIMESTAMP)
    serialize(buffer, ename)
    buffer.putLong(json.value.getTime)
    buffer.putInt(json.value.getNanos)
  }

  private def serialize(buffer: ByteBuffer, json: JsObjectId, ename: Option[String]): Unit = {
    buffer.put(TYPE_OBJECTID)
    serialize(buffer, ename)
    buffer.put(json.value.id)
  }

  private def serialize(buffer: ByteBuffer, json: JsUUID, ename: Option[String]): Unit = {
    buffer.put(TYPE_BINARY)
    serialize(buffer, ename)
    buffer.putInt(16)
    buffer.put(BINARY_SUBTYPE_UUID)
    buffer.putLong(json.value.getMostSignificantBits)
    buffer.putLong(json.value.getLeastSignificantBits)
  }

  private def serialize(buffer: ByteBuffer, json: JsBinary, ename: Option[String]): Unit = {
    buffer.put(TYPE_BINARY)
    serialize(buffer, ename)
    buffer.putInt(json.value.length)
    buffer.put(BINARY_SUBTYPE_GENERIC)
    buffer.put(json.value)
  }

  private def cstring(buffer: ByteBuffer): String = {
    val str = new collection.mutable.ArrayBuffer[Byte](64)
    var b = buffer.get
    while (b != END_OF_STRING) {str += b; b = buffer.get}
    new String(str.toArray)
  }

  private def ename(buffer: ByteBuffer): String = cstring(buffer)

  private def boolean(buffer: ByteBuffer): JsBoolean = {
    val b = buffer.get
    if (b == 0) JsFalse else JsTrue
  }

  private def int(buffer: ByteBuffer): JsInt = {
    val x = buffer.getInt
    if (x == 0) JsInt.zero else JsInt(x)
  }

  private def long(buffer: ByteBuffer): JsLong = {
    val x = buffer.getLong
    if (x == 0) JsLong.zero else JsLong(x)
  }

  private def double(buffer: ByteBuffer): JsDouble = {
    val x = buffer.getDouble
    if (x == 0.0) JsDouble.zero else JsDouble(x)
  }

  private def decimal(buffer: ByteBuffer): JsDecimal = {
    val length = buffer.getInt
    val dst = new Array[Byte](length)
    buffer.get(dst)
    JsDecimal(new String(dst, UTF8))
  }

  private def date(buffer: ByteBuffer): JsDate = {
    val date = Instant.ofEpochMilli(buffer.getLong)
    JsDate(date)
  }

  private def localDate(buffer: ByteBuffer): JsLocalDate = {
    val value = buffer.getInt
    val year = value / 10000
    val month = (value % 10000) / 100
    val day = value % 100
    val date = LocalDate.of(year, month, day)
    JsLocalDate(date)
  }

  private def localTime(buffer: ByteBuffer): JsLocalTime = {
    val value = buffer.getInt
    val hour = value / 10000
    val minute = (value % 10000) / 100
    val second = value % 100
    val time = LocalTime.of(hour, minute, second)
    JsLocalTime(time)
  }

  private def localDatetime(buffer: ByteBuffer): JsLocalDateTime = {
    val value = buffer.getInt
    val year = value / 10000
    val month = (value % 10000) / 100
    val day = value % 100
    val date = LocalDate.of(year, month, day)

    val value2 = buffer.getInt
    val hour = value2 / 10000
    val minute = (value2 % 10000) / 100
    val second = value2 % 100
    val time = LocalTime.of(hour, minute, second)
    JsLocalDateTime(date, time)
  }

  private def timestamp(buffer: ByteBuffer): JsTimestamp = {
    val milliseconds = buffer.getLong
    val nanos = buffer.getInt
    val timestamp = new Timestamp(milliseconds)
    timestamp.setNanos(nanos)
    JsTimestamp(timestamp)
  }

  private def objectId(buffer: ByteBuffer): JsValue = {
    val id = new Array[Byte](ObjectId.size)
    buffer.get(id)
    JsObjectId(ObjectId(id))
  }

  private def string(buffer: ByteBuffer): JsString = {
    val length = buffer.getInt
    val dst = new Array[Byte](length)
    buffer.get(dst)
    JsString(new String(dst, UTF8))
  }

  private def binary(buffer: ByteBuffer): JsValue = {
    val length = buffer.getInt
    val subtype = buffer.get
    if (subtype == BINARY_SUBTYPE_UUID) {
      JsUUID(buffer.getLong, buffer.getLong)
    } else {
      val dst = new Array[Byte](length)
      buffer.get(dst)
      JsBinary(dst)
    }
  }

  private def serialize(buffer: ByteBuffer, json: JsObject, ename: Option[String]): Unit = {
    buffer.put(TYPE_DOCUMENT)
    serialize(buffer, ename)

    val start = buffer.position()
    buffer.putInt(0) // placeholder for document size

    json.fields.toSeq.sortBy(_._1).foreach { case (field, value) =>
      serialize(buffer, value, Some(field))
    }

    buffer.put(END_OF_DOCUMENT)
    buffer.putInt(start, buffer.position() - start) // update document size
  }

  private def serialize(buffer: ByteBuffer, json: JsArray, ename: Option[String]): Unit = {
    buffer.put(TYPE_ARRAY)
    serialize(buffer, ename)

    val start = buffer.position()
    buffer.putInt(0) // placeholder for document size

    json.elements.zipWithIndex.foreach { case (value, index) =>
      serialize(buffer, value, Some(index.toString))
    }

    buffer.put(END_OF_DOCUMENT)
    buffer.putInt(start, buffer.position() - start) // update document size
  }

  private def serialize(buffer: ByteBuffer, json: JsValue, ename: Option[String]): Unit = {
    json match {
      case x: JsBoolean  => serialize(buffer, x, ename)
      case x: JsInt      => serialize(buffer, x, ename)
      case x: JsLong     => serialize(buffer, x, ename)
      case x: JsDouble   => serialize(buffer, x, ename)
      case x: JsDecimal  => serialize(buffer, x, ename)
      case x: JsString   => serialize(buffer, x, ename)
      case x: JsDate     => serialize(buffer, x, ename)
      case x: JsLocalDate     => serialize(buffer, x, ename)
      case x: JsLocalTime     => serialize(buffer, x, ename)
      case x: JsLocalDateTime => serialize(buffer, x, ename)
      case x: JsTimestamp=> serialize(buffer, x, ename)
      case x: JsUUID     => serialize(buffer, x, ename)
      case x: JsObjectId => serialize(buffer, x, ename)
      case x: JsBinary   => serialize(buffer, x, ename)
      case x: JsObject   => serialize(buffer, x, ename)
      case x: JsArray    => serialize(buffer, x, ename)
      case JsNull        => buffer.put(TYPE_NULL); serialize(buffer, ename)
      case JsUndefined   => buffer.put(TYPE_UNDEFINED); serialize(buffer, ename)
      case JsCounter(_)  => throw new IllegalArgumentException("BSON doesn't support JsCounter")
    }
  }

  private def deserialize(buffer: ByteBuffer, json: JsObject): JsObject = {
    val start = buffer.position()
    val size = buffer.getInt // document size

    val loop = new scala.util.control.Breaks
    loop.breakable {
      while (true) {
        buffer.get match {
          case END_OF_DOCUMENT => loop.break()
          case TYPE_BOOLEAN    => json(ename(buffer)) = boolean(buffer)
          case TYPE_INT32      => json(ename(buffer)) = int(buffer)
          case TYPE_INT64      => json(ename(buffer)) = long(buffer)
          case TYPE_DOUBLE     => json(ename(buffer)) = double(buffer)
          case TYPE_BIGDECIMAL => json(ename(buffer)) = decimal(buffer)
          case TYPE_DATETIME         => json(ename(buffer)) = date(buffer)
          case TYPE_LOCAL_DATE       => json(ename(buffer)) = localDate(buffer)
          case TYPE_LOCAL_TIME       => json(ename(buffer)) = localTime(buffer)
          case TYPE_LOCAL_DATETIME   => json(ename(buffer)) = localDatetime(buffer)
          case TYPE_TIMESTAMP  => json(ename(buffer)) = timestamp(buffer)
          case TYPE_STRING     => json(ename(buffer)) = string(buffer)
          case TYPE_OBJECTID   => json(ename(buffer)) = objectId(buffer)
          case TYPE_BINARY     => json(ename(buffer)) = binary(buffer)
          case TYPE_NULL       => json(ename(buffer)) = JsNull
          case TYPE_UNDEFINED  => json(ename(buffer)) = JsUndefined
          case TYPE_DOCUMENT   =>
            val doc = JsObject()
            json(ename(buffer)) = deserialize(buffer, doc)

          case TYPE_ARRAY      =>
            val doc = JsObject()
            val field = ename(buffer)
            deserialize(buffer, doc)
            json(field) = JsArray(doc.fields.map { case (k, v) => (k.toInt, v) }.toSeq.sortBy(_._1).map(_._2): _*)

          case x               => throw new IllegalStateException("Unsupported BSON type: %02X" format x)
        }
      }
    }

    if (buffer.position() - start != size)
      logger.warn(s"BSON size $size but deserialize finishes at ${buffer.position()}, starts at $start")

    json
  }
}

object JsonSerializer {
  val UTF8: Charset = Charset.forName("UTF-8")

  /** End of document */
  val END_OF_DOCUMENT             : Byte = 0x00

  /** End of string */
  val END_OF_STRING               : Byte = 0x00

  /** Type markers, based on BSON (http://bsonspec.org/spec.html). */
  val TYPE_DOUBLE                 : Byte = 0x01
  val TYPE_STRING                 : Byte = 0x02
  val TYPE_DOCUMENT               : Byte = 0x03
  val TYPE_ARRAY                  : Byte = 0x04
  val TYPE_BINARY                 : Byte = 0x05
  val TYPE_UNDEFINED              : Byte = 0x06
  val TYPE_OBJECTID               : Byte = 0x07
  val TYPE_BOOLEAN                : Byte = 0x08
  val TYPE_DATETIME               : Byte = 0x09 // Called UTC datetime in BSON, UTC milliseconds since the Unix epoch.
  val TYPE_NULL                   : Byte = 0x0A
  val TYPE_REGEX                  : Byte = 0x0B
  val TYPE_DBPOINTER              : Byte = 0x0C
  val TYPE_JAVASCRIPT             : Byte = 0x0D
  val TYPE_SYMBOL                 : Byte = 0x0E
  val TYPE_JAVASCRIPT_WITH_SCOPE  : Byte = 0x0F
  val TYPE_INT32                  : Byte = 0x10
  val TYPE_MONGODB_TIMESTAMP      : Byte = 0x11 // Special internal type used by MongoDB.
  val TYPE_INT64                  : Byte = 0x12
  val TYPE_DECIMAL128             : Byte = 0x13 // 128-bit IEEE 754-2008 decimal floating point
  val TYPE_LOCAL_DATE             : Byte = 0x20 // Java8 LocalDate
  val TYPE_LOCAL_TIME             : Byte = 0x21 // Java8 LocalTime
  val TYPE_LOCAL_DATETIME         : Byte = 0x22 // Java8 LocalDateTime
  val TYPE_TIMESTAMP              : Byte = 0x23 // Java8 Timestamp
  val TYPE_BIGDECIMAL             : Byte = 0x30 // Java BigDecimal
  val TYPE_MINKEY                 : Byte = 0xFF.toByte
  val TYPE_MAXKEY                 : Byte = 0x7F

  /** Binary subtypes */
  val BINARY_SUBTYPE_GENERIC      : Byte = 0x00
  val BINARY_SUBTYPE_FUNCTION     : Byte = 0x01
  val BINARY_SUBTYPE_BINARY_OLD   : Byte = 0x02
  val BINARY_SUBTYPE_UUID_OLD     : Byte = 0x03
  val BINARY_SUBTYPE_UUID         : Byte = 0x04
  val BINARY_SUBTYPE_MD5          : Byte = 0x05
  val BINARY_SUBTYPE_USER_DEFINED : Byte = 0x80.toByte

  val TRUE                        : Byte = 0x01
  val FALSE                       : Byte = 0x00

  /** Encoding of "undefined" */
  val undefined: Array[Byte] = Array(TYPE_UNDEFINED)
  val `null`: Array[Byte] = Array(TYPE_NULL)

  /** Helper function convert ByteBuffer to Array[Byte]. */
  implicit def byteBuffer2ArrayByte(buffer: ByteBuffer): Array[Byte] = {
    val bytes = new Array[Byte](buffer.position)
    buffer.position(0)
    buffer.get(bytes)
    bytes
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy