All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.prophecy.libs.fixedFormatSchema.scala Maven / Gradle / Ivy

There is a newer version: 6.3.0-3.3.0
Show newest version
/*
 * ====================================================================
 *
 * PROPHECY CONFIDENTIAL
 *
 * Prophecy Inc
 * All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Prophecy Inc, the intellectual and technical concepts contained
 * herein are proprietary to Prophecy Inc and may be covered by U.S. and Foreign Patents,
 * patents in process, and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Prophecy Inc.
 *
 * ====================================================================
 */
package io.prophecy.libs

import io.prophecy.abinitio.dml.AbInitioToSparkFunctionMapping
import io.prophecy.abinitio.xfr.ast.CustomExpression
import io.prophecy.libs.utils.getLengthFromArraySizeInfo
import org.apache.spark.sql.types.{
  ArrayType,
  DataType,
  DecimalType,
  IntegerType,
  LongType,
  StringType,
  StructField,
  StructType
}
import org.codehaus.jackson.annotate.JsonIgnore
import play.api.libs.functional.syntax.toFunctionalBuilderOps
import play.api.libs.json.Json.JsValueWrapper
import play.api.libs.json._

import java.nio.ByteOrder
import scala.collection.mutable
import scala.util.parsing.input.Positional
import scala.util.{Failure, Success, Try}

// AST

trait FFAST extends Positional {
  def toSpark: DataType = ???
}

// SCHEMA TYPES
case class FFSchemaRecord(recordType: String, rows: Seq[FFSchemaRow]) extends FFAST {
  def toScala = s"""FFSchemaRecord("$recordType", ${rows.map(x ⇒ x.toScala)}) """

  private def toSpark(rows: Seq[FFSchemaRow]): Seq[StructField] = {
    rows.map {
      case FFSimpleSchemaRow(columnName, format, defaultVal) ⇒
        val dataType = format match {
          case FFNumberFormat(
                FFTypeName("DecimalType", _),
                Some(precision),
                scale,
                _
              ) ⇒
            DecimalType(precision, scale.getOrElse(0))
          case FFNumberFormat(FFTypeName("IntegerType", _), Some(precision), _, properties)
              if properties.getOrElse("unsigned", false).asInstanceOf[Boolean] && precision >= 4 && precision < 8 ⇒
            LongType
          case FFNumberFormat(FFTypeName("IntegerType", _), _, _, _) ⇒
            IntegerType
          case FFNumberFormat(FFTypeName("LongType", _), _, _, _) ⇒
            LongType
          case FFNumberArrayFormat(name, precision, scale, arraySizeInfo, miscProperties) ⇒
            ArrayType(
              toSpark(
                FFSimpleSchemaRow(columnName, FFNumberFormat(name, precision, scale, miscProperties), defaultVal) :: Nil
              ).head.dataType
            )
          case FFStringArrayFormat(name, precision, arraySizeInfo) ⇒
            ArrayType(
              toSpark(FFSimpleSchemaRow(columnName, FFStringFormat(name, precision), defaultVal) :: Nil).head.dataType
            )
          case _ ⇒ StringType
        }
        StructField(columnName, dataType)
      case FFCompoundSchemaRow(FFStructArrayType(name1, arraySizeInfo), rows) ⇒
        StructField(name1, ArrayType(StructType(toSpark(rows))))
      case FFCompoundSchemaRow(compound, rows) ⇒
        StructField(compound.name.get, StructType(toSpark(rows)))
      case FFConditionalSchemaRow(condition, schemaRow) ⇒ toSpark(schemaRow :: Nil).head
      case _ ⇒ throw new Exception()
    }
  }

  override def toSpark: StructType =
    StructType(toSpark(rows))

  override def toString: String = {
    recordType match {
      case "void" ⇒
        if (rows.isEmpty) s"void()"
        else {
          val value = rows.head.asInstanceOf[FFSimpleSchemaRow].format.asInstanceOf[FFVoidFormat].size.getOrElse(0)
          s"void($value)"
        }
      case "" ⇒ s"${rows.map(_.toString).mkString("\n")}"
      case _ ⇒
        val newRecordType = if (recordType != "record") s"$recordType record" else recordType
        s"$newRecordType\n${rows.map(_.toString).mkString("\n")}\nend"
    }

  }
}

case class FFRecordType(startType: String) extends FFAST {
  override def toString: String =
    startType
}
sealed trait FFSchemaRow extends FFAST {
  def toScala:   String
  def getSize:   String
  def getLength: String
}

case class FFIncludeFileRow(filePath: String) extends FFSchemaRow {
  override def toScala: String = s"FFIncludeFileRow($filePath)"
  def getSize:          String = ???
  def getLength:        String = ???
  override def toString: String =
    s"""include "$filePath";"""
}
case class FFConditionalSchemaRow(condition: String, schemaRow: FFSchemaRow) extends FFSchemaRow {
  override def toScala: String = s"""FFConditionalSchemaRow(\"\"\"$condition\"\"\", ${schemaRow.toScala})"""

  override def getSize: String = ???

  override def getLength: String = ???
}

case class FFSimpleSchemaRow(name: String, format: FFDataFormat, value: FFDefaultVal) extends FFSchemaRow {
  def toScala =
    s"""FFSimpleSchemaRow("$name", ${format.toScala}, ${value.toScala})"""

  override def toString: String = {
    val valueString = if (value.isInstanceOf[FFNoDefaultVal]) " ;" else s" = ${value.toString} ;"
    s"${format.toString} $name$valueString"
  }

  def getForm(): String = format.getForm()

  override def getSize: String = format.getSize

  override def getLength: String = format.getLength
}

case class FFCompoundSchemaRow(compound: FFCompoundType, rows: Seq[FFSchemaRow]) extends FFSchemaRow {
  def toScala = s"""FFCompoundSchemaRow(${compound.toScala}, ${rows.map(x ⇒ x.toScala)})"""

  override def toString: String = {
    compound match {
      case FFStructType("") ⇒
        s"record\n${rows.map(_.toString).mkString("\n")}\nend ;"
      case FFStructType(name) ⇒
        s"record\n${rows.map(_.toString).mkString("\n")}\nend $name ;"
      case FFStructArrayType(name, arraySizeInfo) ⇒
        s"record\n${rows.map(_.toString).mkString("\n")}\nend[${arraySizeInfo.getOrElse("")}] $name ;"
      case FFUnionType(Some(name)) ⇒
        s"union\n${rows.map(_.toString).mkString("\n")}\nend $name ;"
      case FFUnionType(None) ⇒
        s"union\n${rows.map(_.toString).mkString("\n")}\nend ;"
      case _ ⇒ throw new Exception(s"$compound type not supported in Compound Schema")
    }
  }

  def getForm(): String = {
    if (compound.isInstanceOf[FFStructArrayType]) "vector"
    else if (compound.isInstanceOf[FFStructType]) "record"
    else if (compound.isInstanceOf[FFUnionType]) "union"
    else "record"
  }

  override def getSize: String =
    if (compound.isInstanceOf[FFStructArrayType]) "-1"
    else
      rows.map(_.getSize.toInt).sum.toString

  override def getLength: String = if (compound.isInstanceOf[FFStructArrayType]) "-1" else rows.length.toString
}

case class FFSimpleSchemaList(rows: Seq[FFSimpleSchemaRow]) extends FFSchemaRow {
  def toScala = throw new Exception("toScala method not implemented")

  override def toString: String =
    s"${rows.map(_.toString).mkString("\n")}"

  override def getSize: String = rows.head.getSize

  override def getLength: String = rows.head.getLength
}

// SCHEMA TYPES - Compounds
sealed trait FFCompoundType extends FFAST {
  def name:    Option[String]
  def toScala: String
}

case class FFUnionType(name: Option[String] = None) extends FFCompoundType {
  def toScala = s"""FFUnionType(Some("${name.get}"))"""
  override def toString: String =
    name.getOrElse("")
}
case class FFStructType(name1: String) extends FFCompoundType {
  override def name: Option[String] = Some(name1)
  def toScala = s"""FFStructType("${name1}")"""
  override def toString: String = name1
}

case class FFStructArrayType(name1: String, arraySizeInfo: Option[String]) extends FFCompoundType {
  override def name: Option[String] = Some(name1)

  def getStringWithQuotes(value: Any) = {
    value match {
      case x: String ⇒ s""""$x""""
      case x: Int    ⇒ x
    }
  }
  def toScala = s"""FFStructArrayType("${name1}",${getStringWithQuotes(arraySizeInfo.getOrElse(""))})"""
  override def toString: String = s"$name1[${arraySizeInfo.getOrElse("")}]"
}

// SCHEMA TYPES - Formats
case class FFTypeNameWithProperties(
  name:           String,
  delimiter:      Option[String],
  miscProperties: Map[String, Any] = Map("packed" → false)
) extends FFAST {
  def toScala: String = {
    val delimiterScala      = delimiter.map(f ⇒ s"""Some("$f")""").getOrElse("None")
    val miscPropertiesScala = miscProperties.map(kv ⇒ s""""${kv._1}" -> ${kv._2}""").mkString(", ")
    s"""FFTypeNameWithProperties("$name", $delimiterScala, Map($miscPropertiesScala))"""
  }
  override def toString: String = {
    miscProperties
      .filter(x ⇒ (x._2.isInstanceOf[Boolean] && x._2.asInstanceOf[Boolean]) || !x._2.isInstanceOf[Boolean])
      .map {
        case (k, v) ⇒
          if (v.isInstanceOf[Boolean]) {
            s"$k"
          } else if (v.isInstanceOf[String]) {
            s"""$k="$v""""
          } else
            s"$k=${v.toString}"
      }
      .mkString(", ")
  }
}
case class FFTypeName(name: String, delimiter: Option[String]) extends FFAST {
  def toScala: String = {
    val delimiterScala = delimiter.map(f ⇒ s"""Some("$f")""").getOrElse("None")
    s"""FFTypeName("$name", $delimiterScala)"""
  }
  override def toString: String =
    s"$name(${delimiter.getOrElse("")})"
}

sealed trait FFDataFormat extends FFAST {
  def toScala:      String
  def getSparkType: String
  def getForm(): String = {
    if (this.isInstanceOf[FFNumberArrayFormat] || this.isInstanceOf[FFStringArrayFormat]) "vector"
    else {
      getSparkType match {
        case "StringType"   ⇒ "string"
        case "DateType"     ⇒ "date"
        case "DateTimeType" ⇒ "datetime"
        case "DecimalType"  ⇒ "decimal"
        case "IntegerType"  ⇒ "integer"
        case "VoidType"     ⇒ "void"
        case _              ⇒ "string"
      }
    }
  }
  def getSize:   String
  def getLength: String
  def getPropertyValue(key: String): Option[Any]
  def getStringWithQuotes(value: Any) = {
    value match {
      case x: String ⇒ s""""$x""""
      case x: Int    ⇒ x
    }
  }
}

case class FFNumberFormat(
  name:           FFTypeName,
  precision:      Option[Int],
  scale:          Option[Int],
  miscProperties: Map[String, Any] = Map("signReserved" → false, "packed" → false)
) extends FFDataFormat {
  override def toScala: String = {
    val miscPropertiesScala = miscProperties
      .map { kv ⇒
        val value = if (kv._2.isInstanceOf[String]) s""""${kv._2}"""" else s"${kv._2}"
        s""""${kv._1}" -> $value"""
      }
      .mkString(", ")
    s"""FFNumberFormat(${name.toScala}, $precision, $scale, Map($miscPropertiesScala))"""
  }
  override def getSparkType = name.name

  override def toString: String = {
    val delimiter = name.delimiter
    val decimal_point = miscProperties.getOrElse("decimal_point", "Comma").toString match {
      case "Comma"  ⇒ ", "
      case "Period" ⇒ "."
    }
    val typeString =
      if (name.name == "DecimalType" || (delimiter.isDefined && precision.isEmpty && scale.isEmpty)) "decimal"
      else if (name.name == "DoubleType") "double"
      else "integer"
    val mainParamPart = if (delimiter.isDefined && precision.isDefined && scale.isDefined) {
      s""""${delimiter.get}", ${scale.get}, maximum_length=${precision.get + 1}"""
    } else if (delimiter.isEmpty && precision.isDefined && scale.isDefined) {
      s"""${precision.get}$decimal_point${scale.get}"""
    } else if (delimiter.isDefined && precision.isDefined && scale.isEmpty) {
      s""""${delimiter.get}", maximum_length=${precision.get}"""
    } else if (delimiter.isEmpty && precision.isDefined && scale.isEmpty) {
      s"${precision.get}"
    } else if (delimiter.isDefined && precision.isEmpty && scale.isEmpty) {
      s""""${delimiter.get}""""
    } else ""
    val isPacked = miscProperties.contains("packed") && miscProperties("packed").asInstanceOf[Boolean]
    var extraPropPart = FFTypeNameWithProperties(
      "",
      None,
      miscProperties.filterNot(x ⇒ x._1 == "packed" || x._1 == "decimal_point" || x._1 == "endian" | x._1 == "unsigned")
    ).toString
    if (extraPropPart.nonEmpty) extraPropPart = ", " + extraPropPart

    var combinedString = if (mainParamPart.nonEmpty) s"$typeString($mainParamPart$extraPropPart)" else typeString
    if (isPacked) {
      "packed " + combinedString
    } else combinedString

    if (miscProperties.contains("endian")) {
      combinedString = miscProperties.getOrElse("endian", "big").toString + " endian " + combinedString
    }
    if (miscProperties.getOrElse("unsigned", false).asInstanceOf[Boolean])
      combinedString = "unsigned " + combinedString
    if (miscProperties.getOrElse("ebcdic", false).asInstanceOf[Boolean])
      combinedString = "ebcdic " + combinedString
    combinedString
  }

  override def getSize:   String = precision.getOrElse(-1).toString
  override def getLength: String = precision.getOrElse(-1).toString

  override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFStringFormat(
  name:      FFTypeName,
  precision: Option[Int],
  props:     Option[Map[String, String]] = None
) extends FFDataFormat {
  override def toScala: String =
    s"""FFStringFormat(
       |  ${name.toScala},
       |  $precision,
       |  $propsStr)
       |  """.stripMargin
  override def getSparkType = name.name
  def propsStr: String =
    if (!props.isDefined)
      "None"
    else
      s"""Some(Map(${props.get
        .map {
          case (k: String, v: String) ⇒ "\"" + k + "\" -> \"" + v + "\""
        }
        .mkString(", ")}))""".stripMargin

  override def getSize:   String = precision.getOrElse(-1).toString
  override def getLength: String = precision.getOrElse(-1).toString
  override def getPropertyValue(key: String): Option[Any] = if (props.isDefined) props.get.get(key) else None
  override def toString: String = {
    val delimiter = name.delimiter
    val isEbcdic  = props.isDefined && props.get.getOrElse("ebcdic", "false") == "true"
    val prefix    = if (isEbcdic) "ebcdic " else ""
    if (name.name == "StringType") {
      val f = if (delimiter.isEmpty && precision.isEmpty) {
        s"integer(4)"
      } else if (delimiter.isDefined && precision.isDefined) {
        s""""${delimiter.get}", ${precision.get}"""
      } else if (delimiter.isDefined && precision.isEmpty) {
        s""""${delimiter.get}""""
      } else if (delimiter.isEmpty && precision.isDefined) {
        s"${precision.get}"
      } else throw new Exception("case not supported")
      f
      if (props.isDefined && props.get.nonEmpty && false) {
        s"${prefix}string($f, $propsStr)"
      } else {
        s"${prefix}string($f)"
      }
    } else if (name.name == "TimestampType") {
      "timestamp()"
    } else {
      throw new Exception(s"${name.name} not supported")
    }
  }
}

case class FFDateFormat(name: FFTypeName, format: Option[String], miscProperties: Map[String, Any] = Map())
    extends FFDataFormat {
  override def toScala: String = {
    val formatScala         = format.map(f ⇒ s"""Some("$f")""").getOrElse("None")
    val miscPropertiesScala = miscProperties.map(kv ⇒ s""""${kv._1}" -> ${kv._2}""").mkString(", ")
    s"""FFDateFormat(${name.toScala}, $formatScala, Map($miscPropertiesScala))"""
  }
  override def getSparkType = name.name
  override def getSize:   String = format.getOrElse("").length.toString
  override def getLength: String = getSize

  override def toString: String = {
    if (name.name != "DateType") throw new Exception(s"${name.name} is not supported")
    var extraPropPart = FFTypeNameWithProperties("", None, miscProperties.filterNot(_._1 == "packed")).toString
    if (extraPropPart.nonEmpty) extraPropPart = ", " + extraPropPart
    val delimiter = name.delimiter
    if (delimiter.isDefined && format.isDefined) {
      s"""date("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(
        format.get
      )}"$extraPropPart)('${delimiter.get}')"""
    } else if (delimiter.isEmpty && format.isDefined) {
      s"""date("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(
        format.get
      )}"$extraPropPart)"""
    } else if (delimiter.isEmpty && format.isEmpty) {
      s"date(int)"
    } else {
      s"""date("${delimiter.get}"$extraPropPart)"""
    }
  }

  override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFDateTimeFormat(name: FFTypeName, format: Option[String], miscProperties: Map[String, Any] = Map())
    extends FFDataFormat {
  override def toScala: String =
    s"""FFDateTimeFormat(${name.toScala}, ${format.map(f ⇒ s"""Some("$f")""").getOrElse("None")})"""
  override def getSparkType = name.name
  override def getSize:   String = format.getOrElse("").length.toString
  override def getLength: String = getSize
  override def toString: String = {
    if (name.name != "DateTimeType") throw new Exception(s"${name.name} not supported")
    val delimiter = name.delimiter
    if (delimiter.isDefined && format.isDefined) {
      s"""datetime("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(
        format.get
      )}")('${delimiter.get}')"""
    } else if (delimiter.isEmpty && format.isDefined) {
      s"""datetime("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(format.get)}")"""
    } else if (delimiter.isEmpty && format.isEmpty) {
      s"datetime(int)"
    } else {
      s"""datetime("${delimiter.get}")"""
    }
  }

  override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFStructFormat(name: FFTypeName, precision: Option[Int]) extends FFDataFormat {
  override def toScala: String = s"""FFStructFormat(${name.toScala}, ${precision.toString})"""
  override def getSparkType = name.name
  override def getSize:   String = precision.getOrElse(-1).toString
  override def getLength: String = "1"
  override def toString:  String = super.toString

  override def getPropertyValue(key: String): Option[Any] = None
}

case class FFUnknownFormat(name: FFTypeName, arraySizeInfo: Option[String]) extends FFDataFormat {
  override def toScala: String = s"""FFUnknownFormat(${name.toScala})"""
  override def getSparkType = name.name
  override def getSize:   String = "-1"
  override def getLength: String = "-1"
  override def toString: String =
    if (arraySizeInfo.isDefined) s"${name.name}[${arraySizeInfo.getOrElse("")}]" else name.name

  override def getPropertyValue(key: String): Option[Any] = None
}
case class FFStringArrayFormat(name: FFTypeName, precision: Option[Int], arraySizeInfo: Option[String])
    extends FFDataFormat {
  override def toScala: String = s"""FFStringArrayFormat(${name.toScala}, $precision, ${getStringWithQuotes(
    arraySizeInfo.getOrElse("")
  )})"""
  override def getSparkType = name.name
  override def getSize:   String = "-1"
  override def getLength: String = "-1"
  override def getPropertyValue(key: String): Option[Any] = None
  override def toString: String = {
    val stringPart = FFStringFormat(FFTypeName("StringType", name.delimiter), precision).toString
    s"$stringPart[${arraySizeInfo.getOrElse("")}]"
  }
}
case class FFNumberArrayFormat(
  name:           FFTypeName,
  precision:      Option[Int],
  scale:          Option[Int],
  arraySizeInfo:  Option[String],
  miscProperties: Map[String, Any] = Map("signReserved" → false, "packed" → false)
) extends FFDataFormat {
  override def toScala: String = {
    val miscPropertiesScala = miscProperties.map(kv ⇒ s""""${kv._1}" -> ${kv._2}""").mkString(", ")
    s"""FFNumberArrayFormat(${name.toScala}, $precision, $scale, ${getStringWithQuotes(
      arraySizeInfo.getOrElse(
        ""
      )
    )}, Map($miscPropertiesScala))"""
  }
  override def getSize:   String = "-1"
  override def getLength: String = "-1"
  override def getSparkType = name.name
  override def toString: String = {
    val numberPart = FFNumberFormat(name, precision, scale, miscProperties).toString
    s"$numberPart[${arraySizeInfo.getOrElse("")}]"
  }

  override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFVoidFormat(name: FFTypeName, size: Option[Int]) extends FFDataFormat {
  override def toScala: String = s"""FFVoidFormat(${name.toScala}, $size)"""
  override def getSparkType = name.name
  override def getSize:   String = size.getOrElse(-1).toString
  override def getLength: String = size.getOrElse(-1).toString
  override def toString: String =
    if (size.isDefined)
      s"void(${size.get})"
    else "void()"

  override def getPropertyValue(key: String): Option[Any] = None
}

// SCHEMA TYPES - Default values
sealed trait FFDefaultVal extends FFAST {
  def toScala:  String
  def getValue: String
}
case class FFNoDefaultVal() extends FFDefaultVal {
  override def toScala:  String = s"""FFNoDefaultVal()"""
  override def toString: String = ""
  override def getValue: String = ""
}
case class FFNullDefaultVal(value: Option[Any] = None) extends FFDefaultVal {
  override def toScala: String = s"""FFNullDefaultVal()"""
  override def toString: String = {
    value match {
      case None ⇒ "NULL"
      case Some(innerValue) ⇒
        innerValue match {
          case y: String ⇒ s"""NULL("$y")"""
          case y @ _ ⇒ s"NULL($innerValue)"
        }
    }
  }
  override def getValue: String = ""
}

case class FFExpressionDefaultVal(value: CustomExpression) extends FFDefaultVal {
  override def toScala: String = s"""FFExpressionDefaultVal("$value")"""

  override def toString: String = s"""$value"""

  override def getValue: String = ""
}

case class FFStringDefaultVal(value: String) extends FFDefaultVal {
  override def toScala: String = s"""FFStringDefaultVal("$value")"""
  override def toString: String =
    s""""$value""""
  override def getValue: String = s""""$value""""
}
case class FFIntDefaultVal(value: Int) extends FFDefaultVal {
  override def toScala: String = s"""FFIntDefaultVal($value)"""
  override def toString: String =
    s"$value"
  override def getValue: String = value.toString
}
case class FFDoubleDefaultVal(value: Double) extends FFDefaultVal {
  override def toScala:  String = s"""FFDoubleDefaultVal($value)"""
  override def toString: String = s"$value"
  override def getValue: String = value.toString
}
object FixedFormatSchemaImplicits {

  /**
    * implicit read definition for Map[String, Any] datatype
    */
  implicit val mapReads: Reads[Map[String, Any]] = new Reads[Map[String, Any]] {
    def reads(jv: JsValue): JsResult[Map[String, Any]] =
      JsSuccess(
        jv.as[JsObject]
          .value
          .toSeq
          .map {
            case (k, v) ⇒
              (k, v) match {
                case (k, v: JsString) ⇒ k → v.as[String]
                case (k, v: JsBoolean) ⇒ k → v.as[Boolean]
                case (k, v: JsNumber) ⇒
                  k → {
                    v.asOpt[Long] match {
                      case Some(value) ⇒ value
                      case None ⇒
                        v.asOpt[Int] match {
                          case Some(value2) ⇒ value2
                          case None ⇒
                            v.asOpt[Double] match {
                              case Some(value3) ⇒ value3
                              case None ⇒
                                v.asOpt[Float] match {
                                  case Some(value4) ⇒ value4
                                  case None         ⇒ v.toString()
                                }
                            }
                        }
                    }
                  }
              }
          }
          .toMap
      )
  }

  /**
    * implicit write definition for Map[String, Any] datatype
    */
  implicit val mapWrites: Writes[Map[String, Any]] = new Writes[Map[String, Any]] {
    def writes(map: Map[String, Any]): JsValue =
      Json.obj(map.map {
        case (s, o) ⇒
          val ret: (String, JsValueWrapper) = (s, o) match {
            case (s, o: Int)     ⇒ s → JsNumber(o)
            case (s, o: Long)    ⇒ s → JsNumber(o)
            case (s, o: Double)  ⇒ s → JsNumber(o)
            case (s, o: Boolean) ⇒ s → JsBoolean(o)
            case (s, o: String)  ⇒ s → JsString(o)
          }
          (ret._1, ret._2)
      }.toSeq: _*)
  }

  implicit lazy val jsonMapFormat: Format[Map[String, Any]] = Format(mapReads, mapWrites)

  implicit lazy val ffTypeNameFormat:       OFormat[FFTypeName]          = Json.format[FFTypeName]
  implicit lazy val ffDateTimeFormatFormat: OFormat[FFDateTimeFormat]    = Json.format[FFDateTimeFormat]
  implicit lazy val ffNumberFormatFormat:   OFormat[FFNumberFormat]      = Json.format[FFNumberFormat]
  implicit lazy val ffStringFormatFormat:   OFormat[FFStringFormat]      = Json.format[FFStringFormat]
  implicit lazy val ffDateFormatFormat:     OFormat[FFDateFormat]        = Json.format[FFDateFormat]
  implicit lazy val ffUnknownFormat:        OFormat[FFUnknownFormat]     = Json.format[FFUnknownFormat]
  implicit lazy val ffStringArrayFormat:    OFormat[FFStringArrayFormat] = Json.format[FFStringArrayFormat]
  implicit lazy val ffNumberArrayFormat:    OFormat[FFNumberArrayFormat] = Json.format[FFNumberArrayFormat]
  implicit lazy val ffVoidFormat:           OFormat[FFVoidFormat]        = Json.format[FFVoidFormat]

  implicit lazy val ffDataFormatFormat: Format[FFDataFormat] = new Format[FFDataFormat] {
    override def writes(o: FFDataFormat): JsValue = {
      val f: JsValue = o match {
        case o: FFDateTimeFormat    ⇒ Json.toJson(o)(ffDateTimeFormatFormat)
        case o: FFNumberFormat      ⇒ Json.toJson(o)(ffNumberFormatFormat)
        case o: FFStringFormat      ⇒ Json.toJson(o)(ffStringFormatFormat)
        case o: FFDateFormat        ⇒ Json.toJson(o)(ffDateFormatFormat)
        case o: FFUnknownFormat     ⇒ Json.toJson(o)(ffUnknownFormat)
        case o: FFStringArrayFormat ⇒ Json.toJson(o)(ffStringArrayFormat)
        case o: FFNumberArrayFormat ⇒ Json.toJson(o)(ffNumberArrayFormat)
        case o: FFVoidFormat        ⇒ Json.toJson(o)(ffVoidFormat)

        case _ ⇒ throw new Exception(s"Invalid SubType: ${o.getClass} for FFDataFormat")
      }
      JsObject(List("type" → JsString(o.getClass.getSimpleName), "value" → f))
    }

    override def reads(json: JsValue): JsResult[FFDataFormat] = {
      json match {
        case JsObject(v) ⇒
          val JsString(subType) = v("type")
          subType match {
            case "FFDateTimeFormat"    ⇒ JsSuccess(v("value").as[FFDateTimeFormat])
            case "FFNumberFormat"      ⇒ JsSuccess(v("value").as[FFNumberFormat])
            case "FFStringFormat"      ⇒ JsSuccess(v("value").as[FFStringFormat])
            case "FFDateFormat"        ⇒ JsSuccess(v("value").as[FFDateFormat])
            case "FFUnknownFormat"     ⇒ JsSuccess(v("value").as[FFUnknownFormat])
            case "FFStringArrayFormat" ⇒ JsSuccess(v("value").as[FFStringArrayFormat])
            case "FFNumberArrayFormat" ⇒ JsSuccess(v("value").as[FFNumberArrayFormat])
            case "FFVoidFormat"        ⇒ JsSuccess(v("value").as[FFVoidFormat])
            case _                     ⇒ JsError(s"Invalid object for FFDataFormat: $v")
          }
        case _ ⇒
          JsError(s"Invalid object for FFDefaultVal: ${json}")
      }
    }
  }

  implicit lazy val ffDefFormat: Format[FFDefaultVal] = new Format[FFDefaultVal] {
    implicit lazy val ffStringDefaultValFmt: OFormat[FFStringDefaultVal] = Json.format[FFStringDefaultVal]
    implicit lazy val ffIntDefaultValFmt:    OFormat[FFIntDefaultVal]    = Json.format[FFIntDefaultVal]
    implicit lazy val ffDoubleDefaultValFmt: OFormat[FFDoubleDefaultVal] = Json.format[FFDoubleDefaultVal]
    override def writes(o: FFDefaultVal): JsValue = {
      o match {
        case o:  FFNoDefaultVal     ⇒ JsString("FFNoDefaultVal")
        case o:  FFNullDefaultVal   ⇒ JsString("FFNullDefaultVal")
        case o1: FFStringDefaultVal ⇒ Json.toJson(o1)(ffStringDefaultValFmt)
        case o1: FFIntDefaultVal    ⇒ Json.toJson(o1)(ffIntDefaultValFmt)
        case o1: FFDoubleDefaultVal ⇒ Json.toJson(o1)(ffDoubleDefaultValFmt)
      }
    }

    override def reads(json: JsValue): JsResult[FFDefaultVal] = {
      json match {
        case JsString(value) ⇒
          value match {
            case "FFNoDefaultVal"   ⇒ JsSuccess(FFNoDefaultVal())
            case "FFNullDefaultVal" ⇒ JsSuccess(FFNullDefaultVal())
            case _                  ⇒ JsError(s"Invalid Json Value Type for pDefaultVal: ${value}")
          }
        case v: JsObject ⇒
          Try(
            json
              .asOpt[FFStringDefaultVal]
              .orElse(json.asOpt[FFIntDefaultVal])
              .orElse(json.asOpt[FFDoubleDefaultVal])
              .get
          ) match {
            case Success(value)     ⇒ JsSuccess(value)
            case Failure(exception) ⇒ JsError(s"Invalid object for pDefaultVal: ${v}, ${exception}")
          }
        case _ ⇒ JsError(s"Invalid object for pDefaultVal: ${json}")
      }
    }
  }

  implicit lazy val ffSimpleSchemaRowFmt:    OFormat[FFSimpleSchemaRow]      = Json.format[FFSimpleSchemaRow]
  implicit lazy val ffConditionSchemaRowFmt: OFormat[FFConditionalSchemaRow] = Json.format[FFConditionalSchemaRow]
  implicit lazy val ffSchemaRowFmt: Format[FFSchemaRow] = new Format[FFSchemaRow] {
    override def writes(o: FFSchemaRow): JsValue = {
      val f = o match {
        case o1: FFSimpleSchemaRow      ⇒ Json.toJson(o1)(ffSimpleSchemaRowFmt)
        case o1: FFCompoundSchemaRow    ⇒ Json.toJson(o1)(ffCompoundSchemaRowWriteFmt)
        case o1: FFConditionalSchemaRow ⇒ Json.toJson(o1)(ffConditionSchemaRowFmt)
        case _ ⇒ throw new Exception(s"Invalid SubType for pSchemaRow: ${o}")
      }
      JsObject(List("type" → JsString(o.getClass.getSimpleName), "value" → f))
    }

    override def reads(json: JsValue): JsResult[FFSchemaRow] = {
      json match {
        case v1 @ JsObject(v) ⇒
          val JsString(subType) = v("type")
          val value             = v("value")
          subType match {
            case "FFSimpleSchemaRow"      ⇒ JsSuccess(value.as[FFSimpleSchemaRow])
            case "FFCompoundSchemaRow"    ⇒ JsSuccess(value.as[FFCompoundSchemaRow])
            case "FFConditionalSchemaRow" ⇒ JsSuccess(value.as[FFConditionalSchemaRow])
            case _                        ⇒ JsError(s"Invalid object for pSchemaRow: ${subType}")
          }
        case _ ⇒
          JsError(s"Invalid object for pSchemaRow: ${json}")
      }
    }
  }

  implicit lazy val ffSchemaRecord:       OFormat[FFSchemaRecord]    = Json.format[FFSchemaRecord]
  implicit lazy val ffStructTypeFmt:      OFormat[FFStructType]      = Json.format[FFStructType]
  implicit lazy val ffStructArrayTypeFmt: OFormat[FFStructArrayType] = Json.format[FFStructArrayType]
  implicit lazy val ffUnionTypeFmt:       OFormat[FFUnionType]       = Json.format[FFUnionType]
  implicit lazy val ffCompoundTypeFmt: Format[FFCompoundType] = new Format[FFCompoundType] {
    override def writes(o: FFCompoundType): JsValue = {
      val f: JsValue = o match {
        case o: FFStructType      ⇒ Json.toJson(o)(ffStructTypeFmt)
        case o: FFUnionType       ⇒ Json.toJson(o)(ffUnionTypeFmt)
        case o: FFStructArrayType ⇒ Json.toJson(o)(ffStructArrayTypeFmt)
        case _ ⇒ throw new Exception(s"Invalid SubType: ${o.getClass} for pDataFormat")
      }
      JsObject(List("type" → JsString(o.getClass.getSimpleName), "value" → f))
    }

    override def reads(json: JsValue): JsResult[FFCompoundType] = {
      json match {
        case JsObject(v) ⇒
          val JsString(subType) = v("type")
          subType match {
            case "FFStructType"      ⇒ JsSuccess(v("value").as[FFStructType])
            case "FFUnionType"       ⇒ JsSuccess(v("value").as[FFUnionType])
            case "FFStructArrayType" ⇒ JsSuccess(v("value").as[FFStructArrayType])
            case _                   ⇒ JsError(s"Invalid object for FFDataFormat: $v")
          }
        case _ ⇒
          JsError(s"Invalid object for pDefaultVal: ${json}")
      }
    }
  }

  implicit lazy val ffCompoundSchemaRowReadFmt: Reads[FFCompoundSchemaRow] =
    ((JsPath \ "compound").read[FFCompoundType] ~
      (JsPath \ "rows").lazyRead(Reads.seq[FFSchemaRow]))(FFCompoundSchemaRow)
  implicit lazy val ffCompoundSchemaRowWriteFmt: OWrites[FFCompoundSchemaRow] =
    ((JsPath \ "compound").write[FFCompoundType] ~
      (JsPath \ "rows").lazyWrite(Writes.seq[FFSchemaRow]))(Function.unlift(FFCompoundSchemaRow.unapply))

  implicit def ffSchemaRecordToString(schema: FFSchemaRecord): String = Json.stringify(Json.toJson(schema))
}

object SchemaUtils {
  case class SparkSchemaRow(column: String, colType: String)

  def countRecordLines(record: FFAST): Int = {
    record match {
      case FFSchemaRecord(_,      rows) ⇒ rows.map(countRecordLines(_)).sum + 1
      case FFSimpleSchemaRow(_,   _, _) ⇒ 1
      case FFCompoundSchemaRow(_, rows) ⇒ rows.map(countRecordLines(_)).sum + 1
      case _ ⇒ throw new Exception()
    }
  }

  def findColumnLengthSum(record: FFAST): Int = {
    findColumns(record).map { col ⇒
      val f = SchemaUtils.fixedLength(col._2)

      f.getOrElse(
        throw new Exception(
          s"Unsupported variable length fields, during fixed format reading for column: '${col._1}'"
        )
      )
    }.sum
  }

  def findColumns(record: FFAST): Seq[(String, FFDataFormat)] = {
    record match {
      case FFSchemaRecord(_,         rows) ⇒ rows.flatMap(findColumns(_))
      case FFSimpleSchemaRow(name,   format, _) ⇒ (name → format) :: Nil
      case FFCompoundSchemaRow(_,    rows) ⇒ rows.flatMap(findColumns(_))
      case FFConditionalSchemaRow(_, row)  ⇒ findColumns(row)
      case _ ⇒ throw new Exception()
    }
  }

  def getSparkSchema(record: FFSchemaRecord) = {
    findColumns(record).map {
      case (colName, fFormat) ⇒
        SparkSchemaRow(colName, fFormat.getSparkType)
    }
  }

  def fixedLength(
    format:         FFDataFormat,
    columnValueMap: mutable.Map[String, Any] = mutable.Map[String, Any]()
  ): Option[Int] = {
    format match {
      case FFNumberFormat(_, Some(precision), _, miscProperties) ⇒
        Some(
          if (miscProperties.contains("packed") && miscProperties("packed").asInstanceOf[Boolean])
            Math.ceil(precision.toDouble / 2).toInt
          else precision
        )
      case FFStringFormat(_, Some(precision), Some(m)) ⇒
        val packLen = m.get("pckLen").getOrElse("0").toInt
        Some(precision + packLen)
      case FFStringFormat(_, Some(precision), None) ⇒
        Some(precision)
      case FFStringFormat(_, None, _) ⇒
        None
      case FFDateFormat(_,           Some(format),    _)             ⇒ Some(format.length)
      case FFDateTimeFormat(_,       Some(format), _) ⇒ Some(format.length)
      case FFStructFormat(_,         Some(precision)) ⇒ Some(precision)
      case FFStringArrayFormat(name, Some(precision), arraySizeInfo) ⇒
        Some(precision * getLengthFromArraySizeInfo(arraySizeInfo.getOrElse(""), columnValueMap))
      case FFNumberArrayFormat(name, Some(precision), scale, arraySizeInfo, miscProperties) ⇒
        Some(precision * getLengthFromArraySizeInfo(arraySizeInfo.getOrElse(""), columnValueMap))
      case _ ⇒ throw new Exception("Can't infer length of the supplied format")
    }
  }

  def makeInt(buffer: Array[Byte], order: ByteOrder): Int = {
    var res = 0

    val buf = if (order == ByteOrder.BIG_ENDIAN) buffer.reverse else buffer
    for ((b, i) ← buf.zipWithIndex)
      res = res | ((b & 0xff) << (i * 8))

    res
  }

  def readRow(
    start:          Int,
    buffer:         Array[Byte],
    format:         FFDataFormat,
    columnValueMap: mutable.Map[String, Any]
  ): (Array[Byte], Int) = {
    format match {
      case FFStringFormat(_, Some(precision), Some(m)) if m.contains("pckLen") ⇒
        val pckLen = m("pckLen").toInt
        // o/w read the width characters
        val endian = m.get("endian") match {
          case Some("big") ⇒ ByteOrder.BIG_ENDIAN
          case _           ⇒ ByteOrder.LITTLE_ENDIAN
        }
        val len = makeInt(buffer.slice(start, start + pckLen), endian)
        // if packed length exceeds the string width, then issue an error
        if (len > precision)
          throw new Exception(s"Packed width ($len) > Fixed width ($precision)")

        buffer.slice(start + pckLen, start + pckLen + len) → (pckLen + len)
      case _ ⇒
        val len = fixedLength(format, columnValueMap).get
        buffer.slice(start, start + len) → len
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy