Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.prophecy.libs.fixedFormatSchema.scala Maven / Gradle / Ivy
/*
* ====================================================================
*
* PROPHECY CONFIDENTIAL
*
* Prophecy Inc
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Prophecy Inc, the intellectual and technical concepts contained
* herein are proprietary to Prophecy Inc and may be covered by U.S. and Foreign Patents,
* patents in process, and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Prophecy Inc.
*
* ====================================================================
*/
package io.prophecy.libs
import io.prophecy.abinitio.dml.AbInitioToSparkFunctionMapping
import io.prophecy.abinitio.xfr.ast.CustomExpression
import io.prophecy.libs.utils.getLengthFromArraySizeInfo
import org.apache.spark.sql.types.{
ArrayType,
DataType,
DecimalType,
IntegerType,
LongType,
StringType,
StructField,
StructType
}
import org.codehaus.jackson.annotate.JsonIgnore
import play.api.libs.functional.syntax.toFunctionalBuilderOps
import play.api.libs.json.Json.JsValueWrapper
import play.api.libs.json._
import java.nio.ByteOrder
import scala.collection.mutable
import scala.util.parsing.input.Positional
import scala.util.{Failure, Success, Try}
// AST
trait FFAST extends Positional {
def toSpark: DataType = ???
}
// SCHEMA TYPES
case class FFSchemaRecord(recordType: String, rows: Seq[FFSchemaRow]) extends FFAST {
def toScala = s"""FFSchemaRecord("$recordType", ${rows.map(x ⇒ x.toScala)}) """
private def toSpark(rows: Seq[FFSchemaRow]): Seq[StructField] = {
rows.map {
case FFSimpleSchemaRow(columnName, format, defaultVal) ⇒
val dataType = format match {
case FFNumberFormat(
FFTypeName("DecimalType", _),
Some(precision),
scale,
_
) ⇒
DecimalType(precision, scale.getOrElse(0))
case FFNumberFormat(FFTypeName("IntegerType", _), Some(precision), _, properties)
if properties.getOrElse("unsigned", false).asInstanceOf[Boolean] && precision >= 4 && precision < 8 ⇒
LongType
case FFNumberFormat(FFTypeName("IntegerType", _), _, _, _) ⇒
IntegerType
case FFNumberFormat(FFTypeName("LongType", _), _, _, _) ⇒
LongType
case FFNumberArrayFormat(name, precision, scale, arraySizeInfo, miscProperties) ⇒
ArrayType(
toSpark(
FFSimpleSchemaRow(columnName, FFNumberFormat(name, precision, scale, miscProperties), defaultVal) :: Nil
).head.dataType
)
case FFStringArrayFormat(name, precision, arraySizeInfo) ⇒
ArrayType(
toSpark(FFSimpleSchemaRow(columnName, FFStringFormat(name, precision), defaultVal) :: Nil).head.dataType
)
case _ ⇒ StringType
}
StructField(columnName, dataType)
case FFCompoundSchemaRow(FFStructArrayType(name1, arraySizeInfo), rows) ⇒
StructField(name1, ArrayType(StructType(toSpark(rows))))
case FFCompoundSchemaRow(compound, rows) ⇒
StructField(compound.name.get, StructType(toSpark(rows)))
case FFConditionalSchemaRow(condition, schemaRow) ⇒ toSpark(schemaRow :: Nil).head
case _ ⇒ throw new Exception()
}
}
override def toSpark: StructType =
StructType(toSpark(rows))
override def toString: String = {
recordType match {
case "void" ⇒
if (rows.isEmpty) s"void()"
else {
val value = rows.head.asInstanceOf[FFSimpleSchemaRow].format.asInstanceOf[FFVoidFormat].size.getOrElse(0)
s"void($value)"
}
case "" ⇒ s"${rows.map(_.toString).mkString("\n")}"
case _ ⇒
val newRecordType = if (recordType != "record") s"$recordType record" else recordType
s"$newRecordType\n${rows.map(_.toString).mkString("\n")}\nend"
}
}
}
case class FFRecordType(startType: String) extends FFAST {
override def toString: String =
startType
}
sealed trait FFSchemaRow extends FFAST {
def toScala: String
def getSize: String
def getLength: String
}
case class FFIncludeFileRow(filePath: String) extends FFSchemaRow {
override def toScala: String = s"FFIncludeFileRow($filePath)"
def getSize: String = ???
def getLength: String = ???
override def toString: String =
s"""include "$filePath";"""
}
case class FFConditionalSchemaRow(condition: String, schemaRow: FFSchemaRow) extends FFSchemaRow {
override def toScala: String = s"""FFConditionalSchemaRow(\"\"\"$condition\"\"\", ${schemaRow.toScala})"""
override def getSize: String = ???
override def getLength: String = ???
}
case class FFSimpleSchemaRow(name: String, format: FFDataFormat, value: FFDefaultVal) extends FFSchemaRow {
def toScala =
s"""FFSimpleSchemaRow("$name", ${format.toScala}, ${value.toScala})"""
override def toString: String = {
val valueString = if (value.isInstanceOf[FFNoDefaultVal]) " ;" else s" = ${value.toString} ;"
s"${format.toString} $name$valueString"
}
def getForm(): String = format.getForm()
override def getSize: String = format.getSize
override def getLength: String = format.getLength
}
case class FFCompoundSchemaRow(compound: FFCompoundType, rows: Seq[FFSchemaRow]) extends FFSchemaRow {
def toScala = s"""FFCompoundSchemaRow(${compound.toScala}, ${rows.map(x ⇒ x.toScala)})"""
override def toString: String = {
compound match {
case FFStructType("") ⇒
s"record\n${rows.map(_.toString).mkString("\n")}\nend ;"
case FFStructType(name) ⇒
s"record\n${rows.map(_.toString).mkString("\n")}\nend $name ;"
case FFStructArrayType(name, arraySizeInfo) ⇒
s"record\n${rows.map(_.toString).mkString("\n")}\nend[${arraySizeInfo.getOrElse("")}] $name ;"
case FFUnionType(Some(name)) ⇒
s"union\n${rows.map(_.toString).mkString("\n")}\nend $name ;"
case FFUnionType(None) ⇒
s"union\n${rows.map(_.toString).mkString("\n")}\nend ;"
case _ ⇒ throw new Exception(s"$compound type not supported in Compound Schema")
}
}
def getForm(): String = {
if (compound.isInstanceOf[FFStructArrayType]) "vector"
else if (compound.isInstanceOf[FFStructType]) "record"
else if (compound.isInstanceOf[FFUnionType]) "union"
else "record"
}
override def getSize: String =
if (compound.isInstanceOf[FFStructArrayType]) "-1"
else
rows.map(_.getSize.toInt).sum.toString
override def getLength: String = if (compound.isInstanceOf[FFStructArrayType]) "-1" else rows.length.toString
}
case class FFSimpleSchemaList(rows: Seq[FFSimpleSchemaRow]) extends FFSchemaRow {
def toScala = throw new Exception("toScala method not implemented")
override def toString: String =
s"${rows.map(_.toString).mkString("\n")}"
override def getSize: String = rows.head.getSize
override def getLength: String = rows.head.getLength
}
// SCHEMA TYPES - Compounds
sealed trait FFCompoundType extends FFAST {
def name: Option[String]
def toScala: String
}
case class FFUnionType(name: Option[String] = None) extends FFCompoundType {
def toScala = s"""FFUnionType(Some("${name.get}"))"""
override def toString: String =
name.getOrElse("")
}
case class FFStructType(name1: String) extends FFCompoundType {
override def name: Option[String] = Some(name1)
def toScala = s"""FFStructType("${name1}")"""
override def toString: String = name1
}
case class FFStructArrayType(name1: String, arraySizeInfo: Option[String]) extends FFCompoundType {
override def name: Option[String] = Some(name1)
def getStringWithQuotes(value: Any) = {
value match {
case x: String ⇒ s""""$x""""
case x: Int ⇒ x
}
}
def toScala = s"""FFStructArrayType("${name1}",${getStringWithQuotes(arraySizeInfo.getOrElse(""))})"""
override def toString: String = s"$name1[${arraySizeInfo.getOrElse("")}]"
}
// SCHEMA TYPES - Formats
case class FFTypeNameWithProperties(
name: String,
delimiter: Option[String],
miscProperties: Map[String, Any] = Map("packed" → false)
) extends FFAST {
def toScala: String = {
val delimiterScala = delimiter.map(f ⇒ s"""Some("$f")""").getOrElse("None")
val miscPropertiesScala = miscProperties.map(kv ⇒ s""""${kv._1}" -> ${kv._2}""").mkString(", ")
s"""FFTypeNameWithProperties("$name", $delimiterScala, Map($miscPropertiesScala))"""
}
override def toString: String = {
miscProperties
.filter(x ⇒ (x._2.isInstanceOf[Boolean] && x._2.asInstanceOf[Boolean]) || !x._2.isInstanceOf[Boolean])
.map {
case (k, v) ⇒
if (v.isInstanceOf[Boolean]) {
s"$k"
} else if (v.isInstanceOf[String]) {
s"""$k="$v""""
} else
s"$k=${v.toString}"
}
.mkString(", ")
}
}
case class FFTypeName(name: String, delimiter: Option[String]) extends FFAST {
def toScala: String = {
val delimiterScala = delimiter.map(f ⇒ s"""Some("$f")""").getOrElse("None")
s"""FFTypeName("$name", $delimiterScala)"""
}
override def toString: String =
s"$name(${delimiter.getOrElse("")})"
}
sealed trait FFDataFormat extends FFAST {
def toScala: String
def getSparkType: String
def getForm(): String = {
if (this.isInstanceOf[FFNumberArrayFormat] || this.isInstanceOf[FFStringArrayFormat]) "vector"
else {
getSparkType match {
case "StringType" ⇒ "string"
case "DateType" ⇒ "date"
case "DateTimeType" ⇒ "datetime"
case "DecimalType" ⇒ "decimal"
case "IntegerType" ⇒ "integer"
case "VoidType" ⇒ "void"
case _ ⇒ "string"
}
}
}
def getSize: String
def getLength: String
def getPropertyValue(key: String): Option[Any]
def getStringWithQuotes(value: Any) = {
value match {
case x: String ⇒ s""""$x""""
case x: Int ⇒ x
}
}
}
case class FFNumberFormat(
name: FFTypeName,
precision: Option[Int],
scale: Option[Int],
miscProperties: Map[String, Any] = Map("signReserved" → false, "packed" → false)
) extends FFDataFormat {
override def toScala: String = {
val miscPropertiesScala = miscProperties
.map { kv ⇒
val value = if (kv._2.isInstanceOf[String]) s""""${kv._2}"""" else s"${kv._2}"
s""""${kv._1}" -> $value"""
}
.mkString(", ")
s"""FFNumberFormat(${name.toScala}, $precision, $scale, Map($miscPropertiesScala))"""
}
override def getSparkType = name.name
override def toString: String = {
val delimiter = name.delimiter
val decimal_point = miscProperties.getOrElse("decimal_point", "Comma").toString match {
case "Comma" ⇒ ", "
case "Period" ⇒ "."
}
val typeString =
if (name.name == "DecimalType" || (delimiter.isDefined && precision.isEmpty && scale.isEmpty)) "decimal"
else if (name.name == "DoubleType") "double"
else "integer"
val mainParamPart = if (delimiter.isDefined && precision.isDefined && scale.isDefined) {
s""""${delimiter.get}", ${scale.get}, maximum_length=${precision.get + 1}"""
} else if (delimiter.isEmpty && precision.isDefined && scale.isDefined) {
s"""${precision.get}$decimal_point${scale.get}"""
} else if (delimiter.isDefined && precision.isDefined && scale.isEmpty) {
s""""${delimiter.get}", maximum_length=${precision.get}"""
} else if (delimiter.isEmpty && precision.isDefined && scale.isEmpty) {
s"${precision.get}"
} else if (delimiter.isDefined && precision.isEmpty && scale.isEmpty) {
s""""${delimiter.get}""""
} else ""
val isPacked = miscProperties.contains("packed") && miscProperties("packed").asInstanceOf[Boolean]
var extraPropPart = FFTypeNameWithProperties(
"",
None,
miscProperties.filterNot(x ⇒ x._1 == "packed" || x._1 == "decimal_point" || x._1 == "endian" | x._1 == "unsigned")
).toString
if (extraPropPart.nonEmpty) extraPropPart = ", " + extraPropPart
var combinedString = if (mainParamPart.nonEmpty) s"$typeString($mainParamPart$extraPropPart)" else typeString
if (isPacked) {
"packed " + combinedString
} else combinedString
if (miscProperties.contains("endian")) {
combinedString = miscProperties.getOrElse("endian", "big").toString + " endian " + combinedString
}
if (miscProperties.getOrElse("unsigned", false).asInstanceOf[Boolean])
combinedString = "unsigned " + combinedString
if (miscProperties.getOrElse("ebcdic", false).asInstanceOf[Boolean])
combinedString = "ebcdic " + combinedString
combinedString
}
override def getSize: String = precision.getOrElse(-1).toString
override def getLength: String = precision.getOrElse(-1).toString
override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFStringFormat(
name: FFTypeName,
precision: Option[Int],
props: Option[Map[String, String]] = None
) extends FFDataFormat {
override def toScala: String =
s"""FFStringFormat(
| ${name.toScala},
| $precision,
| $propsStr)
| """.stripMargin
override def getSparkType = name.name
def propsStr: String =
if (!props.isDefined)
"None"
else
s"""Some(Map(${props.get
.map {
case (k: String, v: String) ⇒ "\"" + k + "\" -> \"" + v + "\""
}
.mkString(", ")}))""".stripMargin
override def getSize: String = precision.getOrElse(-1).toString
override def getLength: String = precision.getOrElse(-1).toString
override def getPropertyValue(key: String): Option[Any] = if (props.isDefined) props.get.get(key) else None
override def toString: String = {
val delimiter = name.delimiter
val isEbcdic = props.isDefined && props.get.getOrElse("ebcdic", "false") == "true"
val prefix = if (isEbcdic) "ebcdic " else ""
if (name.name == "StringType") {
val f = if (delimiter.isEmpty && precision.isEmpty) {
s"integer(4)"
} else if (delimiter.isDefined && precision.isDefined) {
s""""${delimiter.get}", ${precision.get}"""
} else if (delimiter.isDefined && precision.isEmpty) {
s""""${delimiter.get}""""
} else if (delimiter.isEmpty && precision.isDefined) {
s"${precision.get}"
} else throw new Exception("case not supported")
f
if (props.isDefined && props.get.nonEmpty && false) {
s"${prefix}string($f, $propsStr)"
} else {
s"${prefix}string($f)"
}
} else if (name.name == "TimestampType") {
"timestamp()"
} else {
throw new Exception(s"${name.name} not supported")
}
}
}
case class FFDateFormat(name: FFTypeName, format: Option[String], miscProperties: Map[String, Any] = Map())
extends FFDataFormat {
override def toScala: String = {
val formatScala = format.map(f ⇒ s"""Some("$f")""").getOrElse("None")
val miscPropertiesScala = miscProperties.map(kv ⇒ s""""${kv._1}" -> ${kv._2}""").mkString(", ")
s"""FFDateFormat(${name.toScala}, $formatScala, Map($miscPropertiesScala))"""
}
override def getSparkType = name.name
override def getSize: String = format.getOrElse("").length.toString
override def getLength: String = getSize
override def toString: String = {
if (name.name != "DateType") throw new Exception(s"${name.name} is not supported")
var extraPropPart = FFTypeNameWithProperties("", None, miscProperties.filterNot(_._1 == "packed")).toString
if (extraPropPart.nonEmpty) extraPropPart = ", " + extraPropPart
val delimiter = name.delimiter
if (delimiter.isDefined && format.isDefined) {
s"""date("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(
format.get
)}"$extraPropPart)('${delimiter.get}')"""
} else if (delimiter.isEmpty && format.isDefined) {
s"""date("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(
format.get
)}"$extraPropPart)"""
} else if (delimiter.isEmpty && format.isEmpty) {
s"date(int)"
} else {
s"""date("${delimiter.get}"$extraPropPart)"""
}
}
override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFDateTimeFormat(name: FFTypeName, format: Option[String], miscProperties: Map[String, Any] = Map())
extends FFDataFormat {
override def toScala: String =
s"""FFDateTimeFormat(${name.toScala}, ${format.map(f ⇒ s"""Some("$f")""").getOrElse("None")})"""
override def getSparkType = name.name
override def getSize: String = format.getOrElse("").length.toString
override def getLength: String = getSize
override def toString: String = {
if (name.name != "DateTimeType") throw new Exception(s"${name.name} not supported")
val delimiter = name.delimiter
if (delimiter.isDefined && format.isDefined) {
s"""datetime("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(
format.get
)}")('${delimiter.get}')"""
} else if (delimiter.isEmpty && format.isDefined) {
s"""datetime("${AbInitioToSparkFunctionMapping.getAbinitioTimeFormatFromSparkDateTimeFormat(format.get)}")"""
} else if (delimiter.isEmpty && format.isEmpty) {
s"datetime(int)"
} else {
s"""datetime("${delimiter.get}")"""
}
}
override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFStructFormat(name: FFTypeName, precision: Option[Int]) extends FFDataFormat {
override def toScala: String = s"""FFStructFormat(${name.toScala}, ${precision.toString})"""
override def getSparkType = name.name
override def getSize: String = precision.getOrElse(-1).toString
override def getLength: String = "1"
override def toString: String = super.toString
override def getPropertyValue(key: String): Option[Any] = None
}
case class FFUnknownFormat(name: FFTypeName, arraySizeInfo: Option[String]) extends FFDataFormat {
override def toScala: String = s"""FFUnknownFormat(${name.toScala})"""
override def getSparkType = name.name
override def getSize: String = "-1"
override def getLength: String = "-1"
override def toString: String =
if (arraySizeInfo.isDefined) s"${name.name}[${arraySizeInfo.getOrElse("")}]" else name.name
override def getPropertyValue(key: String): Option[Any] = None
}
case class FFStringArrayFormat(name: FFTypeName, precision: Option[Int], arraySizeInfo: Option[String])
extends FFDataFormat {
override def toScala: String = s"""FFStringArrayFormat(${name.toScala}, $precision, ${getStringWithQuotes(
arraySizeInfo.getOrElse("")
)})"""
override def getSparkType = name.name
override def getSize: String = "-1"
override def getLength: String = "-1"
override def getPropertyValue(key: String): Option[Any] = None
override def toString: String = {
val stringPart = FFStringFormat(FFTypeName("StringType", name.delimiter), precision).toString
s"$stringPart[${arraySizeInfo.getOrElse("")}]"
}
}
case class FFNumberArrayFormat(
name: FFTypeName,
precision: Option[Int],
scale: Option[Int],
arraySizeInfo: Option[String],
miscProperties: Map[String, Any] = Map("signReserved" → false, "packed" → false)
) extends FFDataFormat {
override def toScala: String = {
val miscPropertiesScala = miscProperties.map(kv ⇒ s""""${kv._1}" -> ${kv._2}""").mkString(", ")
s"""FFNumberArrayFormat(${name.toScala}, $precision, $scale, ${getStringWithQuotes(
arraySizeInfo.getOrElse(
""
)
)}, Map($miscPropertiesScala))"""
}
override def getSize: String = "-1"
override def getLength: String = "-1"
override def getSparkType = name.name
override def toString: String = {
val numberPart = FFNumberFormat(name, precision, scale, miscProperties).toString
s"$numberPart[${arraySizeInfo.getOrElse("")}]"
}
override def getPropertyValue(key: String): Option[Any] = miscProperties.get(key)
}
case class FFVoidFormat(name: FFTypeName, size: Option[Int]) extends FFDataFormat {
override def toScala: String = s"""FFVoidFormat(${name.toScala}, $size)"""
override def getSparkType = name.name
override def getSize: String = size.getOrElse(-1).toString
override def getLength: String = size.getOrElse(-1).toString
override def toString: String =
if (size.isDefined)
s"void(${size.get})"
else "void()"
override def getPropertyValue(key: String): Option[Any] = None
}
// SCHEMA TYPES - Default values
sealed trait FFDefaultVal extends FFAST {
def toScala: String
def getValue: String
}
case class FFNoDefaultVal() extends FFDefaultVal {
override def toScala: String = s"""FFNoDefaultVal()"""
override def toString: String = ""
override def getValue: String = ""
}
case class FFNullDefaultVal(value: Option[Any] = None) extends FFDefaultVal {
override def toScala: String = s"""FFNullDefaultVal()"""
override def toString: String = {
value match {
case None ⇒ "NULL"
case Some(innerValue) ⇒
innerValue match {
case y: String ⇒ s"""NULL("$y")"""
case y @ _ ⇒ s"NULL($innerValue)"
}
}
}
override def getValue: String = ""
}
case class FFExpressionDefaultVal(value: CustomExpression) extends FFDefaultVal {
override def toScala: String = s"""FFExpressionDefaultVal("$value")"""
override def toString: String = s"""$value"""
override def getValue: String = ""
}
case class FFStringDefaultVal(value: String) extends FFDefaultVal {
override def toScala: String = s"""FFStringDefaultVal("$value")"""
override def toString: String =
s""""$value""""
override def getValue: String = s""""$value""""
}
case class FFIntDefaultVal(value: Int) extends FFDefaultVal {
override def toScala: String = s"""FFIntDefaultVal($value)"""
override def toString: String =
s"$value"
override def getValue: String = value.toString
}
case class FFDoubleDefaultVal(value: Double) extends FFDefaultVal {
override def toScala: String = s"""FFDoubleDefaultVal($value)"""
override def toString: String = s"$value"
override def getValue: String = value.toString
}
object FixedFormatSchemaImplicits {
/**
* implicit read definition for Map[String, Any] datatype
*/
implicit val mapReads: Reads[Map[String, Any]] = new Reads[Map[String, Any]] {
def reads(jv: JsValue): JsResult[Map[String, Any]] =
JsSuccess(
jv.as[JsObject]
.value
.toSeq
.map {
case (k, v) ⇒
(k, v) match {
case (k, v: JsString) ⇒ k → v.as[String]
case (k, v: JsBoolean) ⇒ k → v.as[Boolean]
case (k, v: JsNumber) ⇒
k → {
v.asOpt[Long] match {
case Some(value) ⇒ value
case None ⇒
v.asOpt[Int] match {
case Some(value2) ⇒ value2
case None ⇒
v.asOpt[Double] match {
case Some(value3) ⇒ value3
case None ⇒
v.asOpt[Float] match {
case Some(value4) ⇒ value4
case None ⇒ v.toString()
}
}
}
}
}
}
}
.toMap
)
}
/**
* implicit write definition for Map[String, Any] datatype
*/
implicit val mapWrites: Writes[Map[String, Any]] = new Writes[Map[String, Any]] {
def writes(map: Map[String, Any]): JsValue =
Json.obj(map.map {
case (s, o) ⇒
val ret: (String, JsValueWrapper) = (s, o) match {
case (s, o: Int) ⇒ s → JsNumber(o)
case (s, o: Long) ⇒ s → JsNumber(o)
case (s, o: Double) ⇒ s → JsNumber(o)
case (s, o: Boolean) ⇒ s → JsBoolean(o)
case (s, o: String) ⇒ s → JsString(o)
}
(ret._1, ret._2)
}.toSeq: _*)
}
implicit lazy val jsonMapFormat: Format[Map[String, Any]] = Format(mapReads, mapWrites)
implicit lazy val ffTypeNameFormat: OFormat[FFTypeName] = Json.format[FFTypeName]
implicit lazy val ffDateTimeFormatFormat: OFormat[FFDateTimeFormat] = Json.format[FFDateTimeFormat]
implicit lazy val ffNumberFormatFormat: OFormat[FFNumberFormat] = Json.format[FFNumberFormat]
implicit lazy val ffStringFormatFormat: OFormat[FFStringFormat] = Json.format[FFStringFormat]
implicit lazy val ffDateFormatFormat: OFormat[FFDateFormat] = Json.format[FFDateFormat]
implicit lazy val ffUnknownFormat: OFormat[FFUnknownFormat] = Json.format[FFUnknownFormat]
implicit lazy val ffStringArrayFormat: OFormat[FFStringArrayFormat] = Json.format[FFStringArrayFormat]
implicit lazy val ffNumberArrayFormat: OFormat[FFNumberArrayFormat] = Json.format[FFNumberArrayFormat]
implicit lazy val ffVoidFormat: OFormat[FFVoidFormat] = Json.format[FFVoidFormat]
implicit lazy val ffDataFormatFormat: Format[FFDataFormat] = new Format[FFDataFormat] {
override def writes(o: FFDataFormat): JsValue = {
val f: JsValue = o match {
case o: FFDateTimeFormat ⇒ Json.toJson(o)(ffDateTimeFormatFormat)
case o: FFNumberFormat ⇒ Json.toJson(o)(ffNumberFormatFormat)
case o: FFStringFormat ⇒ Json.toJson(o)(ffStringFormatFormat)
case o: FFDateFormat ⇒ Json.toJson(o)(ffDateFormatFormat)
case o: FFUnknownFormat ⇒ Json.toJson(o)(ffUnknownFormat)
case o: FFStringArrayFormat ⇒ Json.toJson(o)(ffStringArrayFormat)
case o: FFNumberArrayFormat ⇒ Json.toJson(o)(ffNumberArrayFormat)
case o: FFVoidFormat ⇒ Json.toJson(o)(ffVoidFormat)
case _ ⇒ throw new Exception(s"Invalid SubType: ${o.getClass} for FFDataFormat")
}
JsObject(List("type" → JsString(o.getClass.getSimpleName), "value" → f))
}
override def reads(json: JsValue): JsResult[FFDataFormat] = {
json match {
case JsObject(v) ⇒
val JsString(subType) = v("type")
subType match {
case "FFDateTimeFormat" ⇒ JsSuccess(v("value").as[FFDateTimeFormat])
case "FFNumberFormat" ⇒ JsSuccess(v("value").as[FFNumberFormat])
case "FFStringFormat" ⇒ JsSuccess(v("value").as[FFStringFormat])
case "FFDateFormat" ⇒ JsSuccess(v("value").as[FFDateFormat])
case "FFUnknownFormat" ⇒ JsSuccess(v("value").as[FFUnknownFormat])
case "FFStringArrayFormat" ⇒ JsSuccess(v("value").as[FFStringArrayFormat])
case "FFNumberArrayFormat" ⇒ JsSuccess(v("value").as[FFNumberArrayFormat])
case "FFVoidFormat" ⇒ JsSuccess(v("value").as[FFVoidFormat])
case _ ⇒ JsError(s"Invalid object for FFDataFormat: $v")
}
case _ ⇒
JsError(s"Invalid object for FFDefaultVal: ${json}")
}
}
}
implicit lazy val ffDefFormat: Format[FFDefaultVal] = new Format[FFDefaultVal] {
implicit lazy val ffStringDefaultValFmt: OFormat[FFStringDefaultVal] = Json.format[FFStringDefaultVal]
implicit lazy val ffIntDefaultValFmt: OFormat[FFIntDefaultVal] = Json.format[FFIntDefaultVal]
implicit lazy val ffDoubleDefaultValFmt: OFormat[FFDoubleDefaultVal] = Json.format[FFDoubleDefaultVal]
override def writes(o: FFDefaultVal): JsValue = {
o match {
case o: FFNoDefaultVal ⇒ JsString("FFNoDefaultVal")
case o: FFNullDefaultVal ⇒ JsString("FFNullDefaultVal")
case o1: FFStringDefaultVal ⇒ Json.toJson(o1)(ffStringDefaultValFmt)
case o1: FFIntDefaultVal ⇒ Json.toJson(o1)(ffIntDefaultValFmt)
case o1: FFDoubleDefaultVal ⇒ Json.toJson(o1)(ffDoubleDefaultValFmt)
}
}
override def reads(json: JsValue): JsResult[FFDefaultVal] = {
json match {
case JsString(value) ⇒
value match {
case "FFNoDefaultVal" ⇒ JsSuccess(FFNoDefaultVal())
case "FFNullDefaultVal" ⇒ JsSuccess(FFNullDefaultVal())
case _ ⇒ JsError(s"Invalid Json Value Type for pDefaultVal: ${value}")
}
case v: JsObject ⇒
Try(
json
.asOpt[FFStringDefaultVal]
.orElse(json.asOpt[FFIntDefaultVal])
.orElse(json.asOpt[FFDoubleDefaultVal])
.get
) match {
case Success(value) ⇒ JsSuccess(value)
case Failure(exception) ⇒ JsError(s"Invalid object for pDefaultVal: ${v}, ${exception}")
}
case _ ⇒ JsError(s"Invalid object for pDefaultVal: ${json}")
}
}
}
implicit lazy val ffSimpleSchemaRowFmt: OFormat[FFSimpleSchemaRow] = Json.format[FFSimpleSchemaRow]
implicit lazy val ffConditionSchemaRowFmt: OFormat[FFConditionalSchemaRow] = Json.format[FFConditionalSchemaRow]
implicit lazy val ffSchemaRowFmt: Format[FFSchemaRow] = new Format[FFSchemaRow] {
override def writes(o: FFSchemaRow): JsValue = {
val f = o match {
case o1: FFSimpleSchemaRow ⇒ Json.toJson(o1)(ffSimpleSchemaRowFmt)
case o1: FFCompoundSchemaRow ⇒ Json.toJson(o1)(ffCompoundSchemaRowWriteFmt)
case o1: FFConditionalSchemaRow ⇒ Json.toJson(o1)(ffConditionSchemaRowFmt)
case _ ⇒ throw new Exception(s"Invalid SubType for pSchemaRow: ${o}")
}
JsObject(List("type" → JsString(o.getClass.getSimpleName), "value" → f))
}
override def reads(json: JsValue): JsResult[FFSchemaRow] = {
json match {
case v1 @ JsObject(v) ⇒
val JsString(subType) = v("type")
val value = v("value")
subType match {
case "FFSimpleSchemaRow" ⇒ JsSuccess(value.as[FFSimpleSchemaRow])
case "FFCompoundSchemaRow" ⇒ JsSuccess(value.as[FFCompoundSchemaRow])
case "FFConditionalSchemaRow" ⇒ JsSuccess(value.as[FFConditionalSchemaRow])
case _ ⇒ JsError(s"Invalid object for pSchemaRow: ${subType}")
}
case _ ⇒
JsError(s"Invalid object for pSchemaRow: ${json}")
}
}
}
implicit lazy val ffSchemaRecord: OFormat[FFSchemaRecord] = Json.format[FFSchemaRecord]
implicit lazy val ffStructTypeFmt: OFormat[FFStructType] = Json.format[FFStructType]
implicit lazy val ffStructArrayTypeFmt: OFormat[FFStructArrayType] = Json.format[FFStructArrayType]
implicit lazy val ffUnionTypeFmt: OFormat[FFUnionType] = Json.format[FFUnionType]
implicit lazy val ffCompoundTypeFmt: Format[FFCompoundType] = new Format[FFCompoundType] {
override def writes(o: FFCompoundType): JsValue = {
val f: JsValue = o match {
case o: FFStructType ⇒ Json.toJson(o)(ffStructTypeFmt)
case o: FFUnionType ⇒ Json.toJson(o)(ffUnionTypeFmt)
case o: FFStructArrayType ⇒ Json.toJson(o)(ffStructArrayTypeFmt)
case _ ⇒ throw new Exception(s"Invalid SubType: ${o.getClass} for pDataFormat")
}
JsObject(List("type" → JsString(o.getClass.getSimpleName), "value" → f))
}
override def reads(json: JsValue): JsResult[FFCompoundType] = {
json match {
case JsObject(v) ⇒
val JsString(subType) = v("type")
subType match {
case "FFStructType" ⇒ JsSuccess(v("value").as[FFStructType])
case "FFUnionType" ⇒ JsSuccess(v("value").as[FFUnionType])
case "FFStructArrayType" ⇒ JsSuccess(v("value").as[FFStructArrayType])
case _ ⇒ JsError(s"Invalid object for FFDataFormat: $v")
}
case _ ⇒
JsError(s"Invalid object for pDefaultVal: ${json}")
}
}
}
implicit lazy val ffCompoundSchemaRowReadFmt: Reads[FFCompoundSchemaRow] =
((JsPath \ "compound").read[FFCompoundType] ~
(JsPath \ "rows").lazyRead(Reads.seq[FFSchemaRow]))(FFCompoundSchemaRow)
implicit lazy val ffCompoundSchemaRowWriteFmt: OWrites[FFCompoundSchemaRow] =
((JsPath \ "compound").write[FFCompoundType] ~
(JsPath \ "rows").lazyWrite(Writes.seq[FFSchemaRow]))(Function.unlift(FFCompoundSchemaRow.unapply))
implicit def ffSchemaRecordToString(schema: FFSchemaRecord): String = Json.stringify(Json.toJson(schema))
}
object SchemaUtils {
case class SparkSchemaRow(column: String, colType: String)
def countRecordLines(record: FFAST): Int = {
record match {
case FFSchemaRecord(_, rows) ⇒ rows.map(countRecordLines(_)).sum + 1
case FFSimpleSchemaRow(_, _, _) ⇒ 1
case FFCompoundSchemaRow(_, rows) ⇒ rows.map(countRecordLines(_)).sum + 1
case _ ⇒ throw new Exception()
}
}
def findColumnLengthSum(record: FFAST): Int = {
findColumns(record).map { col ⇒
val f = SchemaUtils.fixedLength(col._2)
f.getOrElse(
throw new Exception(
s"Unsupported variable length fields, during fixed format reading for column: '${col._1}'"
)
)
}.sum
}
def findColumns(record: FFAST): Seq[(String, FFDataFormat)] = {
record match {
case FFSchemaRecord(_, rows) ⇒ rows.flatMap(findColumns(_))
case FFSimpleSchemaRow(name, format, _) ⇒ (name → format) :: Nil
case FFCompoundSchemaRow(_, rows) ⇒ rows.flatMap(findColumns(_))
case FFConditionalSchemaRow(_, row) ⇒ findColumns(row)
case _ ⇒ throw new Exception()
}
}
def getSparkSchema(record: FFSchemaRecord) = {
findColumns(record).map {
case (colName, fFormat) ⇒
SparkSchemaRow(colName, fFormat.getSparkType)
}
}
def fixedLength(
format: FFDataFormat,
columnValueMap: mutable.Map[String, Any] = mutable.Map[String, Any]()
): Option[Int] = {
format match {
case FFNumberFormat(_, Some(precision), _, miscProperties) ⇒
Some(
if (miscProperties.contains("packed") && miscProperties("packed").asInstanceOf[Boolean])
Math.ceil(precision.toDouble / 2).toInt
else precision
)
case FFStringFormat(_, Some(precision), Some(m)) ⇒
val packLen = m.get("pckLen").getOrElse("0").toInt
Some(precision + packLen)
case FFStringFormat(_, Some(precision), None) ⇒
Some(precision)
case FFStringFormat(_, None, _) ⇒
None
case FFDateFormat(_, Some(format), _) ⇒ Some(format.length)
case FFDateTimeFormat(_, Some(format), _) ⇒ Some(format.length)
case FFStructFormat(_, Some(precision)) ⇒ Some(precision)
case FFStringArrayFormat(name, Some(precision), arraySizeInfo) ⇒
Some(precision * getLengthFromArraySizeInfo(arraySizeInfo.getOrElse(""), columnValueMap))
case FFNumberArrayFormat(name, Some(precision), scale, arraySizeInfo, miscProperties) ⇒
Some(precision * getLengthFromArraySizeInfo(arraySizeInfo.getOrElse(""), columnValueMap))
case _ ⇒ throw new Exception("Can't infer length of the supplied format")
}
}
def makeInt(buffer: Array[Byte], order: ByteOrder): Int = {
var res = 0
val buf = if (order == ByteOrder.BIG_ENDIAN) buffer.reverse else buffer
for ((b, i) ← buf.zipWithIndex)
res = res | ((b & 0xff) << (i * 8))
res
}
def readRow(
start: Int,
buffer: Array[Byte],
format: FFDataFormat,
columnValueMap: mutable.Map[String, Any]
): (Array[Byte], Int) = {
format match {
case FFStringFormat(_, Some(precision), Some(m)) if m.contains("pckLen") ⇒
val pckLen = m("pckLen").toInt
// o/w read the width characters
val endian = m.get("endian") match {
case Some("big") ⇒ ByteOrder.BIG_ENDIAN
case _ ⇒ ByteOrder.LITTLE_ENDIAN
}
val len = makeInt(buffer.slice(start, start + pckLen), endian)
// if packed length exceeds the string width, then issue an error
if (len > precision)
throw new Exception(s"Packed width ($len) > Fixed width ($precision)")
buffer.slice(start + pckLen, start + pckLen + len) → (pckLen + len)
case _ ⇒
val len = fixedLength(format, columnValueMap).get
buffer.slice(start, start + len) → len
}
}
}