org.apache.spark.sql.confluent.json.JsonSchemaConverter.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-extensions_2.12 Show documentation
Show all versions of spark-extensions_2.12 Show documentation
Spark extensions for SmartDataLakeBuilder
The newest version!
package org.apache.spark.sql.confluent.json
import org.apache.spark.sql.types._
import org.json4s._
case class SchemaType(dataType: DataType, nullable: Boolean)
object JsonSchemaConverter {
private[json] val SchemaFieldName = "name"
private[json] val SchemaFieldType = "type"
private[json] val SchemaFieldFormat = "format"
private[json] val SchemaFieldAirbyteType = "airbyte_type"
private[json] val SchemaFieldOneOf = "oneOf"
private[json] val SchemaFieldId = "id"
private[json] val SchemaFieldProperties = "properties"
private[json] val SchemaFieldItems = "items"
private[json] val SchemaFieldAdditionalProperties = "additionalProperties"
private[json] val SchemaFieldDescription = "description"
private[json] val SchemaFieldRequired = "required"
private[json] val SchemaRoot = "/"
private[json] val Definitions = "definitions"
private[json] val Reference = "$ref"
private[json] val JsonToSparkTypeMap = Map[String,DataType](
"string" -> StringType,
"number" -> DecimalType.SYSTEM_DEFAULT,
"float" -> DoubleType,
"double" -> DoubleType,
"integer" -> LongType,
"int32" -> IntegerType,
"int64" -> LongType,
"boolean" -> BooleanType,
"date-time" -> TimestampType,
"date-time-ntz" -> TimestampNTZType,
"date" -> DateType
)
private[json] val SparkToJsonTypeMap = Map[AbstractDataType, String](
IntegerType -> "integer",
ShortType -> "integer",
ByteType -> "integer",
DateType -> "date-time",
StringType -> "string",
DoubleType -> "number",
FloatType -> "number",
LongType -> "integer",
BooleanType-> "boolean",
TimestampType -> "date-time",
StructType -> "object",
ArrayType -> "array"
// DecimalType is handled specifically in code because the mapping is not 1:1.
)
def convertToSpark(schemaContent: String, isStrictTypingEnabled: Boolean = true, additionalPropertiesDefault: Boolean = true): StructType = {
import org.json4s.jackson.JsonMethods._
implicit val format: Formats = DefaultFormats
val schema = parse(schemaContent).extract[JObject]
convertParsedSchemaToSpark(schema, isStrictTypingEnabled, additionalPropertiesDefault)
}
def convertParsedSchemaToSpark(schema: JObject, isStrictTypingEnabled: Boolean = true, additionalPropertiesDefault: Boolean = true): StructType = {
convertParsedSchemaToSparkDataType(schema, isStrictTypingEnabled, additionalPropertiesDefault)
.asInstanceOf[StructType]
}
def convertParsedSchemaToSparkDataType(schema: JValue, isStrictTypingEnabled: Boolean = true, additionalPropertiesDefault: Boolean = true, definitionsPath: String = Definitions): DataType = {
new JsonToSparkSchemaConverter(schema, isStrictTypingEnabled, additionalPropertiesDefault, definitionsPath).convert()
}
def convertFromSpark(schema: StructType): JObject = {
SparkToJsonSchemaConverter.convert(schema)
}
def convertFromSparkToString(schema: StructType): String = {
import org.json4s.jackson.JsonMethods._
implicit val format: Formats = DefaultFormats
val json = convertFromSpark(schema)
pretty(json)
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy