All Downloads are FREE. Search and download functionalities are using the official Maven repository.

input.NestedSchemaExtractor.scala Maven / Gradle / Ivy

There is a newer version: 1.0.0-RC23
Show newest version
package avrohugger
package input

import avrohugger.input.reflectivecompilation.schemagen._
import org.apache.avro.Schema
import org.apache.avro.Schema.Type.{ARRAY, ENUM, MAP, RECORD, UNION}

import scala.collection.JavaConverters._

object NestedSchemaExtractor {
  // if a record is found, extract nested RECORDs and ENUMS (i.e. top-level types) 
  def getNestedSchemas(schema: Schema, schemaStore: SchemaStore): List[Schema] = {
    def extract(schema: Schema, fieldPath: List[String] = List.empty): List[Schema] = {

      schema.getType match {
        case RECORD =>
          val fields: List[Schema.Field] = schema.getFields.asScala.toList
          val fieldSchemas: List[Schema] = fields.map(field => field.schema)
          def flattenSchema(fieldSchema: Schema): List[Schema] = {
            fieldSchema.getType match {
              case ARRAY => flattenSchema(fieldSchema.getElementType)
              case MAP => flattenSchema(fieldSchema.getValueType)
              case RECORD => {
                // if the field schema is one that has already been stored, use that one
                if (schemaStore.schemas.contains(fieldSchema.getFullName)) List()
                // if we've already seen this schema (recursive schemas) don't traverse further
                else if (fieldPath.contains(fieldSchema.getFullName)) List()
                else fieldSchema :: extract(fieldSchema, fieldSchema.getFullName :: fieldPath)
              }
              case UNION => fieldSchema.getTypes.asScala.toList.flatMap(x => flattenSchema(x))
              case ENUM => { //List(fieldSchema)
                // if the field schema is one that has already been stored, use that one
                if (schemaStore.schemas.contains(fieldSchema.getFullName)) List()
                else List(fieldSchema)
              }
              case _ => List(fieldSchema)
            }
          }
          val flatSchemas = fieldSchemas.flatMap(fieldSchema => flattenSchema(fieldSchema))
          def topLevelTypes(schema: Schema) = (schema.getType == RECORD | schema.getType == ENUM)
          val nestedTopLevelSchemas = flatSchemas.filter(topLevelTypes)
          nestedTopLevelSchemas
        case ENUM => List(schema)
        case _ => Nil
      } 
    }

    schema::extract(schema)
  }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy