All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.locationtech.geomesa.kafka.confluent.ConfluentFeatureSerializer.scala Maven / Gradle / Ivy

The newest version!
/***********************************************************************
 * Copyright (c) 2013-2025 Commonwealth Computer Research, Inc.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Apache License, Version 2.0
 * which accompanies this distribution and is available at
 * http://www.opensource.org/licenses/apache2.0.php.
 ***********************************************************************/

package org.locationtech.geomesa.kafka.confluent

import com.typesafe.scalalogging.LazyLogging
import io.confluent.kafka.schemaregistry.client.{CachedSchemaRegistryClient, SchemaRegistryClient}
import io.confluent.kafka.serializers.{KafkaAvroDeserializer, KafkaAvroSerializer}
import org.apache.avro.Schema.{Field, Type}
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.avro.{JsonProperties, Schema}
import org.geotools.api.feature.simple.{SimpleFeature, SimpleFeatureType}
import org.locationtech.geomesa.features.SerializationOption.SerializationOption
import org.locationtech.geomesa.features.{ScalaSimpleFeature, SimpleFeatureSerializer}
import org.locationtech.geomesa.kafka.confluent.ConfluentFeatureSerializer.ConfluentFeatureMapper
import org.locationtech.geomesa.kafka.data.KafkaDataStore
import org.locationtech.geomesa.security.SecurityUtils
import org.locationtech.geomesa.utils.text.{DateParsing, WKBUtils, WKTUtils}
import org.locationtech.jts.geom.Geometry

import java.io.{InputStream, OutputStream}
import java.net.URL
import java.nio.ByteBuffer
import java.time.format.DateTimeFormatter
import java.util.Date
import java.util.concurrent.atomic.AtomicBoolean
import scala.collection.JavaConverters._
import scala.util.Try
import scala.util.control.NonFatal

class ConfluentFeatureSerializer(
    sft: SimpleFeatureType,
    schemaRegistryClient: SchemaRegistryClient,
    schemaOverride: Option[Schema] = None,
    val options: Set[SerializationOption] = Set.empty
  ) extends SimpleFeatureSerializer with LazyLogging {

  private val schema = schemaOverride.getOrElse {
    val schemaId =
      Option(sft.getUserData.get(ConfluentMetadata.SchemaIdKey))
          .map(_.toString.toInt)
          .getOrElse {
            throw new IllegalStateException(s"Cannot create ConfluentFeatureSerializer because SimpleFeatureType " +
                s"'${sft.getTypeName}' does not have schema id at key '${ConfluentMetadata.SchemaIdKey}'")
          }
    schemaRegistryClient.getById(schemaId)
  }

  private val schemaValidationCheck = new AtomicBoolean(false)

  private val serializers = new ThreadLocal[ConfluentFeatureMapper]() {
    override def initialValue(): ConfluentFeatureMapper = {
      val mapper = new ConfluentFeatureMapper(sft, schema, schemaRegistryClient)
      if (schemaValidationCheck.compareAndSet(false, true)) {
        val violations = mapper.checkSchemaViolations()
        if (violations.nonEmpty) {
          logger.warn(
            "The following required schema fields are not mapped to any feature type attributes, " +
                s"and may cause errors during serialization: ${violations.mkString(", ")}")
        }
      }
      mapper
    }
  }

  override def deserialize(id: String, bytes: Array[Byte]): SimpleFeature =
    serializers.get.read(id, bytes)

  override def deserialize(bytes: Array[Byte]): SimpleFeature = deserialize("", bytes)

  override def deserialize(bytes: Array[Byte], offset: Int, length: Int): SimpleFeature =
    deserialize("", bytes, offset, length)

  override def deserialize(id: String, bytes: Array[Byte], offset: Int, length: Int): SimpleFeature = {
    val buf = if (offset == 0 && length == bytes.length) { bytes } else {
      val buf = Array.ofDim[Byte](length)
      System.arraycopy(bytes, offset, buf, 0, length)
      buf
    }
    deserialize(id, buf)
  }

  override def serialize(feature: SimpleFeature): Array[Byte] = serializers.get.write(feature)

  override def serialize(feature: SimpleFeature, out: OutputStream): Unit = out.write(serialize(feature))

  // implement the following if we need them

  override def deserialize(in: InputStream): SimpleFeature = throw new NotImplementedError()

  override def deserialize(id: String, in: InputStream): SimpleFeature =
    throw new NotImplementedError()
}

object ConfluentFeatureSerializer {

  import SchemaParser.{GeoMesaAvroDateFormat, GeoMesaAvroVisibilityField}

  def builder(sft: SimpleFeatureType, schemaRegistryUrl: URL, schemaOverride: Option[Schema] = None): Builder =
    new Builder(sft, schemaRegistryUrl, schemaOverride)

  class Builder private[ConfluentFeatureSerializer](
    sft: SimpleFeatureType,
    schemaRegistryUrl: URL,
    schemaOverride: Option[Schema] = None
  ) extends SimpleFeatureSerializer.Builder[Builder] {
    override def build(): ConfluentFeatureSerializer = {
      val client = new CachedSchemaRegistryClient(schemaRegistryUrl.toExternalForm, 100)
      new ConfluentFeatureSerializer(sft, client, schemaOverride, options.toSet)
    }
  }

  /**
   * Mapping between Avro schema and SimpleFeatureType
   *
   * @param sftIndex index of the field in the sft
   * @param schemaIndex index of the field in the avro schema
   * @param default default value defined in the avro schema
   * @param conversion convert from an avro value to a simple feature type attribute, and vice-versa
   */
  private case class FieldMapping(
      sftIndex: Int,
      schemaIndex: Int,
      default: Option[AnyRef],
      conversion: Option[FieldConverter]
    )

  /**
   * Converts between serialized Avro records and simple features
   *
   * @param sft simple feature type
   * @param schema avro schema
   * @param registry schema registry client
   */
  private class ConfluentFeatureMapper(sft: SimpleFeatureType, schema: Schema, registry: SchemaRegistryClient) {

    private val topic = KafkaDataStore.topic(sft)
    private val kafkaSerializer = new KafkaAvroSerializer(registry)
    private val kafkaDeserializer = new KafkaAvroDeserializer(registry)

    // feature type field index, schema field index and default value, any conversions necessary
    private val fieldMappings = sft.getAttributeDescriptors.asScala.map { d =>
      val field = schema.getField(d.getLocalName)

      val conversion =
        if (classOf[Geometry].isAssignableFrom(d.getType.getBinding)) {
          lazy val union = field.schema.getTypes.asScala.map(_.getType).filter(_ != Schema.Type.NULL).toSet
          field.schema.getType match {
            case Schema.Type.STRING => Some(WktConverter)
            case Schema.Type.BYTES  => Some(WkbConverter)
            case Schema.Type.UNION if union == Set(Schema.Type.STRING) => Some(WktConverter)
            case Schema.Type.UNION if union == Set(Schema.Type.BYTES)  => Some(WkbConverter)
            case _ => throw new IllegalStateException(s"Found a geometry field with an invalid schema: $field")
          }
        } else if (classOf[Date].isAssignableFrom(d.getType.getBinding)) {
          d.getUserData.get(GeoMesaAvroDateFormat.KEY) match {
            case GeoMesaAvroDateFormat.ISO_DATE     => Some(IsoDateConverter)
            case GeoMesaAvroDateFormat.ISO_DATETIME => Some(IsoDateTimeConverter)
            case GeoMesaAvroDateFormat.EPOCH_MILLIS => Some(EpochMillisConverter)
            case null /* avro logical date type */  => Some(EpochMillisConverter)
            case _ =>
              throw new IllegalStateException(s"Found a date field with no format defined:" +
                s" ${d.getLocalName} ${d.getUserData.asScala.mkString(", ")}")
          }
        } else {
          None
        }

      FieldMapping(sft.indexOf(d.getLocalName), field.pos(), defaultValue(field), conversion)
    }

    // visibility field index in the avro schema
    private val visibilityField = schema.getFields.asScala.collectFirst {
      case f if Option(f.getProp(GeoMesaAvroVisibilityField.KEY)).exists(_.toBoolean) => f.pos()
    }

    // avro fields with default values that aren't part of the feature type
    private val defaultFields = schema.getFields.asScala.flatMap(f => defaultValue(f).map(v => f.pos() -> v)).filter {
      case (pos, _) => !fieldMappings.exists(_.schemaIndex == pos) && !visibilityField.contains(pos)
    }

    /**
     * Checks for required fields in the avro schema that are not part of the feature type
     * (i.e. will never be written)
     *
     * @return list of fields that will cause schema validation errors during serialization
     */
    def checkSchemaViolations(): Seq[String] = {
      val mappedPositions = fieldMappings.map(_.schemaIndex) ++ visibilityField.toSeq
      schema.getFields.asScala.collect {
        case f if requiredField(f) && !mappedPositions.contains(f.pos()) => f.name()
      }.toSeq
    }

    /**
     * Serialize a feature as Avro
     *
     * @param feature feature to serialize
     * @return
     */
    def write(feature: SimpleFeature): Array[Byte] = {
      val record = new GenericData.Record(schema)
      defaultFields.foreach { case (i, v) => record.put(i, v) }
      visibilityField.foreach { pos => record.put(pos, SecurityUtils.getVisibility(feature)) }
      fieldMappings.foreach { m =>
        try {
          feature.getAttribute(m.sftIndex) match {
            case null => m.default.foreach(d => record.put(m.schemaIndex, d))
            case v => record.put(m.schemaIndex, m.conversion.fold(v)(_.featureToRecord(v)))
          }
        } catch {
          case NonFatal(e) =>
            val d = sft.getDescriptor(m.sftIndex)
            val v = Try(feature.getAttribute(m.sftIndex))
            val s = schema.getField(d.getLocalName).schema()
            throw new RuntimeException(
              s"Cannot serialize field '${d.getLocalName}' with try-value '$v' into schema '$s':", e)
        }
      }

      kafkaSerializer.serialize(topic, record)
    }

    /**
     * Deserialize an Avro record into a feature
     *
     * @param id feature id
     * @param bytes serialized avro bytes
     * @return
     */
    def read(id: String, bytes: Array[Byte]): SimpleFeature = {
      val record = kafkaDeserializer.deserialize(topic, bytes).asInstanceOf[GenericRecord]
      val attributes = fieldMappings.map { m =>
        try {
          val v = record.get(m.schemaIndex)
          m.conversion match {
            case None => v
            case Some(c) => c.recordToFeature(v)
          }
        } catch {
          case NonFatal(e) =>
            val d = sft.getDescriptor(m.sftIndex)
            throw new RuntimeException(
              s"Cannot deserialize field '${d.getLocalName}' into a '${d.getType.getBinding.getName}':", e)
        }
      }

      val feature = ScalaSimpleFeature.create(sft, id, attributes.toSeq: _*)

      // set the feature visibility if it exists
      visibilityField.foreach { field =>
        val vis = record.get(field)
        if (vis != null) {
          SecurityUtils.setFeatureVisibility(feature, vis.toString)
        }
      }

      feature
    }

    // filter out JNull - bug in kafka avro deserialization https://issues.apache.org/jira/browse/AVRO-1954
    private def defaultValue(f: Field): Option[AnyRef] =
      Option(f.defaultVal()).filterNot(_.isInstanceOf[JsonProperties.Null])

    private def requiredField(f: Field): Boolean = {
      defaultValue(f).isEmpty && {
        f.schema().getType match {
          case Type.NULL => false
          case Type.UNION => !f.schema().getTypes.contains(Type.NULL)
          case _ => true
        }
      }
    }
  }

  /**
   * Converts between avro and feature attribute values
   */
  private sealed trait FieldConverter {
    def recordToFeature(value: AnyRef): AnyRef
    def featureToRecord(value: AnyRef): AnyRef
  }

  /**
   * Converts WKT text fields
   */
  private case object WktConverter extends FieldConverter {
    override def recordToFeature(value: AnyRef): AnyRef = {
      // note: value is an org.apache.avro.util.Utf8
      if (value == null) { null } else { WKTUtils.read(value.toString) }
    }

    override def featureToRecord(value: AnyRef): AnyRef =
      if (value == null) { null } else { WKTUtils.write(value.asInstanceOf[Geometry]) }
  }

  /**
   * Converts WKB bytes fields
   */
  private case object WkbConverter extends FieldConverter {
    override def recordToFeature(value: AnyRef): AnyRef =
      if (value == null) { null } else { WKBUtils.read(unwrap(value.asInstanceOf[ByteBuffer])) }

    override def featureToRecord(value: AnyRef): AnyRef =
      if (value == null) { null } else { ByteBuffer.wrap(WKBUtils.write(value.asInstanceOf[Geometry])) }

    private def unwrap(buf: ByteBuffer): Array[Byte] = {
      if (buf.hasArray && buf.arrayOffset() == 0 && buf.limit() == buf.array().length) {
        buf.array()
      } else {
        val array = Array.ofDim[Byte](buf.limit())
        buf.get(array)
        array
      }
    }
  }

  /**
   * Converts ISO_DATE formatted string fields
   */
  private case object IsoDateConverter extends FieldConverter {
    override def recordToFeature(value: AnyRef): AnyRef = {
      if (value == null) { null } else {
        // note: value is an org.apache.avro.util.Utf8
        DateParsing.parseDate(value.toString, DateTimeFormatter.ISO_DATE)
      }
    }

    override def featureToRecord(value: AnyRef): AnyRef = {
      if (value == null) { null } else {
        DateParsing.formatDate(value.asInstanceOf[Date], DateTimeFormatter.ISO_DATE)
      }
    }
  }

  /**
   * Converts ISO_DATE_TIME formatted string fields
   */
  private case object IsoDateTimeConverter extends FieldConverter {
    override def recordToFeature(value: AnyRef): AnyRef = {
      if (value == null) { null } else {
        // note: value is an org.apache.avro.util.Utf8
        DateParsing.parseDate(value.toString, DateTimeFormatter.ISO_DATE_TIME)
      }
    }

    override def featureToRecord(value: AnyRef): AnyRef = {
      if (value == null) { null } else {
        DateParsing.formatDate(value.asInstanceOf[Date], DateTimeFormatter.ISO_DATE_TIME)
      }
    }
  }

  /**
   * Converts milliseconds since epoch long fields
   */
  private case object EpochMillisConverter extends FieldConverter {
    override def recordToFeature(value: AnyRef): AnyRef =
      if (value == null) { null } else { new Date(value.asInstanceOf[java.lang.Long]) }

    override def featureToRecord(value: AnyRef): AnyRef =
      if (value == null) { null } else { Long.box(value.asInstanceOf[Date].getTime) }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy