All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.landoop.kstreams.kcql.KStreamKcql.scala Maven / Gradle / Ivy

Go to download

Small utility lib for Kafka Streams to help with simple payload transformations

The newest version!
package com.landoop.kstreams.kcql

import com.datamountaineer.avro.kcql.AvroKcql._
import com.datamountaineer.json.kcql.JacksonJson
import com.datamountaineer.json.kcql.JsonKcql._
import com.datamountaineer.kcql.Kcql
import com.fasterxml.jackson.databind.JsonNode
import com.landoop.kstreams.kcql.KStreamSAM._
import com.sksamuel.avro4s.{FromRecord, ScaleAndPrecision}
import io.confluent.kafka.serializers.NonRecordContainer
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericContainer, GenericRecord}
import org.apache.kafka.common.serialization.Serde
import org.apache.kafka.streams.kstream.{KStream, KStreamBuilder}

import scala.reflect.ClassTag

object KStreamKcql {

  private def avroHandler[C](value: GenericContainer)(implicit fromRecord: FromRecord[C]): C = {
    value match {
      case gr: GenericRecord => fromRecord(gr)
      case n: NonRecordContainer => n.getValue.asInstanceOf[C]
      case null => null.asInstanceOf[C]
      case other => throw new IllegalArgumentException(s"${other.getClass.getCanonicalName} is not handled")
    }
  }

  implicit class KStreamBuilderExtrensions(val streamBuilder: KStreamBuilder) extends AnyVal {

    /**
      * Translates the values of all messages received from avro to a class (see avro4s restrictions)
      *
      * @param topics - The source for the KStream
      * @return An instance of KStream[T,C]
      */
    def mapAvroValueAs[T, C](topics: String*)(implicit fromRecord: FromRecord[C]): KStream[T, C] = {
      val source: KStream[T, GenericContainer] = streamBuilder.stream(topics: _*)
      val stream: KStream[T, C] = source.mapValues { value: GenericContainer =>
        avroHandler(value)
      }
      stream
    }

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided
      * This method will ignore KCQL : INSERT INTO A ; will only perform the SELECT part
      *
      * @param kcqlQuery - The sql like syntax allowing to select from a topic [and insert into another]
      * @return
      */
    def mapValueWithKcqlAvro[T](kcqlQuery: String): KStream[T, GenericContainer] = {
      mapValueWithKcqlAvro[T](Kcql.parse(kcqlQuery))
    }

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided
      * This method will ignore KCQL : INSERT INTO A ; will only perform the SELECT part
      *
      * @param kcql - An instance of the parsed KCQL syntax
      * @return
      */
    def mapValueWithKcqlAvro[T](kcql: Kcql): KStream[T, GenericContainer] = {
      val source: KStream[T, GenericContainer] = streamBuilder.stream(kcql.getSource)
      val stream: KStream[T, GenericContainer] = source.mapValues { value: GenericContainer => value.kcql(kcql) }
      stream
    }

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided to an instance of C
      * This method will ignore KCQL : INSERT INTO A ; will only perform the SELECT part
      *
      * @param kcqlQuery - The sql like syntax allowing to select from a topic [and insert into another]
      * @return
      */
    def mapValueWithKcqlAvroAs[T, C](kcqlQuery: String)(implicit fromRecord: FromRecord[C]): KStream[T, C] = {
      val kcql = Kcql.parse(kcqlQuery)
      val source: KStream[T, GenericContainer] = streamBuilder.stream(kcql.getSource)
      val stream: KStream[T, C] = source.mapValues { value: GenericContainer =>
        value.kcql(kcql) match {
          case gr: GenericRecord => fromRecord(gr)
          case null => null.asInstanceOf[C]
          case n: NonRecordContainer => n.getValue.asInstanceOf[C]
          case other => throw new IllegalArgumentException(s"${other.getClass.getCanonicalName} is not handled")
        }
      }
      stream
    }

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided and send it to
      * the target topic. i.e. INSERT INTO topicTarget SELECT * FROM topicSource
      *
      *
      * Make sure both serdes are configured (i.e. avroserde has a reference to schema registry and is set as value serde)
      *
      * @param kcql - The sql like syntax allowing to select from a topic [and insert into another]
      * @return
      */
    def mapValueWithKcqlAvroTo[T](kcql: String)(implicit key: Serde[T], avroSerde: AvroSerde): Unit = mapValueWithKcqlAvroTo[T](Kcql.parse(kcql))


    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided and send it to
      * the target topic. i.e. INSERT INTO topicTarget SELECT * FROM topicSource
      *
      * Make sure both serdes are configured (i.e. avroserde has a reference to schema registry and is set as value serde)
      *
      * @param kcql - An instance of parsed KCQL.
      * @return
      */
    def mapValueWithKcqlAvroTo[T](kcql: Kcql)(implicit keySerde: Serde[T], avroSerde: AvroSerde): Unit = {
      require(kcql.getTarget != null && kcql.getTarget.trim.length > 0,
        "Your KCQL syntax should follow `INSERT INTO B SELECT ... FROM B` pattern")

      val source: KStream[T, GenericContainer] = streamBuilder.stream(kcql.getSource)
      val stream: KStream[T, GenericContainer] = source.mapValues { value: GenericContainer =>
        value.kcql(kcql)
      }
      stream.to(keySerde, avroSerde, kcql.getTarget)
    }

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided
      * This method will ignore KCQL : INSERT INTO A ; will only perform the SELECT part
      *
      * @param kcql - The sql like syntax allowing to select from a topic [and insert into another]
      * @return
      */
    def mapValueWithKcqlJson[T](kcql: String): KStream[T, JsonNode] = mapValueWithKcqlJson[T](Kcql.parse(kcql))

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided
      * This method will ignore KCQL : INSERT INTO A ; will only perform the SELECT part
      *
      * @param kcql - An instance of the parsed KCQL
      * @return
      */
    def mapValueWithKcqlJson[T](kcql: Kcql): KStream[T, JsonNode] = {
      val source: KStream[T, String] = streamBuilder.stream(kcql.getSource)
      val stream: KStream[T, JsonNode] = source.mapValues { value: String =>
        val json = JacksonJson.asJson(value)
        json.kcql(kcql)
      }
      stream
    }

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided
      * This method will ignore KCQL : INSERT INTO A ; will only perform the SELECT part
      *
      * @param kcql - The sql like syntax allowing to select from a topic [and insert into another]
      * @return
      */
    def mapValueWithKcqlJsonAs[T, C: ClassTag](kcql: String): KStream[T, C] = {
      mapValueWithKcqlJsonAs[T, C](Kcql.parse(kcql))
    }


    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided
      * This method will ignore KCQL : INSERT INTO A ; will only perform the SELECT part
      *
      * @param kcql - An instance of the parsed KCQL
      * @return
      */
    def mapValueWithKcqlJsonAs[T, C: ClassTag](kcql: Kcql): KStream[T, C] = {
      val clazz = implicitly[ClassTag[C]].runtimeClass.asInstanceOf[Class[C]]
      val source: KStream[T, String] = streamBuilder.stream(kcql.getSource)
      val stream: KStream[T, C] = source.mapValues { value: String =>
        val json = JacksonJson.asJson(value)
        JacksonJson.mapper.treeToValue(json.kcql(kcql), clazz)
      }
      stream
    }

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided and send it to
      * the target topic. i.e. INSERT INTO topicTarget SELECT * FROM topicSource
      *
      * The output to the topic will be a string as the incoming data
      *
      * @param kcql - The sql like syntax allowing to select from a topic [and insert into another]
      * @return
      */
    def mapValueWithKcqlJsonTo[T](kcql: String): Unit = mapValueWithKcqlJsonTo[T](Kcql.parse(kcql))

    /**
      * Creates a KSource from the KCQL applied and then maps the value based on the query provided and send it to
      * the target topic. i.e. INSERT INTO topicTarget SELECT * FROM topicSource
      *
      * The output to the topic will be a string as the incoming data
      *
      * @param kcql - An instance of the parsed KCQL
      * @return
      */
    def mapValueWithKcqlJsonTo[T](kcql: Kcql): Unit = {
      require(kcql.getTarget != null && kcql.getTarget.trim.nonEmpty,
        "The KCQL should follow the 'INSERT INTO B SELECT * FROM A' pattern")
      val source: KStream[T, String] = streamBuilder.stream(kcql.getSource)
      val stream: KStream[T, String] = source.mapValues { value: String =>
        val json = JacksonJson.asJson(value)
        json.kcql(kcql).toString
      }
      stream.to(kcql.getTarget)
    }

    /**
      * Creates a stream reading from the provided topics and maps the message JSON value to avro
      * It is expected the incoming value is of type String
      *
      * @param avroName      - String providing the name of the record
      * @param avroNamespace - String that qualifies the avro name
      * @param topics        - The topics to read from
      * @tparam T
      * @return A KStream[K,GenericContainer]; GenericContainer can be NonRecordGeneric for primitive types (will work for a json containing arrays);
      *         otherwise it will be a GenericContainer
      */
    def mapJsonValueToAvro[T](avroName: String,
                              avroNamespace: String,
                              topics: String*)(implicit sp: ScaleAndPrecision): KStream[T, GenericContainer] = {
      require(avroName != null && avroName.trim.length > 0, "'name' can't be null or empty")
      val source: KStream[T, String] = streamBuilder.stream(topics: _*)
      val converter = new JsonToAvroConverter(avroNamespace)
      val stream: KStream[T, GenericContainer] = source.mapValues { value: String =>
        implicit val s: Option[Schema] = None
        converter.convert(avroName, value)
      }
      stream
    }

    def mapJsonValueAs[T, C](topics: String*)(implicit tag: ClassTag[C]): KStream[T, C] = {
      val source: KStream[T, String] = streamBuilder.stream(topics: _*)
      val stream: KStream[T, C] = source.mapValues { value: String =>
        val c: C = JacksonJson.mapper.readValue[C](value, tag.runtimeClass.asInstanceOf[Class[C]])
        c
      }
      stream
    }
  }


  implicit class KStreamConverter[T](val kstream: KStream[T, GenericContainer]) extends AnyVal {

    /**
      * Maps the Avro record to instance of C.
      * If you are using Scala Product derived classes you don't have to provide the fromRecord instance.
      * Avro4s library does that work for you via macros
      *
      * @param fromRecord - An instance FromRecord providing the translation from Avro to C
      * @tparam C
      * @return
      */
    def mapAvroValueAs[C]()(implicit fromRecord: FromRecord[C]): KStream[T, C] = {
      kstream.mapValues { value: GenericContainer =>
        avroHandler(value)
      }
    }

    /**
      * Applies the KCQL projection to the Avro record.
      * KCQL example: SELECT */field1/field2.fieldA from A
      * The 'from A' although not used is required
      *
      * @param kcql - The KCQL instructions (i.e. SELECT ... FROM topic)
      * @return
      */
    def mapValueWithKcql(kcql: String): KStream[T, GenericContainer] = mapValueWithKcql(Kcql.parse(kcql))

    /**
      * Applies the KCQL projection to the Avro record.
      * KCQL example: SELECT */field1/field2.fieldA from A
      * The 'from A' although not used is required
      *
      * @param kcql - An instance of the parsed KCQL (i.e. SELECT ... FROM topic)
      * @return
      */
    def mapValueWithKcql(kcql: Kcql): KStream[T, GenericContainer] = {
      kstream.mapValues { value: GenericContainer => value.kcql(kcql) }
    }
  }

  implicit class KStreamJsonConverter[T](val kstream: KStream[T, String]) extends AnyVal {

    /**
      * Maps the JSON String value to the an instance of C.
      *
      * @tparam C
      * @return
      */
    def mapJsonValueAs[C](clazz: Class[C]): KStream[T, C] = {
      kstream.mapValues { value: String =>
        JacksonJson.mapper.readValue(value, clazz)
      }
    }

    /**
      * Applies the KCQL projection to the JSON record.
      * KCQL example: SELECT */field1/field2.fieldA from A
      * The 'from A' although not used is required
      *
      * @param kcql - The KCQL instruction
      * @return
      */
    def mapValueWithKcql(kcql: String): KStream[T, JsonNode] = mapValueWithKcql(Kcql.parse(kcql))

    /**
      * Applies the KCQL projection to the JSON record.
      * KCQL example: SELECT */field1/field2.fieldA from A
      * The 'from A' although not used is required
      *
      * @param kcql - An instance of the parsed KCQL instruction
      * @return
      */
    def mapValueWithKcql(kcql: Kcql): KStream[T, JsonNode] = {
      kstream.mapValues { value: String =>
        JacksonJson.asJson(value).kcql(kcql)
      }
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy