All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xtdb.kafka.connect.clj Maven / Gradle / Ivy

(ns xtdb.kafka.connect
  (:require [juxt.clojars-mirrors.cheshire.v5v10v0.cheshire.core :as json]
            [juxt.clojars-mirrors.cheshire.v5v10v0.cheshire.generate :as json-gen]
            [clojure.tools.logging :as log]
            [xtdb.codec :as c]
            [xtdb.io :as xio]
            [xtdb.error :as err]
            [cognitect.transit :as transit]
            [xtdb.api :as xt])
  (:import [org.apache.kafka.connect.data Schema Struct Field]
           org.apache.kafka.connect.sink.SinkRecord
           org.apache.kafka.connect.source.SourceRecord
           java.io.ByteArrayOutputStream
           [java.util UUID Map]
           [com.fasterxml.jackson.core JsonGenerator JsonParseException]
           xtdb.kafka.connect.XtdbSinkConnector
           xtdb.kafka.connect.XtdbSourceConnector
           xtdb.codec.EDNId))

(json-gen/add-encoder
 EDNId
 (fn [c ^JsonGenerator json-generator]
   (.writeString json-generator (str (c/edn-id->original-id c)))))

(defn- map->edn [m]
  (->> (for [[k v] m]
         [(keyword k)
          (if (instance? Map v)
            (map->edn v)
            v)])
       (into {})))

(defn- get-struct-contents [val]
  (cond
    (instance? Struct val)
    (let [struct-schema (.schema ^Struct val)
          struct-fields (.fields ^Schema struct-schema)]
      (reduce conj
              (map (fn [^Field field] {(keyword (.name field)) (get-struct-contents (.get ^Struct val field))})
                   struct-fields)))
    (instance? java.util.ArrayList val) (into [] (map get-struct-contents val))
    (instance? java.util.HashMap val) (zipmap (map keyword (.keySet ^java.util.HashMap val)) (map get-struct-contents (.values ^java.util.HashMap val)))
    :else val))

(defn- struct->edn [^Schema schema ^Struct s]
  (let [ output-map (get-struct-contents s)]
    (log/info "map val: " output-map)
    output-map))

(defn- record->edn [^SinkRecord record]
  (let [schema (.valueSchema record)
        value (.value record)]
    (cond
      (and (instance? Struct value) schema)
      (struct->edn schema value)

      (and (instance? Map value)
           (nil? schema)
           (= #{"payload" "schema"} (set (keys value))))
      (let [payload (.get ^Map value "payload")]
        (cond
          (string? payload)
          (json/parse-string payload true)

          (instance? Map payload)
          (map->edn payload)

          :else
          (throw (err/illegal-arg :unknown-json-payload-type
                                  {::err/message (str "Unknown JSON payload type: " record)}))))

      (instance? Map value)
      (map->edn value)

      (string? value)
      (try
        (json/parse-string value true)
        (catch JsonParseException e
          (log/debug e "Failed to parse as JSON, trying EDN: " value)
          (c/read-edn-string-with-readers value)))

      :else
      (throw (err/illegal-arg :unknown-message-type
                              {::err/message (str "Unknown message type: " record)})))))

(defn- coerce-eid [id]
  (cond
    (and (some? id) (c/valid-id? id))
    (c/id-edn-reader id)

    (string? id)
    (keyword id)))

(defn- find-eid [props ^SinkRecord record doc]
  (let [id (or (get doc :xt/id)
               (some->> (get props XtdbSinkConnector/ID_KEY_CONFIG)
                        (keyword)
                        (get doc))
               (.key record))]
    (or (coerce-eid id)
        (UUID/randomUUID))))

(defn transform-sink-record [props ^SinkRecord record]
  (log/info "sink record:" record)
  (let [tx-op (if (and (nil? (.value record))
                       (.key record))
                [::xt/delete (coerce-eid (.key record))]
                (let [doc (record->edn record)
                      id (find-eid props record doc)]
                  [::xt/put (assoc doc :xt/id id)]))]
    (log/info "tx op:" tx-op)
    tx-op))

(defn submit-sink-records [api props records]
  (when (seq records)
    (xt/submit-tx api (vec (for [record records]
                              (transform-sink-record props record))))))

(defn- write-transit [x]
  (with-open [out (ByteArrayOutputStream.)]
    (let [writer (transit/writer out :json-verbose
                                 {:handlers
                                  {EDNId
                                   (transit/write-handler
                                    "xt/id"
                                    c/edn-id->original-id)}})]
      (transit/write writer x)
      (.toString out))))

(defn- tx-op-with-explicit-valid-time [[op :as tx-op] tx-time]
  (or (case op
        ::xt/put
        (when (= 2 (count tx-op))
          (conj tx-op tx-time))
        ::xt/delete
        (when (= 2 (count tx-op))
          (conj tx-op tx-time))
        ::xt/match
        (when (= 2 (count tx-op))
          (conj tx-op tx-time))
        ::xt/cas
        (when (= 3 (count tx-op))
          (conj tx-op tx-time))
        nil)
      tx-op))

(defn- tx-log-entry->tx-source-records [source-partition topic formatter
                                        {::xt/keys [tx-ops tx-id tx-time] :as tx}]
  [(SourceRecord. source-partition
                  {"offset" tx-id}
                  topic
                  nil
                  nil
                  nil
                  Schema/STRING_SCHEMA
                  (->> (for [tx-op tx-ops]
                         (tx-op-with-explicit-valid-time tx-op tx-time))
                       (vec)
                       (formatter))
                  (inst-ms tx-time))])

(defn- tx-op->id+doc [[op :as tx-op]]
  (case op
    ::xt/put
    (when (= 2 (count tx-op))
      (let [[_ new-doc] tx-op]
        [(:xt/id new-doc)
         new-doc]))
    (::xt/delete ::xt/evict)
    (when (= 2 (count tx-op))
      (let [[_ deleted-id] tx-op]
        [deleted-id]))
    ::xt/cas
    (when (= 3 (count tx-op))
      (let [[_ old-doc new-doc] tx-op]
        [(:xt/id new-doc)
         new-doc]))))

(defn- tx-log-entry->doc-source-records [source-partition topic formatter
                                         {::xt/keys [tx-ops tx-id tx-time], :as tx}]
  (log/info "tx-ops:" tx-ops)
  (for [[op :as tx-op] tx-ops
        :when (not (contains? #{::xt/fn ::xt/match} op))
        :let [[id doc] (tx-op->id+doc tx-op)
              hashed-id (str (c/new-id id))
              _ (log/info "tx-op:" tx-op "id:" id "hashed id:" hashed-id "doc:" doc)]
        :when id]
    (SourceRecord. source-partition
                   {"offset" tx-id}
                   topic
                   nil
                   Schema/STRING_SCHEMA
                   hashed-id
                   Schema/OPTIONAL_STRING_SCHEMA
                   (some-> doc (formatter))
                   (inst-ms tx-time))))

(defn poll-source-records [api source-offset props]
  (let [url (get props XtdbSourceConnector/URL_CONFIG)
        topic (get props XtdbSourceConnector/TOPIC_CONFIG)
        format (get props XtdbSourceConnector/FORMAT_CONFIG)
        mode (get props XtdbSourceConnector/MODE_CONFIG)
        batch-size (get props XtdbSourceConnector/TASK_BATCH_SIZE_CONFIG)
        source-partition {"url" url}
        formatter (case format
                    "edn" xio/pr-edn-str
                    "json" json/generate-string
                    "transit" write-transit)
        tx-log-entry->source-records (case mode
                                       "tx" tx-log-entry->tx-source-records
                                       "doc" tx-log-entry->doc-source-records)
        after-tx-id (some-> (get source-offset "offset") long)]
    (with-open [tx-log-iterator (xt/open-tx-log api after-tx-id true)]
      (log/info "source offset:" source-offset "tx-id:" after-tx-id "format:" format "mode:" mode)
      (let [records (->> (iterator-seq tx-log-iterator)
                         (take (Long/parseLong batch-size))
                         (map #(tx-log-entry->source-records source-partition topic formatter %))
                         (reduce into []))]
        (when (seq records)
          (log/info "source records:" records))
        records))))




© 2015 - 2025 Weber Informatics LLC | Privacy Policy