All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xtdb.query.clj Maven / Gradle / Ivy

The newest version!
(ns xtdb.query
  (:require [clojure.pprint :as pp]
            [clojure.spec.alpha :as s]
            [clojure.tools.logging :as log]
            [juxt.clojars-mirrors.integrant.core :as ig]
            [xtdb.error :as err]
            [xtdb.expression :as expr]
            xtdb.expression.pg
            xtdb.expression.temporal
            [xtdb.logical-plan :as lp]
            [xtdb.metadata :as meta]
            xtdb.operator.apply
            xtdb.operator.arrow
            xtdb.operator.csv
            xtdb.operator.group-by
            xtdb.operator.join
            xtdb.operator.order-by
            xtdb.operator.project
            xtdb.operator.rename
            xtdb.operator.window
            [xtdb.operator.scan :as scan]
            xtdb.operator.select
            xtdb.operator.set
            xtdb.operator.table
            xtdb.operator.top
            xtdb.operator.unnest
            [xtdb.sql :as sql]
            [xtdb.types :as types]
            [xtdb.util :as util]
            [xtdb.vector.reader :as vr]
            [xtdb.vector.writer :as vw]
            [xtdb.xtql :as xtql]
            [xtdb.xtql.edn :as xtql.edn])
  (:import clojure.lang.MapEntry
           (com.github.benmanes.caffeine.cache Cache Caffeine)
           java.lang.AutoCloseable
           (java.time Clock Duration)
           (java.util HashMap)
           (java.util.concurrent ConcurrentHashMap)
           (java.util.function Function)
           [java.util.stream Stream StreamSupport]
           (org.apache.arrow.memory BufferAllocator RootAllocator)
           org.apache.arrow.vector.types.pojo.Field
           (xtdb ICursor IResultCursor)
           (xtdb.antlr Sql$DirectlyExecutableStatementContext)
           (xtdb.api.query IKeyFn Query)
           xtdb.metadata.IMetadataManager
           xtdb.operator.scan.IScanEmitter
           xtdb.util.RefCounter
           xtdb.vector.IVectorReader
           xtdb.watermark.IWatermarkSource))

#_{:clj-kondo/ignore [:unused-binding :clojure-lsp/unused-public-var]}
(definterface BoundQuery
  (^java.util.List columnFields [])
  (^xtdb.ICursor openCursor [])
  (^void close []
   "optional: if you close this BoundQuery it'll close any closed-over params relation"))

#_{:clj-kondo/ignore [:unused-binding :clojure-lsp/unused-public-var]}
(definterface PreparedQuery
  ;; NOTE we could arguably take the actual params here rather than param-fields
  ;; but if we were to make params a VSR this would then make BoundQuery a closeable resource
  ;; ... or at least raise questions about who then owns the params
  (^java.util.List paramFields [])
  (^java.util.List columnFields [])
  (^java.util.List warnings [])
  (^xtdb.query.BoundQuery bind [queryOpts]
   "queryOpts :: {:params, :table-args, :basis, :default-tz}"))

#_{:clj-kondo/ignore [:unused-binding :clojure-lsp/unused-public-var]}
(definterface IQuerySource
  (^xtdb.query.PreparedQuery prepareRaQuery [ra-query wm-src query-opts])
  (^clojure.lang.PersistentVector planQuery [query wm-src query-opts]))

(defn- wrap-cursor ^xtdb.IResultCursor [^ICursor cursor, ^AutoCloseable wm, ^BufferAllocator al,
                                        ^Clock clock, ^RefCounter ref-ctr fields]
  (reify IResultCursor
    (tryAdvance [_ c]
      (when (.isClosing ref-ctr)
        (throw (InterruptedException.)))

      (binding [expr/*clock* clock]
        (.tryAdvance cursor c)))

    (characteristics [_] (.characteristics cursor))
    (estimateSize [_] (.estimateSize cursor))
    (getComparator [_] (.getComparator cursor))
    (getExactSizeIfKnown [_] (.getExactSizeIfKnown cursor))
    (hasCharacteristics [_ c] (.hasCharacteristics cursor c))
    (trySplit [_] (.trySplit cursor))

    (close [_]
      (.release ref-ctr)
      (util/close cursor)
      (util/close wm)
      (util/close al))

    (columnFields [_] fields)))

(defn- param-sym [v]
  (-> (symbol (str "?" v))
      util/symbol->normal-form-symbol))

(defn mapify-params [params]
  (->> params
       (map-indexed (fn [idx v]
                      (if (map-entry? v)
                        {(param-sym (str (symbol (key v)))) (val v)}
                        {(symbol (str "?_" idx)) v})))))

(defn open-args [^BufferAllocator allocator, args]
  (vw/open-params allocator (into {} (mapify-params args))))

(defn emit-expr [^ConcurrentHashMap cache {:keys [^IScanEmitter scan-emitter, ^IMetadataManager metadata-mgr, ^IWatermarkSource wm-src]}
                 conformed-query scan-cols default-tz param-fields]
  (.computeIfAbsent cache
                    {:scan-fields (when (and (seq scan-cols) scan-emitter)
                                    (with-open [wm (.openWatermark wm-src)]
                                      (.scanFields scan-emitter wm scan-cols)))
                     :default-tz default-tz
                     :last-known-chunk (when metadata-mgr
                                         (.lastEntry (.chunksMetadata metadata-mgr)))
                     :param-fields param-fields}
                    (reify Function
                      (apply [_ emit-opts]
                        (binding [expr/*clock* (Clock/fixed (.instant expr/*clock*) default-tz)]
                          ;; only the tz in the clock is relevant at expr compile time
                          (lp/emit-expr conformed-query (assoc emit-opts :scan-emitter scan-emitter)))))))

(defn ->column-fields [ordered-outer-projection fields]
  (if ordered-outer-projection
    (->> ordered-outer-projection
         (mapv (fn [field-name]
                 (-> (get fields field-name)
                     (types/field-with-name (str field-name))))))
    (->> fields
         (mapv (fn [[field-name field]]
                 (types/field-with-name field (str field-name)))))))

(defn ->param-fields [params]
  (->> params
       (into {} (map (fn [^IVectorReader col]
                       (MapEntry/create (symbol (.getName col)) (.getField col)))))))

(defn prepare-ra ^xtdb.query.PreparedQuery
  [query
   {:keys [^IScanEmitter scan-emitter, ^BufferAllocator allocator,
           ^RefCounter ref-ctr ^IWatermarkSource wm-src] :as deps}
   {:keys [param-types default-tz table-info]}]

  (let [conformed-query (s/conform ::lp/logical-plan query)]
    (when (s/invalid? conformed-query)
      (throw (err/illegal-arg :malformed-query
                              {:plan query
                               :explain (s/explain-data ::lp/logical-plan query)})))

    (let [{:keys [ordered-outer-projection param-count warnings], :or {param-count 0}} (meta query)
          param-types-with-defaults (->> (concat
                                          (mapv #(if (= :default %) :utf8 %) param-types)
                                          (repeat :utf8))
                                         (take param-count))
          tables (filter (comp #{:scan} :op) (lp/child-exprs conformed-query))
          scan-cols (->> tables
                         (into #{} (mapcat scan/->scan-cols)))

          _ (assert (or scan-emitter (empty? scan-cols)))

          relevant-schema-at-prepare-time
          (when (and table-info scan-emitter)
            (with-open [wm (.openWatermark wm-src)]
              (->> tables
                   (map #(str (get-in % [:scan-opts :table])))
                   (mapcat #(map (partial vector %) (get table-info %)))
                   (.scanFields scan-emitter wm))))

          cache (ConcurrentHashMap.)
          param-fields (->> param-types-with-defaults
                            (into [] (comp (map (comp types/col-type->field types/col-type->nullable-col-type))
                                           (map-indexed (fn [idx field]
                                                          (types/field-with-name field (str "?_" idx)))))))
          param-fields-by-name (into {} (map (juxt (comp symbol #(.getName ^Field %)) identity)) param-fields)
          default-tz (or default-tz (.getZone expr/*clock*))]

      (reify PreparedQuery
        (paramFields [_] param-fields)
        (columnFields [_]
          (let [{:keys [fields]} (emit-expr cache deps conformed-query scan-cols default-tz param-fields-by-name)]
            ;; could store column-fields in the cache/map too
            (->column-fields ordered-outer-projection fields)))
        (warnings [_] warnings)

        (bind [_ {:keys [args params basis default-tz]
                  :or {default-tz default-tz}}]

          ;; TODO throw if basis is in the future?
          (util/with-close-on-catch [args (open-args allocator args)]
            ;;TODO consider making the either/or relationship between params/args explicit, e.g throw error if both are provided
            (let [params (or params args)
                  {:keys [fields ->cursor]} (emit-expr cache deps conformed-query scan-cols default-tz (->param-fields params))
                  {:keys [current-time]} basis
                  current-time (or current-time (.instant expr/*clock*))
                  clock (Clock/fixed current-time default-tz)]

              (reify
                BoundQuery
                (columnFields [_]
                  (->column-fields ordered-outer-projection fields))

                (openCursor [_]
                  (when relevant-schema-at-prepare-time
                    (let [table-info-at-execution-time (with-open [wm (.openWatermark wm-src)]
                                                         (.scanFields scan-emitter wm
                                                                      (mapcat #(map (partial vector (key %)) (val %))
                                                                              (scan/tables-with-cols wm-src))))]

                      ;;TODO nullability of col is considered a schema change, not relevant for pgwire, maybe worth ignoring
                      ;;especially given our "per path schema" principal.
                      (when-not (= relevant-schema-at-prepare-time
                                   (select-keys table-info-at-execution-time (keys relevant-schema-at-prepare-time)))
                        (throw (err/runtime-err :prepared-query-out-of-date
                                                ;;TODO consider adding the schema diff to the error, potentially quite large.
                                                {::err/message "Relevant table schema has changed since preparing query, please prepare again"})))))
                  (.acquire ref-ctr)
                  (let [^BufferAllocator allocator
                        (if allocator
                          (util/->child-allocator allocator "BoundQuery/openCursor")
                          (RootAllocator.))
                        wm (.openWatermark wm-src)]
                    (try
                      (binding [expr/*clock* clock]
                        (-> (->cursor {:allocator allocator, :watermark wm
                                       :clock clock,
                                       :basis (-> basis
                                                  (update :at-tx (fnil identity (some-> wm .txBasis)))
                                                  (assoc :current-time current-time))
                                       :params params
                                       :schema (scan/tables-with-cols wm-src)})
                            (wrap-cursor wm allocator clock ref-ctr fields)))

                      (catch Throwable t
                        (.release ref-ctr)
                        (util/try-close wm)
                        (util/try-close allocator)
                        (throw t)))))

                AutoCloseable
                (close [_] (util/try-close params))))))))))

(defmethod ig/prep-key ::query-source [_ opts]
  (merge opts
         {:prepare-cache-size 1000 
          :plan-cache-size 1000
          :allocator (ig/ref :xtdb/allocator)
          :scan-emitter (ig/ref ::scan/scan-emitter)
          :metadata-mgr (ig/ref ::meta/metadata-manager)}))

(defn ->caffeine-cache ^com.github.benmanes.caffeine.cache.Cache [size]
  (-> (Caffeine/newBuilder) (.maximumSize size) (.build)))

(defmethod ig/init-key ::query-source [_ {:keys [plan-cache-size] :as deps}]
  (let [plan-cache (->caffeine-cache plan-cache-size)
        ref-ctr (RefCounter.)
        deps (-> deps (assoc :ref-ctr ref-ctr))]
    (reify
      IQuerySource
      (prepareRaQuery [_ query wm-src query-opts]
        (prepare-ra query (assoc deps :wm-src wm-src) (assoc query-opts :table-info (scan/tables-with-cols wm-src))))
      (planQuery [_ query wm-src query-opts]
        (let [table-info (scan/tables-with-cols wm-src)
              plan-query-opts
              (-> query-opts
                  (select-keys
                   [:decorrelate? :explain? :instrument-rules? :project-anonymous-columns? :validate-plan?])
                  (update :decorrelate? #(if (nil? %) true false))
                  (assoc :table-info table-info))
              ;;TODO defaults to true in rewrite plan so needs defaulting pre-cache,
              ;;Move all defaulting to this level if/when everyone goes via planQuery
              cache-key (assoc plan-query-opts :query query)]
          (.get ^Cache plan-cache cache-key
                (reify Function
                  (apply [_ _]
                    (let [plan (cond
                                 (or (string? query)
                                     (instance? Sql$DirectlyExecutableStatementContext query))
                                 (sql/compile-query query plan-query-opts)

                                 (seq? query) (xtql/compile-query (xtql.edn/parse-query query) plan-query-opts)

                                 (instance? Query query) (xtql/compile-query query plan-query-opts)

                                 :else (throw (err/illegal-arg :unknown-query-type {:query query, :type (type query)})))]
                      (if (:explain? query-opts)
                        [:table [{:plan (with-out-str (pp/pprint plan))}]]
                        plan)))))))

      AutoCloseable
      (close [_]
        (when-not (.tryClose ref-ctr (Duration/ofMinutes 1))
          (log/warn "Failed to shut down after 60s due to outstanding queries"))))))

(defmethod ig/halt-key! ::query-source [_ ^AutoCloseable query-source]
  (.close query-source))

(defn- cache-key-fn [^IKeyFn key-fn]
  (let [cache (HashMap.)]
    (reify IKeyFn
      (denormalize [_ k]
        (.computeIfAbsent cache k
                          (reify Function
                            (apply [_ k]
                              (.denormalize key-fn k))))))))

(defn open-cursor-as-stream ^java.util.stream.Stream [^BoundQuery bound-query {:keys [key-fn]}]
  (let [key-fn (cache-key-fn key-fn)]
    (util/with-close-on-catch [cursor (.openCursor bound-query)]
      (-> (StreamSupport/stream cursor false)
          ^Stream (.onClose (fn []
                              (util/close cursor)
                              (util/close bound-query)))
          (.flatMap (reify Function
                      (apply [_ rel]
                        (.stream (vr/rel->rows rel key-fn)))))))))




© 2015 - 2024 Weber Informatics LLC | Privacy Policy