All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xtdb.expression.map.clj Maven / Gradle / Ivy

The newest version!
(ns xtdb.expression.map
  (:require [xtdb.expression :as expr]
            [xtdb.expression.walk :as ewalk]
            [xtdb.types :as types]
            [xtdb.util :as util]
            [xtdb.vector.reader :as vr]
            [xtdb.vector.writer :as vw])
  (:import (java.lang AutoCloseable)
           java.util.function.IntBinaryOperator
           (java.util List Map)
           (org.apache.arrow.memory BufferAllocator)
           (org.apache.arrow.memory.util.hash MurmurHasher SimpleHasher)
           (org.apache.arrow.vector NullVector VectorSchemaRoot)
           (org.apache.arrow.vector.types.pojo Schema)
           (org.roaringbitmap IntConsumer RoaringBitmap)
           xtdb.arrow.RelationReader
           (xtdb.vector IVectorReader)
           (com.carrotsearch.hppc IntObjectHashMap)))

(def ^:private ^org.apache.arrow.memory.util.hash.ArrowBufHasher hasher
  SimpleHasher/INSTANCE)

#_{:clj-kondo/ignore [:unused-binding :clojure-lsp/unused-public-var]}
(definterface IIndexHasher
  (^int hashCode [^int idx]))

(defn ->hasher ^xtdb.expression.map.IIndexHasher [^List #_ cols]
  (case (.size cols)
    1 (let [^IVectorReader col (.get cols 0)]
        (reify IIndexHasher
          (hashCode [_ idx]
            (.hashCode col idx hasher))))

    (reify IIndexHasher
      (hashCode [_ idx]
        (loop [n 0
               hash-code 0]
          (if (< n (.size cols))
            (let [^IVectorReader col (.get cols n)]
              (recur (inc n) (MurmurHasher/combineHashCode hash-code (.hashCode col idx hasher))))
            hash-code))))))

#_{:clj-kondo/ignore [:unused-binding :clojure-lsp/unused-public-var]}
(definterface IRelationMapBuilder
  (^void add [^int inIdx])
  (^int addIfNotPresent [^int inIdx]))

#_{:clj-kondo/ignore [:unused-binding :clojure-lsp/unused-public-var]}
(definterface IRelationMapProber
  (^int indexOf [^int inIdx, ^boolean removeOnMatch])
  (^void forEachMatch [^int inIdx, ^java.util.function.IntConsumer c])
  (^int matches [^int inIdx]))

#_{:clj-kondo/ignore [:unused-binding :clojure-lsp/unused-public-var]}
(definterface IRelationMap
  (^java.util.Map buildFields [])
  (^java.util.List buildKeyColumnNames [])
  (^java.util.Map probeFields [])
  (^java.util.List probeKeyColumnNames [])

  (^xtdb.expression.map.IRelationMapBuilder buildFromRelation [^xtdb.vector.RelationReader inRelation])
  (^xtdb.expression.map.IRelationMapProber probeFromRelation [^xtdb.vector.RelationReader inRelation])
  (^xtdb.vector.RelationReader getBuiltRelation []))

(defn- andIBO
  ([]
   (reify IntBinaryOperator
     (applyAsInt [_ _l _r]
       1)))

  ([^IntBinaryOperator p1, ^IntBinaryOperator p2]
   (reify IntBinaryOperator
     (applyAsInt [_ l r]
       (let [l-res (.applyAsInt p1 l r)]
         (if (= -1 l-res)
           -1
           (Math/min l-res (.applyAsInt p2 l r))))))))

(def ^:private left-rel (gensym 'left-rel))
(def ^:private left-vec (gensym 'left-vec))
(def ^:private left-idx (gensym 'left-idx))

(def ^:private right-rel (gensym 'right-rel))
(def ^:private right-vec (gensym 'right-vec))
(def ^:private right-idx (gensym 'right-idx))

(def build-comparator
  (-> (fn [expr input-opts]
        (let [{:keys [continue], :as emitted-expr}
              (expr/codegen-expr expr input-opts)]

          (-> `(fn [~(expr/with-tag left-rel RelationReader)
                    ~(expr/with-tag right-rel RelationReader)
                    ~(-> expr/schema-sym (expr/with-tag Map))
                    ~(-> expr/params-sym (expr/with-tag RelationReader))]
                 (let [~@(expr/batch-bindings emitted-expr)]
                   (reify IntBinaryOperator
                     (~'applyAsInt [_# ~left-idx ~right-idx]
                      ~(continue (fn [res-type code]
                                   (case res-type
                                     :null 0
                                     :bool `(if ~code 1 -1))))))))

              #_(doto clojure.pprint/pprint)
              (eval))))
      (util/lru-memoize)))

(def ^:private pg-class-schema-hack
  {"pg_catalog/pg_class" #{}})

(defn- ->equi-comparator [^IVectorReader left-col, ^IVectorReader right-col, params
                          {:keys [nil-keys-equal? param-types]}]
  (let [f (build-comparator {:op :call, :f (if nil-keys-equal? :null-eq :=)
                             :args [{:op :variable, :variable left-vec, :rel left-rel, :idx left-idx}
                                    {:op :variable, :variable right-vec, :rel right-rel, :idx right-idx}]}
                            {:var->col-type {left-vec (types/field->col-type (.getField left-col))
                                             right-vec (types/field->col-type (.getField right-col))}
                             :param-types param-types})]
    (f (RelationReader/from (vr/rel-reader [(.withName left-col (str left-vec))]))
       (RelationReader/from (vr/rel-reader [(.withName right-col (str right-vec))]))
       pg-class-schema-hack
       (some-> params RelationReader/from))))

(defn- ->theta-comparator [probe-rel build-rel theta-expr params {:keys [build-fields probe-fields param-types]}]
  (let [col-types (update-vals (merge build-fields probe-fields) types/field->col-type)
        f (build-comparator (->> (expr/form->expr theta-expr {:col-types col-types, :param-types param-types})
                                 (expr/prepare-expr)
                                 (ewalk/postwalk-expr (fn [{:keys [op] :as expr}]
                                                        (cond-> expr
                                                          (= op :variable)
                                                          (into (let [{:keys [variable]} expr]
                                                                  (if (contains? probe-fields variable)
                                                                    {:rel left-rel, :idx left-idx}
                                                                    {:rel right-rel, :idx right-idx})))))))
                            {:var->col-type col-types, :param-types param-types})]
    (f (RelationReader/from probe-rel)
       (RelationReader/from build-rel)
       pg-class-schema-hack
       (RelationReader/from params))))

(defn- find-in-hash-bitmap ^long [^RoaringBitmap hash-bitmap, ^IntBinaryOperator comparator, ^long idx, remove-on-match?]
  (if-not hash-bitmap
    -1
    (let [it (.getIntIterator hash-bitmap)]
      (loop []
        (if-not (.hasNext it)
          -1
          (let [test-idx (.next it)]
            (if (= 1 (.applyAsInt comparator idx test-idx))
              (do
                (when remove-on-match?
                  (.remove hash-bitmap test-idx))
                test-idx)
              (recur))))))))

(defn returned-idx ^long [^long inserted-idx]
  (-> inserted-idx - dec))

(defn inserted-idx ^long [^long returned-idx]
  (cond-> returned-idx
    (neg? returned-idx) (-> inc -)))

(defn ->nil-rel
  "Returns a single row relation where all columns are nil. (Useful for outer joins)."
  [col-names]
  (vr/rel-reader (for [col-name col-names]
                   (vr/vec->reader (doto (NullVector. (str col-name))
                                     (.setValueCount 1))))))

(def nil-row-idx 0)

(defn ->relation-map ^xtdb.expression.map.IRelationMap
  [^BufferAllocator allocator,
   {:keys [key-col-names store-full-build-rel?
           build-fields probe-fields
           with-nil-row? nil-keys-equal?
           theta-expr param-fields params]
    :as opts}]
  (let [param-types (update-vals param-fields types/field->col-type)
        build-key-col-names (get opts :build-key-col-names key-col-names)
        probe-key-col-names (get opts :probe-key-col-names key-col-names)

        hash->bitmap (IntObjectHashMap.)
        schema (Schema. (-> build-fields
                            (cond-> (not store-full-build-rel?) (select-keys build-key-col-names))
                            (->> (mapv (fn [[field-name field]]
                                         (cond-> (-> field (types/field-with-name (str field-name)))
                                           with-nil-row? types/->nullable-field))))))]

    (util/with-close-on-catch [root (VectorSchemaRoot/create schema allocator)]
      (let [rel-writer (vw/root->writer root)]
        (when with-nil-row?
          (doto (.rowCopier rel-writer (->nil-rel (keys build-fields)))
            (.copyRow 0)))

        (let [build-key-cols (mapv #(vw/vec-wtr->rdr (.colWriter rel-writer (str %))) build-key-col-names)]
          (letfn [(compute-hash-bitmap [^long row-hash]
                    (or (.get hash->bitmap row-hash)
                        (let [bitmap (RoaringBitmap.)]
                          (.put hash->bitmap (int row-hash) bitmap)
                          bitmap)))]
            (reify
              IRelationMap
              (buildFields [_] build-fields)
              (buildKeyColumnNames [_] build-key-col-names)
              (probeFields [_] probe-fields)
              (probeKeyColumnNames [_] probe-key-col-names)

              (buildFromRelation [_ in-rel]
                (let [in-rel (if store-full-build-rel?
                               in-rel
                               (->> (set build-key-col-names)
                                    (mapv #(.readerForName in-rel (str %)))
                                    vr/rel-reader))

                      in-key-cols (mapv #(.readerForName in-rel (str %))
                                        build-key-col-names)

                      ;; NOTE: we might not need to compute `comparator` if the caller never requires `addIfNotPresent` (e.g. joins)
                      !comparator (delay
                                    (->> (map (fn [build-col in-col]
                                                (->equi-comparator in-col build-col params
                                                                   {:nil-keys-equal? nil-keys-equal?,
                                                                    :param-types param-types}))
                                              build-key-cols
                                              in-key-cols)
                                         (reduce andIBO)))

                      hasher (->hasher in-key-cols)

                      row-copier (.rowCopier rel-writer in-rel)]

                  (letfn [(add ^long [^RoaringBitmap hash-bitmap, ^long idx]
                            (let [out-idx (.copyRow row-copier idx)]
                              (.add hash-bitmap out-idx)
                              (returned-idx out-idx)))]

                    (reify IRelationMapBuilder
                      (add [_ idx]
                        (add (compute-hash-bitmap (.hashCode hasher idx)) idx))

                      (addIfNotPresent [_ idx]
                        (let [^RoaringBitmap hash-bitmap (compute-hash-bitmap (.hashCode hasher idx))
                              out-idx (find-in-hash-bitmap hash-bitmap @!comparator idx false)]
                          (if-not (neg? out-idx)
                            out-idx
                            (add hash-bitmap idx))))))))

              (probeFromRelation [this probe-rel]
                (let [build-rel (.getBuiltRelation this)
                      probe-key-cols (mapv #(.readerForName probe-rel (str %))
                                           probe-key-col-names)

                      ^IntBinaryOperator
                      comparator (->> (cond-> (map (fn [build-col probe-col]
                                                     (->equi-comparator probe-col build-col params
                                                                        {:nil-keys-equal? nil-keys-equal?
                                                                         :param-types param-types}))
                                                   build-key-cols
                                                   probe-key-cols)

                                        (some? theta-expr)
                                        (conj (->theta-comparator probe-rel build-rel theta-expr params
                                                                  {:build-fields build-fields
                                                                   :probe-fields probe-fields
                                                                   :param-types param-types})))
                                      (reduce andIBO))

                      hasher (->hasher probe-key-cols)]

                  (reify IRelationMapProber
                    (indexOf [_ idx remove-on-match?]
                      (-> ^RoaringBitmap (.get hash->bitmap (.hashCode hasher idx))
                          (find-in-hash-bitmap comparator idx remove-on-match?)))

                    (forEachMatch [_ idx c]
                      (some-> ^RoaringBitmap (.get hash->bitmap (.hashCode hasher idx))
                              (.forEach (reify IntConsumer
                                          (accept [_ out-idx]
                                            (when (= 1 (.applyAsInt comparator idx out-idx))
                                              (.accept c out-idx)))))))


                    (matches [_ probe-idx]
                      ;; TODO: this doesn't use the hashmaps, still a nested loop join
                      (let [acc (int-array [-1])]
                        (loop [build-idx 0]
                          (if (= build-idx (.rowCount build-rel))
                            (aget acc 0)
                            (let [res (.applyAsInt comparator probe-idx build-idx)]
                              (if (= 1 res)
                                1
                                (do
                                  (aset acc 0 (Math/max (aget acc 0) res))
                                  (recur (inc build-idx))))))))))))

              (getBuiltRelation [_] (vw/rel-wtr->rdr rel-writer))

              AutoCloseable
              (close [_] (.close rel-writer)))))))))




© 2015 - 2024 Weber Informatics LLC | Privacy Policy