xtdb.operator.join.clj Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xtdb-core Show documentation
Show all versions of xtdb-core Show documentation
An open source document database with bitemporal graph queries
The newest version!
(ns xtdb.operator.join
(:require [clojure.set :as set]
[clojure.spec.alpha :as s]
[clojure.string]
[clojure.walk :as walk]
[xtdb.bloom :as bloom]
[xtdb.error :as err]
[xtdb.expression :as expr]
[xtdb.expression.map :as emap]
[xtdb.logical-plan :as lp]
[xtdb.operator.project :as project]
[xtdb.operator.scan :as scan]
[xtdb.types :as types]
[xtdb.util :as util]
[xtdb.vector.reader :as vr])
(:import [clojure.lang IFn]
(java.util ArrayList Iterator List)
(java.util.function Consumer IntConsumer)
(java.util.stream IntStream)
(org.apache.arrow.memory BufferAllocator)
org.apache.arrow.vector.BitVector
(org.apache.arrow.vector.types.pojo Field)
org.roaringbitmap.RoaringBitmap
(org.roaringbitmap.buffer MutableRoaringBitmap)
(xtdb ICursor)
xtdb.arrow.VectorReader
(xtdb.expression.map IRelationMap)
(xtdb.operator ProjectionSpec)
(xtdb.vector RelationReader)))
(defmethod lp/ra-expr :cross-join [_]
(s/cat :op #{:⨯ :cross-join}
:left ::lp/ra-expression
:right ::lp/ra-expression))
(s/def ::join-equi-clause (s/map-of ::lp/expression ::lp/expression :conform-keys true :count 1))
(s/def ::join-condition-clause
(s/or :equi-condition ::join-equi-clause
:pred-expr ::lp/expression))
(s/def ::join-condition
(s/coll-of ::join-condition-clause :kind vector?))
(defmethod lp/ra-expr :join [_]
(s/cat :op #{:⋈ :join}
:condition ::join-condition
:left ::lp/ra-expression
:right ::lp/ra-expression))
(defmethod lp/ra-expr :left-outer-join [_]
(s/cat :op #{:⟕ :left-outer-join}
:condition ::join-condition
:left ::lp/ra-expression
:right ::lp/ra-expression))
(defmethod lp/ra-expr :full-outer-join [_]
(s/cat :op #{:⟗ :full-outer-join}
:condition ::join-condition
:left ::lp/ra-expression
:right ::lp/ra-expression))
(defmethod lp/ra-expr :semi-join [_]
(s/cat :op #{:⋉ :semi-join}
:condition ::join-condition
:left ::lp/ra-expression
:right ::lp/ra-expression))
(defmethod lp/ra-expr :anti-join [_]
(s/cat :op #{:▷ :anti-join}
:condition ::join-condition
:left ::lp/ra-expression
:right ::lp/ra-expression))
(defmethod lp/ra-expr :mark-join [_]
(s/cat :op #{:mark-join}
:mark-spec (s/map-of ::lp/column ::join-condition, :count 1, :conform-keys true)
:left ::lp/ra-expression
:right ::lp/ra-expression))
(defmethod lp/ra-expr :single-join [_]
(s/cat :op #{:single-join}
:condition ::join-condition
:left ::lp/ra-expression
:right ::lp/ra-expression))
(defmethod lp/ra-expr :mega-join [_]
(s/cat :op #{:mega-join}
:conditions ::join-condition
:relations (s/coll-of ::lp/ra-expression)))
(set! *unchecked-math* :warn-on-boxed)
(defn emit-join-children [join-expr args]
(-> join-expr
(update :left #(lp/emit-expr % args))
(update :right #(lp/emit-expr % args))) )
(defn- cross-product ^xtdb.vector.RelationReader [^RelationReader left-rel, ^RelationReader right-rel]
(let [left-row-count (.rowCount left-rel)
right-row-count (.rowCount right-rel)
row-count (* left-row-count right-row-count)]
(vr/rel-reader (concat (.select left-rel
(let [idxs (int-array row-count)]
(dotimes [idx row-count]
(aset idxs idx ^long (quot idx right-row-count)))
idxs))
(.select right-rel
(let [idxs (int-array row-count)]
(dotimes [idx row-count]
(aset idxs idx ^long (rem idx right-row-count)))
idxs)))
row-count)))
(deftype CrossJoinCursor [^BufferAllocator allocator
^ICursor left-cursor
^ICursor right-cursor
^List left-rels
^:unsynchronized-mutable ^Iterator left-rel-iterator
^:unsynchronized-mutable ^RelationReader right-rel]
ICursor
(tryAdvance [this c]
(.forEachRemaining left-cursor
(reify Consumer
(accept [_ left-rel]
(.add left-rels (.copy ^RelationReader left-rel allocator)))))
(boolean
(when-let [right-rel (or (when (and left-rel-iterator (.hasNext left-rel-iterator))
right-rel)
(do
(when right-rel
(.close right-rel)
(set! (.right-rel this) nil))
(when (.tryAdvance right-cursor
(reify Consumer
(accept [_ right-rel]
(set! (.right-rel this) (.copy ^RelationReader right-rel allocator))
(set! (.left-rel-iterator this) (.iterator left-rels)))))
(.right-rel this))))]
(when-let [left-rel (when (.hasNext left-rel-iterator)
(.next left-rel-iterator))]
(.accept c (cross-product left-rel right-rel))
true))))
(close [_]
(when left-rels
(run! util/try-close left-rels)
(.clear left-rels))
(util/try-close left-cursor)
(util/try-close right-rel)
(util/try-close right-cursor)))
(defn emit-cross-join [{:keys [left right]}]
(lp/binary-expr left right
(fn [left-fields right-fields]
{:fields (merge left-fields right-fields)
:->cursor (fn [{:keys [allocator]} left-cursor right-cursor]
(CrossJoinCursor. allocator left-cursor right-cursor (ArrayList.) nil nil))})))
(defmethod lp/emit-expr :cross-join [join-expr args]
(emit-cross-join (emit-join-children join-expr args)))
(defn- build-phase [^ICursor build-cursor, ^IRelationMap rel-map, pushdown-blooms]
(.forEachRemaining build-cursor
(reify Consumer
(accept [_ build-rel]
(let [^RelationReader build-rel build-rel rel-map-builder (.buildFromRelation rel-map build-rel)
build-key-col-names (vec (.buildKeyColumnNames rel-map))]
(dotimes [build-idx (.rowCount build-rel)]
(.add rel-map-builder build-idx))
(when pushdown-blooms
(dotimes [col-idx (count build-key-col-names)]
(let [build-col-name (nth build-key-col-names col-idx)
build-col (.readerForName build-rel (str build-col-name))
^MutableRoaringBitmap pushdown-bloom (nth pushdown-blooms col-idx)]
(dotimes [build-idx (.rowCount build-rel)]
(.add pushdown-bloom ^ints (bloom/bloom-hashes (VectorReader/from build-col) build-idx)))))))))))
#_{:clj-kondo/ignore [:unused-binding]}
(defmulti ^xtdb.vector.RelationReader probe-phase
(fn [join-type probe-rel rel-map matched-build-idxs]
join-type))
(defn- probe-inner-join-select
"Returns a pair of selections [probe-sel, build-sel].
The selections represent matched rows in both underlying relations.
The selections will have the same size."
[^RelationReader probe-rel ^IRelationMap rel-map]
(let [rel-map-prober (.probeFromRelation rel-map probe-rel)
matching-build-idxs (IntStream/builder)
matching-probe-idxs (IntStream/builder)]
(dotimes [probe-idx (.rowCount probe-rel)]
(.forEachMatch rel-map-prober probe-idx
(reify IntConsumer
(accept [_ build-idx]
(.add matching-build-idxs build-idx)
(.add matching-probe-idxs probe-idx)))))
[(.toArray (.build matching-probe-idxs)) (.toArray (.build matching-build-idxs))]))
(defn- join-rels
"Takes a relation (probe-rel) and its mapped relation (via rel-map) and returns a relation with the columns of both."
[^RelationReader probe-rel
^IRelationMap rel-map
[probe-sel build-sel :as _selection-pair]]
(let [built-rel (.getBuiltRelation rel-map)]
(vr/rel-reader (concat (.select built-rel build-sel)
(.select probe-rel probe-sel)))))
(defn- probe-semi-join-select
"Returns a single selection of the probe relation, that represents matches for a semi-join."
^ints [^RelationReader probe-rel, ^IRelationMap rel-map]
(let [rel-map-prober (.probeFromRelation rel-map probe-rel)
matching-probe-idxs (IntStream/builder)]
(dotimes [probe-idx (.rowCount probe-rel)]
(when-not (neg? (.indexOf rel-map-prober probe-idx false))
(.add matching-probe-idxs probe-idx)))
(.toArray (.build matching-probe-idxs))))
(defmethod probe-phase ::inner-join
[_join-type
^RelationReader probe-rel
^IRelationMap rel-map
_matched-build-idxs]
(join-rels probe-rel rel-map (probe-inner-join-select probe-rel rel-map)))
(defmethod probe-phase ::semi-join
[_join-type
^RelationReader probe-rel
^IRelationMap rel-map
_matched-build-idxs]
(.select probe-rel (probe-semi-join-select probe-rel rel-map)))
(defmethod probe-phase ::anti-semi-join
[_join-type, ^RelationReader probe-rel, ^IRelationMap rel-map, _matched-build-idxs]
(let [rel-map-prober (.probeFromRelation rel-map probe-rel)
matching-probe-idxs (IntStream/builder)]
(dotimes [probe-idx (.rowCount probe-rel)]
(when (neg? (.matches rel-map-prober probe-idx))
(.add matching-probe-idxs probe-idx)))
(.select probe-rel (.toArray (.build matching-probe-idxs)))))
(defmethod probe-phase ::mark-join
[_join-type
^RelationReader probe-rel
^IRelationMap rel-map
_matched-build-idxs]
(.select probe-rel (probe-semi-join-select probe-rel rel-map)))
(defn- int-array-concat
^ints [^ints arr1 ^ints arr2]
(let [ret-arr (int-array (+ (alength arr1) (alength arr2)))]
(System/arraycopy arr1 0 ret-arr 0 (alength arr1))
(System/arraycopy arr2 0 ret-arr (alength arr1) (alength arr2))
ret-arr))
(defn- probe-outer-join-select [^RelationReader probe-rel, rel-map, ^RoaringBitmap matched-build-idxs]
(let [[probe-sel build-sel] (probe-inner-join-select probe-rel rel-map)
_ (when matched-build-idxs (.add matched-build-idxs ^ints build-sel))
probe-bm (RoaringBitmap/bitmapOf probe-sel)
probe-int-stream (IntStream/builder)
build-int-stream (IntStream/builder)
_ (dotimes [idx (.rowCount probe-rel)]
(when-not (.contains probe-bm idx)
(.add probe-int-stream idx)
(.add build-int-stream emap/nil-row-idx)))
extra-probe-sel (.toArray (.build probe-int-stream))
extra-build-sel (.toArray (.build build-int-stream))
full-probe-sel (int-array-concat probe-sel extra-probe-sel)
full-build-sel (int-array-concat build-sel extra-build-sel)]
[full-probe-sel full-build-sel]))
(derive ::full-outer-join ::outer-join)
(derive ::left-outer-join ::outer-join)
(defmethod probe-phase ::outer-join
[_join-type
^RelationReader probe-rel
^IRelationMap rel-map
^RoaringBitmap matched-build-idxs]
(->> (probe-outer-join-select probe-rel rel-map matched-build-idxs)
(join-rels probe-rel rel-map)))
(defmethod probe-phase ::single-join
[_join-type
^RelationReader probe-rel
^IRelationMap rel-map
_matched-build-idxs]
(let [rel-map-prober (.probeFromRelation rel-map probe-rel)
matching-build-idxs (IntStream/builder)
matching-probe-idxs (IntStream/builder)]
(dotimes [probe-idx (.rowCount probe-rel)]
(let [!matched (boolean-array 1)]
(.forEachMatch rel-map-prober probe-idx
(reify IntConsumer
(accept [_ build-idx]
(if-not (aget !matched 0)
(do
(aset !matched 0 true)
(.add matching-build-idxs build-idx)
(.add matching-probe-idxs probe-idx))
(throw (err/runtime-err :xtdb.single-join/cardinality-violation
{::err/message "cardinality violation"}))))))
(when-not (aget !matched 0)
(.add matching-probe-idxs probe-idx)
(.add matching-build-idxs emap/nil-row-idx))))
(->> [(.toArray (.build matching-probe-idxs)) (.toArray (.build matching-build-idxs))]
(join-rels probe-rel rel-map))))
(deftype JoinCursor [^BufferAllocator allocator, ^ICursor build-cursor,
^:unsynchronized-mutable ^ICursor probe-cursor
^IFn ->probe-cursor
^IRelationMap rel-map
^RoaringBitmap matched-build-idxs
pushdown-blooms
join-type]
ICursor
(tryAdvance [this c]
(build-phase build-cursor rel-map pushdown-blooms)
(boolean
(or (let [advanced? (boolean-array 1)]
(binding [scan/*column->pushdown-bloom* (cond-> scan/*column->pushdown-bloom*
(some? pushdown-blooms) (conj (zipmap (.probeKeyColumnNames rel-map) pushdown-blooms)))]
(when-not probe-cursor
(util/with-close-on-catch [probe-cursor (->probe-cursor)]
(set! (.probe-cursor this) probe-cursor)))
(while (and (not (aget advanced? 0))
(.tryAdvance ^ICursor (.probe-cursor this)
(reify Consumer
(accept [_ probe-rel]
(when (pos? (.rowCount ^RelationReader probe-rel))
(with-open [out-rel (-> (probe-phase join-type probe-rel rel-map matched-build-idxs)
(.copy allocator))]
(when (pos? (.rowCount out-rel))
(aset advanced? 0 true)
(.accept c out-rel))))))))))
(aget advanced? 0))
(when (= ::full-outer-join join-type)
(let [build-rel (.getBuiltRelation rel-map)
build-row-count (long (.rowCount build-rel))
unmatched-build-idxs (RoaringBitmap/flip matched-build-idxs 0 build-row-count)]
(.remove unmatched-build-idxs emap/nil-row-idx)
(when-not (.isEmpty unmatched-build-idxs)
;; this means .isEmpty will be true on the next iteration (we flip the bitmap)
(.add matched-build-idxs 0 build-row-count)
(let [nil-rel (emap/->nil-rel (set (keys (.probeFields rel-map))))
build-sel (.toArray unmatched-build-idxs)
probe-sel (int-array (alength build-sel))]
(.accept c (join-rels nil-rel rel-map [probe-sel build-sel]))
true)))))))
(close [_]
(run! #(.clear ^MutableRoaringBitmap %) pushdown-blooms)
(util/try-close rel-map)
(util/try-close build-cursor)
(util/try-close probe-cursor)))
(defn- equi-spec [idx condition left-fields right-fields param-fields]
(let [[left-expr right-expr] (first condition)]
(letfn [(equi-projection [side form fields]
(if (symbol? form)
{:key-col-name form}
(let [col-name (symbol (format "?join-expr-%s-%d" (name side) idx))
input-types {:col-types (update-vals fields types/field->col-type)
:param-types (update-vals param-fields types/field->col-type)}]
{:key-col-name col-name
:projection (expr/->expression-projection-spec col-name (expr/form->expr form input-types)
input-types)})))]
{:left (equi-projection :left left-expr left-fields)
:right (equi-projection :right right-expr right-fields)})))
(defn- projection-specs->fields [projection-specs]
(->> projection-specs
(into {} (map (juxt #(.getColumnName ^ProjectionSpec %)
(comp types/col-type->field #(.getColumnType ^ProjectionSpec %)))))))
(defn- emit-join-expr {:style/indent 2} [{:keys [condition left right]} {:keys [param-fields] :as _args} f]
(let [{left-fields :fields, ->left-cursor :->cursor} left
{right-fields :fields, ->right-cursor :->cursor} right
{equis :equi-condition, thetas :pred-expr} (group-by first condition)
theta-expr (when-let [theta-exprs (seq (map second thetas))]
(list* 'and theta-exprs))
equi-specs (->> (map last equis)
(into [] (map-indexed (fn [idx condition]
(equi-spec idx condition left-fields right-fields param-fields)))))
left-projections (vec (concat (for [[col-name ^Field field] left-fields]
(project/->identity-projection-spec col-name field))
(keep (comp :projection :left) equi-specs)))
right-projections (vec (concat (for [[col-name field] right-fields]
(project/->identity-projection-spec col-name field))
(keep (comp :projection :right) equi-specs)))
{:keys [fields ->cursor]} (f {:left-fields (projection-specs->fields left-projections)
:left-key-col-names (mapv (comp :key-col-name :left) equi-specs)
:right-fields (projection-specs->fields right-projections)
:right-key-col-names (mapv (comp :key-col-name :right) equi-specs)
:theta-expr theta-expr})
project-away-specs (->> (set/difference (set (keys fields))
(->> equi-specs
(into #{} (comp (mapcat (juxt :left :right))
(filter :projection)
(map :key-col-name)))))
(mapv #(project/->identity-projection-spec % (get fields %))))]
{:fields (projection-specs->fields project-away-specs)
:->cursor (fn [opts]
(let [->left-project-cursor #(project/->project-cursor opts (->left-cursor opts) left-projections)
->right-project-cursor #(project/->project-cursor opts (->right-cursor opts) right-projections)
join-cursor (->cursor opts ->left-project-cursor ->right-project-cursor)]
(project/->project-cursor opts join-cursor project-away-specs)))}))
(defn- ->pushdown-blooms [key-col-names]
(vec (repeatedly (count key-col-names) #(MutableRoaringBitmap.))))
(defn emit-join-expr-and-children {:style/indent 2} [join-expr args f]
(emit-join-expr
(emit-join-children join-expr args)
args f))
(defn emit-inner-join-expr
[join-expr {:keys [param-fields] :as args}]
(emit-join-expr join-expr args
(fn [{:keys [left-fields right-fields left-key-col-names right-key-col-names theta-expr]}]
{:fields (merge-with types/merge-fields left-fields right-fields)
:->cursor (fn [{:keys [allocator params]} ->left-cursor ->right-cursor]
(util/with-close-on-catch [left-cursor (->left-cursor)]
(JoinCursor. allocator left-cursor nil ->right-cursor
(emap/->relation-map allocator {:build-fields left-fields
:build-key-col-names left-key-col-names
:probe-fields right-fields
:probe-key-col-names right-key-col-names
:store-full-build-rel? true
:theta-expr theta-expr
:param-fields param-fields
:params params})
nil (->pushdown-blooms right-key-col-names) ::inner-join)))})))
(defmethod lp/emit-expr :join [join-expr args]
(emit-inner-join-expr (emit-join-children join-expr args) args))
(defmethod lp/emit-expr :left-outer-join [join-expr {:keys [param-fields] :as args}]
(emit-join-expr-and-children join-expr args
(fn [{:keys [left-fields right-fields left-key-col-names right-key-col-names theta-expr]}]
{:fields (merge-with types/merge-fields left-fields (-> right-fields types/with-nullable-fields))
:->cursor (fn [{:keys [allocator params]}, ->left-cursor ->right-cursor]
(util/with-close-on-catch [right-cursor (->right-cursor)]
(JoinCursor. allocator right-cursor nil ->left-cursor
(emap/->relation-map allocator {:build-fields right-fields
:build-key-col-names right-key-col-names
:probe-fields left-fields
:probe-key-col-names left-key-col-names
:store-full-build-rel? true
:with-nil-row? true
:theta-expr theta-expr
:param-fields param-fields
:params params})
nil nil ::left-outer-join)))})))
(defmethod lp/emit-expr :full-outer-join [join-expr {:keys [param-fields] :as args}]
(emit-join-expr-and-children join-expr args
(fn [{:keys [left-fields right-fields left-key-col-names right-key-col-names theta-expr]}]
{:fields (merge-with types/merge-col-types (types/with-nullable-fields left-fields) (types/with-nullable-fields right-fields))
:->cursor (fn [{:keys [allocator params]} ->left-cursor ->right-cursor]
(util/with-close-on-catch [left-cursor (->left-cursor)]
(JoinCursor. allocator left-cursor nil ->right-cursor
(emap/->relation-map allocator {:build-fields left-fields
:build-key-col-names left-key-col-names
:probe-fields right-fields
:probe-key-col-names right-key-col-names
:store-full-build-rel? true
:with-nil-row? true
:theta-expr theta-expr
:param-fields param-fields
:params params})
(RoaringBitmap.) nil ::full-outer-join)))})))
(defmethod lp/emit-expr :semi-join [join-expr {:keys [param-fields] :as args}]
(emit-join-expr-and-children join-expr args
(fn [{:keys [left-fields right-fields left-key-col-names right-key-col-names theta-expr]}]
{:fields left-fields
:->cursor (fn [{:keys [allocator params]} ->left-cursor ->right-cursor]
(util/with-close-on-catch [right-cursor (->right-cursor)]
(JoinCursor. allocator right-cursor nil ->left-cursor
(emap/->relation-map allocator {:build-fields right-fields
:build-key-col-names right-key-col-names
:probe-fields left-fields
:probe-key-col-names left-key-col-names
:store-full-build-rel? true
:theta-expr theta-expr
:param-fields param-fields
:params params})
nil (->pushdown-blooms right-key-col-names) ::semi-join)))})))
(defmethod lp/emit-expr :anti-join [join-expr {:keys [param-fields] :as args}]
(emit-join-expr-and-children join-expr args
(fn [{:keys [left-fields right-fields left-key-col-names right-key-col-names theta-expr]}]
{:fields left-fields
:->cursor (fn [{:keys [allocator params]} ->left-cursor ->right-cursor]
(util/with-close-on-catch [right-cursor (->right-cursor)]
(JoinCursor. allocator right-cursor nil ->left-cursor
(emap/->relation-map allocator {:build-fields right-fields
:build-key-col-names right-key-col-names
:probe-fields left-fields
:probe-key-col-names left-key-col-names
:store-full-build-rel? true
:theta-expr theta-expr
:param-fields param-fields
:params params})
nil nil ::anti-semi-join)))})))
(defn- mark-join-probe-phase [^IRelationMap rel-map, ^RelationReader probe-rel, ^BitVector mark-col]
(let [rel-prober (.probeFromRelation rel-map probe-rel)]
(dotimes [idx (.rowCount probe-rel)]
(let [match-res (.matches rel-prober idx)]
(if (zero? match-res)
(.setNull mark-col idx)
(.set mark-col idx (case match-res 1 1, -1 0)))))))
(defmethod lp/emit-expr :mark-join [{:keys [mark-spec] :as join-expr} {:keys [param-fields] :as args}]
(let [[mark-col-name mark-condition] (first mark-spec)]
(emit-join-expr-and-children (assoc join-expr :condition mark-condition) args
(fn [{:keys [left-fields right-fields left-key-col-names right-key-col-names theta-expr]}]
{:fields (assoc left-fields mark-col-name (types/->field-default-name #xt.arrow/type :bool true nil))
:->cursor
(fn [{:keys [^BufferAllocator allocator params]} ->probe-cursor ->build-cursor]
(util/with-close-on-catch [build-cursor (->build-cursor)]
(let [!probe-cursor (volatile! nil)
rel-map (emap/->relation-map allocator {:build-fields right-fields
:build-key-col-names right-key-col-names
:probe-fields left-fields
:probe-key-col-names left-key-col-names
:store-full-build-rel? true
:theta-expr theta-expr
:param-fields param-fields
:params params})
pushdown-blooms (vec (repeatedly (count right-key-col-names) #(MutableRoaringBitmap.)))]
(reify ICursor
(tryAdvance [_ c]
(build-phase build-cursor rel-map pushdown-blooms)
(boolean
(let [advanced? (boolean-array 1)]
(binding [scan/*column->pushdown-bloom* (conj scan/*column->pushdown-bloom*
(zipmap (.probeKeyColumnNames rel-map) pushdown-blooms))]
(when-not @!probe-cursor
(util/with-close-on-catch [probe-cursor (->probe-cursor)]
(vreset! !probe-cursor probe-cursor)))
(while (and (not (aget advanced? 0))
(.tryAdvance ^ICursor @!probe-cursor
(reify Consumer
(accept [_ probe-rel]
(let [^RelationReader probe-rel probe-rel
row-count (.rowCount probe-rel)]
(when (pos? row-count)
(aset advanced? 0 true)
(with-open [probe-rel (.copy probe-rel allocator)
mark-col (doto (BitVector. (name mark-col-name) allocator)
(.allocateNew row-count)
(.setValueCount row-count))]
(mark-join-probe-phase rel-map probe-rel mark-col)
(let [out-cols (conj (seq probe-rel) (vr/vec->reader mark-col))]
(.accept c (vr/rel-reader out-cols row-count))))))))))))
(aget advanced? 0))))
(close [_]
(run! #(.clear ^MutableRoaringBitmap %) pushdown-blooms)
(util/try-close rel-map)
(util/try-close build-cursor)
(util/try-close @!probe-cursor))))))}))))
(defmethod lp/emit-expr :single-join [join-expr {:keys [param-fields] :as args}]
(emit-join-expr-and-children join-expr args
(fn [{:keys [left-fields right-fields left-key-col-names right-key-col-names theta-expr]}]
{:fields (merge-with types/merge-fields left-fields (types/with-nullable-fields right-fields))
:->cursor (fn [{:keys [allocator params]} ->left-cursor ->right-cursor]
(util/with-close-on-catch [right-cursor (->right-cursor)]
(JoinCursor. allocator right-cursor nil ->left-cursor
(emap/->relation-map allocator {:build-fields right-fields
:build-key-col-names right-key-col-names
:probe-fields left-fields
:probe-key-col-names left-key-col-names
:store-full-build-rel? true
:with-nil-row? true
:theta-expr theta-expr
:param-fields param-fields
:params params})
nil nil ::single-join)))})))
(defn columns [relation]
(set (keys (:fields relation))))
(defn expr->columns [expr]
(-> (if (symbol? expr)
(if (not (clojure.string/starts-with? (str expr) "?"))
#{expr}
#{})
(set
(walk/postwalk
(fn [token]
(if (seq? token)
(mapcat
(fn [child]
(cond
(seq? child)
child
(and (symbol? child)
(not (clojure.string/starts-with? (str child) "?")))
[child]))
(rest token))
token))
expr)))
(disj 'xtdb/end-of-time)))
(defn adjust-to-equi-condition
"Swaps the sides of equi conditions to match location of cols in plan
or rewrite simple equals predicate condition as equi condition"
[{:keys [condition cols-from-current-rel other-cols] :as join-condition}]
(if (= (first condition) :equi-condition)
(let [equi-join-cond (last condition)
lhs (first (keys equi-join-cond))
rhs (first (vals equi-join-cond))
lhs-cols (expr->columns lhs)]
(if (= (:cols-from-current-rel join-condition) lhs-cols)
condition
[:equi-condition {rhs lhs}]))
(let [predicate (last condition)]
(if (lp/equals-predicate? predicate)
(let [[_ a b] predicate]
(cond (and (= cols-from-current-rel #{a})
(= other-cols #{b}))
[:equi-condition {a b}]
(and (= cols-from-current-rel #{b})
(= other-cols #{a}))
[:equi-condition {b a}]
:else
condition))
condition))))
(defn find-join-conditions-which-contain-cols-from-plan
"Returns join conditions which reference at least one col from the current plan"
[plan conditions]
(filter
(comp seq :cols-from-current-rel)
(map
(fn [condition]
(let [cols-from-current-rel (set/intersection (columns plan) (:cols condition))]
(assoc
condition
:cols-from-current-rel cols-from-current-rel
:other-cols (set/difference (:cols condition) cols-from-current-rel))))
conditions)))
(defn match-relations-to-potential-join-clauses
"Attaches conditions to relations that satisfy the remaining columns not present in the existing plan"
[rels conditions]
(keep
(fn [rel]
(when-let [valid-join-conditions-for-rel
(->>
conditions
(map
(fn [condition]
(assoc
condition
:all-cols-present?
(-> condition
(:other-cols)
(set/difference (columns rel))
(empty?)))))
(filter #(-> % :all-cols-present?))
(not-empty))]
(assoc rel :valid-join-conditions-for-rel valid-join-conditions-for-rel)))
rels))
(defn remove-joined-relation [join-candidate rels]
(remove #(= (:relation-id %) (:relation-id join-candidate)) rels))
(defn remove-used-join-conditions [join-candidate conditions]
(remove
#(contains?
(set
(map
:condition-id
(:valid-join-conditions-for-rel join-candidate)))
(:condition-id %))
conditions))
(defn build-plan-for-next-sub-graph [conditions relations args]
(loop [plan (first relations)
rels (rest relations)
conditions conditions
join-order [(:relation-id plan)]]
(if (seq rels)
(let [join-candidate (->> conditions
(find-join-conditions-which-contain-cols-from-plan plan)
(match-relations-to-potential-join-clauses rels)
(first))
join-conditions (mapv
adjust-to-equi-condition
(:valid-join-conditions-for-rel join-candidate))]
(if join-candidate
(recur
(emit-inner-join-expr
{:condition join-conditions
:left plan
:right join-candidate}
args)
(remove-joined-relation join-candidate rels)
(remove-used-join-conditions join-candidate conditions)
(conj join-order (:relation-id join-candidate)))
{:sub-graph-plan plan
:sub-graph-unused-rels rels
:sub-graph-unused-conditions conditions
:sub-graph-join-order join-order}))
{:sub-graph-plan plan
:sub-graph-unused-rels rels
:sub-graph-unused-conditions conditions
:sub-graph-join-order join-order})))
(defn condition->cols [[condition-type condition]]
(if (= condition-type :equi-condition)
(let [[lhs rhs] (first condition)]
(set/union
(expr->columns rhs)
(expr->columns lhs)))
(expr->columns condition)))
(defmethod lp/emit-expr :mega-join [{:keys [conditions relations]} args]
(let [conditions-with-cols (->> conditions
(map (fn [condition]
{:cols (condition->cols condition)
:condition condition}))
(map-indexed #(assoc %2 :condition-id %1)))
child-relations (->> relations
(map #(lp/emit-expr % args))
(map-indexed #(assoc %2 :relation-id %1))
(sort-by
(juxt (comp nil? :row-count :stats)
(comp :row-count :stats))))
{:keys [sub-graph-plans unused-join-conditions join-order]}
(loop [sub-graph-plans []
relations child-relations
conditions conditions-with-cols
join-order []]
(if (seq relations)
(let [{:keys [sub-graph-plan
sub-graph-unused-rels
sub-graph-unused-conditions
sub-graph-join-order]}
(build-plan-for-next-sub-graph conditions relations args)]
(recur
(conj sub-graph-plans sub-graph-plan)
sub-graph-unused-rels
sub-graph-unused-conditions
(conj join-order sub-graph-join-order)))
{:sub-graph-plans sub-graph-plans
:unused-join-conditions conditions
:join-order join-order}))]
;; bit of a hack as currently mega-join may not choose a join order where
;; a condition like the one below is ever valid, but it should always be correct
;; to used the unused conditions as conditions for the outermost join
(assoc
(if (seq unused-join-conditions)
(emit-inner-join-expr
{:condition (mapv :condition unused-join-conditions)
:left
(reduce (fn [full-plan sub-graph-plan]
(emit-cross-join
{:left full-plan
:right sub-graph-plan})) (butlast sub-graph-plans))
:right (last sub-graph-plans)}
args)
(reduce (fn [full-plan sub-graph-plan]
(emit-cross-join
{:left full-plan
:right sub-graph-plan})) sub-graph-plans))
:join-order join-order)))
© 2015 - 2024 Weber Informatics LLC | Privacy Policy