xtdb.operator.table.clj Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xtdb-core Show documentation
Show all versions of xtdb-core Show documentation
An open source document database with bitemporal graph queries
The newest version!
(ns xtdb.operator.table
(:require [clojure.spec.alpha :as s]
[xtdb.error :as err]
[xtdb.expression :as expr]
[xtdb.logical-plan :as lp]
[xtdb.rewrite :refer [zmatch]]
[xtdb.types :as types]
[xtdb.util :as util]
[xtdb.vector.reader :as vr]
[xtdb.vector.writer :as vw])
(:import clojure.lang.MapEntry
(java.util ArrayList HashMap HashSet Set)
(org.apache.arrow.vector.types.pojo ArrowType$Union Field Schema)
(org.apache.arrow.vector VectorSchemaRoot ZeroVector)
(xtdb ICursor)
xtdb.arrow.VectorPosition
(xtdb.vector RelationReader IRelationWriter IVectorWriter)))
(defmethod lp/ra-expr :table [_]
(s/cat :op #{:table}
:explicit-col-names (s/? (s/coll-of ::lp/column :kind vector?))
:table (s/or :rows (s/coll-of (s/or :map (s/map-of simple-ident? any?)
:param ::lp/param))
:column (s/map-of ::lp/column any?, :count 1)
:param ::lp/param)))
(set! *unchecked-math* :warn-on-boxed)
(deftype TableCursor [^:unsynchronized-mutable ^RelationReader out-rel, param?]
ICursor
(tryAdvance [this c]
(boolean
(when-let [out-rel out-rel]
(try
(set! (.out-rel this) nil)
(.accept c out-rel)
true
(finally
(when-not param? ; params get closed at toplevel
(.close out-rel)))))))
(close [_] (some-> out-rel .close)))
(defn- restrict-cols [fields {:keys [explicit-col-names]}]
(cond-> fields
explicit-col-names (-> (->> (merge (zipmap explicit-col-names (repeat types/null-field))))
(select-keys explicit-col-names))))
(defn- emit-rows-table [rows table-expr {:keys [param-fields schema] :as opts}]
(let [param-types (update-vals param-fields types/field->col-type)
field-sets (HashMap.)
out-rows (->> rows
(mapv (fn [[row-tag row-arg]]
(case row-tag
:param (let [^Field struct-field (-> (for [^Field child-field (-> (or (get param-fields row-arg)
(throw (UnsupportedOperationException. "missing param")))
(types/flatten-union-field))
:when (= #xt.arrow/type :struct (.getType child-field))]
child-field)
(->> (apply types/merge-fields)))
ks (->> (.getChildren struct-field)
(into #{} (map #(symbol (.getName ^Field %)))))]
(doseq [^Field struct-key (.getChildren struct-field)
:let [^Set field-set (.computeIfAbsent field-sets (symbol (.getName struct-key))
(fn [_] (HashSet.)))]]
(.add field-set struct-key))
{:ks ks
:write-row! (fn write-param-row! [{:keys [^RelationReader params]}, ^IRelationWriter out-rel]
(let [param-rdr (.readerForName params (str row-arg))]
(.startRow out-rel)
(doseq [k ks
:let [k (str k)]]
(.writeValue (.colWriter out-rel k)
(-> (.structKeyReader param-rdr k)
(.valueReader (VectorPosition/build 0)))))
(.endRow out-rel)))})
:map (let [out-row (->> row-arg
(into {}
(map (fn [[k v]]
(let [k (symbol k)
expr (expr/form->expr v (assoc opts :param-types param-types))
^Set field-set (.computeIfAbsent field-sets k (fn [_] (HashSet.)))]
(case (:op expr)
:literal (do
(.add field-set (types/col-type->field (vw/value->col-type v)))
(MapEntry/create k (fn write-literal! [_ ^IVectorWriter out-col]
(.writeObject out-col v))))
:param (let [{:keys [param]} expr]
(.add field-set (get param-fields param))
(MapEntry/create k (fn write-param! [{:keys [^RelationReader params]} ^IVectorWriter out-col]
(.writeValue out-col
(-> (.readerForName params (str param))
(.valueReader (VectorPosition/build 0)))))))
;; HACK: this is quite heavyweight to calculate a single value -
;; the EE doesn't yet have an efficient means to do so...
(let [input-types (assoc opts :param-types param-types)
expr (expr/form->expr v input-types)
projection-spec (expr/->expression-projection-spec "_scalar" expr input-types)]
(.add field-set (types/col-type->field (.getColumnType projection-spec)))
(MapEntry/create k (fn write-expr! [{:keys [allocator params]} ^IVectorWriter out-col]
(util/with-open [out-vec (.project projection-spec allocator (vr/rel-reader [] 1) schema params)]
(.writeValue out-col (.valueReader out-vec (VectorPosition/build 0)))))))))))))]
{:ks (set (keys out-row))
:write-row! (fn write-row! [opts ^IRelationWriter out-rel]
(.startRow out-rel)
(doseq [[k write-val!] out-row]
(write-val! opts (.colWriter out-rel (str k))))
(.endRow out-rel))})))))
key-freqs (->> (into [] (mapcat :ks) out-rows)
(frequencies))
row-count (count out-rows)
fields (-> field-sets
(->> (into {} (map (juxt key (fn [[k ^Set !v-types]]
(when-not (= row-count (get key-freqs (symbol k)))
(.add !v-types types/null-field))
(-> (apply types/merge-fields !v-types)
(types/field-with-name (str k))))))))
(restrict-cols table-expr))]
{:fields fields
:->out-rel (fn [{:keys [allocator] :as opts}]
(let [row-count (count rows)]
(when (pos? row-count)
(util/with-close-on-catch [root (VectorSchemaRoot/create (Schema. (or (vals fields) [])) allocator)
out-rel (vw/root->writer root)]
(doseq [{:keys [write-row!]} out-rows]
(write-row! opts out-rel))
(vw/rel-wtr->rdr out-rel)))))}))
(defn- emit-col-table [col-spec table-expr {:keys [param-fields schema] :as opts}]
(let [[out-col v] (first col-spec)
param-types (update-vals param-fields types/field->col-type)
expr (expr/form->expr v (assoc opts :param-types param-types))
input-types (assoc opts :param-types param-types)
projection-spec (expr/->expression-projection-spec out-col expr input-types)
field (-> (types/col-type->field (.getColumnType projection-spec))
(types/unnest-field)
(types/field-with-name (str out-col)))]
{:fields (-> {(symbol (.getName field)) field}
(restrict-cols table-expr))
:->out-rel (fn [{:keys [allocator ^RelationReader params]}]
(util/with-open [list-rdr (.project projection-spec allocator (vr/rel-reader [] 1) schema params)]
(let [list-rdr (cond-> list-rdr
(instance? ArrowType$Union (.getType (.getField list-rdr))) (.legReader "list"))]
(util/with-close-on-catch [el-rdr (.copy (or (some-> list-rdr .listElementReader (.withName (str out-col)))
(vr/vec->reader (ZeroVector. (str out-col))))
allocator)]
(vr/rel-reader [el-rdr] (.valueCount el-rdr))))))}))
(defn- emit-arg-table [param table-expr {:keys [param-fields]}]
(let [fields (-> (into {} (for [^Field field (-> (or (get param-fields param)
(throw (err/illegal-arg :unknown-table
{::err/message "Table refers to unknown param"
:param param, :params (set (keys param-fields))})))
(types/flatten-union-field))
:when (or (= #xt.arrow/type :list (.getType field))
(throw (err/illegal-arg :illegal-param-type
{::err/message "Table param must be of type struct list"
:param param})))
:let [^Field el-field (first (.getChildren field))]
^Field el-leg (types/flatten-union-field el-field)
:when (or (= #xt.arrow/type :struct (.getType el-leg))
(= #xt.arrow/type :null (.getType el-leg))
(throw (err/illegal-arg :illegal-param-type
{::err/message "Table param must be of type struct list"
:param param})))
^Field field (.getChildren el-leg)]
(MapEntry/create (symbol (.getName field)) field)))
(restrict-cols table-expr))]
{:fields fields
:->out-rel (fn [{:keys [^RelationReader params]}]
(let [vec-rdr (.readerForName params (str (symbol param)))
list-rdr (cond-> vec-rdr
(instance? ArrowType$Union (.getType (.getField vec-rdr))) (.legReader "list"))
el-rdr (some-> list-rdr .listElementReader)
el-struct-rdr (cond-> el-rdr
(instance? ArrowType$Union (.getType (.getField el-rdr))) (.legReader "struct"))]
(vr/rel-reader (for [k (some-> el-struct-rdr .structKeys)
:when (contains? fields (symbol k)) ]
(.structKeyReader el-struct-rdr k))
(.valueCount el-rdr))))}))
(defmethod lp/emit-expr :table [{:keys [table] :as table-expr} opts]
(let [[{:keys [fields ->out-rel]} param?] (zmatch table
[:rows rows] [(emit-rows-table rows table-expr opts) false]
[:column col] [(emit-col-table col table-expr opts) false]
[:param param] [(emit-arg-table param table-expr opts) true])]
{:fields fields
:->cursor (fn [opts]
(TableCursor. (->out-rel opts) param?))}))
© 2015 - 2024 Weber Informatics LLC | Privacy Policy