xtdb.operator.arrow.clj Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xtdb-core Show documentation
Show all versions of xtdb-core Show documentation
An open source document database with bitemporal graph queries
The newest version!
(ns xtdb.operator.arrow
(:require [clojure.spec.alpha :as s]
[xtdb.logical-plan :as lp]
[xtdb.util :as util]
[xtdb.vector.reader :as vr])
(:import java.io.BufferedInputStream
java.net.URL
java.nio.channels.SeekableByteChannel
(java.nio.file CopyOption Files Path)
(org.apache.arrow.memory BufferAllocator RootAllocator)
(org.apache.arrow.vector.ipc ArrowFileReader ArrowReader ArrowStreamReader InvalidArrowFileException)
org.apache.arrow.vector.types.pojo.Field
xtdb.ICursor))
(defmethod lp/ra-expr :arrow [_]
(s/cat :op #{:arrow}
:url ::util/url))
(deftype ArrowCursor [^ArrowReader rdr on-close-fn]
ICursor
(tryAdvance [_ c]
(if (.loadNextBatch rdr)
(do
(.accept c (vr/<-root (.getVectorSchemaRoot rdr)))
true)
false))
(close [_]
(util/try-close rdr)
(when on-close-fn
(on-close-fn))))
;; HACK: detection of stream vs file IPC format.
(defn- path->arrow-reader [^SeekableByteChannel ch ^BufferAllocator allocator]
(try
(doto (ArrowFileReader. ch allocator)
(.initialize))
(catch InvalidArrowFileException _
(ArrowStreamReader. (.position ch 0) allocator))))
;; HACK: not ideal that we have to open the file in the emitter just to get the fields?
(defn- path->cursor [^Path path on-close-fn]
{:fields (with-open [al (RootAllocator.)
^ArrowReader rdr (path->arrow-reader (util/->file-channel path) al)]
(->> (.getFields (.getSchema (.getVectorSchemaRoot rdr)))
(into {} (map (juxt #(symbol (.getName ^Field %)) identity)))))
:->cursor (fn [{:keys [^BufferAllocator allocator]}]
(ArrowCursor. (path->arrow-reader (util/->file-channel path) allocator) on-close-fn))})
(defmethod lp/emit-expr :arrow [{:keys [^URL url]} _args]
;; TODO: should we make it possible to disable local files?
(if (= "file" (.getProtocol url))
(path->cursor (util/->path (.toURI url)) nil)
;; HACK: downloading during emit if protocol isn't file.
(let [tmp-path (util/->temp-file "arrow_operator" "download")
^"[Ljava.nio.file.CopyOption;" options (make-array CopyOption 0)]
(with-open [in (BufferedInputStream. (.openStream url))]
(Files/copy in tmp-path options))
(path->cursor tmp-path #(util/delete-file tmp-path)))))
© 2015 - 2024 Weber Informatics LLC | Privacy Policy