
com.github.jlangch.venice.xml.venice Maven / Gradle / Ivy
;;;; __ __ _
;;;; \ \ / /__ _ __ (_) ___ ___
;;;; \ \/ / _ \ '_ \| |/ __/ _ \
;;;; \ / __/ | | | | (_| __/
;;;; \/ \___|_| |_|_|\___\___|
;;;;
;;;;
;;;; Copyright 2017-2024 Venice
;;;;
;;;; Licensed under the Apache License, Version 2.0 (the "License");
;;;; you may not use this file except in compliance with the License.
;;;; You may obtain a copy of the License at
;;;;
;;;; http://www.apache.org/licenses/LICENSE-2.0
;;;;
;;;; Unless required by applicable law or agreed to in writing, software
;;;; distributed under the License is distributed on an "AS IS" BASIS,
;;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;;; See the License for the specific language governing permissions and
;;;; limitations under the License.
;;;; XML parser
(ns xml)
(import :com.github.jlangch.venice.util.xml.IXMLHandler
:com.github.jlangch.venice.util.xml.XMLHandler
:com.github.jlangch.venice.util.xml.XMLUtil
:java.lang.StringBuilder
:java.io.InputStream
:org.xml.sax.InputSource)
(def-dynamic xml/*stack*)
(def-dynamic xml/*current*)
(def-dynamic xml/*sb*)
(def-dynamic xml/*state*) ; :element :chars :between
(defn
^{ :arglists '("(xml/handler)")
:doc "Create a SAX parser content handler" }
xml/handler []
(let [push-content (fn [elem content]
(let [new (conj (or (:content elem) []) content)]
(assoc elem :content new)))
push-chars (fn []
(let [sb (str xml/*sb*)]
(when (and (== xml/*state* :chars) (str/trim-to-nil sb))
(set! xml/*current* (push-content xml/*current* sb)))))]
(proxify :IXMLHandler
{ :startElement
(fn [uri local-name q-name attributes]
(let [attr (xml/parse-attributes attributes)
elem (if (nil? attr)
{:tag q-name }
{:tag q-name :attrs attr}) ]
(push-chars)
(set! xml/*stack* (conj xml/*stack* xml/*current*))
(set! xml/*current* elem)
(set! xml/*state* :element)
nil))
:endElement
(fn [uri local-name q-name]
(push-chars)
(set! xml/*current* (push-content (peek xml/*stack*) xml/*current*))
(set! xml/*stack* (pop xml/*stack*))
(set! xml/*state* :between)
nil)
:characters
(fn [chars]
(when-not (== xml/*state* :chars)
(set! xml/*sb* (. :StringBuilder :new)))
(. xml/*sb* :append chars)
(set! xml/*state* :chars)
nil)
:setDocumentLocator (fn [locator] nil)
:startDocument (fn [] nil)
:endDocument (fn [] nil)
:startPrefixMapping (fn [prefix uri] nil)
:endPrefixMapping (fn [prefix] nil)
:ignorableWhitespace (fn [chars] nil)
:processingInstruction (fn [target data] nil)
:skippedEntity (fn [name] nil) })))
(defn
^{ :arglists '("(xml/parse s)" "(xml/parse s handler)")
:doc """
Parses and loads the XML from the source s with the parser
XMLHandler handler. The source may be an InputSource or an
InputStream.
Returns a tree of XML element maps with the keys :tag,
:attrs, and :content.
""" }
xml/parse
([s] (xml/parse s (xml/handler)))
([s handler]
(when-not (instance-of? :IXMLHandler handler)
(throw (ex :VncException (str "A handler of type "
(type handler)
" is not supported! Please pass a "
":com.github.jlangch.venice.util.xml.IXMLHandler. "
"See: (xml/handler)"))))
(binding [xml/*stack* []
xml/*current* {}
xml/*state* :between
xml/*sb* nil]
(if (or (instance-of? :InputSource s) (instance-of? :InputStream s))
;; limit the sources make it sandbox friendly
(do
(. :XMLUtil :parse s false handler)
(first (:content xml/*current*)))
(throw (ex :VncException (str "An input of type "
(type s)
" is not supported! Please pass a "
":java.io.InputStream or an "
":org.xml.sax.InputSource.")))))))
(defn
^{ :arglists '("(xml/parse-str s)" "(xml/parse-str s handler)")
:doc """
Parses an XML from the string s. Returns a tree of XML element
maps with the keys :tag, :attrs, and :content.
"""
:examples (list
"""
(do
(load-module :xml)
(xml/parse-str "B"))
""" ) }
xml/parse-str
([s] (xml/parse (xml/input-source-from-str s) (xml/handler)))
([s handler] (xml/parse (xml/input-source-from-str s) handler)))
(defn
^{ :arglists '("(xml/input-source-from-str s)")
:doc "Create a SAX InputSource from a string" }
xml/input-source-from-str [s]
(->> (io/string-reader s)
(. :InputSource :new)))
(defn
^{ :arglists '("(xml/parse-attributes attrs)")
:doc "Parse SAX attributes into a map." }
xml/parse-attributes [attributes]
(let [len (. attributes :getLength)]
(when (> len 0)
(apply merge
(map (fn [x] { (keyword (. attributes :getQName x))
(. attributes :getValue x) } )
(range len))))))
(defn
^{ :arglists '("(xml/path-> path nodes)")
:doc "Applies the path to a node or a collection of nodes"
:examples (list
"""
(do
(load-module :xml)
(let [nodes (xml/parse-str "C ")
path [(xml/tag= "b")
(xml/tag= "c")
xml/text
first]]
(xml/path-> path nodes)))
""" ) }
xml/path-> [path nodes]
((apply comp (reverse path)) (if (sequential? nodes) nodes [nodes])))
(def
^{ :arglists '("(xml/children nodes)")
:doc "Returns the children of the XML nodes collection"
:examples (list
"""
(do
(load-module :xml)
(xml/children
(list (xml/parse-str "B"))))
""" ) }
xml/children
(partial mapcat #(:content %)))
(def
^{ :arglists '("(xml/text nodes)")
:doc "Returns a list of text contents of the XML nodes collection"
:examples (list
"""
(do
(load-module :xml)
(let [nodes (xml/parse-str "B")
path [(xml/tag= "b")
xml/text]]
(xml/path-> path nodes)))
""" ) }
xml/text
(comp (partial filter string?) (partial mapcat #(:content %))))
(defn xml/tagp [pred]
(comp
(partial filter #(pred (:tag %)))
xml/children))
(defn xml/tag= [tag]
(xml/tagp (partial == tag)))
(defn xml/attrp [attr pred]
(partial filter #(pred (-> % :attrs attr))))
(defn xml/attr= [attr val]
(xml/attrp attr (partial == val)))
© 2015 - 2025 Weber Informatics LLC | Privacy Policy