All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jlangch.venice.xml.venice Maven / Gradle / Ivy

;;;;   __    __         _
;;;;   \ \  / /__ _ __ (_) ___ ___
;;;;    \ \/ / _ \ '_ \| |/ __/ _ \
;;;;     \  /  __/ | | | | (_|  __/
;;;;      \/ \___|_| |_|_|\___\___|
;;;;
;;;;
;;;; Copyright 2017-2024 Venice
;;;;
;;;; Licensed under the Apache License, Version 2.0 (the "License");
;;;; you may not use this file except in compliance with the License.
;;;; You may obtain a copy of the License at
;;;;
;;;;     http://www.apache.org/licenses/LICENSE-2.0
;;;;
;;;; Unless required by applicable law or agreed to in writing, software
;;;; distributed under the License is distributed on an "AS IS" BASIS,
;;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;;; See the License for the specific language governing permissions and
;;;; limitations under the License.

;;;; XML parser


(ns xml)

(import :com.github.jlangch.venice.util.xml.IXMLHandler
        :com.github.jlangch.venice.util.xml.XMLHandler
        :com.github.jlangch.venice.util.xml.XMLUtil
        :java.lang.StringBuilder
        :java.io.InputStream
        :org.xml.sax.InputSource)


(def-dynamic xml/*stack*)
(def-dynamic xml/*current*)
(def-dynamic xml/*sb*)
(def-dynamic xml/*state*) ; :element :chars :between


(defn
  ^{ :arglists '("(xml/handler)")
     :doc "Create a SAX parser content handler" }

  xml/handler []

  (let [push-content (fn [elem content]
                       (let [new (conj (or (:content elem) []) content)]
                          (assoc elem :content new)))
        push-chars (fn []
                     (let [sb (str xml/*sb*)]
                       (when (and (== xml/*state* :chars) (str/trim-to-nil sb))
                         (set! xml/*current* (push-content xml/*current* sb)))))]
    (proxify :IXMLHandler
      { :startElement
            (fn [uri local-name q-name attributes]
              (let [attr (xml/parse-attributes attributes)
                    elem (if (nil? attr)
                           {:tag q-name }
                           {:tag q-name :attrs attr}) ]
                (push-chars)
                (set! xml/*stack* (conj xml/*stack* xml/*current*))
                (set! xml/*current* elem)
                (set! xml/*state* :element)
                nil))
        :endElement
            (fn [uri local-name q-name]
              (push-chars)
              (set! xml/*current* (push-content (peek xml/*stack*) xml/*current*))
              (set! xml/*stack* (pop xml/*stack*))
              (set! xml/*state* :between)
              nil)
        :characters
            (fn [chars]
              (when-not (== xml/*state* :chars)
                (set! xml/*sb* (. :StringBuilder :new)))
              (. xml/*sb* :append chars)
              (set! xml/*state* :chars)
              nil)
        :setDocumentLocator (fn [locator] nil)
        :startDocument (fn [] nil)
        :endDocument (fn [] nil)
        :startPrefixMapping (fn [prefix uri] nil)
        :endPrefixMapping (fn [prefix] nil)
        :ignorableWhitespace (fn [chars] nil)
        :processingInstruction (fn [target data] nil)
        :skippedEntity (fn [name] nil) })))


(defn
  ^{ :arglists '("(xml/parse s)" "(xml/parse s handler)")
     :doc """
          Parses and loads the XML from the source s with the parser
          XMLHandler handler. The source may be an InputSource or an
          InputStream.

          Returns a tree of XML element maps with the keys :tag,
          :attrs, and :content.
          """ }

  xml/parse

  ([s] (xml/parse s (xml/handler)))

  ([s handler]
    (when-not (instance-of? :IXMLHandler handler)
      (throw (ex :VncException (str "A handler of type "
                                    (type handler)
                                    " is not supported! Please pass a "
                                    ":com.github.jlangch.venice.util.xml.IXMLHandler. "
                                    "See: (xml/handler)"))))
    (binding [xml/*stack* []
              xml/*current* {}
              xml/*state* :between
              xml/*sb* nil]
      (if (or (instance-of? :InputSource s) (instance-of? :InputStream s))
        ;; limit the sources make it sandbox friendly
        (do
          (. :XMLUtil :parse s false handler)
          (first (:content xml/*current*)))
        (throw (ex :VncException (str "An input of type "
                                      (type s)
                                      " is not supported! Please pass a "
                                      ":java.io.InputStream or an "
                                      ":org.xml.sax.InputSource.")))))))


(defn
  ^{ :arglists '("(xml/parse-str s)" "(xml/parse-str s handler)")
     :doc """
          Parses an XML from the string s. Returns a tree of XML element
          maps with the keys :tag, :attrs, and :content.
          """
     :examples (list
          """
          (do
            (load-module :xml)
            (xml/parse-str "B"))
          """ ) }

  xml/parse-str

  ([s] (xml/parse (xml/input-source-from-str s) (xml/handler)))
  ([s handler] (xml/parse (xml/input-source-from-str s) handler)))


(defn
  ^{ :arglists '("(xml/input-source-from-str s)")
     :doc "Create a SAX InputSource from a string" }

  xml/input-source-from-str [s]

  (->> (io/string-reader s)
       (. :InputSource :new)))


(defn
  ^{ :arglists '("(xml/parse-attributes attrs)")
     :doc "Parse SAX attributes into a map." }

  xml/parse-attributes [attributes]

  (let [len (. attributes :getLength)]
    (when (> len 0)
      (apply merge
             (map (fn [x] { (keyword (. attributes :getQName x))
                            (. attributes :getValue x) } )
                  (range len))))))


(defn
  ^{ :arglists '("(xml/path-> path nodes)")
     :doc "Applies the path to a node or a collection of nodes"
     :examples (list
          """
          (do
            (load-module :xml)
            (let [nodes (xml/parse-str "C")
                  path [(xml/tag= "b")
                        (xml/tag= "c")
                        xml/text
                        first]]
              (xml/path-> path nodes)))
          """ ) }

  xml/path-> [path nodes]

  ((apply comp (reverse path)) (if (sequential? nodes) nodes [nodes])))


(def
  ^{ :arglists '("(xml/children nodes)")
     :doc "Returns the children of the XML nodes collection"
     :examples (list
          """
          (do
            (load-module :xml)
            (xml/children
              (list (xml/parse-str "B"))))
          """ ) }
  xml/children
  (partial mapcat #(:content %)))


(def
  ^{ :arglists '("(xml/text nodes)")
     :doc "Returns a list of text contents of the XML nodes collection"
     :examples (list
          """
          (do
            (load-module :xml)
            (let [nodes (xml/parse-str "B")
                  path [(xml/tag= "b")
                        xml/text]]
              (xml/path-> path nodes)))
          """ ) }
  xml/text
  (comp (partial filter string?) (partial mapcat #(:content %))))


(defn xml/tagp [pred]
  (comp
    (partial filter #(pred (:tag %)))
    xml/children))


(defn xml/tag= [tag]
  (xml/tagp (partial == tag)))


(defn xml/attrp [attr pred]
  (partial filter #(pred (-> % :attrs attr))))


(defn xml/attr= [attr val]
  (xml/attrp attr (partial == val)))




© 2015 - 2025 Weber Informatics LLC | Privacy Policy