All Downloads are FREE. Search and download functionalities are using the official Maven repository.

angch.venice.1.12.27.source-code.expr-parser-3.venice Maven / Gradle / Ivy

There is a newer version: 1.12.34
Show newest version
;;;;   __    __         _
;;;;   \ \  / /__ _ __ (_) ___ ___
;;;;    \ \/ / _ \ '_ \| |/ __/ _ \
;;;;     \  /  __/ | | | | (_|  __/
;;;;      \/ \___|_| |_|_|\___\___|
;;;;
;;;;
;;;; Copyright 2017-2024 Venice
;;;;
;;;; Licensed under the Apache License, Version 2.0 (the "License");
;;;; you may not use this file except in compliance with the License.
;;;; You may obtain a copy of the License at
;;;;
;;;;     http://www.apache.org/licenses/LICENSE-2.0
;;;;
;;;; Unless required by applicable law or agreed to in writing, software
;;;; distributed under the License is distributed on an "AS IS" BASIS,
;;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;;; See the License for the specific language governing permissions and
;;;; limitations under the License.

;;; ----------------------------------------------------------------------------
;;; Parsifal expression evaluator example
;;; ----------------------------------------------------------------------------
;;;
;;; The expression evaluator evaluates expressions like `"(3 + 4) * 5"`. It
;;; supports unary expression like `-4`, the math operators `+`, `-`, `*`, `/`,
;;; the number  types`long` and `double`, and the parenthesis `(` and `)`.
;;;
;;; The evaluator uses two Parsifal parsers. The up-front tokenizing parser
;;; operates on a string (stream of characters) and returns a list of tokens.
;;; The expression parser operates on a stream of tokens and returns a number.
;;;
;;; Parsifal does not handle whitespaces on its own. The particular parser
;;; has to deal with whitespaces.
;;; In the subsequent expression parser the up-front tokenizing parser
;;; handles the whitespaces to free the main expression parser from dealing
;;; with whitespaces.

;;; ----------------------------------------------------------------------------
;;; Usage
;;; ----------------------------------------------------------------------------
;;;
;;; [1] Start a REPL and load the expression parser script
;;;     (load-file "path-to-venice/doc/examples/scripts/expr-parser-3.venice")
;;;
;;; [2] Test the tokenizer:
;;;     (tokenize "3 + 4.2")        ; => [[:int "3"] [:op "+"] [:float "4.2"]]
;;;
;;; [3] Test the expression parser:
;;;     (evaluate "1")                    ; => 1
;;;     (evaluate "1 + 2")                ; => 3
;;;     (evaluate "1 + 2 * 3 + 4")        ; => 11
;;;     (evaluate "(1 + 2) * (3 + 4)")    ; => 21
;;;     (evaluate "3 + 4.1 - 5 * 3.2")    ; => -8.9
;;;     (evaluate "3 + (4.1 - 5) * 3.2")  ; => 0.11999999999999877
;;;     ;; unary
;;;     (evaluate "-1")                   ; => -1
;;;     (evaluate "1 + -2")               ; => -1
;;;     (evaluate "1 + 2 * -3 + 4")       ; => -1
;;;     (evaluate "-(-1 + 2) * (-3 + 4)") ; => -1

(do
  ;;; ----------------------------------------------------------------------------
  ;;; EBNF
  ;;; ----------------------------------------------------------------------------
  ;;;
  ;;; [1] Tokenizer
  ;;; ----------------------------------------------------------------------------
  ;;; Whitespace      = " " | "\t" | "\r" | "\n" ;
  ;;; Operator        = "+" | "-" | "*" | "/" ;
  ;;; LParen          = "(" ;
  ;;; RParen          = ")" ;
  ;;; Digit           = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
  ;;; Integer         = Digit { Digit } ;
  ;;; Float           = Digit { Digit } "." Digit { Digit };
  ;;;
  ;;; Token           = Whitespace | Operator | LParen | RParen | Float | Integer ;
  ;;; Tokens          = { Token } EOI ;
  ;;;
  ;;;
  ;;; [2] Expression Parser
  ;;; ----------------------------------------------------------------------------
  ;;;
  ;;; Main            = Expression EOI ;
  ;;; Expression      = AddExpression ;
  ;;; AddExpression   = MulExpression { ( "+" | "-" ) MulExpression } ;
  ;;; MulExpression   = UnaryExpression { ( "*" | "/" ) UnaryExpression } ;
  ;;; UnaryExpression = ( "+" | "-" ) UnaryExpression | ParExpression | Literal ;
  ;;; ParExpression   = "(" Expression ")" ;
  ;;; Literal         = Integer | Float ;


  (load-module :parsifal ['parsifal :as 'p])


  ;;; ----------------------------------------------------------------------------
  ;;; Token
  ;;; ----------------------------------------------------------------------------

  (deftype :Token [type :keyword, val :string, line :long, column :long]
    Object
      (toString [this] (str/format "[%s %s (%d,%d)]"
                                   (pr-str (:type this))
                                   (pr-str (:val this))
                                   (:line this)
                                   (:column this)))
    p/SourcePosition
      (line [this] (:line this))
      (column [this] (:column this)))



  ;;; ----------------------------------------------------------------------------
  ;;; Tokenizer
  ;;; ----------------------------------------------------------------------------

  (p/defparser ws-tok []
    (p/let->> [[l c] (p/pos)
               t     (p/many1 (p/any-char-of " \t\r\n"))]
      (p/always (Token. :ws (apply str t) l c))))

  (p/defparser op-tok []
    (p/let->> [[l c] (p/pos)
               t     (p/any-char-of "+-*/")]
      (p/always (Token. :op (str t) l c))))

  (p/defparser lparen-tok []
    (p/let->> [[l c] (p/pos)
               t     (p/char "(")]
      (p/always (Token. :lparen (str t) l c))))

  (p/defparser rparen-tok []
    (p/let->> [[l c] (p/pos)
               t     (p/char ")")]
      (p/always (Token. :rparen (str t) l c))))

  (p/defparser int-tok []
    (p/let->> [[l c] (p/pos)
               i     (p/many1 (p/digit))]
      (p/always (Token. :int (apply str i) l c))))

  (p/defparser float-tok []
    (p/let->>* [[l c] (p/pos)
                i     (p/many1 (p/digit))
                d     (p/char ".")
                f     (p/many1 (p/digit))]
        (p/always (Token. :float
                          (apply str (flatten (list i d f)))
                          l c))))

  (p/defparser unknown-tok []
    (p/let->> [[l c] (p/pos)
               s     (p/many1 (p/none-char-of " \t\r\n"))]
      (p/always (Token. :unknown (apply str s) l c))))

  (p/defparser token []
    (p/many (ws-tok))
    (p/choice (op-tok)
              (lparen-tok)
              (rparen-tok)
              (float-tok)
              (int-tok)
              (unknown-tok)))

  (p/defparser tokens []
    (p/let->> [t  (p/many (p/attempt (token)))
               _  (p/many (p/attempt (ws-tok)))
               _  (p/eof)]
      (p/always t)))

  (defn tokenize [expression]
    (p/run (tokens) expression))



  ;;; ----------------------------------------------------------------------------
  ;;; Expression Parser
  ;;; ----------------------------------------------------------------------------

  (defn chained-math [seed-val tuples]
    ;; (chained-math 1 [["+" 6] ["-" 4]]) ; => (1 + 6 - 4) => 3
    (reduce (fn [acc t] (let [op (resolve (symbol (first t)))]
                          (op acc (second t))))
            seed-val
            tuples))

  (defn op [sym]
    (p/token #(and (= :op (:type %)) (= sym (:val %)))))

  (defn lparen []
    (p/token #(= :lparen (:type %))))

  (defn rparen []
    (p/token #(= :rparen (:type %))))

  (p/defparser int []
    (p/let->> [i (p/token #(= :int (:type %)))]
       (p/always (long (:val i)))))

  (p/defparser float []
   (p/let->> [i (p/token #(= :float (:type %)))]
      (p/always (double (:val i)))))

  (p/defparser expr []
    ; no EOF handling in this parser! It's recursively called.
    (add-expr))

  (p/defparser add-expr []
    (p/let->> [seed   (mul-expr)
               tuples (p/many (p/let->> [opc (p/either (op "+") (op "-"))
                                         val (mul-expr)]
                                (p/always [(:val opc) val])))]
       (p/always (chained-math seed tuples))))

  (p/defparser mul-expr []
    (p/let->> [seed   (unary-expr)
               tuples (p/many (p/let->> [opc (p/either (op "*") (op "/"))
                                         val (unary-expr)]
                                (p/always [(:val opc) val])))]
       (p/always (chained-math seed tuples))))

  (p/defparser unary-expr []
    (p/choice (p/let->> [opc (p/either (op "+") (op "-"))
                         val (unary-expr)]
                 (p/always (if (= "+" (:val opc)) val (negate val))))
              (paren-expr)
              (float)
              (int)))

  (p/defparser paren-expr []
    (p/between (lparen) (rparen) (expr)))

  (p/defparser main []
    ;; 1) parse empty expressions:    ""           => OK, value => nil
    ;; 2) parse valid expressions:    "3 + 4"      => OK, value => 7
    ;; 3) parse left over tokens:     "(3 + 4) 9"  => ERR, Unexpected token '9'
    (p/either (p/eof)
              (p/let->> [e (expr)
                         t (p/either (p/eof) (p/any))]
                 (if (nil? t)
                   (p/always e)
                   (p/never (str "Unexpected token '" (:val t) "'"))))))

  (defn evaluate [expression]
    (p/run (main) (tokenize expression)))
)




© 2015 - 2024 Weber Informatics LLC | Privacy Policy