angch.venice.1.12.27.source-code.expr-parser-3.venice Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of venice Show documentation
Show all versions of venice Show documentation
Venice, a sandboxed Lisp implemented in Java.
;;;; __ __ _
;;;; \ \ / /__ _ __ (_) ___ ___
;;;; \ \/ / _ \ '_ \| |/ __/ _ \
;;;; \ / __/ | | | | (_| __/
;;;; \/ \___|_| |_|_|\___\___|
;;;;
;;;;
;;;; Copyright 2017-2024 Venice
;;;;
;;;; Licensed under the Apache License, Version 2.0 (the "License");
;;;; you may not use this file except in compliance with the License.
;;;; You may obtain a copy of the License at
;;;;
;;;; http://www.apache.org/licenses/LICENSE-2.0
;;;;
;;;; Unless required by applicable law or agreed to in writing, software
;;;; distributed under the License is distributed on an "AS IS" BASIS,
;;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;;; See the License for the specific language governing permissions and
;;;; limitations under the License.
;;; ----------------------------------------------------------------------------
;;; Parsifal expression evaluator example
;;; ----------------------------------------------------------------------------
;;;
;;; The expression evaluator evaluates expressions like `"(3 + 4) * 5"`. It
;;; supports unary expression like `-4`, the math operators `+`, `-`, `*`, `/`,
;;; the number types`long` and `double`, and the parenthesis `(` and `)`.
;;;
;;; The evaluator uses two Parsifal parsers. The up-front tokenizing parser
;;; operates on a string (stream of characters) and returns a list of tokens.
;;; The expression parser operates on a stream of tokens and returns a number.
;;;
;;; Parsifal does not handle whitespaces on its own. The particular parser
;;; has to deal with whitespaces.
;;; In the subsequent expression parser the up-front tokenizing parser
;;; handles the whitespaces to free the main expression parser from dealing
;;; with whitespaces.
;;; ----------------------------------------------------------------------------
;;; Usage
;;; ----------------------------------------------------------------------------
;;;
;;; [1] Start a REPL and load the expression parser script
;;; (load-file "path-to-venice/doc/examples/scripts/expr-parser-3.venice")
;;;
;;; [2] Test the tokenizer:
;;; (tokenize "3 + 4.2") ; => [[:int "3"] [:op "+"] [:float "4.2"]]
;;;
;;; [3] Test the expression parser:
;;; (evaluate "1") ; => 1
;;; (evaluate "1 + 2") ; => 3
;;; (evaluate "1 + 2 * 3 + 4") ; => 11
;;; (evaluate "(1 + 2) * (3 + 4)") ; => 21
;;; (evaluate "3 + 4.1 - 5 * 3.2") ; => -8.9
;;; (evaluate "3 + (4.1 - 5) * 3.2") ; => 0.11999999999999877
;;; ;; unary
;;; (evaluate "-1") ; => -1
;;; (evaluate "1 + -2") ; => -1
;;; (evaluate "1 + 2 * -3 + 4") ; => -1
;;; (evaluate "-(-1 + 2) * (-3 + 4)") ; => -1
(do
;;; ----------------------------------------------------------------------------
;;; EBNF
;;; ----------------------------------------------------------------------------
;;;
;;; [1] Tokenizer
;;; ----------------------------------------------------------------------------
;;; Whitespace = " " | "\t" | "\r" | "\n" ;
;;; Operator = "+" | "-" | "*" | "/" ;
;;; LParen = "(" ;
;;; RParen = ")" ;
;;; Digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
;;; Integer = Digit { Digit } ;
;;; Float = Digit { Digit } "." Digit { Digit };
;;;
;;; Token = Whitespace | Operator | LParen | RParen | Float | Integer ;
;;; Tokens = { Token } EOI ;
;;;
;;;
;;; [2] Expression Parser
;;; ----------------------------------------------------------------------------
;;;
;;; Main = Expression EOI ;
;;; Expression = AddExpression ;
;;; AddExpression = MulExpression { ( "+" | "-" ) MulExpression } ;
;;; MulExpression = UnaryExpression { ( "*" | "/" ) UnaryExpression } ;
;;; UnaryExpression = ( "+" | "-" ) UnaryExpression | ParExpression | Literal ;
;;; ParExpression = "(" Expression ")" ;
;;; Literal = Integer | Float ;
(load-module :parsifal ['parsifal :as 'p])
;;; ----------------------------------------------------------------------------
;;; Token
;;; ----------------------------------------------------------------------------
(deftype :Token [type :keyword, val :string, line :long, column :long]
Object
(toString [this] (str/format "[%s %s (%d,%d)]"
(pr-str (:type this))
(pr-str (:val this))
(:line this)
(:column this)))
p/SourcePosition
(line [this] (:line this))
(column [this] (:column this)))
;;; ----------------------------------------------------------------------------
;;; Tokenizer
;;; ----------------------------------------------------------------------------
(p/defparser ws-tok []
(p/let->> [[l c] (p/pos)
t (p/many1 (p/any-char-of " \t\r\n"))]
(p/always (Token. :ws (apply str t) l c))))
(p/defparser op-tok []
(p/let->> [[l c] (p/pos)
t (p/any-char-of "+-*/")]
(p/always (Token. :op (str t) l c))))
(p/defparser lparen-tok []
(p/let->> [[l c] (p/pos)
t (p/char "(")]
(p/always (Token. :lparen (str t) l c))))
(p/defparser rparen-tok []
(p/let->> [[l c] (p/pos)
t (p/char ")")]
(p/always (Token. :rparen (str t) l c))))
(p/defparser int-tok []
(p/let->> [[l c] (p/pos)
i (p/many1 (p/digit))]
(p/always (Token. :int (apply str i) l c))))
(p/defparser float-tok []
(p/let->>* [[l c] (p/pos)
i (p/many1 (p/digit))
d (p/char ".")
f (p/many1 (p/digit))]
(p/always (Token. :float
(apply str (flatten (list i d f)))
l c))))
(p/defparser unknown-tok []
(p/let->> [[l c] (p/pos)
s (p/many1 (p/none-char-of " \t\r\n"))]
(p/always (Token. :unknown (apply str s) l c))))
(p/defparser token []
(p/many (ws-tok))
(p/choice (op-tok)
(lparen-tok)
(rparen-tok)
(float-tok)
(int-tok)
(unknown-tok)))
(p/defparser tokens []
(p/let->> [t (p/many (p/attempt (token)))
_ (p/many (p/attempt (ws-tok)))
_ (p/eof)]
(p/always t)))
(defn tokenize [expression]
(p/run (tokens) expression))
;;; ----------------------------------------------------------------------------
;;; Expression Parser
;;; ----------------------------------------------------------------------------
(defn chained-math [seed-val tuples]
;; (chained-math 1 [["+" 6] ["-" 4]]) ; => (1 + 6 - 4) => 3
(reduce (fn [acc t] (let [op (resolve (symbol (first t)))]
(op acc (second t))))
seed-val
tuples))
(defn op [sym]
(p/token #(and (= :op (:type %)) (= sym (:val %)))))
(defn lparen []
(p/token #(= :lparen (:type %))))
(defn rparen []
(p/token #(= :rparen (:type %))))
(p/defparser int []
(p/let->> [i (p/token #(= :int (:type %)))]
(p/always (long (:val i)))))
(p/defparser float []
(p/let->> [i (p/token #(= :float (:type %)))]
(p/always (double (:val i)))))
(p/defparser expr []
; no EOF handling in this parser! It's recursively called.
(add-expr))
(p/defparser add-expr []
(p/let->> [seed (mul-expr)
tuples (p/many (p/let->> [opc (p/either (op "+") (op "-"))
val (mul-expr)]
(p/always [(:val opc) val])))]
(p/always (chained-math seed tuples))))
(p/defparser mul-expr []
(p/let->> [seed (unary-expr)
tuples (p/many (p/let->> [opc (p/either (op "*") (op "/"))
val (unary-expr)]
(p/always [(:val opc) val])))]
(p/always (chained-math seed tuples))))
(p/defparser unary-expr []
(p/choice (p/let->> [opc (p/either (op "+") (op "-"))
val (unary-expr)]
(p/always (if (= "+" (:val opc)) val (negate val))))
(paren-expr)
(float)
(int)))
(p/defparser paren-expr []
(p/between (lparen) (rparen) (expr)))
(p/defparser main []
;; 1) parse empty expressions: "" => OK, value => nil
;; 2) parse valid expressions: "3 + 4" => OK, value => 7
;; 3) parse left over tokens: "(3 + 4) 9" => ERR, Unexpected token '9'
(p/either (p/eof)
(p/let->> [e (expr)
t (p/either (p/eof) (p/any))]
(if (nil? t)
(p/always e)
(p/never (str "Unexpected token '" (:val t) "'"))))))
(defn evaluate [expression]
(p/run (main) (tokenize expression)))
)