All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xerial.core.io.text.parser.Grammar.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2012 Taro L. Saito
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

//--------------------------------------
//
// Grammar.scala
// Since: 2012/08/14 2:39 PM
//
//--------------------------------------

package xerial.core.io.text.parser

import xerial.core.log.Logger
import annotation.tailrec
import xerial.core.io.text.StringScanner
import scala.language.implicitConversions



/**
 * A trait for defining expression grammars.
 *
 * 

Example:

* *
 * trait MyGrammar extends Grammar {
 *
 * // Define expression patterns using expr blocks
 * // '~' denotes a sequence of matches
 * def comment = expr { "#" ~ untilEOF }
 * def expr  = expr { value | string | "(" ~ expr ~ ")" }
 * def value = expr { "0" - "9" | "A" - "Z" | "a" - "z" } // Range of characters
 *
 * // repetition of patterns and syntactic predicate (!->).
 * def string     = expr { "\"" ~ repeat("\\" !->  not("\"") | escapeSequence) ~ "\"" }
 * def escapeSequence = expr { "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t" | "u" ~ hexDigit ~ hexDigit ~ hexDigit ~ hexDigit) }
 * def digit      = expr { "0" - "9" }
 * def hexDigit   = expr { digit | "A" - "F" | "a" - "f" }
 *
 * // Define tokens to ignore
 * def whiteSpace = expr { " " | "\t" | "\n" | "\r" }
 * ignore(whiteSpace)
 * }
 * 
*
* * */ trait Grammar extends Logger { import Grammar._ implicit def toToken(t: String): Expr = new Leaf("'%s'".format(t), t.charAt(0).toInt) implicit def toParserExpr(a: String) = new { // convert to range def -(b: String): Expr = CharRange(a, b) // Syntactic predicate without consumption of stream def !->(expr: => Expr): Expr = { val pred = toToken(a) SyntacticPredicateFail(pred, rule(newNonDuplicateRuleID("Then"), expr)) } } def untilEOF: Expr = CharPred("", { ch: Int => ch != -1 }) def not(ch: String): Expr = Not(toToken(ch)) def repeat(expr: Expr, separator: Expr): Expr = Repeat(expr, separator) def repeat(expr: Expr): Expr = ZeroOrMore(expr) def zeroOrMore(expr: Expr): Expr = repeat(expr) def oneOrMore(expr: Expr, separator: Expr): Expr = (expr ~ ZeroOrMore(separator ~ expr)) def oneOrMore(expr: Expr): Expr = OneOrMore(expr) def option(expr: Expr): Expr = OptionNode(expr) /** * Construct a token expr from a string * @param str * @return */ def token(str: String): Expr = { require(str.length == 1, "token string be a single character") val tokenName = getEnclosingMethodName(3) ruleCache.get(tokenName) match { case Some(t) => t case None => { val l = Leaf(tokenName, str.charAt(0)) ruleCache += tokenName -> l debug(f"Define token $tokenName%14s := '$str'") l } } } /** * Construct an expression. The expression is called-by-name to enable recursive definitions, e.g., * * * def expr = expr { ("(" ~ expr ~ ")") | Int } * * * @param expr * @return */ def expr(expr: => Expr): Expr = rule(getEnclosingMethodName(3), expr) /** * Add an ignored expr * @param rules */ def ignore(rules: Expr*): Unit = { rules foreach { r => debug(s"Tokens that match the expr ${r.name} will be ignored") ignoredExprs += r } } /** * Construct a new expr with a given name * @param ruleName * @param expr * @return */ def rule(ruleName: String, expr: => Expr): Expr = { ruleCache.get(ruleName) match { case Some(r) => r case None => { // Insert a reference to this expr first to avoid recursively calling this method val ref = ExprRef(ruleName, null) ruleCache += ruleName -> ref // Prepare the expr val newExpr: Expr = expr // Update the reference ref.set(newExpr) debug(f"Define expr $ruleName%15s := $newExpr%s") ref } } } private var ruleCache: Map[String, Expr] = Map[String, Expr]() private val prefixCount = collection.mutable.Map[String, Int]() private def newNonDuplicateRuleID(prefix: String): String = { val count = prefixCount.getOrElseUpdate(prefix, 0) prefixCount += prefix -> (count + 1) "%s%d".format(prefix, count + 1) } private var ignoredExprs: Set[Expr] = Set() private def getEnclosingMethodName(stackLevel: Int): String = { new Throwable().getStackTrace()(stackLevel).getMethodName } def parse(e:Expr, s:String) = { trace("preparing parser") val p = new Parser(new StringScanner(s), e, ignoredExprs) trace("parse start") p.parse } } /** * Syntax grammar * * @author leo */ object Grammar extends Logger { def toVisibleString(s: CharSequence): String = { if (s == null) return "" var text: String = s.toString text = text.replaceAll("\n", "\\\\n") text = text.replaceAll("\r", "\\\\r") text = text.replaceAll("\t", "\\\\t") text } /** * Parsing expression * @param name */ sealed abstract class Expr(val name: String) { a: Expr => def ~(b: Expr): Expr = SeqNode(Array(a, b)) def |(b: Expr): Expr = OrNode(Array(a, b)) def or(b: Expr): Expr = OrNode(Array(a, b)) override def toString = toVisibleString(name) } case class OrNode(seq: Array[Expr]) extends Expr("(%s)".format(seq.map(_.name).mkString(" | "))) { override def |(b: Expr): Expr = OrNode(seq :+ b) } case class SeqNode(seq: Array[Expr]) extends Expr("(%s)".format(seq.map(_.name).mkString(" "))) { override def ~(b: Expr): Expr = SeqNode(seq :+ b) } case class ExprRef(override val name: String, private var expr: Expr) extends Expr(name) { private[Grammar] def set(newExpr: Expr) { expr = newExpr } } case class SyntacticPredicateFail(predToFail: Expr, e: Expr) extends Expr("!(%s) -> %s".format(predToFail, e)) case class Not(e: Expr) extends Expr("!(%s)".format(e)) case class CharRange(a: String, b: String) extends Expr("[%s-%s]".format(a, b)) { require(a.length == 1) require(b.length == 1) private val begin = a.charAt(0).toInt private val end = b charAt (0).toInt require(begin <= end) def pred(i:Int): Boolean = { begin <= i && i <= end } } case class CharPred(override val name: String, pred: Int => Boolean) extends Expr(name) case class Leaf(override val name: String, tt: Int) extends Expr(name) case class ZeroOrMore(a: Expr) extends Expr("%s*".format(a.name)) case class OneOrMore(a: Expr) extends Expr("%s+".format(a.name)) { def expr = a ~ ZeroOrMore(a) } case class OptionNode(a: Expr) extends Expr("%s?".format(a.name)) case class Repeat(a: Expr, separator: Expr) extends Expr("rep(%s, %s)".format(a.name, separator.name)) { def expr = OptionNode(a ~ ZeroOrMore(separator ~ a)) } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy