axle.ast.LLLanguage.scala Maven / Gradle / Ivy
The newest version!
package axle.ast
import scala.Stream.cons
import scala.Stream.empty
import spire.implicits.eqOps
/**
*
* http://www.scribd.com/doc/7185137/First-and-Follow-Set
*
*/
case class LLLanguage(
name: String,
_llRuleDescriptions: List[(String, List[String])],
startSymbolString: String = "S") extends Language(
name, Nil, Nil, (text: String) => None, ast => ast) {
val nonTerminals: List[NonTerminal] = (_llRuleDescriptions.map(desc => NonTerminal(desc._1)).toSet).toList
val nonTerminalsByName = nonTerminals.map(nt => (nt.label, nt)).toMap
val startSymbol = nonTerminalsByName(startSymbolString)
val terminals: List[Terminal] = (_llRuleDescriptions.flatMap(_._2).toSet -- _llRuleDescriptions.map(_._1)).map(Terminal(_)).toList.sortBy(_.label) ++ List(⊥)
val terminalsByName = terminals.map(t => (t.label, t)).toMap
val _llRules: List[LLRule] =
_llRuleDescriptions.zipWithIndex
.map({ case (desc, i) => LLRule(i + 1, nonTerminalsByName(desc._1), desc._2.map(symbol(_).get)) })
def llRules: List[LLRule] = _llRules
override def toString: String = view.ViewString.llLanguage(this)
def symbol(label: String): Option[Symbol] =
(if (terminalsByName.contains(label)) terminalsByName else nonTerminalsByName).get(label)
/**
*
* 4. First(Y1Y2..Yk) is either
* 1. First(Y1) (if First(Y1) doesn't contain epsilon)
* 2. OR (if First(Y1) does contain epsilon) then First (Y1Y2..Yk) is everything in First(Y1) as well as everything in First(Y2..Yk)
* 3. If First(Y1) First(Y2)..First(Yk) all contain epsilon then add epsilon to First(Y1Y2..Yk) as well.
*
*/
def first(XS: List[Symbol]): Set[Symbol] = XS match {
case Nil => Set()
case head :: rest => {
val result = first(head)
if (!result.contains(ε)) {
result
} else {
rest match {
case Nil => result
case _ => first(rest) ++ Set(ε)
}
}
}
}
/**
*
* 1. If X is a terminal then First(X) is just X
* 2. If there is a Production X -> epsilon then add epsilon to first(X)
* 3. If there is a Production X -> Y1Y2..Yk then add first(Y1Y2..Yk) to first(X)
*
*/
def first(X: Symbol): Set[Symbol] = X match {
case Terminal(_) => Set(X)
case nt @ NonTerminal(_) => {
llRules.filter(_.from === nt).flatMap({ rule =>
rule.rhs match {
case List(ε) => Set(ε) // Case 2
case _ => first(rule.rhs) // Case 3
}
}).toSet
}
}
/**
*
* 1. First put ⊥ (the end of input marker) in Follow(S) (S is the start symbol)
* 2. If there is a production A -> aBb, (where 'a' can be a whole string) then everything in FIRST(b) except for epsilon is placed in FOLLOW(B).
* 3. If there is a production A -> aB, then everything in FOLLOW(A) is in FOLLOW(B)
* 4. If there is a production A -> aBb, where FIRST(b) contains epsilon, then everything in FOLLOW(A) is in FOLLOW(B)
*
*/
def follow(symbol: NonTerminal, followMemo: Map[Symbol, Set[Symbol]]): (Set[Symbol], Map[Symbol, Set[Symbol]]) = {
// import NonTerminal._
if (followMemo.contains(symbol)) {
(followMemo(symbol), followMemo)
} else {
// TODO allow Terminal(_) in cases below to be a list of Terminals
val s0: Set[Symbol] = if (symbol === startSymbol) Set[Symbol](⊥) else Set()
val result = llRules.foldLeft((s0, followMemo))({
case (v: (Set[Symbol], Map[Symbol, Set[Symbol]]), rule: LLRule) =>
{
val (accSet, followMemo): (Set[Symbol], Map[Symbol, Set[Symbol]]) = v
val x: Set[Symbol] = (rule.rhs match {
// TODO?: enforce that rest is composed of only terminals (maybe not the case)
case Terminal(_) :: symbol :: rest => first(rest).filter(x => !(x === ε))
case _ => Set()
})
val y: (Set[Symbol], Map[Symbol, Set[Symbol]]) = (rule.rhs match {
case Terminal(_) :: symbol :: Nil => follow(rule.from, followMemo)
case Terminal(_) :: symbol :: rest if (first(rest).contains(ε)) => follow(rule.from, followMemo)
case _ => (Set(), followMemo)
})
(accSet ++ x ++ y._1, y._2)
}
})
(result._1, result._2 + (symbol -> result._1))
}
}
lazy val _parseTable: Map[(NonTerminal, Symbol), LLRule] = {
nonTerminals foreach { nt =>
first(nt) // TODO: where is this written?
}
llRules.flatMap({ rule =>
first(rule.rhs).flatMap(a => {
if (terminalsByName.contains(a.label)) {
List((rule.from, a) -> rule)
} else if (a === ε) {
val memo0 = Map[Symbol, Set[Symbol]]() // TODO !!!
val (foll, memo1) = follow(rule.from, memo0) // TODO including $
foll.map(t => (rule.from, t) -> rule)
} else {
Nil
}
})
}).toMap
}
def parseTable: Map[(NonTerminal, Symbol), LLRule] = _parseTable
def parseStateStream(state: LLParserState): Stream[(LLParserAction, LLParserState)] =
if (state.finished) {
empty
} else {
val action = state.nextAction
action match {
case ParseError(x) => empty
case _ => {
val nextState = state(action)
cons((action, nextState), parseStateStream(nextState))
}
}
}
def startState(input: String): LLParserState = LLParserState(this, input, List(startSymbol, ⊥), 0)
def parseDebug(input: String): String =
parseStateStream(startState(input)).toList
.map({
case (action, state) =>
action.toString + "\n" +
" " + state.inputBufferWithMarker + "\n" +
" " + state.stack.mkString("")
}).mkString("\n\n")
def parse(input: String): Option[List[LLRule]] = {
val record = parseStateStream(startState(input)).toList
if (record.last._2.finished) {
Some(record.map(_._1).flatMap({
case Reduce(rule) => List(rule)
case _ => Nil
}))
} else {
None
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy