All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.codecommit.gll.Parsers.scala Maven / Gradle / Ivy

The newest version!
package com.codecommit.gll

import scala.collection.mutable
import mutable.{Buffer, ListBuffer}

import com.codecommit.util._

import Global._

// I hate the way this file is organized, but I don't have a choice
trait Parsers {
  import SetSyntax._

  private val TAIL_ERROR_PATTERN = "Unexpected trailing characters: '%s'"

  implicit def literal(str: String) = new LiteralParser(str)

  def opt[A](p: Parser[A]) = p?

  def rep[A](p: Parser[A]) = p*

  def rep1[A](p: Parser[A]) = p+

  private def processTail(tail: LineStream) = {
    val newTail = handleWhitespace(tail)
    if (newTail.isEmpty) Some(newTail) else None
  }

  protected def handleWhitespace(s: LineStream) = s

  private def canonicalize(str: String) = str.foldLeft("") { (back, c) =>
    val tack = c match {
      case '\n' => "\\n"
      case '\r' => "\\r"
      case '\t' => "\\t"
      case '\f' => "\\f"
      case c => c.toString
    }

    back + tack
  }

  // implicit conversions

  implicit def disjunctiveSyntax[A](left: =>Parser[A]) = new RichParser(left)
  implicit def disjunctiveLiterals(left: String) = new RichParser(literal(left))

  implicit def funSyntax1[A](p: Parser[A]) = new RichSyntax1(p)
  implicit def funLitSyntax(p: String) = new RichSyntax1(literal(p))
  implicit def funSyntax2[A, B](p: Parser[A ~ B]) = new RichSyntax2(p)

  implicit def funSyntax3l[A, B, C](p: Parser[A ~ B ~ C]) = new RichSyntax3l(p)
  implicit def funSyntax3r[A, B, C](p: Parser[~[A, B ~ C]]) = new RichSyntax3r(p)

  implicit def funSyntax4ll[A, B, C, D](p: Parser[A ~ B ~ C ~ D]) = new RichSyntax4ll(p)
  implicit def funSyntax4lr[A, B, C, D](p: Parser[~[A, B ~ C] ~ D]) = new RichSyntax4lr(p)
  implicit def funSyntax4rl[A, B, C, D](p: Parser[~[A, B ~ C ~ D]]) = new RichSyntax4rl(p)
  implicit def funSyntax4rr[A, B, C, D](p: Parser[~[A, ~[B, C ~ D]]]) = new RichSyntax4rr(p)

  implicit def funSyntax5lll[A, B, C, D, E](p: Parser[A ~ B ~ C ~ D ~ E]) = new RichSyntax5lll(p)
  implicit def funSyntax5llr[A, B, C, D, E](p: Parser[~[A, B ~ C] ~ D ~ E]) = new RichSyntax5llr(p)
  implicit def funSyntax5lrl[A, B, C, D, E](p: Parser[~[A, B ~ C ~ D] ~ E]) = new RichSyntax5lrl(p)
  implicit def funSyntax5lrr[A, B, C, D, E](p: Parser[~[A, ~[B, C ~ D]] ~ E]) = new RichSyntax5lrr(p)
  implicit def funSyntax5rll[A, B, C, D, E](p: Parser[~[A ~ B, C ~ D ~ E]]) = new RichSyntax5rll(p)
  implicit def funSyntax5rlr[A, B, C, D, E](p: Parser[~[A ~ B, ~[C, D ~ E]]]) = new RichSyntax5rlr(p)
  implicit def funSyntax5rrl[A, B, C, D, E](p: Parser[~[A, ~[B, C ~ D ~ E]]]) = new RichSyntax5rrl(p)
  implicit def funSyntax5rrr[A, B, C, D, E](p: Parser[~[A, ~[B, ~[C, D ~ E]]]]) = new RichSyntax5rrr(p)

  implicit def funSyntax6[A, B, C, D, E, F](p: Parser[A ~ B ~ C ~ D ~ E ~ F]) = new RichSyntax6(p)
  implicit def funSyntax7[A, B, C, D, E, F, G](p: Parser[A ~ B ~ C ~ D ~ E ~ F ~ G]) = new RichSyntax7(p)
  implicit def funSyntax8[A, B, C, D, E, F, G, H](p: Parser[A ~ B ~ C ~ D ~ E ~ F ~ G ~ H]) = new RichSyntax8(p)

  class RichParser[A](left: =>Parser[A]) {
    def |[B >: A](right: =>Parser[B]): Parser[B] = new DisjunctiveParser(left, right)
  }

  // map syntax

  class RichSyntax1[A](p: Parser[A]) {
    def ^^[R](f: A => R) = ^# { (_, r) => f(r) }

    def ^#[R](f: (LineStream, A) => R) = p mapWithTail f
  }

  class RichSyntax2[A, B](p: Parser[A ~ B]) {
    def ^^[R](f: (A, B) => R) = ^# { (_, r1, r2) => f(r1, r2) }

    def ^#[R](fun: (LineStream, A, B) => R) = p mapWithTail { case (in, a ~ b) => fun(in, a, b) }
  }

  class RichSyntax3l[A, B, C](p: Parser[A ~ B ~ C]) {
    def ^^[R](f: (A, B, C) => R) = ^# { (_, r1, r2, r3) => f(r1, r2, r3) }

    def ^#[R](fun: (LineStream, A, B, C) => R) = p mapWithTail { case (in, a ~ b ~ c) => fun(in, a, b, c) }
  }

  class RichSyntax3r[A, B, C](p: Parser[~[A, B ~ C]]) {
    def ^^[R](f: (A, B, C) => R) = ^# { (_, r1, r2, r3) => f(r1, r2, r3) }

    def ^#[R](fun: (LineStream, A, B, C) => R) = p mapWithTail { case (in, a ~ (b ~ c)) => fun(in, a, b, c) }
  }

  class RichSyntax4ll[A, B, C, D](p: Parser[A ~ B ~ C ~ D]) {
    def ^^[R](f: (A, B, C, D) => R) = ^# { (_, r1, r2, r3, r4) => f(r1, r2, r3, r4) }

    def ^#[R](fun: (LineStream, A, B, C, D) => R) = p mapWithTail { case (in, a ~ b ~ c ~ d) => fun(in, a, b, c, d) }
  }

  class RichSyntax4lr[A, B, C, D](p: Parser[~[A, B ~ C] ~ D]) {
    def ^^[R](f: (A, B, C, D) => R) = ^# { (_, r1, r2, r3, r4) => f(r1, r2, r3, r4) }

    def ^#[R](fun: (LineStream, A, B, C, D) => R) = p mapWithTail { case (in, a ~ (b ~ c) ~ d) => fun(in, a, b, c, d) }
  }

  class RichSyntax4rl[A, B, C, D](p: Parser[~[A, B ~ C ~ D]]) {
    def ^^[R](f: (A, B, C, D) => R) = ^# { (_, r1, r2, r3, r4) => f(r1, r2, r3, r4) }

    def ^#[R](fun: (LineStream, A, B, C, D) => R) = p mapWithTail { case (in, a ~ ((b ~ c) ~ d)) => fun(in, a, b, c, d) }
  }

  class RichSyntax4rr[A, B, C, D](p: Parser[~[A, ~[B, C ~ D]]]) {
    def ^^[R](f: (A, B, C, D) => R) = ^# { (_, r1, r2, r3, r4) => f(r1, r2, r3, r4) }

    def ^#[R](fun: (LineStream, A, B, C, D) => R) = p mapWithTail { case (in, a ~ (b ~ (c ~ d))) => fun(in, a, b, c, d) }
  }

  class RichSyntax5lll[A, B, C, D, E](p: Parser[A ~ B ~ C ~ D ~ E]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, a ~ b ~ c ~ d ~ e) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax5llr[A, B, C, D, E](p: Parser[~[A, B ~ C] ~ D ~ E]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, (a ~ (b ~ c)) ~ d ~ e) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax5lrl[A, B, C, D, E](p: Parser[~[A, B ~ C ~ D] ~ E]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, (a ~ (b ~ c ~ d)) ~ e) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax5lrr[A, B, C, D, E](p: Parser[~[A, ~[B, C ~ D]] ~ E]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, (a ~ (b ~ (c ~ d))) ~ e) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax5rll[A, B, C, D, E](p: Parser[~[A ~ B, C ~ D ~ E]]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, (a ~ b) ~ (c ~ d ~ e)) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax5rlr[A, B, C, D, E](p: Parser[~[A ~ B, ~[C, D ~ E]]]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, (a ~ b) ~ (c ~ (d ~ e))) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax5rrl[A, B, C, D, E](p: Parser[~[A, ~[B, C ~ D ~ E]]]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, a ~ (b ~ (c ~ d ~ e))) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax5rrr[A, B, C, D, E](p: Parser[~[A, ~[B, ~[C, D ~ E]]]]) {
    def ^^[R](f: (A, B, C, D, E) => R) = ^# { (_, r1, r2, r3, r4, r5) => f(r1, r2, r3, r4, r5) }

    def ^#[R](fun: (LineStream, A, B, C, D, E) => R) = p mapWithTail { case (in, a ~ (b ~ (c ~ (d ~ e)))) => fun(in, a, b, c, d, e) }
  }

  class RichSyntax6[A, B, C, D, E, F](p: Parser[A ~ B ~ C ~ D ~ E ~ F]) {
    def ^^[R](f: (A, B, C, D, E, F) => R) = ^# { (_, r1, r2, r3, r4, r5, r6) => f(r1, r2, r3, r4, r5, r6) }

    def ^#[R](fun: (LineStream, A, B, C, D, E, F) => R) = p mapWithTail { case (in, a ~ b ~ c ~ d ~ e ~ f) => fun(in, a, b, c, d, e, f) }
  }

  class RichSyntax7[A, B, C, D, E, F, G](p: Parser[A ~ B ~ C ~ D ~ E ~ F ~ G]) {
    def ^^[R](f: (A, B, C, D, E, F, G) => R) = ^# { (_, r1, r2, r3, r4, r5, r6, r7) => f(r1, r2, r3, r4, r5, r6, r7) }

    def ^#[R](fun: (LineStream, A, B, C, D, E, F, G) => R) = p mapWithTail { case (in, a ~ b ~ c ~ d ~ e ~ f ~ g) => fun(in, a, b, c, d, e, f, g) }
  }

  class RichSyntax8[A, B, C, D, E, F, G, H](p: Parser[A ~ B ~ C ~ D ~ E ~ F ~ G ~ H]) {
    def ^^[R](f: (A, B, C, D, E, F, G, H) => R) = ^# { (_, r1, r2, r3, r4, r5, r6, r7, r8) => f(r1, r2, r3, r4, r5, r6, r7, r8) }

    def ^#[R](fun: (LineStream, A, B, C, D, E, F, G, H) => R) = p mapWithTail { case (in, a ~ b ~ c ~ d ~ e ~ f ~ g ~ h) => fun(in, a, b, c, d, e, f, g, h) }
  }

  //////////////////////////////////////////////////////////////////////////////

  sealed trait Parser[+R] extends (LineStream => Stream[Result[R]]) { self =>
    val terminal: Boolean

    lazy val first = {
      val set = computeFirst(Set()) getOrElse Set()

      if (set contains None)
        UniversalCharSet         // if \epsilon \in FIRST
      else
        set flatMap { x => x }
    }

    def isPreferred = false

    /**
     * @return The FIRST set for this parser, or the empty set
     *         if the production goes to \epsilon.
     */
    def computeFirst(seen: Set[Parser[Any]]): Option[Set[Option[Char]]]

    def chain(t: Trampoline, in: LineStream)(f: Result[R] => Unit)

    // syntax

    def apply(str: String): Stream[Result[R]] = apply(LineStream(str))

    def map[R2](f: R => R2) = mapWithTail { (_, r) => f(r) }

    def mapWithTail[R2](f: (LineStream, R) => R2): Parser[R2]

    def flatMap[R2](f1: R => Parser[R2]): Parser[R2] = new NonTerminalParser[R2] {
      def computeFirst(seen: Set[Parser[Any]]) = self.computeFirst(seen + this)

      def chain(t: Trampoline, in: LineStream)(f2: Result[R2] => Unit) {
        self.chain(t, in) {
          case Success(res1, tail) => f1(res1).chain(t, tail)(f2)
          case f: Failure => f2(f)
        }
      }
    }

    def filter(f: R => Boolean): Parser[R] = new NonTerminalParser[R] {
      override def isPreferred = self.isPreferred

      def computeFirst(seen: Set[Parser[Any]]) = self.computeFirst(seen + this)

      def chain(t: Trampoline, in: LineStream)(f2: Result[R] => Unit) {
        self.chain(t, in) {
          case s @ Success(res, _) => {
            if (f(res))
              f2(s)
            else
              f2(Failure(SyntaxError, in))
          }

          case f: Failure => f2(f)
        }
      }
    }

    def orElse[R2 >: R](alternate: =>Parser[R2]): Parser[R2] = new DisjunctiveParser(this, alternate)

    // operators
    def ~[R2](that: Parser[R2]): Parser[R ~ R2] = new SequentialParser(this, that)

    def <~[R2](that: Parser[R2]) = this ~ that map { case a ~ _ => a }

    def ~>[R2](that: Parser[R2]) = this ~ that map { case _ ~ b => b }

    def *(): Parser[List[R]] = (this+?) map { _ getOrElse Nil }

    def *(sep: Parser[_]): Parser[List[R]] = (this + sep).? ^^ { _ getOrElse Nil }

    def +(): Parser[List[R]] = new NonTerminalParser[List[R]] {
      def computeFirst(seen: Set[Parser[Any]]) = self.computeFirst(seen + this)

      def chain(t: Trampoline, in: LineStream)(f: Result[List[R]] => Unit) {
        t.add(self, in) {
          case Success(res1, tail) => {
            f(Success(res1 :: Nil, tail))

            if ((!tail.isEmpty || first.size == 0) && (tail.isEmpty || (first.contains(in.head) || first.size == 0))) {      // lookahead}
              t.add(this, tail) {
                case Success(res2, tail) => f(Success(res1 :: res2, tail))
                case res: Failure => f(res)
              }
            }
          }

          case res: Failure => f(res)
        }
      }

      override def toString = self.toString + "+"
    }

    def +(sep: Parser[_]) = this ~ (sep ~> this).* ^^ { _ :: _ }

    def ?(): Parser[Option[R]] = new NonTerminalParser[Option[R]] {
      def computeFirst(seen: Set[Parser[Any]]) =
        Some(self.computeFirst(seen) map { _ + None } getOrElse Set(None))

      def chain(t: Trampoline, in: LineStream)(f: Result[Option[R]] => Unit) {
        f(Success(None, in))

        t.add(self, in) {
          case Success(res, tail) => f(Success(Some(res), tail))
          case res: Failure => f(res)
        }
      }

      override def toString = self.toString + "?"
    }

    def +? = (this+)?

    def \(not: TerminalParser[Any]): Parser[R] = new NonTerminalParser[R] {
      def computeFirst(seen: Set[Parser[Any]]) = self.computeFirst(seen)

      def chain(t: Trampoline, in: LineStream)(f: Result[R] => Unit) {
        lazy val sub = not.parse(in)

        self.chain(t, in) {
          case s @ Success(res1, tail) => {
            if (sub match { case Success(_, `tail`) => true case _ => false })
              f(Failure(SyntaxError, in))
            else
              f(s)
          }

          case r: Failure => f(r)
        }
      }

      override def toString = "(%s \\ %s)".format(self, not)
    }

    def ^^^[R2](v: =>R2) = map { _ => v }
  }

  trait TerminalParser[+R] extends Parser[R] { self =>
    final val terminal = true

    final def apply(in: LineStream) = Stream(parse(handleWhitespace(in)) match {
      case Success(res, tail) => processTail(tail) match {
        case Some(tail) => Success(res, tail)
        case None => Failure(UnexpectedTrailingChars(canonicalize(tail.mkString)), tail)
      }

      case x => x
    })

    /**
     * For terminal parsing, this just delegates back to apply()
     */
    def chain(t: Trampoline, in: LineStream)(f: Result[R] => Unit) {
      f(parse(handleWhitespace(in)))
    }

    protected[gll] def parse(in: LineStream): Result[R]

    override def ~[R2](other: Parser[R2]) = other match {
      case other: TerminalParser[R2] => {
        new TerminalParser[R ~ R2] {
          def computeFirst(s: Set[Parser[Any]]) = {
            val sub = self.computeFirst(s)

            sub map { set =>
              if (set.size == 0 || set.contains(None))
                other.computeFirst(s) match {
                  case Some(set2) => {
                    if (set.isComplement)
                      (set - None) ++ set2
                    else
                      set2 ++ (set - None)
                  }

                  case None => set
                }
              else
                set
            }
          }

          def parse(in: LineStream) = self.parse(handleWhitespace(in)) match {
            case Success(res1, tail) => other.parse(handleWhitespace(tail)) match {
              case Success(res2, tail) => Success(new ~(res1, res2), tail)
              case f: Failure => f
            }

            case f: Failure => f
          }
        }
      }

      case other => super.~(other)
    }

    override def \(not: TerminalParser[Any]) = new TerminalParser[R] {
      override def isPreferred = self.isPreferred

      def computeFirst(s: Set[Parser[Any]]) = self.computeFirst(s)

      def parse(in: LineStream) = self.parse(in) match {
        case s @ Success(res1, tail) => {
          val sub = not.parse(in)

          if (sub match { case Success(_, `tail`) => true case _ => false })
            Failure(SyntaxError, in)
          else
            s
        }

        case f: Failure => f
      }
    }

    def mapWithTail[R2](f: (LineStream, R) => R2): Parser[R2] = new MappedParser[R, R2](self, f) with TerminalParser[R2] {
      override def isPreferred = self.isPreferred

      def parse(in: LineStream) = {
        val newTail = handleWhitespace(in)
        self.parse(newTail) match {
          case Success(res, tail) => Success(f(newTail, res), tail)
          case x: Failure => x
        }
      }
    }

    def preferred: TerminalParser[R] = PreferredParser(this)
  }

  trait NonTerminalParser[+R] extends Parser[R] { self =>
    final val terminal = false

    /**
     * This method takes care of kicking off a new
     * parse process.  We will never call this method to
     * handle a sub-parse.  In such situations, we will use
     * the trampoline to queue results.
     *
     * Note: to ensure greedy matching (for PEG compatibility)
     * we define any Success with a non-empty tail to be a
     * Failure
     */
    final def apply(in: LineStream) = {
      val t = new Trampoline

      val successes = mutable.Set[Success[R]]()
      val failures = mutable.Set[Failure]()

      var recognized = false

      def parse(): Stream[Result[R]] = {
        if (t.hasNext) {
          t.step()

          if (successes.isEmpty) {
            parse()
          } else {
            val results = successes.toList
            successes.clear()
            results.toStream append parse()
          }
        } else {
          val results = if (recognized) successes else failures
          results.toStream
        }
      }

      chain(t, in) {
        case s @ Success(res, tail) => {
          tracef("Top-Level Success: %s%n", s)
          processTail(tail) match {
            case Some(tail) => {
              tracef("Tail Accepted: %s%n", s)
              recognized = true
              successes += Success(res, tail)
            }

            case None => {
              tracef("Tail Rejected: %s%n", s)
              failures += Failure(UnexpectedTrailingChars(canonicalize(tail.mkString)), tail)
            }
          }
        }

        case f: Failure => {
          tracef("Top-Level Failure: %s%n", f)
          failures += f
        }
      }

      parse()
    }

    def mapWithTail[R2](f1: (LineStream, R) => R2): Parser[R2] = new MappedParser[R, R2](self, f1) with NonTerminalParser[R2] {
      def chain(t: Trampoline, in: LineStream)(f2: Result[R2] => Unit) {
        self.chain(t, in) {
          case Success(res, tail) => f2(Success(f1(in, res), tail))
          case f: Failure => f2(f)
        }
      }
    }
  }

  abstract class MappedParser[A, +B](private val p: Parser[A], private val f1: (LineStream, A) => B) extends Parser[B] {
    def computeFirst(s: Set[Parser[Any]]) = p.computeFirst(s + this)

    override def toString = p.toString
  }

  /**
   * Used for setting up a trampoline wrapper for disjunction
   * alternatives (for the sake of left-recursion).
   */
  private[gll] abstract class ThunkParser[+A](private val self: Parser[A]) extends NonTerminalParser[A] {
    override def isPreferred = self.isPreferred

    def computeFirst(s: Set[Parser[Any]]) = self.computeFirst(s)

    override def toString = self.toString

    override def equals(other: Any) = other match {
      case that: ThunkParser[A] => this.self == that.self
      case _ => false
    }

    override def hashCode = self.hashCode
  }

  private[gll] case class PreferredParser[+R](delegate: TerminalParser[R]) extends TerminalParser[R] {
    override def isPreferred = true

    def computeFirst(s: Set[Parser[Any]]) = delegate.computeFirst(s)

    protected[gll] def parse(in: LineStream) = delegate.parse(in)
  }

  //////////////////////////////////////////////////////////////////////////////

  case class LiteralParser(str: String) extends TerminalParser[String] {
    def computeFirst(s: Set[Parser[Any]]) = {
      Some(if (str.length > 0) Set(Some(str charAt 0)) else Set(None))
    }

    def parse(in: LineStream) = {
      val trunc = in take str.length

      if (trunc.lengthCompare(str.length) != 0) {
        Failure(UnexpectedEndOfStream(Some(str)), in)
      } else {
        val succ = trunc.zipWithIndex forall {
          case (c, i) => c == str.charAt(i)
        }

        if (succ)
          Success(str, in drop str.length)
        else
          Failure(ExpectedLiteral(str, canonicalize(trunc.mkString)), in)
      }
    }

    override def equals(other: Any) = other match {
      case that: LiteralParser => this.str == that.str
      case _ => false
    }

    override def hashCode = str.hashCode

    override def toString = "'%s'".format(str)
  }

  class SequentialParser[+A, +B](private val left: Parser[A], private val right: Parser[B]) extends NonTerminalParser[A ~ B] {
    def computeFirst(seen: Set[Parser[Any]]) = {
      if (seen contains this) None    // left-recursion detected!
      else {
        val newSeen = seen + this
        val sub = left.computeFirst(newSeen)

        sub map { set =>
          if (set.size == 0 || set.contains(None))
            right.computeFirst(newSeen) match {
              case Some(set2) => {
                if (set.isComplement)
                  (set - None) ++ set2
                else
                  set2 ++ (set - None)
              }

              case None => set
            }
          else
            set
        }
      }
    }

    def chain(t: Trampoline, in: LineStream)(f: Result[A ~ B] => Unit) {
      left.chain(t, in) {
        case Success(res1, tail) => {
          right.chain(t, tail) {
            case Success(res2, tail) => f(Success(new ~(res1, res2), tail))

            case res: Failure => f(res)
          }
        }

        case res: Failure => f(res)
      }
    }

    override def equals(other: Any) = other match {
      case that: SequentialParser[A, B] => {
        this.left == that.left && this.right == that.right
      }

      case _ => false
    }

    override def hashCode = left.hashCode + right.hashCode

    override def toString = "(%s ~ %s)".format(left, right)
  }

  class DisjunctiveParser[A](l: =>Parser[A], r: =>Parser[A]) extends NonTerminalParser[A] with Thunkable {
    private lazy val left = l
    private lazy val right = r

    private lazy val leftClass = thunk[Parser[A]]('l).getClass
    private lazy val rightClass = thunk[Parser[A]]('r).getClass

    lazy val gather: List[Parser[A]] = gatherImpl(Set()).toList

    /**
     * The PREDICT table for this disjunction.  Please note that
     * this is a very different concept from the standard LL(k)
     * PREDICT set.  Specifically, the PREDICT table allows for
     * ambiguity in the prediction while still retaining O(1)
     * dispatch on disjunctions which are LL(1) and near-O(1)
     * for disjunctions which are not.
     *
     * Note that this is not actually sufficient to handle all
     * CFGs allowed by GLL.  Specifically, parsers with an empty
     * FIRST set must be handled specially.
     */
    lazy val predict: Map[Char, Parser[A]] = {
      gather.foldLeft(Map[Char, Parser[A]]()) { (map, p) =>
        p.first.foldLeft(map) { _.updated(_, p) }
      }
    }

    /**
     * Checks if all FIRST sets are disjoint and none
     * are empty.  This is convergent even for
     * left-recursive parsers.
     */
    lazy val isLL1: Boolean = {
      val sets = gather map { _.first }
      val areFinite = sets forall { !_.isComplement }

      if (areFinite) {
        val totalSize = sets.foldLeft(0) { _ + _.size }
        val union = sets.reduceLeft[Set[Char]] { _ ++ _ }
        (totalSize == union.size) && (sets forall { _.size > 0 })
      } else false
    }

    def computeFirst(seen: Set[Parser[Any]]) = {
      if (seen contains this)
        None          // left-recursion detected!
      else {
        val newSeen = seen + this

        val firstSets = gather map { _ computeFirst newSeen getOrElse Set[Option[Char]]() } toList
        val back = firstSets sortWith { (a, b) => a.isComplement || !b.isComplement } reduceLeft { _ ++ _ }

        Some(back)
      }
    }

    def chain(t: Trampoline, in: LineStream)(f: Result[A] => Unit) {
      if (isLL1) {        // graceful degrade to LL(1)
        trace("Detected LL(1): " + this)

        if (in.isEmpty) {
          f(Failure(UnexpectedEndOfStream(None), in))
        } else {
          predict get in.head match {
            case Some(p) => p.chain(t, in)(f)

            case None => f(Failure(UnexpectedChars(in.head.toString), in))
          }
        }
      } else {
        val thunk = new ThunkParser(this) {
          def chain(t: Trampoline, in: LineStream)(f: Result[A] => Unit) {
            var predicted = false
            val results = mutable.Set[Result[A]]()    // merge results

            val preferred = gather filter { _.isPreferred }
            val prefResults = preferred flatMap { _(in) }

            val prefSuccess = prefResults exists {
              case _: Success[_] => true
              case _ => false
            }

            if (prefSuccess) {
              prefResults foreach { res =>
                if (!results.contains(res)) {
                  tracef("Reduced preferred: %s *=> %s%n", this, res)

                  f(res)
                  results += res
                }
              }
            } else {
              for {
                p <- gather

                // [(S = {}) -> (FIRST = U)] /\ [~(S = {}) -> (S[0] \in FIRST)]
                if !in.isEmpty || p.first == UniversalCharSet
                if in.isEmpty || p.first.contains(in.head)      // lookahead
              } {
                predicted = true
                t.add(p, in) { res =>
                  if (!results.contains(res)) {
                    tracef("Reduced: %s *=> %s%n", this, res)

                    f(res)
                    results += res
                  }
                }
              }

              if (!predicted) {
                if (in.isEmpty)
                  f(Failure(UnexpectedEndOfStream(None), in))
                else
                  f(Failure(UnexpectedChars(in.head.toString), in))
              }
            }
          }
        }

        t.add(thunk, handleWhitespace(in))(f)
      }
    }

    private def gatherImpl(seen: Set[DisjunctiveParser[A]]): Buffer[Parser[A]] = {
      val newSeen = seen + this

      def process(p: Parser[A]) = p match {
        case d: DisjunctiveParser[A] => {
          if (!seen.contains(d))
            d.gatherImpl(newSeen)
          else
            new ListBuffer[Parser[A]]
        }

        case p => p +: new ListBuffer[Parser[A]]
      }

      process(left) ++ process(right)
    }

    override def toString = "(%d | %d)".format(left.hashCode, right.hashCode)
  }

  //////////////////////////////////////////////////////////////////////////////

  class Trampoline {
    private type RSet[A] = mutable.Set[Result[A]]
    private type SSet[A] = mutable.Set[Success[A]]
    private type FSet[A] = mutable.Set[Result[A] => Unit]

    // R
    private val queue = new mutable.Stack[(Parser[Any], LineStream)]

    // U_j
    private val done = mutable.Map[LineStream, mutable.Set[Parser[Any]]]()

    // P
    private val popped = mutable.Map[LineStream, HOMap[Parser, SSet]]()

    // GSS back edges
    private val backlinks = mutable.Map[LineStream, HOMap[Parser, FSet]]()

    // prevents divergence in cyclic GSS traversal
    private val saved = HOMap[Result, FSet]()

    // L_0
    def run() {
      while (hasNext) {
        step()
      }
    }

    def hasNext = !queue.isEmpty

    def step() {
      val (p, s) = remove()

      p.chain(this, s) { res =>
        popped.get(s) match {
          case Some(parsers) =>
            if (!(parsers contains p))
              popped(s) += (p -> new mutable.HashSet[Success[Any]])
          case None =>
            popped += (s -> HOMap[Parser, SSet]())
            popped(s) += (p -> new mutable.HashSet[Success[Any]])
        }

        res match {
          case succ: Success[Any] => {
            popped(s)(p) += succ
            tracef("Saved: %s *=> %s%n", (p, s), succ)
          }

          case _: Failure => ()
        }

        saved.get(res) match {
          case Some(set) =>
            for (f <- backlinks(s)(p)) {
              if (!set.contains(f)) {
                set += f
                f(res)
              }
            }

          case None =>
            val set = new mutable.HashSet[Result[Any] => Unit]
            saved += (res -> set)

            for (f <- backlinks(s)(p)) {
              set += f
              f(res)
            }
        }
      }
    }

    def add[A](p: Parser[A], s: LineStream)(f: Result[A] => Unit) {
      val tuple = (p, s)

      backlinks.get(s) match {
        case Some(parsers) =>
          if (!(parsers contains p))
            backlinks(s) += (p -> new mutable.HashSet[Result[Any] => Unit])
        case None =>
          backlinks += (s -> HOMap[Parser, FSet]())
          backlinks(s) += (p -> new mutable.HashSet[Result[Any] => Unit])
      }

      backlinks(s)(p) += f


      popped.get(s) match {
        case Some(parsers) if (parsers contains p) =>
            for (res <- parsers(p)) {           // if we've already done that, use the result
              tracef("Revisited: %s *=> %s%n", tuple, res)
              f(res)
            }
        case _ =>
          done.get(s) match {
            case Some(parsers) =>
              if (!(parsers contains p))
                addTuple(parsers)
            case None =>
              val parsers = new mutable.HashSet[Parser[Any]]
              done += (s -> parsers)
              addTuple(parsers)
          }

          def addTuple(parsers: mutable.Set[Parser[Any]]) {
            queue.push(tuple)
            parsers += p

            trace("Added: " + tuple)
          }
      }
    }

    private def remove() = {
      val tuple = queue.pop()
      trace("Removed: " + tuple)

      tuple
    }
  }
}

// trivial companion object
object Parsers extends Parsers




© 2015 - 2025 Weber Informatics LLC | Privacy Policy