All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scalaparsers.ParsingUtil.scala Maven / Gradle / Ivy

package scalaparsers

import scala.collection.immutable.List
import Document.{ text }
import Diagnostic._

import java.util.TimeZone

import cats.{Monad, StackSafeMonad}
import cats.kernel.Comparison.{EqualTo, GreaterThan, LessThan}
import cats.free.Free
import cats.implicits._

trait Parsing[S] {

  type Parser[+A] = scalaparsers.Parser[S, A]
  type ParseState = scalaparsers.ParseState[S]

  def unit[A](a: A): Parser[A] = new Parser[A] {
    def apply[B >: A](s: ParseState, vs: Supply) = Free.pure(Pure(a))
    override def map[B](f: A => B) = unit(f(a))
    override def flatMap[B](f: A => Parser[B]) = f(a)
  }

  implicit def parserDiagnostic: Diagnostic[Parser] = new Diagnostic[Parser] {
    override def raise(p: Pos, d: Document, aux: List[Document]) = Parser((_,_) => {
      val err = Err.report(p, Some(d), aux)
      // println("pending: " + err.toString) // reporting this
      err
    })
    def fail(msg: Document) = Parser((_,_) => Fail(Some(msg), List(), Set()))
    def empty = Parser((_:ParseState,_:Supply) => Fail(None, List(), Set()))
  }

  implicit def parserMonad: Monad[Parser] = new StackSafeMonad[Parser] {
    def flatMap[A, B](m: Parser[A])(f: A => Parser[B]) = m flatMap f

    def pure[A](a: A) = new Parser[A] {
      def apply[B >: A](s: ParseState, vs: Supply) = Free.pure(Pure(a))
      override def map[B](f : A => B) = pure(f(a))
    }



    override def map[A, B](m: Parser[A])(f: A => B) = m map f
  }

  def get: Parser[ParseState] = Parser((s:ParseState, _:Supply) => Pure(s))
  def gets[A](f: ParseState => A): Parser[A] = Parser((s:ParseState,_:Supply) => Pure(f(s)))
  def getSupply: Parser[Supply] = Parser((_:ParseState, vs:Supply) => Pure(vs))
  def loc: Parser[Pos] = Parser((s:ParseState, _:Supply) => Pure(s.loc))
  def modify(f: ParseState => ParseState) = Parser((s:ParseState,_:Supply) => Commit(f(s),(), Set()))
  def put(s: ParseState) = Parser((_:ParseState,_:Supply) => Commit(s,(),Set()))
  def freshId = Parser((_:ParseState,vs:Supply) => Pure(vs.fresh))
  def rawSatisfy(p: Char => Boolean) = Parser((s:ParseState, _:Supply) => {
    val si = s.input
    if (s.offset == si.length) Fail(None, List(), Set())
    else {
      val so = s.offset
      val c = si.charAt(so)
      val sop = so + 1
      if (p(c)) Commit(s.copy(loc = s.loc.bump(c, si, sop), offset = sop), c, Set())
      else Fail(None, List(), Set())
    }
  })
  def satisfy(p: Char => Boolean) = rawSatisfy(p) << setBol(false)

  def realEOF: Parser[Unit] = Parser((s, _) =>
    if (s.offset == s.input.length) Pure(())
    else Fail(None, List(), Set("end of input"))
  )

  def warn(msg: Document) = Parser((s:ParseState, _:Supply) => { println(msg.toString); Pure(()) })
  def info(msg: Document) = Parser((s:ParseState, _:Supply) => { println(msg.toString); Pure(()) })

  def choice[A](xs: Parser[A]*) = xs.toList.foldRight[Parser[A]](empty)(_ | _)
  def assert(p: => Boolean): Parser[Unit] = if (p) unit(()) else empty
  def attempt[A](p: Parser[A]) = p attempt
  def attempt[A](p: Parser[A], s: String) = p attempt s
  def liftOption[A](p: Option[A]): Parser[A] = p match {
    case Some(a) => unit(a)
    case None    => empty
  }
  def handle[A](p: Parser[A], f: ParseFailure => Parser[A]): Parser[A] = p handle f
  def notFollowedBy[A](p: Parser[A]) = p not

  def stillOnside: Parser[Unit] = for {
    b <- gets(s => !s.bol || s.loc.column > s.depth)
    _ <- guard[Parser](b)
  } yield ()

  def rawCh(c: Char): Parser[Char] = rawSatisfy(_ == c) scope ("'"+c.toString+"'")
  implicit def ch(c: Char): Parser[Char] = stillOnside >> rawCh(c) << setBol(false)
  def rawNewline = rawSatisfy(_ == '\n') scope "newline"
  // def tab = rawSatisfy(_ == '\t') scope "tab"
  def rawWord(s: String): Parser[String] = s.toList.traverse[Parser,Char](ch(_)) attempt ('"'+s+'"') as s
  implicit def word(s: String): Parser[String] = stillOnside >> rawWord(s) << setBol(false)

  def upper  = satisfy(_.isUpper) scope "uppercase letter"
  def lower  = satisfy(_.isLower) scope "lowercase letter"
  def letter = satisfy(_.isLetter) scope "letter"
  def rawLetter = rawSatisfy(_.isLetter) scope "letter"
  def digit  = satisfy(_.isDigit) scope "digit"
  def simpleSpace = satisfy(java.lang.Character.isWhitespace(_)) scope "simple space"

  // layout
  def setBol(b: Boolean): Parser[Unit] = for {
    old <- gets(_.bol)
    _ <- modify(s => s.copy(bol = b)).when(old != b) // avoid committing if we haven't changed it
  } yield ()

  // TODO: properly parse and check for operators that start with --
  private def comment: Parser[Unit] = rawWord("--").attempt >> rawSatisfy(_ != '\n').skipMany >> (rawNewline | realEOF) >> unit(())
  private def blockComment: Parser[Boolean] = {
    def restComment(hadnl: Boolean): Parser[Boolean] =
      rawWord("-}").attempt.as(hadnl) |
      (blockComment >>= restComment) |
      ((rawSatisfy(_ != '\n').as(hadnl) | rawNewline.as(true)) >>= restComment)
    rawWord("{-").attempt >> restComment(false)
  }
  private def someRealWhitespace = rawSatisfy(x => java.lang.Character.isWhitespace(x) && x != '\n').skipSome
  def whiteSpace(spaced: Boolean, side: Boolean): Parser[Token] =
    ( comment.as(true)
    | blockComment
    | rawNewline.as(true)
    | someRealWhitespace.as(false)
    ).scope("whitespace").many.flatMap {
      case List() if side                  => offside(spaced)
      case List()                          => onside (spaced)
      case xs if xs.foldLeft(side)(_ || _) => offside(true)
      case xs                              => onside (true)
    }
  private def offside(spaced: Boolean) = get.flatMap(s => {
    val col = s.loc.column
    s.layoutStack match {
      case IndentedLayout(n, _) :: xs => (col comparison n) match {
        case LessThan => modify(_.copy(layoutStack = xs, bol = true)) as VBrace // pop the layout stack, and we're at bol
        case EqualTo  =>
          if (s.offset != s.input.length) setBol(false) as VSemi
          else unit(Other)
        case GreaterThan => onside(spaced)
      }
      case _ => onside(spaced)
    }
  })

  private def onside(spaced: Boolean): Parser[Token] = get.flatMap(s => {
    if (s.offset == s.input.length)
      s.layoutStack match {
        case IndentedLayout(n, desc) :: xs    => modify(_.copy(layoutStack = xs, bol = true)) as VBrace
        case BracedLayout(_,_,missing,_) :: _ => missing
        case List()                           => unit(Other)
      }
    else s.layoutEndsWith.wouldSucceed.flatMap { b =>
      if (b)
        s.layoutStack match {
          case IndentedLayout(_,desc) :: xs => modify(_.copy(layoutStack = xs, bol = true)) as VBrace
          case _ => if (spaced) unit(WhiteSpace)
                    else setBol(false) as Other
        }
      else if (spaced) unit(WhiteSpace)
           else setBol(false) as Other
    }
  })

  def layout: Parser[Token] = get.flatMap(s => whiteSpace(false, s.bol))

  def virtualLeftBrace(n: String): Parser[Unit] =
    modify(s => s.copy(layoutStack = IndentedLayout[S](s.loc.column max s.depth, n) :: s.layoutStack))

  def virtualRightBrace: Parser[Unit] = get.flatMap(s =>
    layout.flatMatch({
      case VBrace => unit(unit(())) // the layout parser already popped our stack
      case VSemi => loc.flatMap(p => unit(raise(p, "panic: trailing virtual semicolon")))
      case Other|WhiteSpace => for {
        sp <- get
        b <- sp.layoutEndsWith.wouldSucceed
        _ <- failUnless[Parser](b,"end of layout not found")
      } yield raiseWhen[Parser](sp.layoutStack.isEmpty || sp.layoutStack.head.isInstanceOf[BracedLayout[S]], sp.loc, "panic: incorrect layout context for virtual right brace") >>
              modify(_.copy(layoutStack = sp.layoutStack.tail)) // bol remains false
    }).attempt(
      s.layoutStack.collectFirst({
        case BracedLayout(l, _, _, r) => "end of layout (between '" + l + "' and '" + r + "')"
      }).getOrElse("end of layout")
    ).flatMap(x => x)
  )
  // :3:3: error: unmatched '{'
  // :5:6: note: expected '}'
  // :5:6: error: expected infixl (<=) 6 term, end of top level layout, or ...

  // TODO: allow right to succeed when the closing brace parser fails in a 'corrected' mode that can only consume text and report further errors?
  def left(lp: Parser[Any], ld: String, rp: Parser[Any], rd: String): Parser[Unit] = for {
    start <- loc
    _ <- lp.scope(ld)
    _ <- modify(s =>
      s.copy(
        layoutStack = BracedLayout(
          ld,
          rp.scope(rd),
          Parser((s: ParseState, _: Supply) => Fail(None,List(start.report("note: unmatched " + ld)), Set(rd))),
          // l => raise(start, "error: unmatched " + ld, List(l.report("note: expected corresponding " + rd + " here"))),
          rd
        ) :: s.layoutStack
      )
    )
  } yield ()

  def leftToken(ld: String, rd: String) = left(token(ld), "'" + ld + "'", rawWord(rd), "'" + rd + "'")
  def leftBrace    = leftToken("{","}")
  def leftCurlyBanana = leftToken("{|","|}")
  def leftBracket  = leftToken("[","]")
  def leftBanana   = leftToken("(|","|)")
  def leftEnvelope = leftToken("[|","|]")

  def right: Parser[Unit] = get.flatMap { s =>
    s.layoutStack match {
      case b@BracedLayout(_,p,missing,r) :: xs =>
        (  p.scope(r) >>
           modify(_.copy(layoutStack = xs)) >>
           optionalSpace.skipOptional
        ) | missing
      case stk => raise[Parser](s.loc, "panic: expected braced layout, but found: " + stk.mkString(",")) // , but found:" above nest(2, vsep(stk.map(text(_.toString)))))
    }
  }

  def semi: Parser[Char] = layout.flatMatch({
    case Other => token(';')
    case VSemi => unit(';')
  }).attempt("semicolon")

  def eofIgnoringLayout: Parser[Unit] = realEOF // eof // realEOF

  // def eof: Parser[Unit] = realEOF scope "eof" // | layout.flatMatch({ case EOF => unit(()) }) attempt "eof"

  def optionalSpace: Parser[Unit] = layout.flatMatch({
    case WhiteSpace => unit(())
    case Other => unit(())
    case VSemi => Diagnostic.fail[Parser]("vsemi in optional space")
    case VBrace => Diagnostic.fail[Parser]("vbrace in optional space")
  }) attempt "whitespace"

  def eof: Parser[Unit] = realEOF scope "eof" // (layout.collect({ case WhiteSpace | Other => ()}).attempt.skipOptional >> realEOF) scope "end of input"

  def laidout[T](s: String, p: Parser[T]): Parser[List[T]] = (
    brace(p.scope(s).sepBy(token(';'))) |
    p.scope(s).sepBy(semi).between(virtualLeftBrace(s), virtualRightBrace)
  ) scope "layout(" + s + ")"

  def phrase[A](p: Parser[A]) = modify(_.copy(layoutStack = List())) >> simpleSpace.skipMany >> p << eof
  def token[A](p: => Parser[A]): Parser[A] = p << optionalSpace.skipOptional
  def banana[A](p: => Parser[A]): Parser[A] = p.between(leftBanana,right)
  def paren[A](p: => Parser[A]): Parser[A] = p.between(leftToken("(",")"),right)
  def brace[A](p: => Parser[A]): Parser[A] = p.between(leftBrace,right)
  def bracket[A](p: => Parser[A]): Parser[A] = p.between(leftBracket,right)
  def envelope[A](p: => Parser[A]): Parser[A] = p.between(leftEnvelope, right)
  def curlyBanana[A](p: => Parser[A]): Parser[A] = p.between(leftCurlyBanana, right)

  private val charEscMagic: Map[Char, Char] = "bfnrt\\\"'".zip("\b\f\n\r\t\\\"'").toMap
  private val charEscUnmagic: Map[Char, Char] = charEscMagic map (_.swap)

  private def charControl  = (ch('^') >> upper).map(c => (c.toInt - 'A'.toInt).toChar)
  private def charEsc      = choice(charEscMagic.toSeq.map { case (c,d) => ch(c) as d } :_*)
  private def escapeCode   = (charControl | charEsc) scope "escape code" // TODO: charNum, charAscii
  private def charEscape   = ch('\\') >> escapeCode
  private def charLetter   = satisfy(c => (c != '\'') && (c != '\\') && (c > '\u0016'))
  private def charChar     = (charLetter | charEscape) scope "character literal character"
  private def stringLetter = satisfy(c => (c != '"') && (c != '\\') && (c > '\u0016'))
  private def stringEscape = ch('\\') >> (
    (simpleSpace.skipSome >> (ch('\\') scope "end of string gap")).as(None) | // escape gap
    ch('&').as(None) |                                                  // empty escape
    escapeCode.map(Some(_))
  )
  private def stringChar = (stringLetter.map(Some(_)) | stringEscape) scope "string literal character"

  /** token parser for parsing a character literal */
  def charLiteral = token(charChar.between('\'','\'') scope "character literal")

  /** token parser for parsing a string literal */
  // def stringLiteral: Parser[String] = satisfy('"' != _).skipMany.slice.between('"', '"')
  def stringLiteral: Parser[String] = token(stringChar.many.between('"','"').map(
    _.sequence[Option,Char].getOrElse(List()).mkString
  ) scope "string literal")

  /** Format a string back to its equivalent literal form. */
  def inverseStringLiteral(s: String): String =
    "\"" |+| augmentString(s).flatMap{
      case c if charEscUnmagic contains c => "\\" + charEscUnmagic(c)
      case c if 1 to 26 contains c => "^" + (c + ('A' - 1) toChar)
      case c => c.toString} |+| "\""

  def doubleLiteral_ : Parser[Double] = (digit.skipSome >> (((ch('.') >> digit.skipMany) >> (ch('e') >> digit.skipSome).skipOptional) | ((ch('e') >> digit.skipSome)))).attempt.slice.map(_.toDouble)
  def doubleLiteral: Parser[Double] = token(doubleLiteral_)

  def dateLiteral_ = {
    for {
      y <- ch('@') >> nat_ << ch('/')
      m <- nat_.filter(1L to 12L contains) << ch('/')
      d <- nat_.filter(1L to 31L contains)
    } yield {
      import java.util.Calendar
      val c = Calendar getInstance (TimeZone getTimeZone "GMT")
      c set (Calendar.MILLISECOND, 0)
      c set (y.toInt, m.toInt - 1, d.toInt, 0, 0, 0)
      c.getTime
    }}
  /** token parser for date literals
    * (dates being year-month-day triples) */
  def dateLiteral = token(dateLiteral_.attempt)

  def nat_ = digit.skipSome.slice.map(_.toLong)
  def nat = token(nat_)
  def tailChar: Parser[Char] = satisfy(c => c.isLetter || c.isDigit || c == '_' || c == '#' || c == '\'')
  def rawTailChar: Parser[Char] = rawSatisfy(c => c.isLetter || c.isDigit || c == '_' || c == '#' || c == '\'')
  def identTail: Parser[Unit] = tailChar.skipMany
  def rawIdentTail: Parser[Unit] = rawTailChar.skipMany

  val nonopChars = "()[]{};,\"".sorted.toArray[Char]
  val opChars = ":!#$%&*+./<=>?@\\^|-~'`".sorted.toArray[Char]
  def existsIn(chs: Array[Char], c: Char): Boolean =
    java.util.Arrays.binarySearch(chs, c) >= 0

  sealed trait Op[T] extends Located {
    def loc: Pos
    def prec: Int
    def assoc: Assoc
    def apply(xs: List[T]): Parser[List[T]]
  }

  object Op {
    def unary[T](l: Pos, p: Int, f: T => T) = new Op[T] {
      def loc = l
      def prec = p
      def assoc = AssocL // permits repeated prefixes/postfixes, AssocN would disable
      def apply(xs: List[T]): Parser[List[T]] = xs match {
        case x :: xs => unit(f(x) :: xs)
        case _       => empty[Parser]
      }
    }
    def infix[T](l: Pos, p: Int, a: Assoc, f: (T,T) => T) = new Op[T] {
      def loc = l
      def prec = p
      def assoc = a
      def apply(xs: List[T]): Parser[List[T]] = xs match {
        case x :: y :: xs => unit(f(y,x) :: xs)
        case _ => empty[Parser]
      }
    }

    def shuntingYard[T](pre: Parser[Op[T]], inpost: Parser[Op[T]], operand: Parser[T]): Parser[T] = {
      def clear(l: Pos, p: Op[T], rators: List[Op[T]], rands: List[T]): Parser[T] = rators match {
        case f::fs => p.prec comparison f.prec match {
          case LessThan => f(rands) flatMap { clear(l, p, fs, _) }
          case EqualTo  => (p.assoc, f.assoc) match {
            case (AssocL, AssocL) => f(rands) flatMap { clear(l, p, fs, _) }
            case (AssocR, AssocR) => postRator(l, p :: rators, rands)
            case _ => raise(f.loc, "error: ambiguous operator of precedence " + p.prec,
                       List(p.report("note: is incompatible with this operator (add parentheses)")))
          }
          case GreaterThan => postRator(l, p :: rators, rands)
        }
        case Nil => postRator(l, List(p), rands)
      }

      def finish(l : Pos, rators: List[Op[T]], rands: List[T]): Parser[T] = rators match {
        case f :: fs => f(rands) flatMap (finish(l, fs, _))
        case Nil => rands match {
          case List(x) => unit(x)
          case _ => fail("error: ill-formed expression")
        }
      }

      def postRator(l : Pos, rators: List[Op[T]], rands: List[T]): Parser[T] =
        operand.flatMap(rand => postRand(l, rators, rand :: rands)) |
        pre.flatMap(clear(l, _, rators, rands)) |
        finish(l, rators, rands)

      def postRand(l : Pos, rators: List[Op[T]], rands: List[T]): Parser[T] =
        inpost.flatMap(clear(l, _, rators, rands)) |
        finish(l, rators, rands)

      loc.flatMap(postRator(_, List(), List()))
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy