All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hypertino.parser.HParser.scala Maven / Gradle / Ivy

The newest version!
package com.hypertino.parser

import java.math.BigInteger

import com.hypertino.binders.value.Text
import com.hypertino.binders.{value ⇒ bn}
import com.hypertino.parser.ast._
import org.parboiled2.{CharPredicate, Parser, ParserInput, StringBuilding, _}

import scala.annotation.switch
import scala.util.{Failure, Success, Try}

case class HParseError(message: String, cause: Throwable) extends RuntimeException(message, cause)

class HParser(val input: ParserInput) extends Parser with StringBuilding {
  import CharPredicate.{Digit, Digit19, HexDigit}
  import HParser._

  def customOperators: Vector[Rule1[Identifier]] = Vector.empty

  def Literal = rule { WhiteSpace ~ Value }

  def Object: Rule1[bn.Obj] = rule {
    ws('{') ~ zeroOrMore(Pair).separatedBy(ws(',')) ~ ws('}') ~> ((fields: Seq[(String,bn.Value)]) ⇒ bn.Obj.from(fields :_*))
  }

  def Pair = rule { StringUnwrapped ~ ws(':') ~ Value ~> ((_, _)) }

  def Value: Rule1[bn.Value] = rule {
    run {
      (cursorChar: @switch) match {
        case '"' ⇒ String
        case '\'' ⇒ StringNoEscapes
        case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '-' ⇒ Number
        case '{' ⇒ Object
        case '[' ⇒ List
        case 't' ⇒ True
        case 'f' ⇒ False
        case 'n' ⇒ Null
        case _ ⇒ MISMATCH
      }
    }
  }

  def String = rule { StringUnwrapped ~> bn.Text }

  def StringNoEscapes = rule { StringUnwrappedNoEscapes ~> bn.Text }

  def StringUnwrapped = rule { '"' ~ clearSB() ~ Characters ~ ws('"') ~ push(sb.toString) }

  def StringUnwrappedNoEscapes = rule { '\'' ~ clearSB() ~ zeroOrMore(CharExceptApostrophe) ~ ws('\'') ~ push(sb.toString) }

  def Number = rule { HexNumber | DecNumber }

  def HexNumber = rule { ignoreCase("0x") ~ capture(oneOrMore(HexDigit)) ~> (s ⇒ bn.Number(BigDecimal(new BigInteger(s, 16)))) ~ WhiteSpace }

  def DecNumber = rule { capture(Integer ~ optional(Frac) ~ optional(Exp)) ~> (s ⇒ bn.Number(BigDecimal(s))) ~ WhiteSpace }

  def List = rule { ws('[') ~ zeroOrMore(Value).separatedBy(ws(',')) ~ ws(']') ~> (bn.Lst.from(_ :_*)) }

  def Characters = rule { zeroOrMore(NormalChar | '\\' ~ EscapedChar) }

  def NormalChar = rule { !QuoteBackslash ~ ANY ~ appendSB() }

  def CharExceptApostrophe = rule { !'\'' ~ ANY ~ appendSB() }

  def EscapedChar = rule (
    QuoteSlashBackSlash ~ appendSB()
      | 'b' ~ appendSB('\b')
      | 'f' ~ appendSB('\f')
      | 'n' ~ appendSB('\n')
      | 'r' ~ appendSB('\r')
      | 't' ~ appendSB('\t')
      | Unicode ~> { code ⇒ sb.append(code.asInstanceOf[Char]); () }
  )

  def Unicode = rule { 'u' ~ capture(HexDigit ~ HexDigit ~ HexDigit ~ HexDigit) ~> (java.lang.Integer.parseInt(_, 16)) }

  def Integer = rule { optional('-') ~ (Digit19 ~ Digits | Digit) }

  def Digits = rule { oneOrMore(Digit) }

  def Frac = rule { "." ~ Digits }

  def Exp = rule { ignoreCase('e') ~ optional(anyOf("+-")) ~ Digits }

  def True = rule { "true" ~ WhiteSpace ~ push(bn.True) }

  def False = rule { "false" ~ WhiteSpace ~ push(bn.False) }

  def Null = rule { "null" ~ WhiteSpace ~ push(bn.Null) }

  def WhiteSpace = rule { zeroOrMore(WhiteSpaceChar) }

  def ws(c: Char) = rule { c ~ WhiteSpace }

  // Backtick escaped string, like `abc dfg` or `abc``dfg`
  def TicCharacters = rule { zeroOrMore( (!'`' ~ ANY ~ appendSB()) | ("``" ~ appendSB('`'))) }
  def TickString = rule { '`' ~ clearSB() ~ TicCharacters ~ ws('`') ~ push(sb.toString) }

  // string interpolation
  def CharExceptQuoteDollar = rule { !QuoteDollar ~ ANY ~ appendSB() }
  def InterpolationStringChars = rule { CharExceptQuoteDollar | "$$" ~ appendSB("$") }
  def InterpolationExpression = rule { "${" ~ Expression ~ '}' | "$" ~ IdentNoWhitespace }
  def InterpolationString = rule { clearSB() ~ oneOrMore(InterpolationStringChars) ~ push(sb.toString) ~> (s ⇒ Constant(Text(s))) }
  def Interpolation = rule { "s\"" ~ oneOrMore(InterpolationString | InterpolationExpression) ~ ws('"') ~> StringInterpolation }

  // identifiers

  def IdentFirstChar = CharPredicate.Alpha ++ CharPredicate("$_")
  def IdentChar = IdentFirstChar ++ CharPredicate.Digit
  def IdentFirstSegment = rule { capture( IdentFirstChar ~ zeroOrMore(IdentChar)) }
  def IdentFirstSegmentTic: Rule1[String] = rule { TickString }
  def IdentSegmentTic: Rule1[String] = rule { '.' ~ WhiteSpace ~ TickString }
  def IdentSegment = rule { "." ~ WhiteSpace ~ capture (IdentFirstChar ~ zeroOrMore(IdentChar)) }
  def IdentNoWhitespace = rule { (IdentFirstSegmentTic | IdentFirstSegment) ~ zeroOrMore(IdentSegmentTic | IdentSegment) ~> ConstructIdentifier _ }
  def Ident = rule { (IdentFirstSegmentTic | IdentFirstSegment) ~ zeroOrMore(IdentSegmentTic | IdentSegment) ~ WhiteSpace ~> ConstructIdentifier _ }

  def ConstructIdentifier(firstSegment: String, subSegments: Seq[String]) = Identifier(Seq(firstSegment) ++ subSegments)

  def FuncArgs = rule { Expression ~ zeroOrMore(',' ~ Expression) ~> (Seq(_) ++ _)}
  def Func = rule { Ident ~ '(' ~ WhiteSpace ~ optional (FuncArgs) ~ ')' ~ WhiteSpace ~> {(i:Identifier,e:Option[Seq[Expression]]) ⇒ {com.hypertino.parser.ast.Function(i,e.getOrElse(Seq.empty))}}}

  def UnaryOps = rule { capture ( CharPredicate("!-") ) ~ WhiteSpace ~> OpIdentifier _ }
  def OpIdentifier(name: String) = Identifier(name)

  def UnaryExpression = rule { UnaryOps ~ SingleExpression ~> UnaryOperation }

  // sorted by precedence
  def BinaryOps = Vector(
    rule { capture("or") ~> OpIdentifier _ },
    rule { capture("xor") ~> OpIdentifier _ },
    rule { capture("and") ~> OpIdentifier _ },
    rule { capture("=" | "!=") ~> OpIdentifier _ },
    rule { capture("<=" | "<" | ">=" | ">") ~> OpIdentifier _ },
    rule {
      { capture("has" ~ oneOrMore(WhiteSpaceChar) ~ "not") ~> (_ ⇒ OpIdentifier("has not")) } |
      { capture("has") ~> OpIdentifier _ } |
      { capture("not" ~ oneOrMore(WhiteSpaceChar) ~ "like") ~> (_ ⇒ OpIdentifier("not like")) } |
      { capture("like") ~> OpIdentifier _ }
    }

  ) ++ customOperators ++ Vector(
    rule { capture(CharPredicate("+-") | "++" | "--") ~> OpIdentifier _ },
    rule { capture(CharPredicate("*/%")) ~> OpIdentifier _ }
  )

  def binaryOpsSize = 7 + customOperators.length

  def BinaryExpression(index: Int): Rule1[Expression] = {
    if (index > binaryOpsSize)
      SingleExpression
    else rule {
      BinaryExpression(index + 1) ~ zeroOrMore(
        WhiteSpace ~ BinaryOps(index) ~ WhiteSpace ~ BinaryExpression(index + 1) ~> BinaryOperation
      )
    }
  }

  def ConstExpression = rule { Literal ~> Constant }

  def ParensExpression = rule { '(' ~ Expression ~ ')' ~ WhiteSpace }

  //def ApplyExpression = rule { (ConstExpression | Func | UnaryExpression | ParensExpression) ~ '(' ~ Expression ~ ')' ~> ApplyFunction _ }

  //def ApplyFunction(left: Expression, right: Expression): Expression = Function(Identifier("apply"), Seq(left,right))

  def SingleExpression: Rule1[Expression] = rule { WhiteSpace ~ (/*ApplyExpression | */ConstExpression | Func | Interpolation | Ident | UnaryExpression | ParensExpression) ~ WhiteSpace }

  def Expression: Rule1[Expression] = BinaryExpression(0)

  def InputLine = rule { Expression ~ EOI }
}

object HParser {
  val WhiteSpaceChar = CharPredicate(" \n\r\t\f")
  val QuoteBackslash = CharPredicate("\"\\")
  val QuoteSlashBackSlash = QuoteBackslash ++ "/"
  val QuoteDollar = CharPredicate("\"$")

  def apply(input: ParserInput): Expression = {
    val parser = new HParser(input)
    run(parser, parser.InputLine.run())
  }

  def apply(input: ParserInput, operators: Seq[String]): Expression = {
    val parser = new HParser(input) {
      override def customOperators = {
        operators.foldLeft(Vector.newBuilder[Rule1[Identifier]]) { (ops, op) ⇒
          val opByWhiteSpaces = op.split(" ")
          var isFirst = true
          val operationRule = opByWhiteSpaces.foldLeft(WhiteSpace) { (currentRule, segment) ⇒
            if (isFirst) {
              isFirst = false
              rule { currentRule ~ segment }
            } else {
              rule { currentRule ~ oneOrMore(WhiteSpaceChar) ~ segment }
            }
          }
          ops += rule { capture(operationRule) ~ WhiteSpace ~> ((_:String) ⇒ Identifier(op)) }
        }.result()
      }
    }
    run(parser, parser.InputLine.run())
  }

  private def run(parser: Parser, runParser: ⇒ Try[ast.Expression]): Expression = {
    runParser match {
      case Success(e) ⇒ e
      case Failure(pe : ParseError) ⇒
        throw HParseError(pe.format(parser), pe)
      case Failure(f) ⇒
        throw f
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy