io.prophecy.abinitio.mp.pset.PsetLexer.scala Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of prophecy-libs_2.12 Show documentation
Prophecy Spark Libraries
There is a newer version: 6.3.0-3.3.0
package io.prophecy.abinitio.mp.pset

import scala.util.parsing.combinator.RegexParsers
import scala.util.parsing.input.Positional

//
// Errors
//
sealed trait PSETCompilationError
case class PSETLexerError(location: Location, msg: String) extends PSETCompilationError
case class PSETParserError(location: Location, msg: String) extends PSETCompilationError
case class Location(line: Int, column: Int) { override def toString = s"$line:$column" }

//
// Tokens
//
sealed trait PsetToken extends Positional
case class TEXT_VALUE(value: String) extends PsetToken
case class TRUE() extends PsetToken
case class FALSE() extends PsetToken
case class CURLY_BRACE_CONTENT(value: String) extends PsetToken
case class NUMBER(value: Int) extends PsetToken
case class REAL_NUMBER(value: Double) extends PsetToken
case class PIPE() extends PsetToken
case class SEMICOLON() extends PsetToken
case class OBRACE() extends PsetToken
case class CBRACE() extends PsetToken
case class NEWLINE() extends PsetToken
case class IDENTIFIER(str: String) extends PsetToken
case class IDENTIFIER_WITH_COLON(parentKey: String, childKey: String) extends PsetToken

/**
  * Lexer class for pset input file which will break pset content into tokens as per specification provided in
  * this class.
  */
class PsetLexer extends RegexParsers {

  override def skipWhitespace = false
  def apply(code: String): Either[PSETLexerError, List[PsetToken]] = {
    parse(tokens, code) match {
      case NoSuccess(msg, next) ⇒
        Left(
          PSETLexerError(Location(next.pos.line, next.pos.column), msg + " at possible token = " + next.first.toString)
        )
      case Success(result, _) ⇒ Right(result)
    }
  }

  def tokens: Parser[List[PsetToken]] = {
    phrase(
      rep1(
        theTrue | theFalse
          | newline
          | textValue
          | realNumber | number
          | identifierWithColon | identifier
          | pipe
      )
    ) ^^ { tokens ⇒
      tokens
    }
  }

  final private val toBeSkippedTokens =
    "(True|False|([a-zA-Z_][a-zA-Z0-9_]*|(-)?[0-9]+)((\\.|\\:)[a-zA-Z0-9_]+)?|\\.[0-9]+)"

  def textValue: Parser[TEXT_VALUE] = positioned {
    s"""(?!($toBeSkippedTokens[|]|\n))((\\\\[|])|("[|]")|[^|\n])+""".r ^^ (str ⇒ TEXT_VALUE(str))
  }

  def identifier: Parser[IDENTIFIER] = positioned {
    "[a-zA-Z_][a-zA-Z0-9_]*".r ^^ (str ⇒ IDENTIFIER(str))
  }

  def identifierWithColon: Parser[IDENTIFIER_WITH_COLON] = positioned {
    "[a-zA-Z_][a-zA-Z0-9_]*(\\:)[a-zA-Z0-9_]+".r ^^ { str ⇒
      val tokens = str.split("\\:")
      IDENTIFIER_WITH_COLON(tokens.head, tokens.last)
    }
  }

  def number: Parser[NUMBER] = positioned {
    """(-)?[0-9]+[d]*""".r ^^ (num ⇒ NUMBER(num.toInt))
  }

  def realNumber = """(-)?[0-9]*\.[0-9]+""".r ^^ { dub ⇒
    REAL_NUMBER(dub.toDouble)
  }

  def newline: Parser[NEWLINE] = positioned {
    "(\\r\\n?|\\n|\n)".r ^^ (x ⇒ NEWLINE())
  }

  def theFalse:  Parser[FALSE]     = positioned("False" ^^ (_ ⇒ FALSE()))
  def theTrue:   Parser[TRUE]      = positioned("True" ^^ (_ ⇒ TRUE()))
  def pipe:      Parser[PIPE]      = positioned("|" ^^ (_ ⇒ PIPE()))
  def obrace:    Parser[OBRACE]    = positioned("{" ^^ (_ ⇒ OBRACE()))
  def cbrace:    Parser[CBRACE]    = positioned("}" ^^ (_ ⇒ CBRACE()))
  def semicolon: Parser[SEMICOLON] = positioned(";" ^^ (_ ⇒ SEMICOLON()))
}