io.prophecy.abinitio.mp.pset.PsetLexer.scala Maven / Gradle / Ivy
package io.prophecy.abinitio.mp.pset
import scala.util.parsing.combinator.RegexParsers
import scala.util.parsing.input.Positional
//
// Errors
//
sealed trait PSETCompilationError
case class PSETLexerError(location: Location, msg: String) extends PSETCompilationError
case class PSETParserError(location: Location, msg: String) extends PSETCompilationError
case class Location(line: Int, column: Int) { override def toString = s"$line:$column" }
//
// Tokens
//
sealed trait PsetToken extends Positional
case class TEXT_VALUE(value: String) extends PsetToken
case class TRUE() extends PsetToken
case class FALSE() extends PsetToken
case class CURLY_BRACE_CONTENT(value: String) extends PsetToken
case class NUMBER(value: Int) extends PsetToken
case class REAL_NUMBER(value: Double) extends PsetToken
case class PIPE() extends PsetToken
case class SEMICOLON() extends PsetToken
case class OBRACE() extends PsetToken
case class CBRACE() extends PsetToken
case class NEWLINE() extends PsetToken
case class IDENTIFIER(str: String) extends PsetToken
case class IDENTIFIER_WITH_COLON(parentKey: String, childKey: String) extends PsetToken
/**
* Lexer class for pset input file which will break pset content into tokens as per specification provided in
* this class.
*/
class PsetLexer extends RegexParsers {
override def skipWhitespace = false
def apply(code: String): Either[PSETLexerError, List[PsetToken]] = {
parse(tokens, code) match {
case NoSuccess(msg, next) ⇒
Left(
PSETLexerError(Location(next.pos.line, next.pos.column), msg + " at possible token = " + next.first.toString)
)
case Success(result, _) ⇒ Right(result)
}
}
def tokens: Parser[List[PsetToken]] = {
phrase(
rep1(
theTrue | theFalse
| newline
| textValue
| realNumber | number
| identifierWithColon | identifier
| pipe
)
) ^^ { tokens ⇒
tokens
}
}
final private val toBeSkippedTokens =
"(True|False|([a-zA-Z_][a-zA-Z0-9_]*|(-)?[0-9]+)((\\.|\\:)[a-zA-Z0-9_]+)?|\\.[0-9]+)"
def textValue: Parser[TEXT_VALUE] = positioned {
s"""(?!($toBeSkippedTokens[|]|\n))((\\\\[|])|("[|]")|[^|\n])+""".r ^^ (str ⇒ TEXT_VALUE(str))
}
def identifier: Parser[IDENTIFIER] = positioned {
"[a-zA-Z_][a-zA-Z0-9_]*".r ^^ (str ⇒ IDENTIFIER(str))
}
def identifierWithColon: Parser[IDENTIFIER_WITH_COLON] = positioned {
"[a-zA-Z_][a-zA-Z0-9_]*(\\:)[a-zA-Z0-9_]+".r ^^ { str ⇒
val tokens = str.split("\\:")
IDENTIFIER_WITH_COLON(tokens.head, tokens.last)
}
}
def number: Parser[NUMBER] = positioned {
"""(-)?[0-9]+[d]*""".r ^^ (num ⇒ NUMBER(num.toInt))
}
def realNumber = """(-)?[0-9]*\.[0-9]+""".r ^^ { dub ⇒
REAL_NUMBER(dub.toDouble)
}
def newline: Parser[NEWLINE] = positioned {
"(\\r\\n?|\\n|\n)".r ^^ (x ⇒ NEWLINE())
}
def theFalse: Parser[FALSE] = positioned("False" ^^ (_ ⇒ FALSE()))
def theTrue: Parser[TRUE] = positioned("True" ^^ (_ ⇒ TRUE()))
def pipe: Parser[PIPE] = positioned("|" ^^ (_ ⇒ PIPE()))
def obrace: Parser[OBRACE] = positioned("{" ^^ (_ ⇒ OBRACE()))
def cbrace: Parser[CBRACE] = positioned("}" ^^ (_ ⇒ CBRACE()))
def semicolon: Parser[SEMICOLON] = positioned(";" ^^ (_ ⇒ SEMICOLON()))
}