com.github.kardapoltsev.astparser.parser.Lexer.scala Maven / Gradle / Ivy
/*
Copyright 2016 Alexey Kardapoltsev
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package com.github.kardapoltsev.astparser.parser
import scala.util.parsing.input._
sealed trait Token extends Positional
object Tokens {
case class TypeKeyword() extends Token
case class PackageKeyword() extends Token
case class SchemaKeyword() extends Token
case class TraitKeyword() extends Token
case class VersionKeyword() extends Token
case class CallKeyword() extends Token
case class ExternalKeyword() extends Token
case class ImportKeyword() extends Token
case class Eq() extends Token
case class Colon() extends Token
case class Semicolon() extends Token
case class Hash() extends Token
case class Dot() extends Token
case class LeftBrace() extends Token
case class RightBrace() extends Token
case class LeftBracket() extends Token
case class RightBracket() extends Token
case class LessSign() extends Token
case class GreaterSign() extends Token
case class Lexeme(chars: String) extends Token
case class Http(chars: String) extends Token
trait Doc extends Token {
def chars: String
}
case class LeftDoc(chars: String) extends Doc
case class RightDoc(chars: String) extends Doc
case class Error(message: String) extends Token {
def chars: String = s"ERROR: $message"
}
case object EOF extends Token {
def chars: String = ""
}
}
//class TokenReader(seq: Seq[Token]) extends Reader[Token] {
// override def atEnd = seq.isEmpty
//
// override def pos = {
// if(seq.nonEmpty) seq.head.pos
// else NoPosition
// }
//
// override def first = {
// if (seq.nonEmpty) seq.head
// else throw new RuntimeException("SeqReader at end")
// }
//
// override def rest = {
// if (seq.nonEmpty) new TokenReader(seq.tail)
// else throw new RuntimeException("SeqReader at end")
// }
//}
//noinspection ScalaStyle
class Lexer extends BaseLexer {
override type Token = com.github.kardapoltsev.astparser.parser.Token
import Tokens._
import scala.util.parsing.input.CharArrayReader.EofCh
override def whitespace: Parser[Any] = rep[Any](
elem("", _.isWhitespace)
| '/' ~ '/' ~ tillEndOfLine
| '/' ~ '*' ~ not('*') ~ multilineCommentBody
)
protected def comment: Parser[Any] = (
rep (noneOf(EofCh, '*')) ~ '*' ~ '/' ^^ { case _ => ' ' }
| rep (noneOf(EofCh, '*')) ~ '*' ~ comment ^^ { case _ => ' ' }
)
protected def eq = '=' ^^^ Eq()
protected def colon = ':' ^^^ Colon()
protected def semicolon = ';' ^^^ Semicolon()
protected def hash = '#' ^^^ Hash()
protected def dot = '.' ^^^ Dot()
protected def leftBrace = '{' ^^^ LeftBrace()
protected def rightBrace = '}' ^^^ RightBrace()
protected def leftBracket = '[' ^^^ LeftBracket()
protected def rightBracket = ']' ^^^ RightBracket()
protected def lessSign = '<' ^^^ LessSign()
protected def greaterSign = '>' ^^^ GreaterSign()
import com.github.kardapoltsev.astparser.Hardcoded.{Keywords => K}
private def typeKeyword: Parser[Token] = keyword(K.Type, TypeKeyword())
private def packageKeyword: Parser[Token] = keyword(K.Package, PackageKeyword())
protected def schemaKeyword = keyword(K.Schema, SchemaKeyword())
protected def versionKeyword = keyword(K.Version, VersionKeyword())
protected def traitKeyword = keyword(K.Trait, TraitKeyword())
protected def callKeyword = keyword(K.Call, CallKeyword())
protected def externalKeyword = keyword(K.External, ExternalKeyword())
protected def importKeyword = keyword(K.Import, ImportKeyword())
protected def restString = {
(elem('@') ~> tillEndOfLine) ^^ {
rest => Http(rest)
}
}
private def keyword(keyword: String, keywordToken: => Token): Parser[Token] = {
acceptSeq(keyword) ~ ' ' ^^^ keywordToken
}
override def errorToken(msg: String) = Tokens.Error(msg)
override def token: Parser[Token] = positioned(
symbol
| packageKeyword
| typeKeyword
| schemaKeyword
| versionKeyword
| traitKeyword
| restString
| callKeyword
| externalKeyword
| importKeyword
| lexeme
| doc
| eof
| failure("illegal character")
)
private def eof = EofCh ^^^ EOF
private def symbol: Parser[Token] =
eq | colon | semicolon | hash | dot | leftBrace | rightBrace | leftBracket | rightBracket |
lessSign | greaterSign
private def lexemeChar = elem("valid lexeme", x => x != EofCh && (x.isLetter || x.isDigit))
private def lexeme: Parser[Lexeme] = opt('`') ~> rep1(lexemeChar) <~ opt('`') ^^ (x => Lexeme(x.mkString))
private def lineDoc: Parser[RightDoc] = ('-' ~ '-') ~> tillEndOfLine ^^ RightDoc
private def multilineDoc: Parser[LeftDoc] =
('/' ~ '*' ~ '*') ~> multilineCommentBody ^^ LeftDoc
def doc: Parser[Doc] = lineDoc | multilineDoc
private def tillEndOfLine = rep(noneOf(EofCh, '\n')) ^^ (_.mkString)
private def multilineCommentBody: Parser[String] = {
def seq = rep(noneOf('*', EofCh)) ^^ (_.mkString)
( seq <~ ('*' ~ '/')
| seq ~ ('*' ~> multilineCommentBody) ^^ {
case a ~ b => a + "*" + b
}
| seq <~ eof ~ err("unclosed comment"))
}
private def multilineComment: Parser[Any] =
('/' ~ '*' ~ guard(not('*'))) ~> multilineCommentBody
private def noneOf(xs: Elem*): Parser[Elem] = elem("", e => !xs.contains(e))
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy