scala.util.parsing.combinator.lexical.StdLexical.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scala-library Show documentation
Show all versions of scala-library Show documentation
Standard library for the Scala Programming Language
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */
package scala.util.parsing
package combinator
package lexical
import token._
import input.CharArrayReader.EofCh
import scala.collection.mutable
/** This component provides a standard lexical parser for a simple,
* [[http://scala-lang.org Scala]]-like language. It parses keywords and
* identifiers, numeric literals (integers), strings, and delimiters.
*
* To distinguish between identifiers and keywords, it uses a set of
* reserved identifiers: every string contained in `reserved` is returned
* as a keyword token. (Note that `=>` is hard-coded as a keyword.)
* Additionally, the kinds of delimiters can be specified by the
* `delimiters` set.
*
* Usually this component is used to break character-based input into
* bigger tokens, which are then passed to a token-parser (see
* [[scala.util.parsing.combinator.syntactical.TokenParsers]].)
*
* @author Martin Odersky
* @author Iulian Dragos
* @author Adriaan Moors
*/
class StdLexical extends Lexical with StdTokens {
// see `token` in `Scanners`
def token: Parser[Token] =
( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") }
| digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") }
| '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") }
| '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") }
| EofCh ^^^ EOF
| '\'' ~> failure("unclosed string literal")
| '\"' ~> failure("unclosed string literal")
| delim
| failure("illegal character")
)
/** Returns the legal identifier chars, except digits. */
def identChar = letter | elem('_')
// see `whitespace in `Scanners`
def whitespace: Parser[Any] = rep(
whitespaceChar
| '/' ~ '*' ~ comment
| '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') )
| '/' ~ '*' ~ failure("unclosed comment")
)
protected def comment: Parser[Any] = (
'*' ~ '/' ^^ { case _ => ' ' }
| chrExcept(EofCh) ~ comment
)
/** The set of reserved identifiers: these will be returned as `Keyword`s. */
val reserved = new mutable.HashSet[String]
/** The set of delimiters (ordering does not matter). */
val delimiters = new mutable.HashSet[String]
protected def processIdent(name: String) =
if (reserved contains name) Keyword(name) else Identifier(name)
private lazy val _delim: Parser[Token] = {
// construct parser for delimiters by |'ing together the parsers for the individual delimiters,
// starting with the longest one -- otherwise a delimiter D will never be matched if there is
// another delimiter that is a prefix of D
def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) }
val d = new Array[String](delimiters.size)
delimiters.copyToArray(d, 0)
scala.util.Sorting.quickSort(d)
(d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x)
}
protected def delim: Parser[Token] = _delim
}