scala.util.parsing.combinator.RegexParsers.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of scala-library Show documentation
Show all versions of scala-library Show documentation
Standard library for the Scala Programming Language
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */
package scala.util.parsing.combinator
import java.util.regex.Pattern
import scala.util.matching.Regex
import scala.util.parsing.input._
import scala.collection.immutable.PagedSeq
import scala.language.implicitConversions
/** The ''most important'' differences between `RegexParsers` and
* [[scala.util.parsing.combinator.Parsers]] are:
*
* - `Elem` is defined to be [[scala.Char]]
* - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`,
* so that string literals can be used as parser combinators.
* - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`,
* so that regex expressions can be used as parser combinators.
* - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true,
* skip any whitespace before each parser is called.
* - Protected val `whiteSpace` returns a regex that identifies whitespace.
*
* For example, this creates a very simple calculator receiving `String` input:
*
* {{{
* object Calculator extends RegexParsers {
* def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble }
* def factor: Parser[Double] = number | "(" ~> expr <~ ")"
* def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ {
* case number ~ list => (number /: list) {
* case (x, "*" ~ y) => x * y
* case (x, "/" ~ y) => x / y
* }
* }
* def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ {
* case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /:
* case (x, "+" ~ y) => x + y
* case (x, "-" ~ y) => x - y
* }
* }
*
* def apply(input: String): Double = parseAll(expr, input) match {
* case Success(result, _) => result
* case failure : NoSuccess => scala.sys.error(failure.msg)
* }
* }
* }}}
*/
trait RegexParsers extends Parsers {
type Elem = Char
protected val whiteSpace = """\s+""".r
def skipWhitespace = whiteSpace.toString.length > 0
/** Method called to handle whitespace before parsers.
*
* It checks `skipWhitespace` and, if true, skips anything
* matching `whiteSpace` starting from the current offset.
*
* @param source The input being parsed.
* @param offset The offset into `source` from which to match.
* @return The offset to be used for the next parser.
*/
protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int =
if (skipWhitespace)
(whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match {
case Some(matched) => offset + matched.end
case None => offset
}
else
offset
/** A parser that matches a literal string */
implicit def literal(s: String): Parser[String] = new Parser[String] {
def apply(in: Input) = {
val source = in.source
val offset = in.offset
val start = handleWhiteSpace(source, offset)
var i = 0
var j = start
while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) {
i += 1
j += 1
}
if (i == s.length)
Success(source.subSequence(start, j).toString, in.drop(j - offset))
else {
val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'"
Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset))
}
}
}
/** A parser that matches a regex string */
implicit def regex(r: Regex): Parser[String] = new Parser[String] {
def apply(in: Input) = {
val source = in.source
val offset = in.offset
val start = handleWhiteSpace(source, offset)
(r findPrefixMatchOf (source.subSequence(start, source.length))) match {
case Some(matched) =>
Success(source.subSequence(start, start + matched.end).toString,
in.drop(start + matched.end - offset))
case None =>
val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'"
Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset))
}
}
}
/** `positioned` decorates a parser's result with the start position of the input it consumed.
* If whitespace is being skipped, then it is skipped before the start position is recorded.
*
* @param p a `Parser` whose result conforms to `Positional`.
* @return A parser that has the same behaviour as `p`, but which marks its result with the
* start position of the input it consumed after whitespace has been skipped, if it
* didn't already have a position.
*/
override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = {
val pp = super.positioned(p)
new Parser[T] {
def apply(in: Input) = {
val offset = in.offset
val start = handleWhiteSpace(in.source, offset)
pp(in.drop (start - offset))
}
}
}
override def phrase[T](p: Parser[T]): Parser[T] =
super.phrase(p <~ opt("""\z""".r))
/** Parse some prefix of reader `in` with parser `p`. */
def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] =
p(in)
/** Parse some prefix of character sequence `in` with parser `p`. */
def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] =
p(new CharSequenceReader(in))
/** Parse some prefix of reader `in` with parser `p`. */
def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] =
p(new PagedSeqReader(PagedSeq.fromReader(in)))
/** Parse all of reader `in` with parser `p`. */
def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] =
parse(phrase(p), in)
/** Parse all of reader `in` with parser `p`. */
def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] =
parse(phrase(p), in)
/** Parse all of character sequence `in` with parser `p`. */
def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] =
parse(phrase(p), in)
}