sbt.internal.util.complete.Parsers.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of completion_2.12 Show documentation
Show all versions of completion_2.12 Show documentation
sbt is an interactive build tool
/*
* sbt
* Copyright 2023, Scala center
* Copyright 2011 - 2022, Lightbend, Inc.
* Copyright 2008 - 2010, Mark Harrah
* Licensed under Apache License 2.0 (see LICENSE)
*/
package sbt.internal.util
package complete
import Parser._
import java.io.File
import java.net.URI
import java.lang.Character.{
CURRENCY_SYMBOL,
DASH_PUNCTUATION,
MATH_SYMBOL,
MODIFIER_SYMBOL,
OTHER_PUNCTUATION,
OTHER_SYMBOL,
getType
}
import scala.annotation.tailrec
import sbt.internal.util.Util.nilSeq
/** Provides standard implementations of commonly useful [[Parser]]s. */
trait Parsers {
/** Matches the end of input, providing no useful result on success. */
lazy val EOF = not(any, "Expected EOF")
/** Parses any single character and provides that character as the result. */
lazy val any: Parser[Char] = charClass(_ => true, "any character")
/** Set that contains each digit in a String representation.*/
lazy val DigitSet = Set("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")
/** Parses any single digit and provides that digit as a Char as the result.*/
lazy val Digit = charClass(_.isDigit, "digit") examples DigitSet
/** Set containing Chars for hexadecimal digits 0-9 and A-F (but not a-f). */
lazy val HexDigitSet =
Set('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F')
/** Parses a single hexadecimal digit (0-9, a-f, A-F). */
lazy val HexDigit = charClass(c => HexDigitSet(c.toUpper), "hex digit") examples HexDigitSet.map(
_.toString
)
/** Parses a single letter, according to Char.isLetter, into a Char. */
lazy val Letter = charClass(_.isLetter, "letter")
/** Parses a single letter, according to Char.isUpper, into a Char. */
lazy val Upper = charClass(_.isUpper, "upper")
/** Parses a single letter, according to Char.isLower, into a Char. */
lazy val Lower = charClass(_.isLower, "lower")
/** Parses the first Char in an sbt identifier, which must be a [[Letter]].*/
def IDStart = Letter
/** Parses an identifier Char other than the first character. This includes letters, digits, dash `-`, and underscore `_`.*/
lazy val IDChar = charClass(isIDChar, "ID character")
/** Parses an identifier String, which must start with [[IDStart]] and contain zero or more [[IDChar]]s after that. */
lazy val ID = identifier(IDStart, IDChar)
/** Parses a single operator Char, as allowed by [[isOpChar]]. */
lazy val OpChar = charClass(isOpChar, "symbol")
/** Parses a non-empty operator String, which consists only of characters allowed by [[OpChar]]. */
lazy val Op = OpChar.+.string
/** Parses either an operator String defined by [[Op]] or a non-symbolic identifier defined by [[ID]]. */
lazy val OpOrID = ID | Op
/** Parses a single, non-symbolic Scala identifier Char. Valid characters are letters, digits, and the underscore character `_`. */
lazy val ScalaIDChar = charClass(isScalaIDChar, "Scala identifier character")
/** Parses a non-symbolic Scala-like identifier. The identifier must start with [[IDStart]] and contain zero or more [[ScalaIDChar]]s after that.*/
lazy val ScalaID = identifier(IDStart, ScalaIDChar)
/** Parses a non-symbolic Scala-like identifier. The identifier must start with [[Upper]] and contain zero or more [[ScalaIDChar]]s after that.*/
lazy val CapitalizedID = identifier(Upper, ScalaIDChar)
/** Parses a String that starts with `start` and is followed by zero or more characters parsed by `rep`.*/
def identifier(start: Parser[Char], rep: Parser[Char]): Parser[String] =
start ~ rep.* map { case x ~ xs => (x +: xs).mkString }
def opOrIDSpaced(s: String): Parser[Char] =
if (DefaultParsers.matches(ID, s))
OpChar | SpaceClass
else if (DefaultParsers.matches(Op, s))
IDChar | SpaceClass
else
any
/** Returns true if `c` an operator character. */
def isOpChar(c: Char) = !isDelimiter(c) && isOpType(getType(c))
def isOpType(cat: Int) = cat match {
case MATH_SYMBOL | OTHER_SYMBOL | DASH_PUNCTUATION | OTHER_PUNCTUATION | MODIFIER_SYMBOL |
CURRENCY_SYMBOL =>
true; case _ => false
}
/** Returns true if `c` is a dash `-`, a letter, digit, or an underscore `_`. */
def isIDChar(c: Char) = isScalaIDChar(c) || c == '-'
/** Returns true if `c` is a letter, digit, or an underscore `_`. */
def isScalaIDChar(c: Char) = c.isLetterOrDigit || c == '_'
def isDelimiter(c: Char) = c match {
case '`' | '\'' | '\"' | /*';' | */ ',' | '.' => true; case _ => false
}
/** Matches a single character that is not a whitespace character. */
lazy val NotSpaceClass = charClass(!_.isWhitespace, "non-whitespace character")
/** Matches a single whitespace character, as determined by Char.isWhitespace.*/
lazy val SpaceClass = charClass(_.isWhitespace, "whitespace character")
/** Matches a non-empty String consisting of non-whitespace characters. */
lazy val NotSpace = NotSpaceClass.+.string
/** Matches a possibly empty String consisting of non-whitespace characters. */
lazy val OptNotSpace = NotSpaceClass.*.string
/**
* Matches a non-empty String consisting of whitespace characters.
* The suggested tab completion is a single, constant space character.
*/
lazy val Space: Parser[Seq[Char]] = SpaceClass.+.examples(" ")
/**
* Matches a possibly empty String consisting of whitespace characters.
* The suggested tab completion is a single, constant space character.
*/
lazy val OptSpace = SpaceClass.*.examples(" ")
/** Parses a non-empty String that contains only valid URI characters, as defined by [[URIChar]].*/
lazy val URIClass = URIChar.+.string !!! "Invalid URI"
/** Triple-quotes, as used for verbatim quoting.*/
lazy val VerbatimDQuotes = "\"\"\""
/** Double quote character. */
lazy val DQuoteChar = '\"'
/** Backslash character. */
lazy val BackslashChar = '\\'
/** Matches a single double quote. */
lazy val DQuoteClass = charClass(_ == DQuoteChar, "double-quote character")
/** Matches any character except a double quote or whitespace. */
lazy val NotDQuoteSpaceClass =
charClass({ c: Char =>
(c != DQuoteChar) && !c.isWhitespace
}, "non-double-quote-space character")
/** Matches any character except a double quote or backslash. */
lazy val NotDQuoteBackslashClass =
charClass({ c: Char =>
(c != DQuoteChar) && (c != BackslashChar)
}, "non-double-quote-backslash character")
/** Matches a single character that is valid somewhere in a URI. */
lazy val URIChar = charClass(alphanum, "alphanum") | chars("_-!.~'()*,;:$&+=?/[]@%#")
/** Returns true if `c` is an ASCII letter or digit. */
def alphanum(c: Char) =
('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')
/**
* @param base the directory used for completion proposals (when the user presses the TAB key). Only paths under this
* directory will be proposed.
* @return the file that was parsed from the input string. The returned path may or may not exist.
*/
def fileParser(base: File): Parser[File] =
OptSpace ~> StringBasic
.examples(new FileExamples(base))
.map(new File(_))
/** Parses a port number. Currently, this accepts any integer and presents a tab completion suggestion of ``. */
lazy val Port = token(IntBasic, "")
/** Parses a signed integer. */
lazy val IntBasic = mapOrFail('-'.? ~ Digit.+)(Function.tupled(toInt))
/** Parses an unsigned integer. */
lazy val NatBasic = mapOrFail(Digit.+)(_.mkString.toInt)
private[this] def toInt(neg: Option[Char], digits: Seq[Char]): Int =
(neg.toSeq ++ digits).mkString.toInt
/** Parses the lower-case values `true` and `false` into their corresponding Boolean values. */
lazy val Bool = ("true" ^^^ true) | ("false" ^^^ false)
/**
* Parses a potentially quoted String value. The value may be verbatim quoted ([[StringVerbatim]]),
* quoted with interpreted escapes ([[StringEscapable]]), or unquoted ([[NotQuoted]]).
*/
lazy val StringBasic = StringVerbatim | StringEscapable | NotQuoted | NotQuotedThenQuoted
/**
* Parses a verbatim quoted String value, discarding the quotes in the result. This kind of quoted text starts with triple quotes `"""`
* and ends at the next triple quotes and may contain any character in between.
*/
lazy val StringVerbatim: Parser[String] = VerbatimDQuotes ~>
any.+.string.filter(!_.contains(VerbatimDQuotes), _ => "Invalid verbatim string") <~
VerbatimDQuotes
/**
* Parses a string value, interpreting escapes and discarding the surrounding quotes in the result.
* See [[EscapeSequence]] for supported escapes.
*/
lazy val StringEscapable: Parser[String] =
(DQuoteChar ~> (NotDQuoteBackslashClass | EscapeSequence).+.string <~ DQuoteChar |
(DQuoteChar ~ DQuoteChar) ^^^ "")
/**
* Parses a size unit string. For example, `128K` parsers to `128L * 1024`, and `1.25g` parses
* to `1024L * 1024 * 1024 * 5 / 4`.
*/
lazy val Size: Parser[Long] = SizeParser.value
/**
* Parses a brace enclosed string and, if each opening brace is matched with a closing brace,
* it returns the entire string including the braces.
*
* @param open the opening character, e.g. '{'
* @param close the closing character, e.g. '}'
* @return a parser for the brace encloosed string.
*/
private[sbt] def braces(open: Char, close: Char): Parser[String] = {
val notDelim = charClass(c => c != open && c != close).*.string
def impl(): Parser[String] = {
(open ~ (notDelim ~ close).?).flatMap {
case (l, Some((content, r))) => Parser.success(s"$l$content$r")
case (l, None) =>
((notDelim ~ impl()).map {
case (leftPrefix, nestedBraces) => leftPrefix + nestedBraces
}.+ ~ notDelim ~ close).map {
case ((nested, suffix), r) => s"$l${nested.mkString}$suffix$r"
}
}
}
impl()
}
/**
* Parses a single escape sequence into the represented Char.
* Escapes start with a backslash and are followed by `u` for a [[UnicodeEscape]] or by `b`, `t`, `n`, `f`, `r`, `"`, `'`, `\` for standard escapes.
*/
lazy val EscapeSequence: Parser[Char] =
BackslashChar ~> ('b' ^^^ '\b' | 't' ^^^ '\t' | 'n' ^^^ '\n' | 'f' ^^^ '\f' | 'r' ^^^ '\r' |
'\"' ^^^ '\"' | '\'' ^^^ '\'' | '\\' ^^^ '\\' | UnicodeEscape)
/**
* Parses a single unicode escape sequence into the represented Char.
* A unicode escape begins with a backslash, followed by a `u` and 4 hexadecimal digits representing the unicode value.
*/
lazy val UnicodeEscape: Parser[Char] =
("u" ~> repeat(HexDigit, 4, 4)) map { seq =>
Integer.parseInt(seq.mkString, 16).toChar
}
/** Parses an unquoted, non-empty String value that cannot start with a double quote and cannot contain whitespace.*/
lazy val NotQuoted = (NotDQuoteSpaceClass ~ OptNotSpace) map { case (c, s) => c.toString + s }
/** Parses a non-empty String value that cannot start with a double quote, but includes double quotes.*/
lazy val NotQuotedThenQuoted = (NotQuoted ~ StringEscapable) map {
case (s1, s2) => s"""$s1\"$s2\""""
}
/**
* Applies `rep` zero or more times, separated by `sep`.
* The result is the (possibly empty) sequence of results from the multiple `rep` applications. The `sep` results are discarded.
*/
def repsep[T](rep: Parser[T], sep: Parser[_]): Parser[Seq[T]] =
rep1sep(rep, sep) ?? nilSeq[T]
/**
* Applies `rep` one or more times, separated by `sep`.
* The result is the non-empty sequence of results from the multiple `rep` applications. The `sep` results are discarded.
*/
def rep1sep[T](rep: Parser[T], sep: Parser[_]): Parser[Seq[T]] =
(rep ~ (sep ~> rep).*).map { case (x ~ xs) => x +: xs }
/** Wraps the result of `p` in `Some`.*/
def some[T](p: Parser[T]): Parser[Option[T]] = p map { v =>
Some(v)
}
/**
* Applies `f` to the result of `p`, transforming any exception when evaluating
* `f` into a parse failure with the exception `toString` as the message.
*/
def mapOrFail[S, T](p: Parser[S])(f: S => T): Parser[T] =
p flatMap { s =>
try {
success(f(s))
} catch { case e: Exception => failure(e.toString) }
}
/**
* Parses a space-delimited, possibly empty sequence of arguments.
* The arguments may use quotes and escapes according to [[StringBasic]].
*/
def spaceDelimited(display: String): Parser[Seq[String]] =
(token(Space) ~> token(StringBasic, display)).* <~ SpaceClass.*
/** Applies `p` and uses `true` as the result if it succeeds and turns failure into a result of `false`. */
def flag[T](p: Parser[T]): Parser[Boolean] = (p ^^^ true) ?? false
/**
* Defines a sequence parser where the parser used for each part depends on the previously parsed values.
* `p` is applied to the (possibly empty) sequence of already parsed values to obtain the next parser to use.
* The parsers obtained in this way are separated by `sep`, whose result is discarded and only the sequence
* of values from the parsers returned by `p` is used for the result.
*/
def repeatDep[A](p: Seq[A] => Parser[A], sep: Parser[Any]): Parser[Seq[A]] = {
def loop(acc: Seq[A]): Parser[Seq[A]] = {
val next = (sep ~> p(acc)) flatMap { result =>
loop(acc :+ result)
}
next ?? acc
}
p(Vector()) flatMap { first =>
loop(Seq(first))
}
}
/** Applies String.trim to the result of `p`. */
def trimmed(p: Parser[String]) = p map { _.trim }
/** Parses a URI that is valid according to the single argument java.net.URI constructor. */
lazy val basicUri = mapOrFail(URIClass)(uri => new URI(uri))
/** Parses a URI that is valid according to the single argument java.net.URI constructor, using `ex` as tab completion examples. */
def Uri(ex: Set[URI]) = basicUri examples (ex.map(_.toString))
}
/** Provides standard [[Parser]] implementations. */
object Parsers extends Parsers
/** Provides common [[Parser]] implementations and helper methods.*/
object DefaultParsers extends Parsers with ParserMain {
/** Applies parser `p` to input `s` and returns `true` if the parse was successful. */
def matches(p: Parser[_], s: String): Boolean =
apply(p)(s).resultEmpty.isValid
/** Returns `true` if `s` parses successfully according to [[ID]].*/
def validID(s: String): Boolean = {
// Handwritten version of `matches(ID, s)` because validID turned up in profiling.
def isIdChar(c: Char): Boolean = Character.isLetterOrDigit(c) || (c == '-') || (c == '_')
@tailrec def isRestIdChar(cur: Int, s: String, length: Int): Boolean =
if (cur < length)
isIdChar(s.charAt(cur)) && isRestIdChar(cur + 1, s, length)
else
true
!s.isEmpty && Character.isLetter(s.charAt(0)) && isRestIdChar(1, s, s.length)
}
}