parsley.unicode.scala Maven / Gradle / Ivy
/*
* Copyright 2020 Parsley Contributors
*
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley
import parsley.Parsley.{empty, many, some}
import parsley.errors.combinator.ErrorMethods
import parsley.token.errors.{Label, LabelConfig, NotConfigured}
import parsley.internal.deepembedding.singletons
/** This module contains many parsers to do with reading one or more characters.
*
* In particular, this module contains: combinators that can read specific characters; combinators that represent character classes and their negations;
* combinators for reading specific strings; as well as a selection of pre-made parsers to parse specific kinds of character, like digits and letters.
* Unlike [[parsley.character$ `character`]], this module handles full utf-16 codepoints, which can be up to two 16-bit characters long.
*
* @since 4.4.0
*
* @groupprio pred 100
* @groupname pred Character Predicates
* @groupdesc pred
* These are useful for providing to the sub-descriptions of a [[token.descriptions.LexicalDesc]] to specify behaviour for the lexer.
* Other than that, they aren't ''particularly'' useful.
*
* @groupprio core 0
* @groupname core Core Combinators and Parsers
* @groupdesc core
* These are the most primitive combinators for consuming input capable of any input reading tasks.
*
* @groupprio skip 75
* @groupname skip Whitespace Skipping Parsers
* @groupdesc skip
* These parsers are designed to skip chunks of whitespace, for very rudimentary lexing tasks. It
* is probably better to use the functionality of [[parsley.token]].
*
* @groupprio class 20
* @groupname class Character Class Combinators
* @groupdesc class
* These combinators allow for working with ''character classes''. This means that a set, or range, of
* characters can be specified, and the combinator will return a parser that matches one of those characters
* (or conversely, any character that is ''not'' in that set). The parsed character is always returned.
*
* @groupprio spec 25
* @groupname spec Specific Character Parsers
* @groupdesc spec
* These parsers are special cases of [[satisfy `satisfy`]] or [[char `char`]]. They are worth using, as they are given special error labelling,
* producing nicer error messages than their primitive counterparts.
*
* This documentation assumes JDK 17.
* JDK 17 is compliant with [[https://www.unicode.org/versions/Unicode13.0.0/UnicodeStandard-13.0.pdf Unicode® Specification 13.0]].
* As such, the descriptions of the parsers in this section are accurate with respect to Unicode® Specification 13.0:
* using a different JDK may affect the ''precise'' definitions of the parsers below. If in doubt, check the documentation
* for `java.lang.Character` to see which Unicode version is supported by your JVM. A table of the Unicode versions
* up to JDK 17 can be found [[https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Character.html here]].
*
* @groupprio string 22
* @groupname string String Combinators
* @groupdesc string
* These combinators allow for working with, or building, strings. This means that they can
* parse specific strings, specific sets of strings, or can read codepoints repeatedly to
* generate strings. They are united in all returning `String` as their result.
*
* @define oneOf
* This combinator tries to parse any codepoint from supplied set of codepoints `cs`, returning it if successful.
* @define noneOf
* This combinator tries to parse any codepoint '''not''' from supplied set of codepoints `cs`, returning it if successful.
*
* @define categories
* ''The full list of codepoints found in a category can be found in the
* [[https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedGeneralCategory.txt Unicode Character Database]]''.
*/
object unicode {
/** This combinator tries to parse a single specific codepoint `c` from the input.
*
* Like [[character.char `character.char`]], except it may consume two characters from the input,
* in the case where the code-point is greater than `0xffff`. This is parsed ''atomically''
* so that no input is consumed if the first half of the codepoint is parsed and the second
* is not.
*
* @example {{{
* scala> import parsley.unicode.char
* scala> char(0x1f642).parse("")
* val res0 = Failure(..)
* scala> char(0x1f642).parse("🙂")
* val res1 = Success(0x1f642)
* scala> char(0x1f642).parse("b🙂")
* val res2 = Failure(..)
* }}}
*
* @param c the code-point to parse
* @return
* @group core
*/
def char(c: Int): Parsley[Int] = char(c, NotConfigured)
private def char(c: Int, label: String): Parsley[Int] = char(c, Label(label))
private def char(c: Int, label: LabelConfig): Parsley[Int] = {
if (Character.isBmpCodePoint(c)) new Parsley(new singletons.CharTok(c.toChar, c, label))
else new Parsley(new singletons.SupplementaryCharTok(c, c, label))
}
// TODO: test
/** This combinator tries to parse a single codepoint from the input that matches the given predicate.
*
* Attempts to read a codepoint from the input and tests it against the predicate `pred`. If a codepoint `c`
* can be read and `pred(c)` is true, then `c` is consumed and returned. Otherwise, no input is consumed
* and this combinator will fail.
*
* @example {{{
* scala> import parsley.unicode.satisfy
* scala> satisfy(Character.isDigit(_)).parse("")
* val res0 = Failure(..)
* scala> satisfy(Character.isDigit(_)).parse("7")
* val res1 = Success(0x37)
* scala> satisfy(Character.isDigit(_)).parse("a5")
* val res2 = Failure(..)
* scala> def char(c: Int): Parsley[Int] = satisfy(_ == c)
* }}}
*
* @param pred the predicate to test the next codepoint against, should one exist.
* @return a parser that tries to read a single codepoint `c`, such that `pred(c)` is true, or fails.
* @group core
*/
def satisfy(pred: Int => Boolean): Parsley[Int] = satisfy(pred, NotConfigured)
private def satisfy(pred: Int => Boolean, label: String): Parsley[Int] = satisfy(pred, Label(label))
private def satisfy(pred: Int => Boolean, label: LabelConfig) = new Parsley(new singletons.UniSatisfy(pred, label))
// TODO: test
/** This combinator tries to parse and process a codepoint from the input if it is defined for the given function.
*
* Attempts to read a codepoint from the input and tests to see if it is in the domain of `f`. If a codepoint
* `c` can be read and `f(c)` is defined, then `c` is consumed and `f(c)` is returned. Otherwise, no input is consumed
* and this combinator will fail.
*
* @example {{{
* scala> import parsley.unicode.satisfyMap
* scala> val chars = satisfyMap {
* case c => Character.toChars(c)
* }
* scala> chars.parse("")
* val res0 = Failure(..)
* scala> chars.parse("7")
* val res1 = Success(Array('7'))
* scala> chars.parse("🙂")
* val res2 = Success(Array('\ud83d', '\ude42'))
* }}}
*
* @param f the function to test the next codepoint against and transform it with, should one exist.
* @return a parser that tries to read a single codepoint `c`, such that `f(c)` is defined, and returns `f(c)` if so, or fails.
* @since 4.4.0
* @group core
*/
def satisfyMap[A](pred: PartialFunction[Int, A]): Parsley[A] = satisfy(pred.isDefinedAt(_)).map(pred)
// This should always just match up, so no need to test
// $COVERAGE-OFF$
/** This combinator attempts to parse a given string from the input, and fails otherwise.
*
* Attempts to read the given string ''completely'' from the input at the current position.
* If the string is present, then the parser succeeds, and the entire string is consumed
* from the input. Otherwise, if the input has too few characters remaining, or not all
* the characters matched, the parser fails. On failure, '''all''' the characters that were
* matched are consumed from the input.
*
* @example {{{
* scala> import parsley.unicode.string
* scala> string("abc").parse("")
* val res0 = Failure(..)
* scala> string("abc").parse("abcd")
* val res1 = Success("abc")
* scala> string("abc").parse("xabc")
* val res2 = Failure(..)
* }}}
*
* @param s the string to be parsed from the input
* @return a parser that either parses the string `s` or fails at the first mismatched character.
* @note the error messages generated by `string` do not reflect how far into the input it managed
* to get: this is because the error being positioned at the start of the string is more
* natural. However, input '''will''' still be consumed for purposes of backtracking.
* @note just an alias for [[character.string `character.string`]], to allow for more ergonomic imports.
* @group string
*/
def string(s: String): Parsley[String] = character.string(s)
// $COVERAGE-ON$
/** $oneOf
*
* If the next codepoint in the input is a member of the set `cs`, it is consumed
* and returned. Otherwise, no input is consumed and the combinator fails.
*
* @example {{{
* scala> import parsley.codepoint.oneOf
* scala> val p = oneOf(Set(97, 98, 99))
* scala> p.parse("a")
* val res0 = Success(97)
* scala> p.parse("c")
* val res1 = Success(99)
* scala> p.parse("xb")
* val res2 = Failure(..)
* }}}
*
* @param cs the set of codepoints to check.
* @return a parser that parses one of the member of the set `cs`.
* @see [[satisfy `satisfy`]]
* @group class
*/
def oneOf(cs: Set[Int]): Parsley[Int] = cs.size match {
case 0 => empty
case 1 => char(cs.head)
case _ => satisfy(cs, {
val Some(label) = parsley.errors.helpers.disjunct(cs.map(renderChar).toList, oxfordComma = true): @unchecked
s"one of $label"
})
}
/** $oneOf
*
* If the next codepoint in the input is an element of the list of codepoints `cs`, it is consumed
* and returned. Otherwise, no input is consumed and the combinator fails.
*
* @example {{{
* scala> import parsley.unicode.oneOf
* scala> val p = oneOf(97, 98, 99)
* scala> p.parse("a")
* val res0 = Success(97)
* scala> p.parse("c")
* val res1 = Success(99)
* scala> p.parse("xb")
* val res2 = Failure(..)
* }}}
*
* @param cs the codepoints to check.
* @return a parser that parses one of the elements of `cs`.
* @see [[satisfy `satisfy`]]
* @group class
*/
def oneOf(cs: Int*): Parsley[Int] = oneOf(cs.toSet)
/** $oneOf
*
* If the next codepoint in the input is within the range of codepoints `cs`, it is consumed
* and returned. Otherwise, no input is consumed and the combinator fails.
*
* @example {{{
* scala> import parsley.unicode.oneOf
* scala> val p = oneOf(97 to 99)
* scala> p.parse("a")
* val res0 = Success(97)
* scala> p.parse("b")
* val res1 = Success(98)
* scala> p.parse("c")
* val res1 = Success(99)
* scala> p.parse("xb")
* val res2 = Failure(..)
* }}}
*
* @param cs the range of codepoints to check.
* @return a parser that parses a codepoint within the range `cs`.
* @see [[satisfy `satisfy`]]
* @group class
*/
def oneOf(cs: Range): Parsley[Int] = cs.size match {
case 0 => empty
case 1 => char(cs.head)
case _ if Math.abs(cs(0) - cs(1)) == 1 => satisfy(cs.contains(_),
s"one of ${renderChar(cs.min)} to ${renderChar(cs.max)}"
)
case _ => satisfy(cs.contains(_))
}
/** $noneOf
*
* If the next codepoint in the input is not a member of the set `cs`, it is consumed
* and returned. Otherwise, no input is consumed and the combinator fails.
*
* @example {{{
* scala> import parsley.unicode.noneOf
* scala> val p = noneOf(Set('a', 'b', 'c'))
* scala> p.parse("a")
* val res0 = Failure(..)
* scala> p.parse("c")
* val res1 = Failure(..)
* scala> p.parse("xb")
* val res2 = Success('x')
* scala> p.parse("")
* val res3 = Failure(..)
* }}}
*
* @param cs the set of codepoints to check.
* @return a parser that parses one codepoint that is not a member of the set `cs`.
* @see [[satisfy `satisfy`]]
* @group class
*/
def noneOf(cs: Set[Int]): Parsley[Int] = cs.size match {
case 0 => item
case 1 => satisfy(cs.head != _, s"anything except ${renderChar(cs.head)}")
case _ => satisfy(!cs.contains(_), {
val Some(label) = parsley.errors.helpers.disjunct(cs.map(renderChar).toList, oxfordComma = true): @unchecked
s"anything except $label"
})
}
/** $noneOf
*
* If the next codepoint in the input is not an element of the list of codepoints `cs`, it is consumed
* and returned. Otherwise, no input is consumed and the combinator fails.
*
* @example {{{
* scala> import parsley.unicode.noneOf
* scala> val p = noneOf('a', 'b', 'c')
* scala> p.parse("a")
* val res0 = Failure(..)
* scala> p.parse("c")
* val res1 = Failure(..)
* scala> p.parse("xb")
* val res2 = Success('x')
* scala> p.parse("")
* val res3 = Failure(..)
* }}}
*
* @param cs the set of codepoints to check.
* @return a parser that parses one codepoint that is not an element of `cs`.
* @see [[satisfy `satisfy`]]
* @group class
*/
def noneOf(cs: Int*): Parsley[Int] = noneOf(cs.toSet)
/** $noneOf
*
* If the next codepoint in the input is outside of the range of codepoints `cs`, it is consumed
* and returned. Otherwise, no input is consumed and the combinator fails.
*
* @example {{{
* scala> import parsley.unicode.noneOf
* scala> val p = noneOf('a' to 'c')
* scala> p.parse("a")
* val res0 = Failure(..)
* scala> p.parse("b")
* val res1 = Failure(..)
* scala> p.parse("c")
* val res1 = Failure(..)
* scala> p.parse("xb")
* val res2 = Success('x')
* scala> p.parse("")
* val res3 = Failure(..)
* }}}
*
* @param cs the range of codepoints to check.
* @return a parser that parses a codepoint outside the range `cs`.
* @see [[satisfy `satisfy`]]
* @group class
*/
def noneOf(cs: Range): Parsley[Int] = cs.size match {
case 0 => item
case 1 => satisfy(cs.head != _, s"anything except ${renderChar(cs.head)}")
case _ if Math.abs(cs(0) - cs(1)) == 1 => satisfy(!cs.contains(_), {
s"anything outside of ${renderChar(cs.min)} to ${renderChar(cs.max)}"
})
case _ => satisfy(!cs.contains(_))
}
// TODO: test?
/** This combinator parses `pc` '''zero''' or more times, collecting its results into a string.
*
* Parses `pc` repeatedly until it fails. The resulting codepoints are placed into a string,
* which is then returned. This is ''morally'' equivalent to `many(pc).flatMap(Character.chars(_)).map(_.mkString)`, but
* it uses `StringBuilder`, which makes it much more efficient.
*
* @example {{{
* scala> import parsley.unicode.{letter, letterOrDigit, stringOfMany}
* scala> import parsley.syntax.zipped._
* scala> val ident = (letter, stringOfMany(letterOrDigit)).zipped((c, s) => s"${Character.toString(c)}$s")
* scala> ident.parse("abdc9d")
* val res0 = Success("abdc9d")
* scala> ident.parse("a")
* val res1 = Success("a")
* scala> ident.parse("9")
* val res2 = Failure(..)
* }}}
*
* @param pc the parser whose results make up the string
* @return a parser that parses a string whose letters consist of results from `pc`.
* @since 4.4.0
* @group string
*/
def stringOfMany(pc: Parsley[Int]): Parsley[String] = many(pc, StringFactories.intFactory)
// TODO: test
/** This combinator parses codepoints matching the given predicate '''zero''' or more times, collecting
* the results into a string.
*
* Repeatly reads codepoints that satisfy the given predicate `pred`. When no more codepoints
* can be successfully read, the results are stitched together into a `String` and returned.
* This combinator can never fail, since `satisfy` can never fail having consumed input.
*
* @example {{{
* scala> import parsley.unicode.{letter, stringOfMany}
* scala> import parsley.syntax.zipped._
* scala> val ident = (letter, stringOfMany(Character.isLetterOrDigit(_))).zipped((c, s) => s"${Character.toString(c)}$s")
* scala> ident.parse("abdc9d")
* val res0 = Success("abdc9d")
* scala> ident.parse("a")
* val res1 = Success("a")
* scala> ident.parse("9")
* val res2 = Failure(..)
* }}}
*
* @param pred the predicate to test codepoints against.
* @return a parser that returns the span of codepoints satisfying `pred`
* @note this acts exactly like `stringOfMany(satisfy(pred))`, but may be more efficient.
* @note analogous to the `megaparsec` `takeWhileP` combinator.
* @since 4.4.0
* @group string
*/
def stringOfMany(pred: Int => Boolean): Parsley[String] = many(satisfy(pred)).span
// TODO: test?
/** This combinator parses `pc` '''one''' or more times, collecting its results into a string.
*
* Parses `pc` repeatedly until it fails. The resulting codepoints are placed into a string,
* which is then returned. This is ''morally'' equivalent to `some(pc).flatMap(Character.chars(_)).map(_.mkString)`, but
* it uses `StringBuilder`, which makes it much more efficient. The result string must have
* at least one codepoint in it.
*
* @example {{{
* scala> import parsley.unicode.{letter, letterOrDigit, stringOfSome}
* scala> val ident = stringOfSome(letter)
* scala> ident.parse("abdc9d")
* val res0 = Success("abdc")
* scala> ident.parse("")
* val res1 = Failure(..)
* }}}
*
* @param pc the parser whose results make up the string
* @return a parser that parses a string whose letters consist of results from `pc`.
* @since 4.4.0
* @group string
*/
def stringOfSome(pc: Parsley[Int]): Parsley[String] = some(pc, StringFactories.intFactory)
// TODO: test
/** This combinator parses codepoints matching the given predicate '''one''' or more times, collecting
* the results into a string.
*
* Repeatly reads codepoints that satisfy the given predicate `pred`. When no more codepoints
* can be successfully read, the results are stitched together into a `String` and returned.
* This combinator can never fail having consumed input, since `satisfy` can never fail having
* consumed input.
*
* @example {{{
* scala> import parsley.unicode.{letter, stringOfSome}
* scala> val ident = stringOfSome(Character.isLetter(_)))
* scala> ident.parse("abdc9d")
* val res0 = Success("abdc")
* scala> ident.parse("")
* val res1 = Failure(..)
* }}}
*
* @param pred the predicate to test codepoints against.
* @return a parser that returns the span of codepoints satisfying `pred`
* @note this acts exactly like `stringOfSome(satisfy(pred))`, but may be more efficient.
* @note analogous to the `megaparsec` `takeWhileP1` combinator.
* @since 4.4.0
* @group string
*/
def stringOfSome(pred: Int => Boolean): Parsley[String] = some(satisfy(pred)).span
// These should always just match up, so no need to test
// $COVERAGE-OFF$
/** This combinator tries to parse each of the strings `strs` (and `str0`), until one of them succeeds.
*
* Unlike `choice`, or more accurately `atomicChoice`, this combinator will not
* necessarily parse the strings in the order provided. It will favour strings that have another string
* as a prefix first, so that it has ''Longest Match'' semantics. It will try to minimise backtracking
* too, making it a much more efficient option than `atomicChoice`.
*
* The longest succeeding string will be returned. If no strings match then the combinator fails.
*
* @example {{{
* scala> import parsley.unicode.strings
* scala> val p = strings("hell", "hello", "goodbye", "g", "abc")
* scala> p.parse("hell")
* val res0 = Success("hell")
* scala> p.parse("hello")
* val res1 = Success("hello")
* scala> p.parse("good")
* val res2 = Success("g")
* scala> p.parse("goodbye")
* val res3 = Success("goodbye")
* scala> p.parse("a")
* val res4 = Failure(..)
* }}}
*
* @param str0 the first string to try to parse.
* @param strs the remaining strings to try to parse.
* @return a parser that tries to parse all the given strings returning the longest one that matches.
* @note just an alias for [[parsley.character.strings(str0* `character.strings`]], to allow for more ergonomic imports.
* @group string
*/
def strings(str0: String, strs: String*): Parsley[String] = character.strings(str0, strs: _*)
/** This combinator tries to parse each of the key-value pairs `kvs` (and `kv0`), until one of them succeeds.
*
* Each argument to this combinator is a pair of a string and a parser to perform if that string can be parsed.
* `strings(s0 -> p0, ...)` can be thought of as `atomicChoice(string(s0) *> p0, ...)`, however, the given
* ordering of key-value pairs does not dictate the order in which the parses are tried. In particular, it
* will favour keys that are the prefix of another key first, so that it has ''Longest Match'' semantics.
* it will try to minimise backtracking too, making it a much more efficient option than `atomicChoice`.
*
* @example {{{
* scala> import parsley.unicode.strings
* scala> val p = strings("hell" -> pure(4), "hello" -> pure(5), "goodbye" -> pure(7), "g" -> pure(1), "abc" -> pure(3))
* scala> p.parse("hell")
* val res0 = Success(4)
* scala> p.parse("hello")
* val res1 = Success(5)
* scala> p.parse("good")
* val res2 = Success(1)
* scala> p.parse("goodbye")
* val res3 = Success(7)
* scala> p.parse("a")
* val res4 = Failure(..)
* }}}
*
* @note the scope of any backtracking performed is isolated to the key itself, as it is assumed that once a
* key parses correctly, the branch has been committed to. Putting an `attempt` around the values will not affect
* this behaviour.
*
* @param kv0 the first key-value pair to try to parse.
* @param kvs the remaining key-value pairs to try to parse.
* @return a parser that tries to parse all the given key-value pairs, returning the (possibly failing) result
* of the value that corresponds to the longest matching key.
* @note just an alias for [[parsley.character.strings[A](kv0* `character.strings`]], to allow for more ergonomic imports.
* @group string
*/
def strings[A](kv0: (String, Parsley[A]), kvs: (String, Parsley[A])*): Parsley[A] = character.strings(kv0, kvs: _*)
// $COVERAGE-ON$
/** This parser will parse '''any''' single codepoint from the input, failing if there is no input remaining.
*
* @group core
*/
val item: Parsley[Int] = satisfy(_ => true, "any character")
/** This parser tries to parse a space or tab character, and returns it if successful
*
* @see [[isSpace `isSpace`]]
* @group spec
*/
val space: Parsley[Int] = satisfy(isSpace(_), "space/tab")
/** This parser skips zero or more space characters using [[space `space`]].
*
* @group skip
*/
val spaces: Parsley[Unit] = many(space).void
/** This parser tries to parse a whitespace character, and returns it if successful.
*
* A whitespace character is one of:
* 1. a space (`' '`)
* 1. a tab (`'\t'`)
* 1. a line feed (`'\n'`)
* 1. a carriage return (`'\r'`)
* 1. a form feed (`'\f'`)
* 1. a vertical tab (`'\u000b'`)
*
* @group spec
*/
val whitespace: Parsley[Int] = satisfy(Character.isWhitespace(_), "whitespace")
/** This parser skips zero or more space characters using [[whitespace `whitespace`]].
*
* @group skip
*/
val whitespaces: Parsley[Unit] = many(whitespace).void
/** This parser tries to parse a line feed newline (`'\n'`) character, and returns it if successful.
*
* This parser will not accept a carriage return (`CR`) character or `CRLF`.
*
* @group spec
*/
val newline: Parsley[Int] = char('\n', "newline")
/** This parser tries to parse a `CRLF` newline character pair, returning `'\n'` if successful.
*
* A `CRLF` character is the pair of carriage return (`'\r'`) and line feed (`'\n'`). These
* two characters will be parsed together or not at all. The parser is made atomic using `attempt`.
*
* @group spec
*/
val crlf: Parsley[Int] = character.crlf.as(0x0a)
/** This parser will parse either a line feed (`LF`) or a `CRLF` newline, returning `'\n'` if successful.
*
* @group spec
* @see [[crlf `crlf`]]
*/
val endOfLine: Parsley[Int] = (newline <|> crlf).label("end of line")
/** This parser tries to parse a tab (`'\t'`) character, and returns it if successful.
*
* This parser does not recognise vertical tabs, only horizontal ones.
*
* @group spec
*/
val tab: Parsley[Int] = char('\t', "tab")
/** This parser tries to parse an uppercase letter, and returns it if successful.
*
* An uppercase letter is any character whose Unicode ''Category Type'' is Uppercase Letter (`Lu`).
* Examples of characters within this category include:
* - the Latin letters `'A'` through `'Z'`
* - Latin special character such as `'Å'`, `'Ç'`, `'Õ'`
* - Cryillic letters
* - Greek letters
* - Coptic letters
*
* $categories
*
* @group spec
*/
val upper: Parsley[Int] = satisfy(Character.isUpperCase(_), "uppercase letter")
/** This parser tries to parse a lowercase letter, and returns it if successful.
*
* A lowercase letter is any character whose Unicode ''Category Type'' is Lowercase Letter (`Ll`).
* Examples of characters within this category include:
* - the Latin letters `'a'` through `'z'`
* - Latin special character such as `'é'`, `'ß'`, `'ð'`
* - Cryillic letters
* - Greek letters
* - Coptic letters
*
* $categories
*
* @group spec
*/
val lower: Parsley[Int] = satisfy(Character.isLowerCase(_), "lowercase letter")
/** This parser tries to parse either a letter or a digit, and returns it if successful.
*
* A letter or digit is anything that would parse in either `letter` or `digit`.
*
* @see documentation for [[letter `letter`]].
* @see documentation for [[digit `digit`]].
* @group spec
*/
val letterOrDigit: Parsley[Int] = satisfy(Character.isLetterOrDigit(_), "alpha-numeric character")
/** This parser tries to parse a letter, and returns it if successful.
*
* A letter is any character whose Unicode ''Category Type'' is any of the following:
* 1. Uppercase Letter (`Lu`)
* 1. Lowercase Letter (`Ll`)
* 1. Titlecase Letter (`Lt`)
* 1. Modifier Letter (`Lm`)
* 1. Other Letter (`Lo`)
*
* $categories
*
* @group spec
*/
val letter: Parsley[Int] = satisfy(Character.isLetter(_), "letter")
/** This parser tries to parse a digit, and returns it if successful.
*
* A digit is any character whose Unicode ''Category Type'' is Decimal Number (`Nd`).
* Examples of (inclusive) ranges within this category include:
* - the Latin digits `'0'` through `'9'`
* - the Arabic-Indic digits `'\u0660'` through `'\u0669'`
* - the Extended Arabic-Indic digits `'\u06F0'` through `'\u06F9'`
* - the Devangari digits `'\u0966'` through `'\u096F'`
* - the Fullwidth digits `'\uFF10'` through `'\uFF19'`
*
* $categories
*
* @group spec
*/
val digit: Parsley[Int] = satisfy(Character.isDigit(_), "digit")
/** This parser tries to parse a hexadecimal digit, and returns it if successful.
*
* A hexadecimal digit is one of (all inclusive ranges):
* 1. the digits `'0'` through `'9'`
* 1. the letters `'a'` through `'f'`
* 1. the letters `'A'` through `'Z'`
*
* @see [[isHexDigit ``isHexDigit``]]
* @group spec
*/
val hexDigit: Parsley[Int] = satisfy(isHexDigit(_), "hexadecimal digit")
/** This parser tries to parse an octal digit, and returns it if successful.
*
* An octal digit is one of `'0'` to `'7'` (inclusive).
*
* @see [[isOctDigit ``isOctDigit``]]
* @group spec
*/
val octDigit: Parsley[Int] = satisfy(isOctDigit(_), "octal digit")
/** This parser tries to parse a bit and returns it if successful.
*
* A bit (binary digit) is either `'0'` or `'1'`.
*
* @group spec
*/
val bit: Parsley[Int] = satisfy(c => Character.digit(c, 2) != -1, "bit")
// Functions
/** This function returns true if a character is a hexadecimal digit.
*
* A hexadecimal digit is one of (all inclusive ranges):
* 1. the digits `'0'` through `'9'`
* 1. the letters `'a'` through `'f'`
* 1. the letters `'A'` through `'Z'`
* 1. an equivalent from another charset
*
* @see [[hexDigit `hexDigit`]]
* @group pred
*/
def isHexDigit(c: Int): Boolean = Character.digit(c, 16) != -1
/** This function returns true if a character is an octal digit.
*
* An octal digit is one of `'0'` to `'7'` (inclusive).
*
* @group pred
* @see [[octDigit `octDigit`]]
*/
def isOctDigit(c: Int): Boolean = Character.digit(c, 8) != -1
/** This function returns true if a codepoint is either a space or a tab character.
*
* @group pred
* @see [[space `space`]]
*/
def isSpace(c: Int): Boolean = c == 0x20 || c == 0x09
// Sue me.
private def renderChar(c: Int): String = parsley.errors.helpers.renderRawString(Character.toChars(c).mkString)
private [parsley] def addCodepoint(sb: StringBuilder, codepoint: Int): StringBuilder = {
if (Character.isSupplementaryCodePoint(codepoint)) {
sb += Character.highSurrogate(codepoint)
sb += Character.lowSurrogate(codepoint)
}
else sb += codepoint.toChar
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy