All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parsley.character.scala Maven / Gradle / Ivy

/*
 * Copyright 2020 Parsley Contributors 
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */
package parsley

import scala.collection.immutable.NumericRange

import parsley.Parsley.{atomic, empty, many, transPure => pure, some}
import parsley.combinator.choice
import parsley.errors.combinator.ErrorMethods
import parsley.token.errors.{Label, LabelConfig, NotConfigured}

import parsley.internal.deepembedding.singletons

/** This module contains many parsers to do with reading one or more characters. Almost every parser will need something from this module.
  *
  * In particular, this module contains: combinators that can read specific characters; combinators that represent character classes and their negations;
  * combinators for reading specific strings; as well as a selection of pre-made parsers to parse specific kinds of character, like digits and letters.
  *
  * @since 2.2.0
  *
  * @groupprio pred 100
  * @groupname pred Character Predicates
  * @groupdesc pred
  *     These are useful for providing to the sub-descriptions of a [[token.descriptions.LexicalDesc]] to specify behaviour for the lexer.
  *     Other than that, they aren't ''particularly'' useful.
  *
  * @groupprio core 0
  * @groupname core Core Combinators and Parsers
  * @groupdesc core
  *     These are the most primitive combinators for consuming input capable of any input reading tasks.
  *
  * @groupprio skip 75
  * @groupname skip Whitespace Skipping Parsers
  * @groupdesc skip
  *     These parsers are designed to skip chunks of whitespace, for very rudimentary lexing tasks. It
  *     is probably better to use the functionality of [[parsley.token]].
  *
  * @groupprio class 20
  * @groupname class Character Class Combinators
  * @groupdesc class
  *     These combinators allow for working with ''character classes''. This means that a set, or range, of
  *     characters can be specified, and the combinator will return a parser that matches one of those characters
  *     (or conversely, any character that is ''not'' in that set). The parsed character is always returned.
  *
  * @groupprio spec 25
  * @groupname spec Specific Character Parsers
  * @groupdesc spec
  *     These parsers are special cases of [[satisfy `satisfy`]] or [[char `char`]]. They are worth using, as they are given special error labelling,
  *     producing nicer error messages than their primitive counterparts.
  *
  *     This documentation assumes JDK 17.
  *     JDK 17 is compliant with [[https://www.unicode.org/versions/Unicode13.0.0/UnicodeStandard-13.0.pdf Unicode® Specification 13.0]].
  *     As such, the descriptions of the parsers in this section are accurate with respect to Unicode® Specification 13.0:
  *     using a different JDK may affect the ''precise'' definitions of the parsers below. If in doubt, check the documentation
  *     for `java.lang.Character` to see which Unicode version is supported by your JVM. A table of the Unicode versions
  *     up to JDK 17 can be found [[https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Character.html here]].
  *
  *     These parsers are only able to parse unicode characters in the range `'\u0000'` to `'\uffff'`, known as
  *     the ''Basic Multilingual Plane (BMP)''. Unicode characters wider than a single 16-bit character should be
  *     parsed using multi-character combinators such as `string`, or, alternatively, combinators found in [[unicode `unicode`]].
  *
  * @groupprio string 22
  * @groupname string String Combinators
  * @groupdesc string
  *     These combinators allow for working with, or building, strings. This means that they can
  *     parse specific strings, specific sets of strings, or can read characters repeatedly to
  *     generate strings. They are united in all returning `String` as their result.
  *
  * @define oneOf
  *     This combinator tries to parse any character from supplied set of characters `cs`, returning it if successful.
  * @define noneOf
  *     This combinator tries to parse any character '''not''' from supplied set of characters `cs`, returning it if successful.
  *
  * @define categories
  *     ''The full list of codepoints found in a category can be found in the
  *     [[https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedGeneralCategory.txt Unicode Character Database]]''.
  */
object character extends character
private [parsley] trait character {
    /** This combinator tries to parse a single specific character `c` from the input.
      *
      * Attempts to read the given character `c` from the input stream at the current
      * position. If this character can be found, it is consumed and returned. Otherwise,
      * no input is consumed and this combinator will fail.
      *
      * @example {{{
      * scala> import parsley.character.char
      * scala> char('a').parse("")
      * val res0 = Failure(..)
      * scala> char('a').parse("a")
      * val res1 = Success('a')
      * scala> char('a').parse("ba")
      * val res2 = Failure(..)
      * }}}
      *
      * @param c the character to parse
      * @return a parser that tries to read a single `c`, or fails.
      * @note this combinator can only handle 16-bit characters: for larger codepoints,
      *       consider using [[string `string`]] or [[unicode.char `unicode.char`]].
      * @group core
      */
    final def char(c: Char): Parsley[Char] = char(c, NotConfigured)
    private def char(c: Char, label: String): Parsley[Char] = char(c, Label(label))
    private def char(c: Char, label: LabelConfig): Parsley[Char] = new Parsley(new singletons.CharTok(c, c, label))

    /** This combinator tries to parse a single character from the input that matches the given predicate.
      *
      * Attempts to read a character from the input and tests it against the predicate `pred`. If a character `c`
      * can be read and `pred(c)` is true, then `c` is consumed and returned. Otherwise, no input is consumed
      * and this combinator will fail.
      *
      * @example {{{
      * scala> import parsley.character.satisfy
      * scala> satisfy(_.isDigit).parse("")
      * val res0 = Failure(..)
      * scala> satisfy(_.isDigit).parse("7")
      * val res1 = Success('7')
      * scala> satisfy(_.isDigit).parse("a5")
      * val res2 = Failure(..)
      * scala> def char(c: Char): Parsley[Char] = satisfy(_ == c)
      * }}}
      *
      * @param pred the predicate to test the next character against, should one exist.
      * @return a parser that tries to read a single character `c`, such that `pred(c)` is true, or fails.
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.satisfy `unicode.satisfy`]].
      * @group core
      */
    final def satisfy(pred: Char => Boolean): Parsley[Char] = satisfy(pred, NotConfigured)
    private def satisfy(pred: Char => Boolean, label: String): Parsley[Char] = satisfy(pred, Label(label))
    private def satisfy(pred: Char => Boolean, label: LabelConfig) = new Parsley(new singletons.Satisfy(pred, label))

    /** This combinator tries to parse and process a character from the input if it is defined for the given function.
      *
      * Attempts to read a character from the input and tests to see if it is in the domain of `f`. If a character
      * `c` can be read and `f(c)` is defined, then `c` is consumed and `f(c)` is returned. Otherwise, no input is consumed
      * and this combinator will fail.
      *
      * @example {{{
      * scala> import parsley.character.satisfyMap
      * scala> val digit = satisfyMap {
      *   case c if c.isDigit => c.asDigit
      * }
      * scala> digit.parse("")
      * val res0 = Failure(..)
      * scala> digit.parse("7")
      * val res1 = Success(7)
      * scala> digit.parse("a5")
      * val res2 = Failure(..)
      * }}}
      *
      * @param f the function to test the next character against and transform it with, should one exist.
      * @return a parser that tries to read a single character `c`, such that `f(c)` is defined, and returns `f(c)` if so, or fails.
      * @since 4.4.0
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.satisfyMap `unicode.satisfyMap`]].
      * @group core
      */
    final def satisfyMap[A](f: PartialFunction[Char, A]): Parsley[A] = satisfy(f.isDefinedAt(_)).map(f)

    /** This combinator attempts to parse a given string from the input, and fails otherwise.
      *
      * Attempts to read the given string ''completely'' from the input at the current position.
      * If the string is present, then the parser succeeds, and the entire string is consumed
      * from the input. Otherwise, if the input has too few characters remaining, or not all
      * the characters matched, the parser fails. On failure, '''all''' the characters that were
      * matched are consumed from the input.
      *
      * @example {{{
      * scala> import parsley.character.string
      * scala> string("abc").parse("")
      * val res0 = Failure(..)
      * scala> string("abc").parse("abcd")
      * val res1 = Success("abc")
      * scala> string("abc").parse("xabc")
      * val res2 = Failure(..)
      * }}}
      *
      * @param s the string to be parsed from the input
      * @return a parser that either parses the string `s` or fails at the first mismatched character.
      * @note the error messages generated by `string` do not reflect how far into the input it managed
      *       to get: this is because the error being positioned at the start of the string is more
      *       natural. However, input '''will''' still be consumed for purposes of backtracking.
      * @group string
      */
    final def string(s: String): Parsley[String] = string(s, NotConfigured)
    private [parsley] def string(s: String, label: String): Parsley[String] = string(s, Label(label))
    private [parsley] def string(s: String, label: LabelConfig): Parsley[String] = {
        require(s.nonEmpty, "`string` may not be passed the empty string (`string(\"\")` is meaningless, perhaps you meant `pure(\"\")`?)")
        new Parsley(new singletons.StringTok(s, s, label))
    }

    /** $oneOf
      *
      * If the next character in the input is a member of the set `cs`, it is consumed
      * and returned. Otherwise, no input is consumed and the combinator fails.
      *
      * @example {{{
      * scala> import parsley.character.oneOf
      * scala> val p = oneOf(Set('a', 'b', 'c'))
      * scala> p.parse("a")
      * val res0 = Success('a')
      * scala> p.parse("c")
      * val res1 = Success('c')
      * scala> p.parse("xb")
      * val res2 = Failure(..)
      * }}}
      *
      * @param cs the set of characters to check.
      * @return a parser that parses one of the member of the set `cs`.
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.oneOf(cs:Set* `unicode.oneOf`]].
      * @see [[satisfy `satisfy`]]
      * @group class
      */
    final def oneOf(cs: Set[Char]): Parsley[Char] = cs.size match {
        case 0 => empty.uo("oneOf(Set.empty)")
        case 1 => char(cs.head).uo(s"oneOf($cs)")
        case _ => satisfy(cs, {
            val Some(label) = parsley.errors.helpers.disjunct(cs.map(renderChar).toList, oxfordComma = true): @unchecked
            s"one of $label"
        }).uo(s"oneOf($cs)")
    }

    /** $oneOf
      *
      * If the next character in the input is an element of the list of characters `cs`, it is consumed
      * and returned. Otherwise, no input is consumed and the combinator fails.
      *
      * @example {{{
      * scala> import parsley.character.oneOf
      * scala> val p = oneOf('a', 'b', 'c')
      * scala> p.parse("a")
      * val res0 = Success('a')
      * scala> p.parse("c")
      * val res1 = Success('c')
      * scala> p.parse("xb")
      * val res2 = Failure(..)
      * }}}
      *
      * @param cs the characters to check.
      * @return a parser that parses one of the elements of `cs`.
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.oneOf(cs:Int* `unicode.oneOf`]].
      * @see [[satisfy `satisfy`]]
      * @group class
      */
    final def oneOf(cs: Char*): Parsley[Char] = oneOf(cs.toSet)

    /** $oneOf
      *
      * If the next character in the input is within the range of characters `cs`, it is consumed
      * and returned. Otherwise, no input is consumed and the combinator fails.
      *
      * @example {{{
      * scala> import parsley.character.oneOf
      * scala> val p = oneOf('a' to 'c')
      * scala> p.parse("a")
      * val res0 = Success('a')
      * scala> p.parse("b")
      * val res1 = Success('b')
      * scala> p.parse("c")
      * val res1 = Success('c')
      * scala> p.parse("xb")
      * val res2 = Failure(..)
      * }}}
      *
      * @param cs the range of characters to check.
      * @return a parser that parses a character within the range `cs`.
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.oneOf(cs:Range* `unicode.oneOf`]].
      * @see [[satisfy `satisfy`]]
      * @group class
      */
    final def oneOf(cs: NumericRange[Char]): Parsley[Char] = cs.size match {
        case 0 => empty.uo(s"oneOf($cs)")
        case 1 => char(cs.head).uo(s"oneOf($cs)")
        case _ if Math.abs(cs(0).toInt - cs(1).toInt) == 1 => satisfy(cs.contains(_),
            s"one of ${renderChar(cs.min)} to ${renderChar(cs.max)}"
        ).uo(s"oneOf($cs)")
        case _ => satisfy(cs.contains(_)).uo(s"oneOf($cs)")
    }

    /** $noneOf
      *
      * If the next character in the input is not a member of the set `cs`, it is consumed
      * and returned. Otherwise, no input is consumed and the combinator fails.
      *
      * @example {{{
      * scala> import parsley.character.noneOf
      * scala> val p = noneOf(Set('a', 'b', 'c'))
      * scala> p.parse("a")
      * val res0 = Failure(..)
      * scala> p.parse("c")
      * val res1 = Failure(..)
      * scala> p.parse("xb")
      * val res2 = Success('x')
      * scala> p.parse("")
      * val res3 = Failure(..)
      * }}}
      *
      * @param cs the set of characters to check.
      * @return a parser that parses one character that is not a member of the set `cs`.
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.noneOf(cs:Set* `unicode.noneOf`]].
      * @see [[satisfy `satisfy`]]
      * @group class
      */
    final def noneOf(cs: Set[Char]): Parsley[Char] = cs.size match {
        case 0 => item.uo("noneOf(Set.empty)")
        case 1 => satisfy(cs.head != _, s"anything except ${renderChar(cs.head)}").uo(s"noneOf($cs)")
        case _ => satisfy(!cs.contains(_), {
            val Some(label) = parsley.errors.helpers.disjunct(cs.map(renderChar).toList, oxfordComma = true): @unchecked
            s"anything except $label"
        }).uo(s"noneOf($cs)")
    }

    /** $noneOf
      *
      * If the next character in the input is not an element of the list of characters `cs`, it is consumed
      * and returned. Otherwise, no input is consumed and the combinator fails.
      *
      * @example {{{
      * scala> import parsley.character.noneOf
      * scala> val p = noneOf('a', 'b', 'c')
      * scala> p.parse("a")
      * val res0 = Failure(..)
      * scala> p.parse("c")
      * val res1 = Failure(..)
      * scala> p.parse("xb")
      * val res2 = Success('x')
      * scala> p.parse("")
      * val res3 = Failure(..)
      * }}}
      *
      * @param cs the set of characters to check.
      * @return a parser that parses one character that is not an element of `cs`.
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.noneOf(cs:Int* `unicode.noneOf`]].
      * @see [[satisfy `satisfy`]]
      * @group class
      */
    final def noneOf(cs: Char*): Parsley[Char] = noneOf(cs.toSet)

    /** $noneOf
      *
      * If the next character in the input is outside of the range of characters `cs`, it is consumed
      * and returned. Otherwise, no input is consumed and the combinator fails.
      *
      * @example {{{
      * scala> import parsley.character.noneOf
      * scala> val p = noneOf('a' to 'c')
      * scala> p.parse("a")
      * val res0 = Failure(..)
      * scala> p.parse("b")
      * val res1 = Failure(..)
      * scala> p.parse("c")
      * val res1 = Failure(..)
      * scala> p.parse("xb")
      * val res2 = Success('x')
      * scala> p.parse("")
      * val res3 = Failure(..)
      * }}}
      *
      * @param cs the range of characters to check.
      * @return a parser that parses a character outside the range `cs`.
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.noneOf(cs:Range* `unicode.noneOf`]].
      * @see [[satisfy `satisfy`]]
      * @group class
      */
    final def noneOf(cs: NumericRange[Char]): Parsley[Char] = cs.size match {
        case 0 => item.uo(s"noneOf($cs)")
        case 1 => satisfy(cs.head != _, s"anything except ${renderChar(cs.head)}").uo(s"noneOf($cs)")
        case _ if Math.abs(cs(0).toInt - cs(1).toInt) == 1 => satisfy(!cs.contains(_), {
            s"anything outside of ${renderChar(cs.min)} to ${renderChar(cs.max)}"
        }).uo(s"noneOf($cs)")
        case _ => satisfy(!cs.contains(_)).uo(s"noneOf($cs)")
    }

    /** This combinator parses `pc` '''zero''' or more times, collecting its results into a string.
      *
      * Parses `pc` repeatedly until it fails. The resulting characters are placed into a string,
      * which is then returned. This is ''morally'' equivalent to `many(pc).map(_.mkString)`, but
      * it uses `StringBuilder`, which makes it much more efficient.
      *
      * @example {{{
      * scala> import parsley.character.{letter, letterOrDigit, stringOfMany}
      * scala> import parsley.syntax.zipped._
      * scala> val ident = (letter, stringOfMany(letterOrDigit)).zipped((c, s) => s"$c$s")
      * scala> ident.parse("abdc9d")
      * val res0 = Success("abdc9d")
      * scala> ident.parse("a")
      * val res1 = Success("a")
      * scala> ident.parse("9")
      * val res2 = Failure(..)
      * }}}
      *
      * @param pc the parser whose results make up the string
      * @return a parser that parses a string whose letters consist of results from `pc`.
      * @since 4.0.0
      * @group string
      */
    final def stringOfMany(pc: Parsley[Char]): Parsley[String] = many(pc, StringFactories.charFactory).uo("stringOfMany")

    // TODO: optimise, this can be _really_ tightly implemented with a substring on the input
    /** This combinator parses characters matching the given predicate '''zero''' or more times, collecting
      * the results into a string.
      *
      * Repeatly reads characters that satisfy the given predicate `pred`. When no more characters
      * can be successfully read, the results are stitched together into a `String` and returned.
      * This combinator can never fail, since `satisfy` can never fail having consumed input.
      *
      * @example {{{
      * scala> import parsley.character.{letter, stringOfMany}
      * scala> import parsley.syntax.zipped._
      * scala> val ident = (letter, stringOfMany(_.isLetterOrDigit)).zipped((c, s) => s"$c$s")
      * scala> ident.parse("abdc9d")
      * val res0 = Success("abdc9d")
      * scala> ident.parse("a")
      * val res1 = Success("a")
      * scala> ident.parse("9")
      * val res2 = Failure(..)
      * }}}
      *
      * @param pred the predicate to test characters against.
      * @return a parser that returns the span of characters satisfying `pred`
      * @note this acts exactly like `stringOfMany(satisfy(pred))`, but may be more efficient.
      * @note analogous to the `megaparsec` `takeWhileP` combinator.
      * @since 4.4.0
      * @group string
      */
    final def stringOfMany(pred: Char => Boolean): Parsley[String] = many(satisfy(pred).ut()).ut().span.uo("stringOfMany")

    /** This combinator parses `pc` '''one''' or more times, collecting its results into a string.
      *
      * Parses `pc` repeatedly until it fails. The resulting characters are placed into a string,
      * which is then returned. This is ''morally'' equivalent to `many(pc).map(_.mkString)`, but
      * it uses `StringBuilder`, which makes it much more efficient. The result string must have
      * at least one character in it.
      *
      * @example {{{
      * scala> import parsley.character.{letter, stringOfSome}
      * scala> val ident = stringOfSome(letter)
      * scala> ident.parse("abdc9d")
      * val res0 = Success("abdc")
      * scala> ident.parse("")
      * val res1 = Failure(..)
      * }}}
      *
      * @param pc the parser whose results make up the string
      * @return a parser that parses a string whose letters consist of results from `pc`.
      * @since 4.0.0
      * @group string
      */
    final def stringOfSome(pc: Parsley[Char]): Parsley[String] = some(pc, StringFactories.charFactory).uo("stringOfSome")

    // TODO: optimise, this can be _really_ tightly implemented with a substring on the input
    /** This combinator parses characters matching the given predicate '''one''' or more times, collecting
      * the results into a string.
      *
      * Repeatly reads characters that satisfy the given predicate `pred`. When no more characters
      * can be successfully read, the results are stitched together into a `String` and returned.
      * This combinator can never fail having consumed input, since `satisfy` can never fail having
      * consumed input.
      *
      * @example {{{
      * scala> import parsley.character.{stringOfSome}
      * scala> val ident = stringOfSome(_.isLetter)
      * scala> ident.parse("abdc9d")
      * val res0 = Success("abdc")
      * scala> ident.parse("")
      * val res1 = Failure(..)
      * }}}
      *
      * @param pred the predicate to test characters against.
      * @return a parser that returns the span of characters satisfying `pred`
      * @note this acts exactly like `stringOfSome(satisfy(pred))`, but may be more efficient.
      * @note analogous to the `megaparsec` `takeWhile1P` combinator.
      * @since 4.4.0
      * @group string
      */
    final def stringOfSome(pred: Char => Boolean): Parsley[String] = some(satisfy(pred).ut()).ut().span.uo("stringOfSome")

    /** This combinator tries to parse each of the strings `strs` (and `str0`), until one of them succeeds.
      *
      * Unlike `choice`, or more accurately `atomicChoice`, this combinator will not
      * necessarily parse the strings in the order provided. It will avoid strings that have another string
      * as a prefix first, so that it has ''Longest Match'' semantics. It will try to minimise backtracking
      * too, making it a much more efficient option than `atomicChoice`.
      *
      * The longest succeeding string will be returned. If no strings match then the combinator fails.
      *
      * @example {{{
      * scala> import parsley.character.strings
      * scala> val p = strings("hell", "hello", "goodbye", "g", "abc")
      * scala> p.parse("hell")
      * val res0 = Success("hell")
      * scala> p.parse("hello")
      * val res1 = Success("hello")
      * scala> p.parse("good")
      * val res2 = Success("g")
      * scala> p.parse("goodbye")
      * val res3 = Success("goodbye")
      * scala> p.parse("a")
      * val res4 = Failure(..)
      * }}}
      *
      * @param str0 the first string to try to parse.
      * @param strs the remaining strings to try to parse.
      * @return a parser that tries to parse all the given strings returning the longest one that matches.
      * @since 4.0.0
      * @group string
      */
    final def strings(str0: String, strs: String*): Parsley[String] = strings(str0 -> pure(str0), strs.map(s => s -> pure(s)): _*)

    /** This combinator tries to parse each of the key-value pairs `kvs` (and `kv0`), until one of them succeeds.
      *
      * Each argument to this combinator is a pair of a string and a parser to perform if that string can be parsed.
      * `strings(s0 -> p0, ...)` can be thought of as `atomicChoice(string(s0) *> p0, ...)`, however, the given
      * ordering of key-value pairs does not dictate the order in which the parses are tried. In particular, it
      * will avoid keys that are the prefix of another key first, so that it has ''Longest Match'' semantics.
      * It will try to minimise backtracking too, making it a much more efficient option than `atomicChoice`.
      *
      * @example {{{
      * scala> import parsley.character.strings
      * scala> val p = strings("hell" -> pure(4), "hello" -> pure(5), "goodbye" -> pure(7), "g" -> pure(1), "abc" -> pure(3))
      * scala> p.parse("hell")
      * val res0 = Success(4)
      * scala> p.parse("hello")
      * val res1 = Success(5)
      * scala> p.parse("good")
      * val res2 = Success(1)
      * scala> p.parse("goodbye")
      * val res3 = Success(7)
      * scala> p.parse("a")
      * val res4 = Failure(..)
      * }}}
      *
      * @note the scope of any backtracking performed is isolated to the key itself, as it is assumed that once a
      * key parses correctly, the branch has been committed to. Putting an `atomic` around the values will not affect
      * this behaviour.
      *
      * @param kv0 the first key-value pair to try to parse.
      * @param kvs the remaining key-value pairs to try to parse.
      * @return a parser that tries to parse all the given key-value pairs, returning the (possibly failing) result
      *         of the value that corresponds to the longest matching key.
      * @since 4.0.0
      * @group string
      */
    final def strings[A](kv0: (String, Parsley[A]), kvs: (String, Parsley[A])*): Parsley[A] = {
        // this isn't the best we could do: it's possible to eliminate backtracking with a Trie...
        // can this be done in a semantic preserving way without resorting to a new instruction?
        // I don't think it's worth it. Down the line a general Trie-backed optimisation would be
        // more effective.
        val ss = kv0 +: kvs
        choice(ss.groupBy(_._1.head).toList.sortBy(_._1).view.map(_._2).flatMap { s =>
            val (sLast, pLast) :: rest = s.toList.sortBy(_._1.length): @unchecked
            ((string(sLast).ut() *> pLast.ut()).ut() :: rest.map { case (s, p) => (atomic(string(s).ut()).ut() *> p).ut() }).reverse
        }.toSeq: _*).uo((kv0._1 +: kvs.map(_._1)).mkString("strings(", ", ", ")"))
    }

    /** This parser will parse '''any''' single character from the input, failing if there is no input remaining.
      *
      * @note this combinator can only handle 16-bit characters: for larger codepoints, consider using [[unicode.item `unicode.item`]].
      * @group core
      */
    final val item: Parsley[Char] = satisfy(_ => true, "any character").uo("item")

    /** This parser tries to parse a space or tab character, and returns it if successful.
      *
      * @see [[isSpace `isSpace`]]
      * @group spec
      */
    final val space: Parsley[Char] = _space.uo("space")
    private def _space = satisfy(isSpace(_), "space/tab")

    /** This parser skips zero or more space characters using [[space `space`]].
      *
      * @group skip
      */
    final val spaces: Parsley[Unit] = many(_space.ut()).ut().void.uo("spaces")

    /** This parser tries to parse a whitespace character, and returns it if successful.
      *
      * A whitespace character is one of:
      *   1. a space (`' '`)
      *   1. a tab (`'\t'`)
      *   1. a line feed (`'\n'`)
      *   1. a carriage return (`'\r'`)
      *   1. a form feed (`'\f'`)
      *   1. a vertical tab (`'\u000B'`)
      *
      * @group spec
      */
    final val whitespace: Parsley[Char] = _whitespace.uo("whitespace")
    private def _whitespace = satisfy(_.isWhitespace, "whitespace")

    /** This parser skips zero or more space characters using [[whitespace `whitespace`]].
      *
      * @group skip
      */
    final val whitespaces: Parsley[Unit] = many(_whitespace.ut()).ut().void.uo("whitespaces")

    /** This parser tries to parse a line feed newline (`'\n'`) character, and returns it if successful.
      *
      * This parser will not accept a carriage return (`CR`) character or `CRLF`.
      *
      * @group spec
      */
    final val newline: Parsley[Char] = _newline.uo("newline")
    private def _newline = char('\n', "newline")

    /** This parser tries to parse a `CRLF` newline character pair, returning `'\n'` if successful.
      *
      * A `CRLF` character is the pair of carriage return (`'\r'`) and line feed (`'\n'`). These
      * two characters will be parsed together or not at all. The parser is made atomic using `atomic`.
      *
      * @group spec
      */
    final val crlf: Parsley[Char] = _crlf.uo("crlf")
    private def _crlf = atomic(string("\r\n", "end of crlf").ut()).ut().as('\n')

    /** This parser will parse either a line feed (`LF`) or a `CRLF` newline, returning `'\n'` if successful.
      *
      * @group spec
      * @see [[crlf `crlf`]]
      */
    final val endOfLine: Parsley[Char] = (_newline.ut() <|> _crlf.ut()).ut().label("end of line").uo("endOfLine")

    /** This parser tries to parse a tab (`'\t'`) character, and returns it if successful.
      *
      * This parser does not recognise vertical tabs, only horizontal ones.
      *
      * @group spec
      */
    final val tab: Parsley[Char] = char('\t', "tab").uo("tab")

    /** This parser tries to parse an uppercase letter, and returns it if successful.
      *
      * An uppercase letter is any character `c <= '\uffff'` whose Unicode ''Category Type'' is Uppercase Letter (`Lu`).
      * Examples of characters within this category include:
      *   - the Latin letters `'A'` through `'Z'`
      *   - Latin special character such as `'Å'`, `'Ç'`, `'Õ'`
      *   - Cryillic letters
      *   - Greek letters
      *   - Coptic letters
      *
      * $categories
      *
      * @group spec
      */
    final val upper: Parsley[Char] = satisfy(_.isUpper, "uppercase letter").uo("upper")

    /** This parser tries to parse a lowercase letter, and returns it if successful.
      *
      * A lowercase letter is any character `c <= '\uffff'` whose Unicode ''Category Type'' is Lowercase Letter (`Ll`).
      * Examples of characters within this category include:
      *   - the Latin letters `'a'` through `'z'`
      *   - Latin special character such as `'é'`, `'ß'`, `'ð'`
      *   - Cryillic letters
      *   - Greek letters
      *   - Coptic letters
      *
      * $categories
      *
      * @group spec
      */
    final val lower: Parsley[Char] = satisfy(_.isLower, "lowercase letter").uo("lower")

    /** This parser tries to parse either a letter or a digit, and returns it if successful.
      *
      * A letter or digit is anything that would parse in either `letter` or `digit`.
      *
      * @see documentation for [[letter `letter`]].
      * @see documentation for [[digit `digit`]].
      * @group spec
      */
    final val letterOrDigit: Parsley[Char] = satisfy(_.isLetterOrDigit, "alpha-numeric character").uo("letterOrDigit")

    /** This parser tries to parse a letter, and returns it if successful.
      *
      * A letter is any character `c <= '\uffff'` whose Unicode ''Category Type'' is any of the following:
      *   1. Uppercase Letter (`Lu`)
      *   1. Lowercase Letter (`Ll`)
      *   1. Titlecase Letter (`Lt`)
      *   1. Modifier Letter (`Lm`)
      *   1. Other Letter (`Lo`)
      *
      * $categories
      *
      * @group spec
      */
    final val letter: Parsley[Char] = satisfy(_.isLetter, "letter").uo("letter")

    /** This parser tries to parse a digit, and returns it if successful.
      *
      * A digit is any character `c <= '\uffff'` whose Unicode ''Category Type'' is Decimal Number (`Nd`).
      * Examples of (inclusive) ranges within this category include:
      *   - the Latin digits `'0'` through `'9'`
      *   - the Arabic-Indic digits `'\u0660'` through `'\u0669'`
      *   - the Extended Arabic-Indic digits `'\u06f0'` through `'\u06f9'`
      *   - the Devangari digits `'\u0966'` through `'\u096f'`
      *   - the Fullwidth digits `'\uff10'` through `'\uff19'`
      *
      * $categories
      *
      * @group spec
      */
    final val digit: Parsley[Char] = satisfy(_.isDigit, "digit").uo("digit")

    /** This parser tries to parse a hexadecimal digit, and returns it if successful.
      *
      * A hexadecimal digit is one of (all inclusive ranges):
      *   1. the digits `'0'` through `'9'`
      *   1. the letters `'a'` through `'f'`
      *   1. the letters `'A'` through `'Z'`
      *
      * @see [[isHexDigit ``isHexDigit``]]
      * @group spec
      */
    final val hexDigit: Parsley[Char] = satisfy(isHexDigit(_), "hexadecimal digit").uo("hexDigit")

    /** This parser tries to parse an octal digit, and returns it if successful.
      *
      * An octal digit is one of `'0'` to `'7'` (inclusive).
      *
      * @see [[isOctDigit ``isOctDigit``]]
      * @group spec
      */
    final val octDigit: Parsley[Char] = satisfy(isOctDigit(_), "octal digit").uo("octDigit")

    /** This parser tries to parse a binary digit (bit) and returns it if successful.
      *
      * A bit is either `'0'` or `'1'`.
      *
      * @group spec
      */
    final val bit: Parsley[Char] = satisfy(c => Character.digit(c, 2) != -1, "bit").uo("bit")

    // Functions
    /** This function returns true if a character is a hexadecimal digit.
      *
      * A hexadecimal digit is one of (all inclusive ranges):
      *   1. the digits `'0'` through `'9'`
      *   1. the letters `'a'` through `'f'`
      *   1. the letters `'A'` through `'Z'`
      *   1. an equivalent from another charset
      *
      * @see [[hexDigit `hexDigit`]]
      * @group pred
      */
    final def isHexDigit(c: Char): Boolean = Character.digit(c, 16) != -1

    /** This function returns true if a character is an octal digit.
      *
      * An octal digit is one of `'0'` to `'7'` (inclusive).
      *
      * @group pred
      * @see [[octDigit `octDigit`]]
      */
    final def isOctDigit(c: Char): Boolean = Character.digit(c, 8) != -1

    /** This function returns true if a character is either a space or a tab character.
      *
      * @group pred
      * @see [[space `space`]]
      */
    final def isSpace(c: Char): Boolean = c == ' ' || c == '\t'

    // Sue me.
    private def renderChar(c: Char): String = parsley.errors.helpers.renderRawString(s"$c")
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy