parsley.token.text.Character.scala Maven / Gradle / Ivy
/* SPDX-FileCopyrightText: © 2022 Parsley Contributors
* SPDX-License-Identifier: BSD-3-Clause
*/
package parsley.token.text
import parsley.Parsley
import parsley.token.predicate.{Basic, CharPredicate, NotRequired, Unicode}
/** This class defines a uniform interface for defining parsers for character
* literals, independent of how whitespace should be handled after the literal.
*
* @since 4.0.0
* @note implementations of this class found within `Lexer` may employ sharing
* and refine the `def`s in this class into `val` or `lazy val` when overriding.
*
* @define disclaimer
* the exact behaviour of this parser is decided by the implementations given in
* `Lexer`, which will depend on user-defined configuration. Please see the
* relevant documentation of these specific objects.
*/
abstract class Character private[text] {
/** This parser will parse a single character literal, which may contain
* any unicode graphic character as defined by up to two UTF-16 codepoints.
* It may also contain escape sequences.
*
* @example {{{
* scala> fullUtf16.parse("'a'")
* val res0 = Success(97)
* scala> fullUtf16.parse("'£'")
* val res1 = Success(163)
* scala> fullUtf16.parse("'λ'")
* val res2 = Success(0x03BB)
* scala> fullUtf16.parse("'🙂'")
* val res3 = Success(0x1F642)
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def fullUtf16: Parsley[Int]
/** This parser will parse a single character literal, which may contain
* any graphic character that falls within the "Basic Multilingual Plane" (BMP).
* This is defined as any UTF-16 character that fits into 16 bits. A Scala `Char`
* is exactly large enough to hold any BMP character. It may also contain escape sequences,
* but only those which result in BMP characters.
*
* @example {{{
* scala> basicMultilingualPlane.parse("'a'")
* val res0 = Success('a')
* scala> basicMultilingualPlane.parse("'£'")
* val res1 = Success('£')
* scala> basicMultilingualPlane.parse("'λ'")
* val res2 = Success('λ')
* scala> basicMultilingualPlane.parse("'🙂'")
* val res3 = Failure(...) // 🙂 has a 32-bit codepoint of larger than 0xffff
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def basicMultilingualPlane: Parsley[Char]
/** This parser will parse a single character literal, which may contain
* any graphic ASCII character. These are characters with ordinals in range
* 0 to 127 inclusive. It may also contain escape sequences, but only
* those which result in ASCII characters.
*
* @example {{{
* scala> ascii.parse("'a'")
* val res0 = Success('a')
* scala> ascii.parse("'£'")
* val res1 = Failure(...) // £'s ordinal is not less than 127
* scala> ascii.parse("'λ'")
* val res2 = Failure(...) // λ's ordinal is not less than 127
* scala> ascii.parse("'🙂'")
* val res3 = Failure(...) // 🙂's ordinal is not less than 127
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def ascii: Parsley[Char]
/** This parser will parse a single character literal, which may contain
* any graphic extended ASCII character. These are characters with ordinals in range
* 0 to 255 inclusive. It may also contain escape sequences, but only
* those which result in extended ASCII characters.
*
* @example {{{
* scala> latin1.parse("'a'")
* val res0 = Success('a')
* scala> latin1.parse("'£'")
* val res1 = Success('£')
* scala> latin1.parse("'λ'")
* val res2 = Failure(...) // λ's ordinal is not less than 255
* scala> latin1.parse("'🙂'")
* val res3 = Failure(...) // 🙂's ordinal is not less than 255
* }}}
*
* @since 4.0.0
* @note $disclaimer
*/
def latin1: Parsley[Char]
}
private [text] object Character {
final val MaxAscii: Int = 0x7f
final val MaxLatin1: Int = 0xff
def letter(terminalLead: Char, allowsAllSpace: Boolean, isGraphic: CharPredicate): CharPredicate = isGraphic match {
case Unicode(g) if allowsAllSpace => Unicode(c => c != terminalLead.toInt && (g(c) || parsley.character.isWhitespace(c.toChar)))
case Unicode(g) => Unicode(c => c != terminalLead.toInt && g(c))
case Basic(g) if allowsAllSpace => Basic(c => c != terminalLead && (g(c) || parsley.character.isWhitespace(c)))
case Basic(g) => Basic(c => c != terminalLead && g(c))
case NotRequired => NotRequired
}
def letter(terminalLead: Char, escapeLead: Char, allowsAllSpace: Boolean, isGraphic: CharPredicate): CharPredicate = {
letter(terminalLead, allowsAllSpace, isGraphic) match {
case Unicode(g) => Unicode(c => c != escapeLead.toInt && g(c))
case Basic(g) => Basic(c => c != escapeLead && g(c))
case NotRequired => NotRequired
}
}
@inline def isBmpCodePoint(codepoint: Int): Boolean = java.lang.Character.isBmpCodePoint(codepoint)
@inline def isValidCodePoint(codepoint: Int): Boolean = java.lang.Character.isValidCodePoint(codepoint)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy