All Downloads are FREE. Search and download functionalities are using the official Maven repository.

weaponregex.internal.parser.ParserJS.scala Maven / Gradle / Ivy

The newest version!
package weaponregex.internal.parser

import fastparse.*
import weaponregex.internal.model.regextree.*

import NoWhitespace.*

/** Concrete parser for JS flavor of regex
  * @param pattern
  *   The regex pattern to be parsed
  * @param flags
  *   The regex flags to be used
  * @note
  *   This class constructor is private, instances must be created using the companion
  *   [[weaponregex.internal.parser.Parser]] object
  * @see
  *   [[https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet]]
  * @see
  *   [[https://tc39.es/ecma262/multipage/text-processing.html#sec-patterns]]
  */
private[weaponregex] class ParserJS private[parser] (pattern: String, val flags: Option[String] = None)
    extends Parser(pattern) {

  /** Whether the flags contain the `u` or `v` flag for Unicode mode */
  private val unicodeMode: Boolean = flags.exists(f => f.contains("u") || f.contains("v"))

  /** Regex special characters
    */
  override val specialChars: String = """()[{\.^$|?*+"""

  /** Special characters within a character class
    */
  override val charClassSpecialChars: String = """]\"""

  /** Allowed boundary meta-characters
    */
  override val boundaryMetaChars: String = "bB"

  /** Allowed escape characters
    */
  override val escapeChars: String = "\\\\tnrf" // fastparse needs `////` for a single backslash

  /** Allowed predefined character class characters
    */
  override val predefCharClassChars: String = "dDsSvwW"

  /** Minimum number of character class items of a valid character class
    */
  override val minCharClassItem: Int = 0

  /** The escape character used with a code point
    * @example
    *   `\ x{h..h}` or `\ u{h..h}`
    */
  override val codePointEscChar: String = "u"

  /** Parse special cases of a character literal
    * @return
    *   The captured character as a string
    */
  override def charLiteralSpecialCases[A: P]: P[String] = P("{".! ~ !quantifierLongTail)

  /** Intermediate parsing rule for character class item tokens which can parse either `preDefinedCharClass`,
    * `metaCharacter`, `range`, `quoteChar`, or `charClassCharLiteral`
    * @return
    *   [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
    * @note
    *   Nested character class is a Scala/Java-only regex syntax
    */
  override def classItem[A: P]: P[RegexTree] =
    if (unicodeMode)
      P(preDefinedCharClass | unicodeCharClass | metaCharacter | range | quoteChar | charClassCharLiteral)
    else
      P(preDefinedCharClass | metaCharacter | range | quoteChar | charClassCharLiteral)

  /** Parse a group name
    * @return
    *   the parsed name string
    * @example
    *   `"name1"`
    */
  override def groupName[A: P]: P[String] =
    P(CharIn("a-z", "A-Z", "_") ~ CharIn("a-z", "A-Z", "0-9", "_").rep).!

  /** Parse a quoted character (any character). If [[weaponregex.internal.parser.ParserJS unicodeMode]] is true, only
    * the following characters are allowed: `^ $ \ . * + ? ( ) [ ] { } |` or `/`
    * @return
    *   [[weaponregex.internal.model.regextree.QuoteChar]]
    * @example
    *   `"\$"`
    */
  override def quote[A: P]: P[QuoteChar] = if (unicodeMode)
    Indexed("""\""" ~ CharIn("""^$\.*+?()[]{}|/""").!)
      .map { case (loc, char) => QuoteChar(char.head, loc) }
  else quoteChar

  /** Parse a character with octal value `\n`, `\nn`, `\mnn` (0 <= m,n <= 9)
    *
    * @return
    *   [[weaponregex.internal.model.regextree.MetaChar]] tree node
    * @example
    *   `"\012"`
    * @note
    *   This syntax will correctly match if 0 <= m <= 3, 0 <= n <= 7; but m and/or n outside of this range will still be
    *   parsable.
    */
  override def charOct[A: P]: P[MetaChar] = Indexed("""\""" ~ CharIn("0-9").rep(min = 1, max = 3).!)
    .map { case (loc, octDigits) => MetaChar(octDigits, loc) }

  /** Intermediate parsing rule for reference tokens which can parse only `nameReference`
    * @return
    *   [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
    */
  override def reference[A: P]: P[RegexTree] = nameReference

  /** Intermediate parsing rule for meta-character tokens which can parse either `charOct`, `charHex`, `charUnicode` or
    * `escapeChar`
    * @return
    *   [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
    */
  override def metaCharacter[A: P]: P[RegexTree] =
    if (unicodeMode) P(charOct | charHex | charUnicode | charCodePoint | escapeChar | controlChar)
    else P(charOct | charHex | escapeChar | controlChar)

  /** Intermediate parsing rule which can parse either `capturing`, `anyDot`, `preDefinedCharClass`, `boundary`,
    * `charClass`, `reference`, `character` or `quote`
    * @return
    *   [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
    */
  override def elementaryRE[A: P]: P[RegexTree] =
    if (unicodeMode)
      P(
        capturing | anyDot | preDefinedCharClass | unicodeCharClass | boundary | charClass | reference | character | quote
      )
    else P(capturing | anyDot | preDefinedCharClass | boundary | charClass | reference | character | quote)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy