weaponregex.internal.parser.ParserJS.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of weapon-regex_2.12 Show documentation
Show all versions of weapon-regex_2.12 Show documentation
Weapon regeX mutates regular expressions for use in mutation testing.
The newest version!
package weaponregex.internal.parser
import fastparse.*
import weaponregex.internal.model.regextree.*
import NoWhitespace.*
/** Concrete parser for JS flavor of regex
* @param pattern
* The regex pattern to be parsed
* @param flags
* The regex flags to be used
* @note
* This class constructor is private, instances must be created using the companion
* [[weaponregex.internal.parser.Parser]] object
* @see
* [[https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Cheatsheet]]
* @see
* [[https://tc39.es/ecma262/multipage/text-processing.html#sec-patterns]]
*/
private[weaponregex] class ParserJS private[parser] (pattern: String, val flags: Option[String] = None)
extends Parser(pattern) {
/** Whether the flags contain the `u` or `v` flag for Unicode mode */
private val unicodeMode: Boolean = flags.exists(f => f.contains("u") || f.contains("v"))
/** Regex special characters
*/
override val specialChars: String = """()[{\.^$|?*+"""
/** Special characters within a character class
*/
override val charClassSpecialChars: String = """]\"""
/** Allowed boundary meta-characters
*/
override val boundaryMetaChars: String = "bB"
/** Allowed escape characters
*/
override val escapeChars: String = "\\\\tnrf" // fastparse needs `////` for a single backslash
/** Allowed predefined character class characters
*/
override val predefCharClassChars: String = "dDsSvwW"
/** Minimum number of character class items of a valid character class
*/
override val minCharClassItem: Int = 0
/** The escape character used with a code point
* @example
* `\ x{h..h}` or `\ u{h..h}`
*/
override val codePointEscChar: String = "u"
/** Parse special cases of a character literal
* @return
* The captured character as a string
*/
override def charLiteralSpecialCases[A: P]: P[String] = P("{".! ~ !quantifierLongTail)
/** Intermediate parsing rule for character class item tokens which can parse either `preDefinedCharClass`,
* `metaCharacter`, `range`, `quoteChar`, or `charClassCharLiteral`
* @return
* [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
* @note
* Nested character class is a Scala/Java-only regex syntax
*/
override def classItem[A: P]: P[RegexTree] =
if (unicodeMode)
P(preDefinedCharClass | unicodeCharClass | metaCharacter | range | quoteChar | charClassCharLiteral)
else
P(preDefinedCharClass | metaCharacter | range | quoteChar | charClassCharLiteral)
/** Parse a group name
* @return
* the parsed name string
* @example
* `"name1"`
*/
override def groupName[A: P]: P[String] =
P(CharIn("a-z", "A-Z", "_") ~ CharIn("a-z", "A-Z", "0-9", "_").rep).!
/** Parse a quoted character (any character). If [[weaponregex.internal.parser.ParserJS unicodeMode]] is true, only
* the following characters are allowed: `^ $ \ . * + ? ( ) [ ] { } |` or `/`
* @return
* [[weaponregex.internal.model.regextree.QuoteChar]]
* @example
* `"\$"`
*/
override def quote[A: P]: P[QuoteChar] = if (unicodeMode)
Indexed("""\""" ~ CharIn("""^$\.*+?()[]{}|/""").!)
.map { case (loc, char) => QuoteChar(char.head, loc) }
else quoteChar
/** Parse a character with octal value `\n`, `\nn`, `\mnn` (0 <= m,n <= 9)
*
* @return
* [[weaponregex.internal.model.regextree.MetaChar]] tree node
* @example
* `"\012"`
* @note
* This syntax will correctly match if 0 <= m <= 3, 0 <= n <= 7; but m and/or n outside of this range will still be
* parsable.
*/
override def charOct[A: P]: P[MetaChar] = Indexed("""\""" ~ CharIn("0-9").rep(min = 1, max = 3).!)
.map { case (loc, octDigits) => MetaChar(octDigits, loc) }
/** Intermediate parsing rule for reference tokens which can parse only `nameReference`
* @return
* [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
*/
override def reference[A: P]: P[RegexTree] = nameReference
/** Intermediate parsing rule for meta-character tokens which can parse either `charOct`, `charHex`, `charUnicode` or
* `escapeChar`
* @return
* [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
*/
override def metaCharacter[A: P]: P[RegexTree] =
if (unicodeMode) P(charOct | charHex | charUnicode | charCodePoint | escapeChar | controlChar)
else P(charOct | charHex | escapeChar | controlChar)
/** Intermediate parsing rule which can parse either `capturing`, `anyDot`, `preDefinedCharClass`, `boundary`,
* `charClass`, `reference`, `character` or `quote`
* @return
* [[weaponregex.internal.model.regextree.RegexTree]] (sub)tree
*/
override def elementaryRE[A: P]: P[RegexTree] =
if (unicodeMode)
P(
capturing | anyDot | preDefinedCharClass | unicodeCharClass | boundary | charClass | reference | character | quote
)
else P(capturing | anyDot | preDefinedCharClass | boundary | charClass | reference | character | quote)
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy