java.util.regex.Pattern.scala Maven / Gradle / Ivy
package java.util
package regex
import scalanative.native._, stdlib._, stdio._, string._
import cre2h._
import annotation.tailrec
// Inspired by: https://github.com/google/re2j/blob/master/java/com/google/re2j/Pattern.java
object Pattern {
def CANON_EQ: Int = 128
def CASE_INSENSITIVE: Int = 2
def COMMENTS: Int = 4
def DOTALL: Int = 32
def LITERAL: Int = 16
def MULTILINE: Int = 8
def UNICODE_CASE: Int = 64
def UNICODE_CHARACTER_CLASS: Int = 256
def UNIX_LINES: Int = 1
def compile(regex: String): Pattern = compile(regex, 0)
def compile(regex: String, flags: Int): Pattern = {
// make sure the provided regex is compiled
CompiledPatternStore.withRE2Regex(regex, flags)(_ => ())
new Pattern(
_pattern = regex,
_flags = flags
)
}
def matches(regex: String, input: CharSequence): Boolean =
compile(regex).matcher(input).matches
def quote(s: String): String =
Zone { implicit z =>
val original, quoted = stackalloc[cre2.string_t]
toRE2String(s, original)
cre2.quoteMeta(quoted, original)
val res = fromRE2String(quoted)
res
}
private object CompiledPatternStore {
final case class Key(regex: String, flags: Int)
final class Node(var key: Key,
var value: RE2RegExpOps,
var rc: Int,
var next: Node)
private def freshNode(next: Node) =
new Node(null, new RE2RegExpOps(null), 0, next)
// The tip of Nodes. The Nodes form a ring buffer of some length.
private var last: Node = {
// Populate the ringbuffer
@tailrec def f(n: Node, num: Int): Node =
if (num < 0) {
n
} else {
f(freshNode(n), num - 1)
}
val last = freshNode(null)
last.next = f(last, 128)
last
}
// Used to quickly look up a Node from a Key.
private val map = scala.collection.mutable.HashMap.empty[Key, Node]
private def selectNode(regex: String, flags: Int): Node = {
// Look up a RE2RegExpOps from the map.
// If the map doesn't contain the key, look for an unused Node (whose refcount(rc) is 0),
// delete its old compiled pattern if any, and then compile a new RE2 pattern and cache it
// before returning it.
// If all of the nodes are in use, expand the ringbuffer by 1 as a last resort.
map.get(Key(regex, flags)).getOrElse {
@tailrec def findUnused(n: Node): Node = {
if (n eq last) {
// No unused nodes in the ringbuffer; expand its size by 1
val newnode = freshNode(last.next)
last.next = newnode
newnode
} else if (n.rc <= 0) {
n
} else {
findUnused(n.next)
}
}
val reused = {
if (last.rc <= 0) last
else findUnused(last.next)
}
// delete the old pattern (if any)
map -= reused.key
if (reused.value.ptr != null) {
cre2.delete(reused.value.ptr)
reused.value = new RE2RegExpOps(null)
}
// reuse the node by replacing its members with new contents
reused.key = Key(regex, flags)
reused.value = doCompile(regex, flags)
map += reused.key -> reused
// advance `last` so that it points to the next node (which is likely the least recently used one)
last = reused.next
reused
}
}
def withRE2Regex[A](regex: String, flags: Int)(f: RE2RegExpOps => A): A = {
// increase the refcount of the selected node while in use to prevent it from deleted
val node = {
val n = selectNode(regex, flags)
n.rc += 1
n
}
try f(node.value)
finally node.rc -= 1
}
private def doCompile(regex: String, flags: Int): RE2RegExpOps = Zone {
implicit z =>
def notSupported(flag: Int, flagName: String): Unit = {
if ((flags & flag) == flag) {
assert(false, s"regex flag $flagName is not supported")
}
}
notSupported(CANON_EQ, "CANON_EQ(canonical equivalences)")
notSupported(COMMENTS, "COMMENTS")
notSupported(UNICODE_CASE, "UNICODE_CASE")
notSupported(UNICODE_CHARACTER_CLASS, "UNICODE_CHARACTER_CLASS")
notSupported(UNIX_LINES, "UNIX_LINES")
val options = cre2.optNew()
try {
cre2.setCaseSensitive(options, flags & CASE_INSENSITIVE)
cre2.setDotNl(options, flags & DOTALL)
cre2.setLiteral(options, flags & LITERAL)
cre2.setLogErrors(options, 0)
// setOneLine(false) is only available when limiting ourself to posix_syntax
// https://github.com/google/re2/blob/2017-03-01/re2/re2.h#L548
// regex flag MULTILINE cannot be disabled
val re2 = {
val regexre2 = alloc[cre2.string_t]
toRE2String(regex, regexre2)
cre2.compile(regexre2.data, regexre2.length, options)
}
val code = cre2.errorCode(re2)
if (code != ERROR_NO_ERROR) {
val errorPattern = {
val arg = alloc[cre2.string_t]
cre2.errorArg(re2, arg)
fromRE2String(arg)
}
// we try to find the index of the parsing error
// this could return the wrong index it only finds the first match
// see https://groups.google.com/forum/#!topic/re2-dev/rnvFZ9Ki8nk
val index =
if (code == ERROR_TRAILING_BACKSLASH) regex.size - 1
else regex.indexOfSlice(errorPattern)
val reText = fromCString(cre2.errorString(re2))
val description =
code match {
case ERROR_INTERNAL => "Internal Error"
case ERROR_BAD_ESCAPE => "Illegal/unsupported escape sequence"
case ERROR_BAD_CHAR_CLASS =>
"Illegal/unsupported character class"
case ERROR_BAD_CHAR_RANGE => "Illegal character range"
case ERROR_MISSING_BRACKET => "Unclosed character class"
case ERROR_MISSING_PAREN => "Missing parenthesis"
case ERROR_TRAILING_BACKSLASH => "Trailing Backslash"
case ERROR_REPEAT_ARGUMENT => "Dangling meta character '*'"
case ERROR_REPEAT_SIZE => "Bad repetition argument"
case ERROR_REPEAT_OP => "Bad repetition operator"
case ERROR_BAD_PERL_OP => "Bad perl operator"
case ERROR_BAD_UTF8 => "Invalid UTF-8 in regexp"
case ERROR_BAD_NAMED_CAPTURE => "Bad named capture group"
case ERROR_PATTERN_TOO_LARGE =>
"Pattern too large (compilation failed)"
case _ => reText
}
cre2.delete(re2)
throw new PatternSyntaxException(
description,
regex,
index
)
}
new RE2RegExpOps(re2)
} finally {
cre2.optDelete(options)
}
}
}
}
final class Pattern private[regex] (
_pattern: String,
_flags: Int
) {
// this loan pattern makes sure that the instance of cre2.regexp_t is kept alive while in use.
private[regex] def withRE2Regex[A](f: RE2RegExpOps => A): A =
Pattern.CompiledPatternStore.withRE2Regex(_pattern, _flags)(f)
def split(input: CharSequence): Array[String] =
split(input, 0)
def split(input: CharSequence, limit: Int): Array[String] =
split(new Matcher(this, input), limit)
private def split(m: Matcher, limit: Int): Array[String] = {
var matchCount = 0
var arraySize = 0
var last = 0
while (m.find()) {
matchCount += 1
if (limit != 0 || last < m.start()) {
arraySize = matchCount
}
last = m.end()
}
if (last < m.inputLength || limit != 0) {
matchCount += 1
arraySize = matchCount
}
var trunc = 0
if (limit > 0 && arraySize > limit) {
arraySize = limit
trunc = 1
}
val array = Array.ofDim[String](arraySize)
var i = 0
last = 0
m.reset()
while (m.find() && i < arraySize - trunc) {
val t = i
i += 1
array(t) = m.substring(last, m.start())
last = m.end()
}
if (i < arraySize) {
array(i) = m.substring(last, m.inputLength)
}
array
}
def matcher(input: CharSequence): Matcher = new Matcher(this, input)
def flags: Int = _flags
def pattern: String = _pattern
override def toString: String = _pattern
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy