All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.re2j.RE2 Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2020 The Go Authors. All rights reserved.
 *
 * Use of this source code is governed by a BSD-style
 * license that can be found in the LICENSE file.
 */
// Original Go source here:
// http://code.google.com/p/go/source/browse/src/pkg/regexp/regexp.go

// Beware, submatch results may pin a large underlying String into
// memory.  Consider creating explicit string copies if submatches are
// long-lived and inputs are large.
//
// The JDK API supports incremental processing of the input without
// necessarily consuming it all; we do not attempt to do so.

// The Java API emphasises UTF-16 Strings, not UTF-8 byte[] as in Go, as
// the primary input datatype, and the method names have been changed to
// reflect this.

package com.google.re2j;

import com.google.re2j.MatcherInput.Encoding;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicReference;

/**
 * An RE2 class instance is a compiled representation of an RE2 regular expression, independent of
 * the public Java-like Pattern/Matcher API.
 *
 * 

* This class also contains various implementation helpers for RE2 regular expressions. * *

* Use the {@link #quoteMeta(String)} utility function to quote all regular expression * metacharacters in an arbitrary string. * *

* See the {@code Matcher} and {@code Pattern} classes for the public API, and the package-level documentation for an overview of how to use this API. */ class RE2 { // (In the Go implementation this structure is just called "Regexp".) //// Parser flags. // Fold case during matching (case-insensitive). static final int FOLD_CASE = 0x01; // Treat pattern as a literal string instead of a regexp. static final int LITERAL = 0x02; // Allow character classes like [^a-z] and [[:space:]] to match newline. static final int CLASS_NL = 0x04; // Allow '.' to match newline. static final int DOT_NL = 0x08; // Treat ^ and $ as only matching at beginning and end of text, not // around embedded newlines. (Perl's default). static final int ONE_LINE = 0x10; // Make repetition operators default to non-greedy. static final int NON_GREEDY = 0x20; // allow Perl extensions: // non-capturing parens - (?: ) // non-greedy operators - *? +? ?? {}? // flag edits - (?i) (?-i) (?i: ) // i - FoldCase // m - !OneLine // s - DotNL // U - NonGreedy // line ends: \A \z // \Q and \E to disable/enable metacharacters // (?Pexpr) for named captures // \C (any byte) is not supported. static final int PERL_X = 0x40; // Allow \p{Han}, \P{Han} for Unicode group and negation. static final int UNICODE_GROUPS = 0x80; // Regexp END_TEXT was $, not \z. Internal use only. static final int WAS_DOLLAR = 0x100; static final int MATCH_NL = CLASS_NL | DOT_NL; // As close to Perl as possible. static final int PERL = CLASS_NL | ONE_LINE | PERL_X | UNICODE_GROUPS; // POSIX syntax. static final int POSIX = 0; //// Anchors static final int UNANCHORED = 0; static final int ANCHOR_START = 1; static final int ANCHOR_BOTH = 2; //// RE2 instance members. final String expr; // as passed to Compile final Prog prog; // compiled program final int cond; // EMPTY_* bitmask: empty-width conditions // required at start of match final int numSubexp; boolean longest; String prefix; // required UTF-16 prefix in unanchored matches byte[] prefixUTF8; // required UTF-8 prefix in unanchored matches boolean prefixComplete; // true iff prefix is the entire regexp int prefixRune; // first rune in prefix // Cache of machines for running regexp. Forms a Treiber stack. private final AtomicReference pooled = new AtomicReference(); public Map namedGroups; // This is visible for testing. RE2(String expr) { RE2 re2 = RE2.compile(expr); // Copy everything. this.expr = re2.expr; this.prog = re2.prog; this.cond = re2.cond; this.numSubexp = re2.numSubexp; this.longest = re2.longest; this.prefix = re2.prefix; this.prefixUTF8 = re2.prefixUTF8; this.prefixComplete = re2.prefixComplete; this.prefixRune = re2.prefixRune; } private RE2(String expr, Prog prog, int numSubexp, boolean longest) { this.expr = expr; this.prog = prog; this.numSubexp = numSubexp; this.cond = prog.startCond(); this.longest = longest; } /** * Parses a regular expression and returns, if successful, an {@code RE2} instance that can be * used to match against text. * *

* When matching against text, the regexp returns a match that begins as early as possible in the * input (leftmost), and among those it chooses the one that a backtracking search would have * found first. This so-called leftmost-first matching is the same semantics that Perl, Python, * and other implementations use, although this package implements it without the expense of * backtracking. For POSIX leftmost-longest matching, see {@link #compilePOSIX}. */ static RE2 compile(String expr) throws PatternSyntaxException { return compileImpl(expr, PERL, /*longest=*/ false); } /** * {@code compilePOSIX} is like {@link #compile} but restricts the regular expression to POSIX ERE * (egrep) syntax and changes the match semantics to leftmost-longest. * *

* That is, when matching against text, the regexp returns a match that begins as early as * possible in the input (leftmost), and among those it chooses a match that is as long as * possible. This so-called leftmost-longest matching is the same semantics that early regular * expression implementations used and that POSIX specifies. * *

* However, there can be multiple leftmost-longest matches, with different submatch choices, and * here this package diverges from POSIX. Among the possible leftmost-longest matches, this * package chooses the one that a backtracking search would have found first, while POSIX * specifies that the match be chosen to maximize the length of the first subexpression, then the * second, and so on from left to right. The POSIX rule is computationally prohibitive and not * even well-defined. See http://swtch.com/~rsc/regexp/regexp2.html#posix */ static RE2 compilePOSIX(String expr) throws PatternSyntaxException { return compileImpl(expr, POSIX, /*longest=*/ true); } // Exposed to ExecTests. static RE2 compileImpl(String expr, int mode, boolean longest) throws PatternSyntaxException { Regexp re = Parser.parse(expr, mode); int maxCap = re.maxCap(); // (may shrink during simplify) re = Simplify.simplify(re); Prog prog = Compiler.compileRegexp(re); RE2 re2 = new RE2(expr, prog, maxCap, longest); StringBuilder prefixBuilder = new StringBuilder(); re2.prefixComplete = prog.prefix(prefixBuilder); re2.prefix = prefixBuilder.toString(); try { re2.prefixUTF8 = re2.prefix.getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("can't happen"); } if (!re2.prefix.isEmpty()) { re2.prefixRune = re2.prefix.codePointAt(0); } re2.namedGroups = re.namedGroups; return re2; } /** * Returns the number of parenthesized subexpressions in this regular expression. */ int numberOfCapturingGroups() { return numSubexp; } // get() returns a machine to use for matching |this|. It uses |this|'s // machine cache if possible, to avoid unnecessary allocation. Machine get() { // Pop a machine off the stack if available. Machine head; do { head = pooled.get(); } while (head != null && !pooled.compareAndSet(head, head.next)); return head; } // Clears the memory associated with this machine. void reset() { pooled.set(null); } // put() returns a machine to |this|'s machine cache. There is no attempt to // limit the size of the cache, so it will grow to the maximum number of // simultaneous matches run using |this|. (The cache empties when |this| // gets garbage collected or reset is called.) @SuppressWarnings("MakeAlwaysEqual") // for ErrorProne, see below void put(Machine m, boolean isNew) { // To avoid allocation in the single-thread or uncontended case, reuse a node only if // it was the only element in the stack when it was popped, and it's the only element // in the stack when it's pushed back after use. Machine head; do { head = pooled.get(); if (!isNew && head != null) { // If an element had a null next pointer and it was previously in the stack, another thread // might be trying to pop it out right now, and if it sees the same node now in the // stack the pop will succeed, but the new top of the stack will be the stale (null) value // of next. Allocate a new Machine so that the CAS will not succeed if this node has been // popped and re-pushed. m = new Machine(m); isNew = true; } // Without this comparison, TSAN will complain about a race condition: // Thread A, B, and C all attempt to do a match on the same pattern. // // A: Allocates Machine 1; executes match; put machine 1. State is now: // // pooled -> machine 1 -> null // // B reads pooled, sees machine 1 // // C reads pooled, sees machine 1 // // B successfully CASes pooled to null // // B executes match; put machine 1, which involves setting machine1.next to // null (even though it's already null); preempted before CAS // // C resumes, and reads machine1.next in order to execute cas(head, head.next) // // There is no happens-before relationship between B's redundant null write // and C's read, thus triggering TSAN. // // A future release of ErrorProne may want to make the assignment unconditionally. The // @SuppressWarning("MakeAlwaysEqual") on this method is intended to prevent that from happening. if (m.next != head) { m.next = head; } } while (!pooled.compareAndSet(head, m)); } @Override public String toString() { return expr; } // doExecute() finds the leftmost match in the input and returns // the position of its subexpressions. // Derived from exec.go. private int[] doExecute(MachineInput in, int pos, int anchor, int ncap) { Machine m = get(); // The Treiber stack cannot reuse nodes, unless the node to be reused has only ever been at // the bottom of the stack (i.e., next == null). boolean isNew = false; if (m == null) { m = new Machine(this); isNew = true; } else if (m.next != null) { m = new Machine(m); isNew = true; } m.init(ncap); int[] cap = m.match(in, pos, anchor) ? m.submatches() : null; put(m, isNew); return cap; } /** * Returns true iff this regexp matches the string {@code s}. */ boolean match(CharSequence s) { return doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 0) != null; } boolean match(CharSequence input, int start, int end, int anchor, int[] group, int ngroup) { return match(MatcherInput.utf16(input), start, end, anchor, group, ngroup); } /** * Matches the regular expression against input starting at position start and ending at position * end, with the given anchoring. Records the submatch boundaries in group, which is [start, end) * pairs of byte offsets. The number of boundaries needed is inferred from the size of the group * array. It is most efficient not to ask for submatch boundaries. * * @param input the input byte array * @param start the beginning position in the input * @param end the end position in the input * @param anchor the anchoring flag (UNANCHORED, ANCHOR_START, ANCHOR_BOTH) * @param group the array to fill with submatch positions * @param ngroup the number of array pairs to fill in * @return true if a match was found */ boolean match(MatcherInput input, int start, int end, int anchor, int[] group, int ngroup) { if (start > end) { return false; } // TODO(afrozm): We suspect that the correct code should look something // like the following: // doExecute(MachineInput.fromUTF16(input), start, anchor, 2*ngroup); // // In Russ' own words: // That is, I believe doExecute needs to know the bounds of the whole input // as well as the bounds of the subpiece that is being searched. MachineInput machineInput = input.getEncoding() == Encoding.UTF_16 ? MachineInput.fromUTF16(input.asCharSequence(), 0, end) : MachineInput.fromUTF8(input.asBytes(), 0, end); int[] groupMatch = doExecute(machineInput, start, anchor, 2 * ngroup); if (groupMatch == null) { return false; } if (group != null) { System.arraycopy(groupMatch, 0, group, 0, groupMatch.length); } return true; } /** * Returns true iff this regexp matches the UTF-8 byte array {@code b}. */ // This is visible for testing. boolean matchUTF8(byte[] b) { return doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, 0) != null; } /** * Returns true iff textual regular expression {@code pattern} matches string {@code s}. * *

* More complicated queries need to use {@link #compile} and the full {@code RE2} interface. */ // This is visible for testing. static boolean match(String pattern, CharSequence s) throws PatternSyntaxException { return compile(pattern).match(s); } // This is visible for testing. interface ReplaceFunc { String replace(String orig); } /** * Returns a copy of {@code src} in which all matches for this regexp have been replaced by * {@code repl}. No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the * replacement string. */ // This is visible for testing. String replaceAll(String src, final String repl) { return replaceAllFunc( src, new ReplaceFunc() { @Override public String replace(String orig) { return repl; } }, 2 * src.length() + 1); // TODO(afrozm): Is the reasoning correct, there can be at the most 2*len +1 // replacements. Basically [a-z]*? abc x will be xaxbcx. So should it be // len + 1 or 2*len + 1. } /** * Returns a copy of {@code src} in which only the first match for this regexp has been replaced * by {@code repl}. No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the * replacement string. */ // This is visible for testing. String replaceFirst(String src, final String repl) { return replaceAllFunc( src, new ReplaceFunc() { @Override public String replace(String orig) { return repl; } }, 1); } /** * Returns a copy of {@code src} in which at most {@code maxReplaces} matches for this regexp have * been replaced by the return value of of function {@code repl} (whose first argument is the * matched string). No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the * replacement string. */ // This is visible for testing. String replaceAllFunc(String src, ReplaceFunc repl, int maxReplaces) { int lastMatchEnd = 0; // end position of the most recent match int searchPos = 0; // position where we next look for a match StringBuilder buf = new StringBuilder(); MachineInput input = MachineInput.fromUTF16(src); int numReplaces = 0; while (searchPos <= src.length()) { int[] a = doExecute(input, searchPos, UNANCHORED, 2); if (a == null || a.length == 0) { break; // no more matches } // Copy the unmatched characters before this match. buf.append(src.substring(lastMatchEnd, a[0])); // Now insert a copy of the replacement string, but not for a // match of the empty string immediately after another match. // (Otherwise, we get double replacement for patterns that // match both empty and nonempty strings.) // FIXME(adonovan), FIXME(afrozm) - JDK seems to be doing exactly this // put a replacement for a pattern that also matches empty and non-empty // strings. The fix would not just be a[1] >= lastMatchEnd, there are a // few corner cases in that as well, and there are tests which will fail // when that case is touched (happens only at the end of the input string // though). if (a[1] > lastMatchEnd || a[0] == 0) { buf.append(repl.replace(src.substring(a[0], a[1]))); // Increment the replace count. ++numReplaces; } lastMatchEnd = a[1]; // Advance past this match; always advance at least one character. int width = input.step(searchPos) & 0x7; if (searchPos + width > a[1]) { searchPos += width; } else if (searchPos + 1 > a[1]) { // This clause is only needed at the end of the input // string. In that case, DecodeRuneInString returns width=0. searchPos++; } else { searchPos = a[1]; } if (numReplaces >= maxReplaces) { // Should never be greater though. break; } } // Copy the unmatched characters after the last match. buf.append(src.substring(lastMatchEnd)); return buf.toString(); } /** * Returns a string that quotes all regular expression metacharacters inside the argument text; * the returned string is a regular expression matching the literal text. For example, * {@code quoteMeta("[foo]").equals("\\[foo\\]")}. */ static String quoteMeta(String s) { StringBuilder b = new StringBuilder(2 * s.length()); // A char loop is correct because all metacharacters fit in one UTF-16 code. for (int i = 0, len = s.length(); i < len; i++) { char c = s.charAt(i); if ("\\.+*?()|[]{}^$".indexOf(c) >= 0) { b.append('\\'); } b.append(c); } return b.toString(); } // The number of capture values in the program may correspond // to fewer capturing expressions than are in the regexp. // For example, "(a){0}" turns into an empty program, so the // maximum capture in the program is 0 but we need to return // an expression for \1. Pad returns a with -1s appended as needed; // the result may alias a. private int[] pad(int[] a) { if (a == null) { return null; // No match. } int n = (1 + numSubexp) * 2; if (a.length < n) { int[] a2 = new int[n]; System.arraycopy(a, 0, a2, 0, a.length); Arrays.fill(a2, a.length, n, -1); a = a2; } return a; } private interface DeliverFunc { // Called iteratively with a list of submatch indices in the same // unit as the MachineInput cursor. void deliver(int[] x); } // Find matches in input. private void allMatches(MachineInput input, int n, DeliverFunc deliver) { int end = input.endPos(); if (n < 0) { n = end + 1; } for (int pos = 0, i = 0, prevMatchEnd = -1; i < n && pos <= end; ) { int[] matches = doExecute(input, pos, UNANCHORED, prog.numCap); if (matches == null || matches.length == 0) { break; } boolean accept = true; if (matches[1] == pos) { // We've found an empty match. if (matches[0] == prevMatchEnd) { // We don't allow an empty match right // after a previous match, so ignore it. accept = false; } int r = input.step(pos); if (r < 0) { // EOF pos = end + 1; } else { pos += r & 0x7; } } else { pos = matches[1]; } prevMatchEnd = matches[1]; if (accept) { deliver.deliver(pad(matches)); i++; } } } // Legacy Go-style interface; preserved (package-private) for better // test coverage. // // There are 16 methods of RE2 that match a regular expression and // identify the matched text. Their names are matched by this regular // expression: // // find(All)?(UTF8)?(Submatch)?(Index)? // // If 'All' is present, the routine matches successive non-overlapping // matches of the entire expression. Empty matches abutting a // preceding match are ignored. The return value is an array // containing the successive return values of the corresponding // non-All routine. These routines take an extra integer argument, n; // if n >= 0, the function returns at most n matches/submatches. // // If 'UTF8' is present, the argument is a UTF-8 encoded byte[] array; // otherwise it is a UTF-16 encoded java.lang.String; return values // are adjusted as appropriate. // // If 'Submatch' is present, the return value is an list identifying // the successive submatches of the expression. Submatches are // matches of parenthesized subexpressions within the regular // expression, numbered from left to right in order of opening // parenthesis. Submatch 0 is the match of the entire expression, // submatch 1 the match of the first parenthesized subexpression, and // so on. // // If 'Index' is present, matches and submatches are identified by // byte index pairs within the input string: result[2*n:2*n+1] // identifies the indexes of the nth submatch. The pair for n==0 // identifies the match of the entire expression. If 'Index' is not // present, the match is identified by the text of the match/submatch. // If an index is negative, it means that subexpression did not match // any string in the input. /** * Returns an array holding the text of the leftmost match in {@code b} of this regular * expression. * *

* A return value of null indicates no match. */ // This is visible for testing. byte[] findUTF8(byte[] b) { int[] a = doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, 2); if (a == null) { return null; } return Utils.subarray(b, a[0], a[1]); } /** * Returns a two-element array of integers defining the location of the leftmost match in * {@code b} of this regular expression. The match itself is at {@code b[loc[0]...loc[1]]}. * *

* A return value of null indicates no match. */ // This is visible for testing. int[] findUTF8Index(byte[] b) { int[] a = doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, 2); if (a == null) { return null; } return Utils.subarray(a, 0, 2); } /** * Returns a string holding the text of the leftmost match in {@code s} of this regular * expression. * *

* If there is no match, the return value is an empty string, but it will also be empty if the * regular expression successfully matches an empty string. Use {@link #findIndex} or * {@link #findSubmatch} if it is necessary to distinguish these cases. */ // This is visible for testing. String find(String s) { int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2); if (a == null) { return ""; } return s.substring(a[0], a[1]); } /** * Returns a two-element array of integers defining the location of the leftmost match in * {@code s} of this regular expression. The match itself is at * {@code s.substring(loc[0], loc[1])}. * *

* A return value of null indicates no match. */ // This is visible for testing. int[] findIndex(String s) { return doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2); } /** * Returns an array of arrays the text of the leftmost match of the regular expression in * {@code b} and the matches, if any, of its subexpressions, as defined by the Submatch description above. * *

* A return value of null indicates no match. */ // This is visible for testing. byte[][] findUTF8Submatch(byte[] b) { int[] a = doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, prog.numCap); if (a == null) { return null; } byte[][] ret = new byte[1 + numSubexp][]; for (int i = 0; i < ret.length; i++) { if (2 * i < a.length && a[2 * i] >= 0) { ret[i] = Utils.subarray(b, a[2 * i], a[2 * i + 1]); } } return ret; } /** * Returns an array holding the index pairs identifying the leftmost match of this regular * expression in {@code b} and the matches, if any, of its subexpressions, as defined by the the * Submatch and Index descriptions above. * *

* A return value of null indicates no match. */ // This is visible for testing. int[] findUTF8SubmatchIndex(byte[] b) { return pad(doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, prog.numCap)); } /** * Returns an array of strings holding the text of the leftmost match of the regular expression in * {@code s} and the matches, if any, of its subexpressions, as defined by the Submatch description above. * *

* A return value of null indicates no match. */ // This is visible for testing. String[] findSubmatch(String s) { int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap); if (a == null) { return null; } String[] ret = new String[1 + numSubexp]; for (int i = 0; i < ret.length; i++) { if (2 * i < a.length && a[2 * i] >= 0) { ret[i] = s.substring(a[2 * i], a[2 * i + 1]); } } return ret; } /** * Returns an array holding the index pairs identifying the leftmost match of this regular * expression in {@code s} and the matches, if any, of its subexpressions, as defined by the Submatch description above. * *

* A return value of null indicates no match. */ // This is visible for testing. int[] findSubmatchIndex(String s) { return pad(doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap)); } /** * {@code findAllUTF8()} is the All version of {@link #findUTF8}; it returns a * list of up to {@code n} successive matches of the expression, as defined by the All description above. * *

* A return value of null indicates no match. * * TODO(adonovan): think about defining a byte slice view class, like a read-only Go slice backed * by |b|. */ // This is visible for testing. List findAllUTF8(final byte[] b, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF8(b), n, new DeliverFunc() { @Override public void deliver(int[] match) { result.add(Utils.subarray(b, match[0], match[1])); } }); if (result.isEmpty()) { return null; } return result; } /** * {@code findAllUTF8Index} is the All version of {@link #findUTF8Index}; it * returns a list of up to {@code n} successive matches of the expression, as defined by the All description above. * *

* A return value of null indicates no match. */ // This is visible for testing. List findAllUTF8Index(final byte[] b, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF8(b), n, new DeliverFunc() { @Override public void deliver(int[] match) { result.add(Utils.subarray(match, 0, 2)); } }); if (result.isEmpty()) { return null; } return result; } /** * {@code findAll} is the All version of {@link #find}; it returns a list of up * to {@code n} successive matches of the expression, as defined by the All * description above. * *

* A return value of null indicates no match. */ // This is visible for testing. List findAll(final String s, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF16(s), n, new DeliverFunc() { @Override public void deliver(int[] match) { result.add(s.substring(match[0], match[1])); } }); if (result.isEmpty()) { return null; } return result; } /** * {@code findAllIndex} is the All version of {@link #findIndex}; it returns a * list of up to {@code n} successive matches of the expression, as defined by the All description above. * *

* A return value of null indicates no match. */ // This is visible for testing. List findAllIndex(String s, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF16(s), n, new DeliverFunc() { @Override public void deliver(int[] match) { result.add(Utils.subarray(match, 0, 2)); } }); if (result.isEmpty()) { return null; } return result; } /** * {@code findAllUTF8Submatch} is the All version of {@link #findUTF8Submatch}; * it returns a list of up to {@code n} successive matches of the expression, as defined by the All description above. * *

* A return value of null indicates no match. */ // This is visible for testing. List findAllUTF8Submatch(final byte[] b, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF8(b), n, new DeliverFunc() { @Override public void deliver(int[] match) { byte[][] slice = new byte[match.length / 2][]; for (int j = 0; j < slice.length; ++j) { if (match[2 * j] >= 0) { slice[j] = Utils.subarray(b, match[2 * j], match[2 * j + 1]); } } result.add(slice); } }); if (result.isEmpty()) { return null; } return result; } /** * {@code findAllUTF8SubmatchIndex} is the All version of * {@link #findUTF8SubmatchIndex}; it returns a list of up to {@code n} successive matches of the * expression, as defined by the All description above. * *

* A return value of null indicates no match. */ // This is visible for testing. List findAllUTF8SubmatchIndex(byte[] b, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF8(b), n, new DeliverFunc() { @Override public void deliver(int[] match) { result.add(match); } }); if (result.isEmpty()) { return null; } return result; } /** * {@code findAllSubmatch} is the All version of {@link #findSubmatch}; it * returns a list of up to {@code n} successive matches of the expression, as defined by the All description above. * *

* A return value of null indicates no match. */ // This is visible for testing. List findAllSubmatch(final String s, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF16(s), n, new DeliverFunc() { @Override public void deliver(int[] match) { String[] slice = new String[match.length / 2]; for (int j = 0; j < slice.length; ++j) { if (match[2 * j] >= 0) { slice[j] = s.substring(match[2 * j], match[2 * j + 1]); } } result.add(slice); } }); if (result.isEmpty()) { return null; } return result; } /** * {@code findAllSubmatchIndex} is the All version of * {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the * expression, as defined by the All description above. * *

* A return value of null indicates no match. */ // This is visible for testing. List findAllSubmatchIndex(String s, int n) { final List result = new ArrayList(); allMatches( MachineInput.fromUTF16(s), n, new DeliverFunc() { @Override public void deliver(int[] match) { result.add(match); } }); if (result.isEmpty()) { return null; } return result; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy