
com.google.re2j.Pattern Maven / Gradle / Ivy
// Copyright 2010 Google Inc. All Rights Reserved.
package com.google.re2j;
import java.io.Serializable;
/**
* A compiled representation of an RE2 regular expression, mimicking the
* {@code java.util.regex.Pattern} API.
*
* The matching functions take {@code String} arguments instead of
* the more general Java {@code CharSequence} since the latter doesn't
* provide UTF-16 decoding.
*
*
See the package-level
* documentation for an overview of how to use this API.
*
* @author [email protected] (Russ Cox)
*/
public final class Pattern implements Serializable {
/** Flag: case insensitive matching. */
public static final int CASE_INSENSITIVE = 1;
/** Flag: dot ({@code .}) matches all characters, including newline. */
public static final int DOTALL = 2;
/**
* Flag: multiline matching: {@code ^} and {@code $} match at
* beginning and end of line, not just beginning and end of input.
*/
public static final int MULTILINE = 4;
/**
* Flag: Unicode groups (e.g. {@code \p\{Greek\}}) will be syntax errors.
*/
public static final int DISABLE_UNICODE_GROUPS = 8;
// The pattern string at construction time.
private final String pattern;
// The flags at construction time.
private final int flags;
// The compiled RE2 regexp.
private transient final RE2 re2;
// This is visible for testing.
Pattern(String pattern, int flags, RE2 re2) {
if (pattern == null) {
throw new NullPointerException("pattern is null");
}
if (re2 == null) {
throw new NullPointerException("re2 is null");
}
this.pattern = pattern;
this.flags = flags;
this.re2 = re2;
}
/**
* Releases memory used by internal caches associated with this pattern. Does
* not change the observable behaviour. Useful for tests that detect memory
* leaks via allocation tracking.
*/
public void reset() {
re2.reset();
}
/**
* Returns the flags used in the constructor.
*/
public int flags() {
return flags;
}
/**
* Returns the pattern used in the constructor.
*/
public String pattern() {
return pattern;
}
RE2 re2() {
return re2;
}
/**
* Creates and returns a new {@code Pattern} corresponding to
* compiling {@code regex} with the default flags (0).
*
* @param regex the regular expression
* @throws PatternSyntaxException if the pattern is malformed
*/
public static Pattern compile(String regex) {
return compile(regex, regex, 0);
}
/**
* Creates and returns a new {@code Pattern} corresponding to
* compiling {@code regex} with the default flags (0).
*
* @param regex the regular expression
* @param flags bitwise OR of the flag constants {@code CASE_INSENSITIVE},
* {@code DOTALL}, and {@code MULTILINE}
* @throws PatternSyntaxException if the regular expression is malformed
* @throws IllegalArgumentException if an unknown flag is given
*/
public static Pattern compile(String regex, int flags) {
String flregex = regex;
if ((flags & CASE_INSENSITIVE) != 0) {
flregex = "(?i)" + flregex;
}
if ((flags & DOTALL) != 0) {
flregex = "(?s)" + flregex;
}
if ((flags & MULTILINE) != 0) {
flregex = "(?m)" + flregex;
}
if ((flags & ~(MULTILINE | DOTALL | CASE_INSENSITIVE | DISABLE_UNICODE_GROUPS)) != 0) {
throw new IllegalArgumentException("Flags should only be a combination " +
"of MULTILINE, DOTALL, CASE_INSENSITIVE, DISABLE_UNICODE_GROUPS");
}
return compile(flregex, regex, flags);
}
/**
* Helper: create new Pattern with given regex and flags.
* Flregex is the regex with flags applied.
*/
private static Pattern compile(String flregex, String regex, int flags) {
int re2Flags = RE2.PERL;
if ((flags & DISABLE_UNICODE_GROUPS) != 0) {
re2Flags &= ~RE2.UNICODE_GROUPS;
}
return new Pattern(regex, flags, RE2.compileImpl(flregex, re2Flags, /*longest=*/false));
}
/**
* Matches a string against a regular expression.
*
* @param regex the regular expression
* @param input the input
* @return true if the regular expression matches the entire input
* @throws PatternSyntaxException if the regular expression is malformed
*/
public static boolean matches(String regex, CharSequence input) {
return compile(regex).matcher(input).matches();
}
public boolean matches(String input) {
return this.matcher(input).matches();
}
/**
* Creates a new {@code Matcher} matching the pattern against the input.
*
* @param input the input string
*/
public Matcher matcher(CharSequence input) {
return new Matcher(this, input);
}
/**
* Splits input around instances of the regular expression.
* It returns an array giving the strings that occur before, between, and after instances
* of the regular expression. Empty strings that would occur at the end
* of the array are omitted.
*
* @param input the input string to be split
* @return the split strings
*/
public String[] split(String input) {
return split(input, 0);
}
/**
* Splits input around instances of the regular expression.
* It returns an array giving the strings that occur before, between, and after instances
* of the regular expression.
*
* If {@code limit <= 0}, there is no limit on the size of the returned array.
* If {@code limit == 0}, empty strings that would occur at the end of the array are omitted.
* If {@code limit > 0}, at most limit strings are returned. The final string contains
* the remainder of the input, possibly including additional matches of the pattern.
*
* @param input the input string to be split
* @param limit the limit
* @return the split strings
*/
public String[] split(String input, int limit) {
return split(new Matcher(this, input), limit);
}
/** Helper: run split on m's input. */
private String[] split(Matcher m, int limit) {
int matchCount = 0;
int arraySize = 0;
int last = 0;
while (m.find()) {
matchCount++;
if (limit != 0 || last < m.start()) {
arraySize = matchCount;
}
last = m.end();
}
if (last < m.inputLength() || limit != 0) {
matchCount++;
arraySize = matchCount;
}
int trunc = 0;
if (limit > 0 && arraySize > limit) {
arraySize = limit;
trunc = 1;
}
String[] array = new String[arraySize];
int i = 0;
last = 0;
m.reset();
while (m.find() && i < arraySize - trunc) {
array[i++] = m.substring(last, m.start());
last = m.end();
}
if (i < arraySize) {
array[i] = m.substring(last, m.inputLength());
}
return array;
}
/**
* Returns a literal pattern string for the specified
* string.
*
*
This method produces a string that can be used to
* create a Pattern
that would match the string
* s
as if it were a literal pattern.
Metacharacters
* or escape sequences in the input sequence will be given no special
* meaning.
*
* @param s The string to be literalized
* @return A literal string replacement
*/
public static String quote(String s) {
return RE2.quoteMeta(s);
}
@Override
public String toString() {
return pattern;
}
/**
* Returns the number of capturing groups in this matcher's pattern.
* Group zero denotes the entire pattern and is excluded from this count.
*
* @return the number of capturing groups in this pattern
*/
public int groupCount() {
return re2.numberOfCapturingGroups();
}
Object readResolve() {
// The deserialized version will be missing the RE2 instance, so we need to create a new,
// compiled version.
return Pattern.compile(pattern, flags);
}
private static final long serialVersionUID = 0;
}