
aQute.libg.re.RE Maven / Gradle / Ivy
Show all versions of biz.aQute.bndlib Show documentation
package aQute.libg.re;
import static aQute.libg.re.Catalog.cc;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import aQute.libg.re.RE.F.Flag;
/**
* A library to make regular expressions with {@link Pattern} a bit easier to
* use. The Pattern class is extremely powerfull and as far as I know high
* performance. However,regular expressions quickly become unwieldy.
*
* This class provides a more modern interface using lambdas and options and the
* accompanying {@link Catalog} class provides a comprehensive set of constants
* and static methods to create complex regular expressions.
*/
public interface RE {
/**
* Represents a Character Class in a regular expression. This is an
* additional type since character classes have some special rules.
*/
interface C extends RE {
/**
* Intersect two character classes. This uses the `&&` operator. I.e.
* `[%abc@]` and `\p{Alnum}` will intersect to only `abc` and will be
* represented as `[abc&&\p{Alnum}`.
*
* @param and the second character class
* @return a new character class
*/
C and(C and);
/**
* Make the union of two character classes. This concatenates the set if
* possible. I.e. `[%abc@]` and `\p{Alnum}` will union to
* `[%abc@\p{Alnum}]`.
*
* @param or the second character class
* @return a new character class
*/
C or(C or);
/**
* Make the union of two character classes. This concatenates the set if
* possible. I.e. `[%abc@]` and `\p{Alnum}` will union to
* `[%abc@\p{Alnum}]`.
*
* @param or the second character class
* @return a new character class
*/
default C or(String or) {
return or(cc(or));
}
/**
* Return just the content of the set without the square brackets.
*/
String asSetContent();
/**
* Some character sets have a reverse name. For example the `\s` has
* `\S`. A set with square brackets can be reversed by adding/removing a
* `^` as first character. This overrides the RE version but returns a C
* so this can be repeated.
*/
@Override
C not();
}
/**
* Represents a flag. A flag can be specified during compilation or in an
* expression. It can work for the remainder of the expression or it can
* only be effective in a group.
*/
interface F extends RE {
/**
* The supported flags
*/
public enum Flag {
/**
* Match case insensitive, see {@link Pattern#CASE_INSENSITIVE}
*/
CASE_INSENSITIVE('i', Pattern.CASE_INSENSITIVE),
/**
* Ignore comments, see {@link Pattern#COMMENTS}
*/
COMMENTS('x', Pattern.COMMENTS),
/**
* The any ('.') matcher also matches the cr and lf, it normally
* doesn't.
*/
DOTALL('s', Pattern.DOTALL),
/**
* The `$` and `^` normally match the begin and end of the input. In
* multiline mode they the beginning and ending of a line. See
* {@link Pattern#MULTILINE}
*/
MULTILINE('m', Pattern.MULTILINE),
/**
* Use the Unicode rules to case fold, see
* {@link Pattern#UNICODE_CASE}
*/
UNICODE_CASE('u', Pattern.UNICODE_CASE),
/**
* See {@link Pattern#UNICODE_CHARACTER_CLASS}
*/
UNICODE_CHARACTER_CLASS('U', Pattern.UNICODE_CHARACTER_CLASS),
/**
* Only line separator recognized is \n. See
* {@link Pattern#UNIX_LINES}
*/
UNIX_LINES('d', Pattern.UNIX_LINES);
/**
* The char that represents this flag. For example 'i' is the
* {@link Pattern#CASE_INSENSITIVE}.
*/
public final char flag;
/**
* The Pattern option
*/
public final int option;
Flag(char flag, int option) {
this.flag = flag;
this.option = option;
}
}
/**
* Return the flags to turn off.
*/
Set negative();
/**
* Return the flags to turn on.
*/
Set positive();
}
/**
* A group is a regular expression that groups a set of REs. A capturing
* group is a simple parenthesis open. Other groups start with `(?` and are
* then following by a unique identification.
*/
interface G extends RE {
/**
* Variation of different group types
*/
enum Type {
/**
* Matches _if_ its members can match ahead of the current position.
* It will not consume anything from the input. See
* https://www.regular-expressions.info/lookaround.html
*/
AHEAD("(?="),
/**
* An atomic group is a group that, when the regex engine exits from
* it, automatically throws away all backtracking positions
* remembered by any tokens inside the group.The regular expression
* `a(bc|b)c` matches `abcc` and `abc`. The regex `a(?>bc|b)c`
* (atomic group) matches `abcc` but not `abc`.
*/
ATOMIC("(?>"),
/**
* Matches _if_ its members can match behind the current position.
* It will not consume anything from the input. See
* https://www.regular-expressions.info/lookaround.html
*/
BEHIND("(?<="),
/**
* Basic most simple group. It is advised not to use these since
* they need to be counted and that is really tricky. Using named
* groups is much easier and recommended.
*/
CAPTURING("("),
/**
* If this group is matched, the value of this group specifically
* can be retrieved by its group name.
*/
NAMED("(?<"),
/**
* Groups but will not capture a match.
*/
NONCAPTURING("(?:"),
/**
* Will not provide grouping parenthesis.
*/
NONE("", ""),
/**
* Will match if its members do not match ahead
*/
NOT_AHEAD("(?!"),
/**
* Will match if its members do not match before
*/
NOT_BEHIND("(?`.
*/
final String prefix;
/**
* The suffix to end the grouping.
*/
final String suffix;
Type(String prefix) {
this(prefix, ")");
}
Type(String prefix, String suffix) {
this.prefix = prefix;
this.suffix = suffix;
}
}
/**
* Get the type of this group
*/
Type groupType();
}
/**
* The result of a matched group after a successful find, matches, or
* lookingAt operation.
*/
interface Match extends MatchGroup {
/**
* Get the matching groups. This will only return the groups that were
* captured.
*/
Map getGroups();
/**
* Get the matching groups with their value. This will only return the
* values that were actually captured.
*/
Map getGroupValues();
/**
* Get a group by name. This will throw an exception if the group was
* not defined in this regular expression. It will return an
* Optional.empty() when the group wasn't captured.
*
* @param name the name of the group
*/
Optional group(String name);
/**
* This Match has a rover in its the matching region. This method
* requires the expected to match against the current position or it
* will throw an exception. It will move the rover forward to after the
* match. It will skip any whitespace before it matches.
*
* @param expected the expected match
* @return the value of the match
*/
default String take(RE expected) {
skip(Catalog.setWs);
String result = tryMatch(expected);
if (result == null)
throw new IllegalArgumentException("take: no match for " + expected + " on " + this);
return result;
}
/**
* This Match has a rover in its the matching region. This method
* requires the skip to match against the current position or it will
* throw an exception. It will move the rover forward to after the
* match.
*
* @param skip the RE to skip
*/
default void skip(RE skip) {
if (tryMatch(skip) == null)
throw new IllegalArgumentException("skip: no match for " + skip + " on " + this);
}
/**
* This Match has a rover its the matching region. This method will see
* if the current position matches the RE. If it does, the rover will be
* moved forward. Otherwise it stays where it is. It will skip any
* whitespace before it matches.
*
* @param expected the expected value
* @return true if there was a match and the match was consumed
*/
default boolean check(RE expected) {
skip(Catalog.setWs);
return tryMatch(expected) != null;
}
/**
* This Match has a rover in its the matching region. This method tries
* too see if the string from this rover to the end of this match,
* matches the match parameter. If so, it returns the value and moves
* the rover forward.
*
* @param match the RE to match return a string when matched or null
*/
String tryMatch(RE match);
Optional group(int group);
/**
* This gets the value of a group but throws an exception of the group
* is not there.
*
* @param groupName the name of the group
*/
String presentGroup(String groupName);
}
/**
* Provides the details of a matching group. The Matching Group is also a
* CharSequence.
*/
interface MatchGroup extends CharSequence {
/**
* The end index of this group in the original string. See
* {@link Matcher#end(String)}
*/
int end();
/**
* The original matcher
*/
Matcher getMatcher();
/**
* The name of the captured group
*/
String name();
/**
* The start index of this group in the original string. See
* {@link Matcher#start(String)}
*/
int start();
/**
* The value of the captured group.
*/
String value();
}
/**
* The `*`, `?`, `+` operators and the `{...}` suffix quantify the previous
* node. By default, these quantified nodes are _greedy_, they try to match
* as much as possible of the input. Quantified nodes can be further
* modified to be reluctant (first match) or possesive.
*/
interface Q extends RE {
/**
* The types of modified quantification
*/
enum Type {
/**
* Default, match as much as possible
*/
greedy,
/**
* See documentation
*/
possesive,
/**
* Stop after first match
*/
reluctant;
}
/**
* Set greedy
*/
RE greedy();
/**
* Set possesive
*/
RE possesive();
/**
* Set reluctant
*/
RE reluctant();
}
/**
* Return a predicate that checks if the pattern is found in the tested
* string.
*/
Predicate asFindPredicate();
/**
* Return a predicate that checks if the pattern is looking at in the tested
* string.
*/
Predicate asLookingAtPredicate();
/**
* Return a predicate that checks if the pattern is matched at in the tested
* string.
*/
Predicate asMatchPredicate();
/**
* Find the given pattern in the given string. If found, a Match is returned
* that can be used to continue.
*
* @param string the source string
* @return a matcher if found
*/
Optional findIn(String string);
/**
* Return a stream with matches in the current string
*
* @param string the source string
*/
Stream findAllIn(String string);
/**
* Return
*
* @param string the source string
* @return the replaced String
*/
default String append(String string, Function replacement) {
StringBuilder sb = new StringBuilder(string.length() * 2);
append(sb, string, replacement);
return sb.toString();
}
/**
* Append the StringBuilder by finding all this matches in the given string,
* and using the replacement from the replacement function. For each match,
* this function is called with the Match. The function can then take the
* captured groups and calculate the replacement string. This is like a
* template function.
*
* If the replacement function returns null, it will be ignored
*
* @param sb the builder
* @param string the source string
* @param replacement
*/
void append(StringBuilder sb, String string, Function replacement);
/**
* Get a set of group names in the current RE. This includes any member REs
* recursively.
*/
Set getGroupNames();
/**
* Returns true if this RE matches the given string.
*
* @param string the source string
*/
boolean isMatch(String string);
/**
* Returns true if this RE is a single node. That is a single letter, a
* character class, a group that is not NONE, etc.
*/
boolean isSingle();
/**
* Match with lookingAt
*
* @param string the source string
*/
Optional lookingAt(String string);
/**
* Get a new matcher activated with the given source string.
*
* @param string the source string
*/
Matcher getMatcher(CharSequence string);
/**
* Matches the source string to this RE. If there is a match, it returns the
* Match
*
* @param string the source string
*/
Optional matches(String string);
/**
* Merge another RE with this RE. This is not always possible.
*
* @param re the other RE
*/
Optional merge(RE re);
/**
* Reverse the meaning of this RE. This depends on the different types. If
* it has no meaning, it will return the original.
*/
RE not();
@Override
String toString();
/**
* Return the pattern compiled with the given flags. The pattern is cached
* for optimization but this method can be called concurrently.
*
* @param flags the flags
*/
Pattern pattern(Flag... flags);
}