All Downloads are FREE. Search and download functionalities are using the official Maven repository.

aQute.libg.re.RE Maven / Gradle / Ivy

The newest version!
package aQute.libg.re;

import static aQute.libg.re.Catalog.cc;

import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;

import aQute.libg.re.RE.F.Flag;

/**
 * A library to make regular expressions with {@link Pattern} a bit easier to
 * use. The Pattern class is extremely powerfull and as far as I know high
 * performance. However,regular expressions quickly become unwieldy.
 * 

* This class provides a more modern interface using lambdas and options and the * accompanying {@link Catalog} class provides a comprehensive set of constants * and static methods to create complex regular expressions. */ public interface RE { /** * Represents a Character Class in a regular expression. This is an * additional type since character classes have some special rules. */ interface C extends RE { /** * Intersect two character classes. This uses the `&&` operator. I.e. * `[%abc@]` and `\p{Alnum}` will intersect to only `abc` and will be * represented as `[abc&&\p{Alnum}`. * * @param and the second character class * @return a new character class */ C and(C and); /** * Make the union of two character classes. This concatenates the set if * possible. I.e. `[%abc@]` and `\p{Alnum}` will union to * `[%abc@\p{Alnum}]`. * * @param or the second character class * @return a new character class */ C or(C or); /** * Make the union of two character classes. This concatenates the set if * possible. I.e. `[%abc@]` and `\p{Alnum}` will union to * `[%abc@\p{Alnum}]`. * * @param or the second character class * @return a new character class */ default C or(String or) { return or(cc(or)); } /** * Return just the content of the set without the square brackets. */ String asSetContent(); /** * Some character sets have a reverse name. For example the `\s` has * `\S`. A set with square brackets can be reversed by adding/removing a * `^` as first character. This overrides the RE version but returns a C * so this can be repeated. */ @Override C not(); } /** * Represents a flag. A flag can be specified during compilation or in an * expression. It can work for the remainder of the expression or it can * only be effective in a group. */ interface F extends RE { /** * The supported flags */ public enum Flag { /** * Match case insensitive, see {@link Pattern#CASE_INSENSITIVE} */ CASE_INSENSITIVE('i', Pattern.CASE_INSENSITIVE), /** * Ignore comments, see {@link Pattern#COMMENTS} */ COMMENTS('x', Pattern.COMMENTS), /** * The any ('.') matcher also matches the cr and lf, it normally * doesn't. */ DOTALL('s', Pattern.DOTALL), /** * The `$` and `^` normally match the begin and end of the input. In * multiline mode they the beginning and ending of a line. See * {@link Pattern#MULTILINE} */ MULTILINE('m', Pattern.MULTILINE), /** * Use the Unicode rules to case fold, see * {@link Pattern#UNICODE_CASE} */ UNICODE_CASE('u', Pattern.UNICODE_CASE), /** * See {@link Pattern#UNICODE_CHARACTER_CLASS} */ UNICODE_CHARACTER_CLASS('U', Pattern.UNICODE_CHARACTER_CLASS), /** * Only line separator recognized is \n. See * {@link Pattern#UNIX_LINES} */ UNIX_LINES('d', Pattern.UNIX_LINES); /** * The char that represents this flag. For example 'i' is the * {@link Pattern#CASE_INSENSITIVE}. */ public final char flag; /** * The Pattern option */ public final int option; Flag(char flag, int option) { this.flag = flag; this.option = option; } } /** * Return the flags to turn off. */ Set negative(); /** * Return the flags to turn on. */ Set positive(); } /** * A group is a regular expression that groups a set of REs. A capturing * group is a simple parenthesis open. Other groups start with `(?` and are * then following by a unique identification. */ interface G extends RE { /** * Variation of different group types */ enum Type { /** * Matches _if_ its members can match ahead of the current position. * It will not consume anything from the input. See * https://www.regular-expressions.info/lookaround.html */ AHEAD("(?="), /** * An atomic group is a group that, when the regex engine exits from * it, automatically throws away all backtracking positions * remembered by any tokens inside the group.The regular expression * `a(bc|b)c` matches `abcc` and `abc`. The regex `a(?>bc|b)c` * (atomic group) matches `abcc` but not `abc`. */ ATOMIC("(?>"), /** * Matches _if_ its members can match behind the current position. * It will not consume anything from the input. See * https://www.regular-expressions.info/lookaround.html */ BEHIND("(?<="), /** * Basic most simple group. It is advised not to use these since * they need to be counted and that is really tricky. Using named * groups is much easier and recommended. */ CAPTURING("("), /** * If this group is matched, the value of this group specifically * can be retrieved by its group name. */ NAMED("(?<"), /** * Groups but will not capture a match. */ NONCAPTURING("(?:"), /** * Will not provide grouping parenthesis. */ NONE("", ""), /** * Will match if its members do not match ahead */ NOT_AHEAD("(?!"), /** * Will match if its members do not match before */ NOT_BEHIND("(?`. */ final String prefix; /** * The suffix to end the grouping. */ final String suffix; Type(String prefix) { this(prefix, ")"); } Type(String prefix, String suffix) { this.prefix = prefix; this.suffix = suffix; } } /** * Get the type of this group */ Type groupType(); } /** * The result of a matched group after a successful find, matches, or * lookingAt operation. */ interface Match extends MatchGroup { /** * Get the matching groups. This will only return the groups that were * captured. */ Map getGroups(); /** * Get the matching groups with their value. This will only return the * values that were actually captured. */ Map getGroupValues(); /** * Get a group by name. This will throw an exception if the group was * not defined in this regular expression. It will return an * Optional.empty() when the group wasn't captured. * * @param name the name of the group */ Optional group(String name); /** * This Match has a rover in its the matching region. This method * requires the expected to match against the current position or it * will throw an exception. It will move the rover forward to after the * match. It will skip any whitespace before it matches. * * @param expected the expected match * @return the value of the match */ default String take(RE expected) { skip(Catalog.setWs); String result = tryMatch(expected); if (result == null) throw new IllegalArgumentException("take: no match for " + expected + " on " + this); return result; } /** * This Match has a rover in its the matching region. This method * requires the skip to match against the current position or it will * throw an exception. It will move the rover forward to after the * match. * * @param skip the RE to skip */ default void skip(RE skip) { if (tryMatch(skip) == null) throw new IllegalArgumentException("skip: no match for " + skip + " on " + this); } /** * This Match has a rover its the matching region. This method will see * if the current position matches the RE. If it does, the rover will be * moved forward. Otherwise it stays where it is. It will skip any * whitespace before it matches. * * @param expected the expected value * @return true if there was a match and the match was consumed */ default boolean check(RE expected) { skip(Catalog.setWs); return tryMatch(expected) != null; } /** * This Match has a rover in its the matching region. This method tries * too see if the string from this rover to the end of this match, * matches the match parameter. If so, it returns the value and moves * the rover forward. * * @param match the RE to match return a string when matched or null */ String tryMatch(RE match); Optional group(int group); /** * This gets the value of a group but throws an exception of the group * is not there. * * @param groupName the name of the group */ String presentGroup(String groupName); } /** * Provides the details of a matching group. The Matching Group is also a * CharSequence. */ interface MatchGroup extends CharSequence { /** * The end index of this group in the original string. See * {@link Matcher#end(String)} */ int end(); /** * The original matcher */ Matcher getMatcher(); /** * The name of the captured group */ String name(); /** * The start index of this group in the original string. See * {@link Matcher#start(String)} */ int start(); /** * The value of the captured group. */ String value(); } /** * The `*`, `?`, `+` operators and the `{...}` suffix quantify the previous * node. By default, these quantified nodes are _greedy_, they try to match * as much as possible of the input. Quantified nodes can be further * modified to be reluctant (first match) or possesive. */ interface Q extends RE { /** * The types of modified quantification */ enum Type { /** * Default, match as much as possible */ greedy, /** * See documentation */ possesive, /** * Stop after first match */ reluctant; } /** * Set greedy */ RE greedy(); /** * Set possesive */ RE possesive(); /** * Set reluctant */ RE reluctant(); } /** * Return a predicate that checks if the pattern is found in the tested * string. */ Predicate asFindPredicate(); /** * Return a predicate that checks if the pattern is looking at in the tested * string. */ Predicate asLookingAtPredicate(); /** * Return a predicate that checks if the pattern is matched at in the tested * string. */ Predicate asMatchPredicate(); /** * Find the given pattern in the given string. If found, a Match is returned * that can be used to continue. * * @param string the source string * @return a matcher if found */ Optional findIn(String string); /** * Return a stream with matches in the current string * * @param string the source string */ Stream findAllIn(String string); /** * Return * * @param string the source string * @return the replaced String */ default String append(String string, Function replacement) { StringBuilder sb = new StringBuilder(string.length() * 2); append(sb, string, replacement); return sb.toString(); } /** * Append the StringBuilder by finding all this matches in the given string, * and using the replacement from the replacement function. For each match, * this function is called with the Match. The function can then take the * captured groups and calculate the replacement string. This is like a * template function. *

* If the replacement function returns null, it will be ignored * * @param sb the builder * @param string the source string * @param replacement */ void append(StringBuilder sb, String string, Function replacement); /** * Get a set of group names in the current RE. This includes any member REs * recursively. */ Set getGroupNames(); /** * Returns true if this RE matches the given string. * * @param string the source string */ boolean isMatch(String string); /** * Returns true if this RE is a single node. That is a single letter, a * character class, a group that is not NONE, etc. */ boolean isSingle(); /** * Match with lookingAt * * @param string the source string */ Optional lookingAt(String string); /** * Get a new matcher activated with the given source string. * * @param string the source string */ Matcher getMatcher(CharSequence string); /** * Matches the source string to this RE. If there is a match, it returns the * Match * * @param string the source string */ Optional matches(String string); /** * Merge another RE with this RE. This is not always possible. * * @param re the other RE */ Optional merge(RE re); /** * Reverse the meaning of this RE. This depends on the different types. If * it has no meaning, it will return the original. */ RE not(); @Override String toString(); /** * Return the pattern compiled with the given flags. The pattern is cached * for optimization but this method can be called concurrently. * * @param flags the flags */ Pattern pattern(Flag... flags); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy