aQute.libg.re.Catalog Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of biz.aQute.bndlib Show documentation
bndlib: A Swiss Army Knife for OSGi
The newest version!
package aQute.libg.re;

import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import org.eclipse.jdt.annotation.Nullable;

import aQute.libg.re.RE.C;
import aQute.libg.re.RE.F;
import aQute.libg.re.RE.F.Flag;
import aQute.libg.re.RE.G;
import aQute.libg.re.RE.Q;

/**
 * This class provides an implementation of the RE types. The class is useful as
 * static imports. (For Eclipse users, look at favorites in the preferences.)
 * However, it can also be used as base class. If that is the case, field names
 * can be used as named capture groups. In this constellation, the static
 * methods are also in scope, not requiring many static imports.
 *
 *  * void foo() {
 * 	class X extends Catalog {
 * 		RE match = lit("abc");
 * 		RE namedMatch = named(match);
 * 	}
 * 	X x;
 *  x.....
 * }
 * 
 */
public class Catalog {


	/**
	 * If this class is extended, the named fields in that class can be used in
	 * named groups. This method will lookup the name of a field and create a
	 * capturing group with this name. It finds the field by comparing the
	 * content.
	 *
	 * @param re the RE that should be in a field in this class.
	 * @return a group RE
	 */
	public RE named(RE re) {
		String name = findFieldWith(re);
		assert name != null;
		return g(name, re);
	}

	String findFieldWith(RE re) {
		Class c = getClass();
		for (Field f : c.getDeclaredFields())
			try {
				f.setAccessible(true);
				if (f.get(this) == re)
					return f.getName();
			} catch (Exception e) {
				// ignore
			}
		return null;
	}

	/**
	 * Return a control char. For example, `control('b') returns ^b. See the
	 * sequence `\\cb`.
	 *
	 * @param c the control character
	 * @return an RE representing the control character
	 */
	public static RE control(char c) {
		return new REImpl("\\c" + c);
	}

	/**
	 * Create a non capturing group
	 *
	 * @param res the members
	 * @return a non capturing group
	 */
	public static RE g(RE... res) {
		return new Group(Group.Type.NONCAPTURING, res);
	}

	/**
	 * Create an OR combination of a number of RE's
	 *
	 * @param res the set of RE's that are the members of the OR
	 * @return the RE representing the OR
	 */
	public static RE or(RE... res) {

		assert res != null;

		return switch (res.length) {
			case 0 -> empty;
			case 1 -> res[0];
			default -> {
				StringBuilder sb = new StringBuilder();
				String del = "";
				for (RE re : res) {
					sb.append(del)
						.append(re);
					del = "|";
				}
				yield new Group(null, sb.toString(), Group.Type.NONCAPTURING, names(res));
			}
		};
	}

	/**
	 * Create an OR combination of a number of Strings. The strings are
	 * converted with {@link #lit(String)}.
	 *
	 * @param res the strings
	 * @return the RE representing the OR
	 */
	public static RE or(String... res) {
		assert res != null;
		return or(Stream.of(res)
			.map(Catalog::lit)
			.toArray(RE[]::new));
	}

	/**
	 * Create an or combination of character classes.
	 *
	 * @param res the character classes
	 * @return an RE representing the combined clases
	 */
	public static RE or(C... res) {
		assert res != null;
		return switch (res.length) {
			case 0 -> empty;
			case 1 -> res[0];
			default -> {
				StringBuilder sb = new StringBuilder();
				for (C re : res) {
					sb.append(re.asSetContent());
				}
				yield new CharacterClass(sb.toString());
			}
		};
	}

	/**
	 * Create a named capturing group
	 *
	 * @param name the name of the group. This must be a valid Java identifier
	 * @param res the members.
	 * @return a new named capture group
	 */
	public static RE g(String name, RE... res) {
		assert isValidGroupName(name);
		if (res == null || res.length == 0)
			return empty;

		return new Group(name, res);
	}

	private static boolean isValidGroupName(String name) {
		return name == null || javaId.matches(name)
			.isPresent();
	}

	/**
	 * Return a named group but where each member that is not a whitespace, will
	 * be preceded with a #setWs.
	 *
	 * @param name the name of the group or null for a non-named group
	 * @param res the members
	 * @return a group, either named or capturing
	 */
	public static RE term(@Nullable
	String name, RE... res) {
		assert isValidGroupName(name) : name;
		if (res == null || res.length == 0)
			return empty;

		List out = new ArrayList<>();

		RE last = setWs;
		out.add(last);
		boolean lastWs = true;

		for (int i = 0; i < res.length; i++) {
			RE next = res[i];
			boolean nextWs = isWhiteSpace(next);

			int n = 0;
			if (lastWs)
				n = 1;
			if (nextWs)
				n += 2;

			switch (n) {
				case 0 -> {
					out.add(setWs);
					out.add(next);
				}
				case 1, 2 -> {
					out.add(next);
				}
				case 3 -> {
				}
			}
			last = next;
			lastWs = nextWs;
		}
		return new Group(name, out.toArray(RE[]::new));
	}

	/**
	 * See {@link #term(String,RE...)} with a null for name
	 *
	 * @param res the members
	 * @return a new
	 */
	public static RE term(RE... res) {
		return term(null, res);
	}

	/**
	 * Create a character class. I.e. `[abc]`. Do not include the ^ to negate
	 * the set, use the not() method.
	 *
	 * @param allowed the allowed characters
	 * @return a character class.
	 */

	public static C cc(String allowed) {
		return new CharacterClass(allowed, true, null);
	}

	/**
	 * Provide a literal text. This lit can contain characters that normally
	 * have a special meaning. All characters that have a special meaning are
	 * escaped with the backslash ('\').
	 *
	 * @param s the literal string
	 * @return an RE
	 */
	public static RE lit(String s) {
		StringBuilder sb = null;

		for (int i = 0; i < s.length(); i++) {
			char c = s.charAt(i);
			if ("()$\\{}[]^+*?.| ".indexOf(c) >= 0) {
				if (sb == null) {
					sb = new StringBuilder();
					sb.append(s, 0, i);
				}
				sb.append("\\")
					.append(c);
			} else if (sb != null)
				sb.append(c);
		}
		return new REImpl(sb == null ? s : sb.toString());
	}

	/**
	 * Useful if you need a number of literal REs
	 *
	 * @param s the strings
	 * @return an array of RE
	 */
	public static RE[] lit(String... s) {
		return Stream.of(s)
			.map(ss -> lit(ss))
			.toArray(RE[]::new);
	}

	/**
	 * Use the quoting facility built into {@link Pattern#quote(String)}
	 *
	 * @param s the string
	 * @return the quoted string
	 */
	public static RE quote(String s) {
		return new REImpl(Pattern.quote(s));
	}

	/**
	 * Use the Unicode name. Is \\N
	 *
	 * @param name the unicode name.
	 * @return the RE representing the unicode name.
	 */
	public static RE unicode(String name) {
		return new REImpl("\\N{".concat(name)
			.concat("}"));
	}

	/**
	 * Used to reference a previous capturing group. Unfortunately this cannot
	 * be done by name. This class will by default create non-capturing groups,
	 * so only explicit groups need to be counted.
	 *
	 * @param group the group number
	 * @return a new RE referencing a previous group
	 */
	public static RE back(int group) {
		assert group < 10 && group > 0;
		return new REImpl("\\" + group);
	}

	/**
	 * Used to reference a previous named capturing group.
	 *
	 * @param group the group name
	 * @return a new RE referencing a previous group
	 */
	public static RE back(String group) {
		return new REImpl("\\k<" + group + ">");
	}

	/**
	 * Create a list of clauses separated by a separator. The clauses and
	 * separators will be separated by zero or more whitespace.
	 *
	 * @param clause
	 * @param separator
	 * @return a new RE that presents a list of clauses
	 */
	public static RE list(RE clause, RE separator) {
		return term(clause, set(term(separator, clause)));
	}

	/**
	 * Create a list of clauses separated by a comma. The clauses and separators
	 * will be separated by zero or more whitespace.
	 *
	 * @param clause
	 * @return a new RE that presents a list of clauses separated by commas
	 */
	public static RE list(RE clause) {
		return list(clause, Catalog.comma);
	}

	/**
	 * Return an optional RE
	 *
	 * @param res the members of the optional
	 * @return a Q representing the optional
	 */
	public static Q opt(RE... res) {
		return new Quantified(0, 1, Quantified.Type.greedy, res);
	}

	/**
	 * Return an optional literal (see {@link #lit(String)}t
	 *
	 * @param s the literal
	 * @return a Q representing the optional
	 */
	public static Q opt(String s) {
		return opt(lit(s));
	}

	/**
	 * Return a group of some members.
	 *
	 * @param res the members
	 * @return a Q representing the some
	 */
	public static Q some(RE... res) {
		return new Quantified(1, Integer.MAX_VALUE, Q.Type.greedy, res);
	}

	/**
	 * Return a group of some members.
	 *
	 * @param res the members
	 * @return a Q representing the some
	 */
	public static Q set(RE... res) {
		return new Quantified(0, Integer.MAX_VALUE, Q.Type.greedy, res);
	}

	/**
	 * Creates a regular expression that matches the negation of the provided
	 * regular expression. This method wraps the given regex pattern in a
	 * negative construct.
	 *
	 * @param re the regular expression to be negated
	 * @return a new RE instance representing the negated version of the
	 *         provided regular expression
	 */
	public static RE not(RE re) {
		return re.not();
	}

	/**
	 * Modifies the given quantified regular expression to match reluctantly. A
	 * reluctant quantifier matches as few characters as possible.
	 *
	 * @param re the quantified regular expression to be modified
	 * @return a new RE instance with a reluctant quantification
	 */
	public static RE reluctant(Q re) {
		return re.reluctant();
	}

	/**
	 * Modifies the given quantified regular expression to match greedily. A
	 * greedy quantifier matches as many characters as possible.
	 *
	 * @param re the quantified regular expression to be modified
	 * @return a new RE instance with a greedy quantification
	 */
	public static RE greedy(Q re) {
		return re.greedy();
	}

	/**
	 * Modifies the given quantified regular expression to match in a possessive
	 * manner. A possessive quantifier does not give up matches as the regex
	 * engine backtracks.
	 *
	 * @param re the quantified regular expression to be modified
	 * @return a new RE instance with a possessive quantification
	 */
	public static RE possesive(Q re) {
		return re.possesive();
	}

	/**
	 * Creates a regular expression that matches anything except the specified
	 * string. For a single character, it creates a negated character class;
	 * otherwise, it negates the literal string.
	 *
	 * @param s the string to be negated in the match
	 * @return an RE instance that matches anything but the specified string
	 */
	public static RE anythingBut(String s) {
		if (s.length() == 1)
			return set(new CharacterClass(s).not());
		else
			return set(new REImpl(s).not());
	}

	/**
	 * Creates a regular expression that optionally matches the given string.
	 * The string is wrapped in a non-capturing group with a quantifier allowing
	 * zero or one occurrence.
	 *
	 * @param s the string to be optionally matched
	 * @return an RE instance that optionally matches the specified string
	 */
	public static RE maybe(String s) {
		return g(setAll, opt(s));
	}

	/**
	 * Creates a quantified regular expression that matches a specified minimum
	 * and maximum number of occurrences. This method applies a greedy
	 * quantifier.
	 *
	 * @param minimum the minimum number of occurrences to match
	 * @param maximum the maximum number of occurrences to match
	 * @param res the regular expressions to be quantified
	 * @return a Q instance representing the specified quantification
	 */
	public static Q multiple(int minimum, int maximum, RE... res) {
		return new Quantified(minimum, maximum, Quantified.Type.greedy, res);
	}

	/**
	 * Creates a quantified regular expression that matches a specified minimum
	 * and maximum number of occurrences of a literal string. This method
	 * applies a greedy quantifier.
	 *
	 * @param minimum the minimum number of occurrences to match
	 * @param maximum the maximum number of occurrences to match
	 * @param lit the literal string to be quantified
	 * @return a Q instance representing the specified quantification of the
	 *         literal string
	 */
	public static Q multiple(int minimum, int maximum, String lit) {
		return new Quantified(minimum, maximum, Quantified.Type.greedy, lit(lit));
	}

	/**
	 * Creates a quantified regular expression that matches at least a specified
	 * minimum number of occurrences. This method applies a greedy quantifier.
	 *
	 * @param minimum the minimum number of occurrences to match
	 * @param res the regular expressions to be quantified
	 * @return a Q instance representing the quantification with the specified
	 *         minimum and no maximum limit
	 */
	public static Q atLeast(int minimum, RE... res) {
		return new Quantified(minimum, Integer.MAX_VALUE, Quantified.Type.greedy, res);
	}

	/**
	 * Creates a quantified regular expression that matches at least a specified
	 * minimum number of occurrences of a literal string. This method applies a
	 * greedy quantifier.
	 *
	 * @param minimum the minimum number of occurrences to match
	 * @param lit the literal string to be quantified
	 * @return a Q instance representing the quantification with the specified
	 *         minimum and no maximum limit
	 */
	public static Q atLeast(int minimum, String lit) {
		return atLeast(minimum, lit(lit));
	}

	/**
	 * Applies the case-insensitive flag to the provided regular expressions.
	 * This method makes the given patterns match characters regardless of their
	 * case.
	 *
	 * @param res the regular expressions to be affected by the case-insensitive
	 *            flag
	 * @return an F instance with the case-insensitive flag applied
	 */
	public static F caseInsenstive(RE... res) {
		return new Option(EnumSet.of(F.Flag.CASE_INSENSITIVE), null, res);
	}

	/**
	 * Turns off the case-insensitive flag for the provided regular expressions.
	 * This method reverts the given patterns to match characters considering
	 * their case.
	 *
	 * @param res the regular expressions to be affected by turning off the
	 *            case-insensitive flag
	 * @return an F instance with the case-insensitive flag turned off
	 */
	public static F caseInsenstiveOff(RE... res) {
		return new Option(null, EnumSet.of(F.Flag.CASE_INSENSITIVE), res);
	}

	/**
	 * Applies the dotall flag to the provided regular expressions. With this
	 * flag, the dot ('.') pattern matches any character, including a line
	 * terminator.
	 *
	 * @param res the regular expressions to be affected by the dotall flag
	 * @return an F instance with the dotall flag applied
	 */
	public static F dotall(RE... res) {
		return new Option(EnumSet.of(F.Flag.DOTALL), null, res);
	}

	/**
	 * Turns off the dotall flag for the provided regular expressions. With the
	 * flag turned off, the dot ('.') pattern does not match line terminators by
	 * default.
	 *
	 * @param res the regular expressions to be affected by turning off the
	 *            dotall flag
	 * @return an F instance with the dotall flag turned off
	 */
	public static F dotallOff(RE... res) {
		return new Option(null, EnumSet.of(F.Flag.DOTALL), res);
	}

	/**
	 * Applies the comments flag to the provided regular expressions. This flag
	 * allows whitespace and comments within the pattern for better readability.
	 *
	 * @param res the regular expressions to be affected by the comments flag
	 * @return an F instance with the comments flag applied
	 */
	public static F comments(RE... res) {
		return new Option(EnumSet.of(F.Flag.COMMENTS), null, res);
	}

	/**
	 * Turns off the comments flag for the provided regular expressions. With
	 * the flag turned off, whitespace and comments within the pattern are no
	 * longer ignored.
	 *
	 * @param res the regular expressions to be affected by turning off the
	 *            comments flag
	 * @return an F instance with the comments flag turned off
	 */
	public static F commentsOff(RE... res) {
		return new Option(null, EnumSet.of(F.Flag.COMMENTS), res);
	}

	/**
	 * Applies the multiline flag to the provided regular expressions. This flag
	 * changes the behavior of '^' and '$' from matching at the start and end of
	 * the input string to matching at the start and end of each line.
	 *
	 * @param res the regular expressions to be affected by the multiline flag
	 * @return an F instance with the multiline flag applied
	 */
	public static F multiline(RE... res) {
		return new Option(EnumSet.of(F.Flag.MULTILINE), null, res);
	}

	/**
	 * Turns off the multiline flag for the provided regular expressions. With
	 * the flag turned off, '^' and '$' match only at the start and end of the
	 * entire input string.
	 *
	 * @param res the regular expressions to be affected by turning off the
	 *            multiline flag
	 * @return an F instance with the multiline flag turned off
	 */
	public static F multilineOff(RE... res) {
		return new Option(null, EnumSet.of(F.Flag.MULTILINE), res);
	}

	/**
	 * Applies the unicode character class flag to the provided regular
	 * expressions. This flag enables the Unicode versions of predefined
	 * character classes and POSIX character classes.
	 *
	 * @param res the regular expressions to be affected by the unicode
	 *            character class flag
	 * @return an F instance with the unicode character class flag applied
	 */
	public static F unicodeCharacterClass(RE... res) {
		return new Option(EnumSet.of(F.Flag.UNICODE_CHARACTER_CLASS), null, res);
	}

	/**
	 * Turns off the unicode character class flag for the provided regular
	 * expressions. With the flag turned off, the ASCII versions of predefined
	 * character classes and POSIX character classes are used.
	 *
	 * @param res the regular expressions to be affected by turning off the
	 *            unicode character class flag
	 * @return an F instance with the unicode character class flag turned off
	 */
	public static F unicodeCharacterClassOff(RE... res) {
		return new Option(null, EnumSet.of(F.Flag.UNICODE_CHARACTER_CLASS), res);
	}

	/**
	 * Applies the unicode case flag to the provided regular expressions. This
	 * flag enables the correct handling of character cases in Unicode when
	 * applying case-insensitive matching.
	 *
	 * @param res the regular expressions to be affected by the unicode case
	 *            flag
	 * @return an F instance with the unicode case flag applied
	 */
	public static F unicodeCase(RE... res) {
		return new Option(EnumSet.of(F.Flag.UNICODE_CASE), null, res);
	}

	/**
	 * Turns off the unicode case flag for the provided regular expressions.
	 * With the flag turned off, character cases in Unicode are not correctly
	 * handled when applying case-insensitive matching.
	 *
	 * @param res the regular expressions to be affected by turning off the
	 *            unicode case flag
	 * @return an F instance with the unicode case flag turned off
	 */
	public static F unicodeCaseOff(RE... res) {
		return new Option(null, EnumSet.of(F.Flag.UNICODE_CASE), res);
	}

	/**
	 * Applies the unix lines flag to the provided regular expressions. This
	 * flag affects how line terminators are matched. With this flag, only the
	 * '\n' line terminator is recognized.
	 *
	 * @param res the regular expressions to be affected by the unix lines flag
	 * @return an F instance with the unix lines flag applied
	 */
	public static F unixLines(RE... res) {
		return new Option(EnumSet.of(F.Flag.UNIX_LINES), null, res);
	}

	/**
	 * Turns off the unix lines flag for the provided regular expressions. With
	 * the flag turned off, line terminators are matched in a
	 * platform-independent manner.
	 *
	 * @param res the regular expressions to be affected by turning off the unix
	 *            lines flag
	 * @return an F instance with the unix lines flag turned off
	 */
	public static F unixLinesOff(RE... res) {
		return new Option(null, EnumSet.of(F.Flag.UNIX_LINES), res);
	}

	/**
	 * Combine the res into a single atomic Group.
	 *
	 * @see #atomic(String)
	 * @param res the constituents.
	 */
	public static G atomic(RE... res) {
		return new Group(G.Type.ATOMIC, res);
	}

	/**
	 * Creates an atomic group with the provided string. An atomic group
	 * prevents the regex engine from backtracking once the group has matched.
	 *
	 * @param string the literal string to be included in the atomic group
	 * @return a G instance representing an atomic group containing the provided
	 *         string
	 */
	public static G atomic(String string) {
		return new Group(G.Type.ATOMIC, lit(string));
	}

	/**
	 * Creates a lookahead group with the provided regular expressions. A
	 * lookahead group asserts that the given pattern must be matched next in
	 * the input sequence.
	 *
	 * @param res the regular expressions to be included in the lookahead group
	 * @return a G instance representing a lookahead group containing the
	 *         provided expressions
	 */
	public static G ahead(RE... res) {
		return new Group(G.Type.AHEAD, res);
	}

	/**
	 * Creates a lookbehind group with the provided regular expressions. A
	 * lookbehind group asserts that the given pattern must precede the current
	 * position in the input sequence.
	 *
	 * @param res the regular expressions to be included in the lookbehind group
	 * @return a G instance representing a lookbehind group containing the
	 *         provided expressions
	 */
	public static G behind(RE... res) {
		return new Group(G.Type.BEHIND, res);
	}

	/**
	 * Creates a sequence of regular expressions. This method groups the
	 * provided expressions in the order they are given, without any additional
	 * logic.
	 *
	 * @param res the regular expressions to be sequenced
	 * @return a G instance representing a sequence of the provided regular
	 *         expressions
	 */
	public static G seq(RE... res) {
		return new Group(G.Type.NONE, res);
	}

	/**
	 * Creates a conditional regular expression. The resulting pattern matches
	 * 'thenExpect' if 'condition' matches, otherwise it matches
	 * 'otherWiseExpect'.
	 *
	 * @param condition the conditional regular expression
	 * @param thenExpect the regular expression to match if the condition is
	 *            true
	 * @param otherWiseExpect the regular expression to match if the condition
	 *            is false
	 * @return an RE instance representing the conditional regular expression
	 */
	public static RE if_(RE condition, RE thenExpect, RE otherWiseExpect) {
		RE pos = seq(ahead(condition), thenExpect);
		RE all = or(pos, otherWiseExpect);
		return all;
	}

	/**
	 * Creates a conditional regular expression with no alternative case. The
	 * resulting pattern matches 'then' only if 'condition' matches.
	 *
	 * @param condition the conditional regular expression
	 * @param then the regular expression to match if the condition is true
	 * @return an RE instance representing the conditional regular expression
	 *         with no alternative case
	 */
	public static RE if_(RE condition, RE then) {
		return seq(ahead(condition), g(then));
	}

	/**
	 * Creates a regular expression that repeats the 'thenExpect' pattern as
	 * long as 'condition' matches.
	 *
	 * @param condition the condition for repetition
	 * @param thenExpect the regular expression to be repeated
	 * @return an RE instance representing the repeated regular expression
	 */
	public static RE while_(RE condition, RE thenExpect) {
		return set(seq(ahead(condition), thenExpect));
	}

	/**
	 * Creates a regular expression that matches 'thenExpect' until 'condition'
	 * becomes true.
	 *
	 * @param condition the condition to terminate matching
	 * @param thenExpect the regular expression to match until the condition is
	 *            met
	 * @return an RE instance representing the regular expression matching until
	 *         the condition
	 */
	public static RE until(RE condition, RE thenExpect) {
		return set(seq(thenExpect, ahead(condition)));
	}

	/**
	 * Creates a capturing group with the provided regular expressions. This
	 * method groups the expressions and captures them for later reference.
	 *
	 * @param res the regular expressions to be included in the capturing group
	 * @return a G instance representing a capturing group containing the
	 *         provided expressions
	 */
	public static G capture(RE... res) {
		return new Group(G.Type.CAPTURING, res);
	}

	/**
	 * Return a string that is delimeted on both sides with the same character.
	 * The character cannot be used directly but must be escaped with a
	 * backslash.
	 */

	public static RE string(char delimeter) {
		C del = cc(delimeter + "");
		C notBackslashOrDel = del.or(backslash)
			.not();
		RE backslashFollowedByAll = g(backslash, all);
		RE inner = or(notBackslashOrDel, backslashFollowedByAll);
		return g(del, set(inner), del);
	}

	final public static C	ws						= new Special("\\s");
	final public static Q	setWs					= set(ws);
	final public static Q	someWs					= some(ws);
	final public static RE	all						= new REImpl(".");
	final public static Q	setAll					= set(all);
	final public static RE	someAll					= some(all);
	final public static C	backslash				= new CharacterClass("\\\\");
	final public static C	Lu						= new Predefined("Lu", true);
	final public static C	Ll						= new Predefined("Ll", true);
	final public static C	Lt						= new Predefined("Lt", true);
	final public static C	Lm						= new Predefined("Lm", true);
	final public static C	Lo						= new Predefined("Lo", true);
	final public static C	Nd						= new Predefined("Nd", true);
	final public static C	Nl						= new Predefined("Nl", true);
	final public static C	No						= new Predefined("No", true);
	final public static C	Z						= new Predefined("Z", true);
	final public static C	P						= new Predefined("P", true);
	final public static C	S						= new Predefined("S", true);
	final public static C	Cc						= new Predefined("Cc", true);
	final public static C	Cf						= new Predefined("Cf", true);
	final public static C	Cn						= new Predefined("Cn", true);
	final public static C	Lower					= new Predefined("Lower", true);
	final public static C	Upper					= new Predefined("Upper", true);
	final public static C	ASCII					= new Predefined("ASCII", true);
	final public static C	Alpha					= new Predefined("Alpha", true);
	final public static C	Digit					= new Predefined("Digit", true);
	final public static C	Alnum					= new Predefined("Alnum", true);
	final public static C	Punct					= new Predefined("Punct", true);
	final public static C	Graph					= new Predefined("Graph", true);
	final public static C	Print					= new Predefined("Print", true);
	final public static C	Blank					= new Predefined("Blank", true);
	final public static C	Cntrl					= new Predefined("Cntrl", true);
	final public static C	XDigit					= new Predefined("XDigit", true);
	final public static C	Space					= new Predefined("Space", true);
	final public static C	letter					= new Special("\\w");
	final public static C	dollar					= new Special("\\$");
	final public static C	euro					= new Special("€");
	final public static Q	word					= some(letter);
	final public static C	digit					= new Special("\\d");
	final public static C	nonDigit				= digit.not();
	final public static C	lineEnd					= new Special("\\b");
	final public static C	dot						= new Special("\\.");
	final public static C	comma					= new Special(",");
	final public static C	semicolon				= new Special(";");
	final public static C	colon					= new Special(":");
	final public static C	nl						= new Special("\\R");
	final public static C	cr						= new Special("\r");
	final public static C	lf						= new Special("\n");
	final public static C	ff						= new Special("\f");
	final public static C	alarm					= new Special("\\a");
	final public static C	escape					= new Special("\\e");
	final public static RE	eof						= new Boundary("$");
	final public static RE	eol						= or(nl, eof);
	final public static C	parOpen					= new Special("\\(");
	final public static C	parClose				= new Special("\\)");
	final public static RE	empty					= new REImpl("");
	final public static C	tab						= new Special("\t");
	final public static RE	number					= some(digit);
	public static C			hexdigit				= cc("0-9A-F");
	public static C			bindigit				= cc("0-1");
	public static RE		hexnumber				= some(hexdigit);
	final public static C	minus					= new CharacterClass("-");
	final public static C	dquote					= new CharacterClass("\"");
	final public static C	squote					= new CharacterClass("'");
	final public static C	backQuote				= new CharacterClass("`");
	final public static C	underscore				= new CharacterClass("_");
	final public static Q	qualifier				= some(or(Alpha, Digit, underscore, minus));
	final public static RE	version					=															//
		g(number, opt(g(dot, number, opt(g(dot, number, opt(g(dot, qualifier)))))));
	final public static C	javaLowerCase			= new Predefined("javaLowerCase", true);
	final public static C	javaUpperCase			= new Predefined("javaUpperCase", true);
	final public static C	javaWhitespace			= new Predefined("javaWhitespace", true);
	final public static C	javaMirrored			= new Predefined("javaMirrored", true);
	final public static C	javaJavaIdentifierStart	= new Predefined("javaJavaIdentifierStart", true);
	final public static C	javaJavaIdentifierPart	= new Predefined("javaJavaIdentifierPart", true);
	final public static RE	javaId					= seq(javaJavaIdentifierStart, set(javaJavaIdentifierPart));
	final public static RE	fullyQualifiedName		= seq(javaId, set(dot, javaId));
	final public static RE	startOfLine				= new Boundary("^");
	final public static RE	endOfLine				= new Boundary("$");
	final public static RE	wordBoundary			= new Boundary("\\b");
	final public static RE	beginInput				= new Boundary("\\A");
	final public static RE	endOfPreviousMatch		= new Boundary("\\G");
	final public static RE	endOfInputForFinal		= new Boundary("\\Z");
	final public static RE	endOfInput				= new Boundary("\\z");
	final public static C	isLatin					= new Predefined("IsLatin", true);
	final public static C	inGreek					= new Predefined("InGreek", true);
	final public static C	isAlphabetic			= new Predefined("isAlphabetic", true);
	final public static C	sc						= new Predefined("Sc", true);

	static class REImpl implements RE {
		final String		literal;
		final Set	groups;
		volatile Pattern	pattern;

		REImpl(String literal) {
			this.literal = literal;
			groups = null;
		}

		REImpl(String literal, String... names) {
			this.literal = literal;
			this.groups = names.length > 0 ? new LinkedHashSet<>() : null;
			for (String name : names)
				this.groups.add(name);
		}

		@Override
		public RE not() {
			StringBuilder sb = new StringBuilder();
			sb.append("(?!");
			sb.append(literal);
			sb.append(")");
			return new REImpl(sb.toString());
		}

		@Override
		public String toString() {
			return literal;
		}

		@Override
		public Pattern pattern(RE.F.Flag... type) {

			int options = 0;
			for (Flag flag : type) {
				options |= flag.option;
			}
			Pattern p = pattern;
			if (p == null || p.flags() != options) {
				p = Pattern.compile(toString(), options);
			}
			return pattern = p;
		}

		@Override
		public boolean isMatch(String string) {
			return pattern().matcher(string)
				.matches();
		}

		@Override
		public Optional matches(String string) {
			if (string == null)
				return Optional.empty();
			return matches0(string, Matcher::matches);
		}

		Optional matches0(String string, Predicate m) {
			if (string == null)
				return Optional.empty();
			Matcher matcher = pattern().matcher(string);
			return matches1(string, m, matcher);

		}

		private Optional matches1(String string, Predicate m, Matcher matcher) {
			if (m.test(matcher)) {
				abstract class Base {
					public int length() {
						return value().length();
					}

					public char charAt(int index) {
						return value().charAt(index);
					}

					public CharSequence subSequence(int start, int end) {
						return value().subSequence(start, end);
					}

					public Matcher getMatcher() {
						return matcher;
					}

					public abstract String value();

					@Override
					public String toString() {
						return value();
					}
				}

				class MatchGroupImpl extends Base implements MatchGroup {
					final String	name;
					String			value;
					int				start	= -1;
					int				end		= -1;

					MatchGroupImpl(String name, String value) {
						this.name = name;
						this.value = value;
					}

					@Override
					public String name() {
						return name;
					}

					@Override
					public String value() {
						return value == null ? value : matcher.group(name);
					}

					@Override
					public int start() {
						return start < 0 ? start = matcher.start(name) : start;
					}

					@Override
					public int end() {
						return end < 0 ? end = matcher.start(name) : end;
					}
				}
				class MatchGroupImplIndex extends Base implements MatchGroup {
					final int	name;
					String		value;
					int			start	= -1;
					int			end		= -1;

					MatchGroupImplIndex(int name, String value) {
						this.name = name;
						this.value = value;
					}

					@Override
					public String name() {
						return Integer.toString(name);
					}

					@Override
					public String value() {
						return value == null ? value : matcher.group(name);
					}

					@Override
					public int start() {
						return start < 0 ? start = matcher.start(name) : start;
					}

					@Override
					public int end() {
						return end < 0 ? end = matcher.end(name) : end;
					}
				}
				class MatchImpl extends Base implements Match {
					Map	matchGroups;
					Map		matchValues;
					int						rover;

					@Override
					public String name() {
						return "";
					}

					@Override
					public String value() {
						return matcher.group();
					}

					@Override
					public int start() {
						return matcher.start();
					}

					@Override
					public int end() {
						return matcher.end();
					}

					@Override
					public Map getGroups() {
						if (matchGroups == null) {
							if (groups == null)
								matchGroups = Collections.emptyMap();
							else {
								Map result = new TreeMap<>();
								for (String name : groups) {
									String value = matcher.group(name);
									if (value != null) {
										MatchGroupImpl mg = new MatchGroupImpl(name, value);
										result.put(name, mg);
									}
								}
								matchGroups = Collections.unmodifiableMap(result);
							}
						}
						return matchGroups;
					}

					@Override
					public Map getGroupValues() {
						if (matchValues == null) {
							Map result = new LinkedHashMap<>();
							getGroups().forEach((k, v) -> result.put(k, v.value()));
							matchValues = Collections.unmodifiableMap(result);
						}
						return matchValues;
					}

					@Override
					public Optional group(String name) {
						if (groups == null)
							throw new IllegalArgumentException("no groups defined");

						if (!groups.contains(name)) {
							throw new IllegalArgumentException("no group name defined: " + name + " in " + groups);
						}
						String value = matcher.group(name);
						if (value == null) {
							Optional.empty();
						}
						return Optional.of(new MatchGroupImpl(name, value));
					}

					@Override
					public String tryMatch(RE expected) {
						Matcher m = expected.getMatcher(this);
						m.region(rover, length());
						if (m.lookingAt()) {
							rover = m.end();
							return m.group();
						} else
							return null;
					}

					@Override
					public Optional group(int group) {

						if (matcher.groupCount() < group)
							return Optional.empty();

						String value = matcher.group(group);
						if (value == null)
							return Optional.empty();

						return Optional.of(new MatchGroupImplIndex(group, value));
					}

					@Override
					public String presentGroup(String groupName) {
						String group = matcher.group(groupName);
						if (group == null)
							throw new IllegalArgumentException("no such group " + groupName);
						return group;
					}

				}
				return Optional.of(new MatchImpl());
			} else
				return Optional.empty();
		}

		@Override
		public Optional lookingAt(String string) {
			return matches0(string, Matcher::lookingAt);
		}

		private Predicate predicate(Predicate subPredicate) {
			Pattern pattern = pattern();
			return s -> subPredicate.test(pattern.matcher(s));
		}

		@Override
		public Optional findIn(String string) {
			return matches0(string, Matcher::find);
		}

		@Override
		public Stream findAllIn(String string) {
			return stream(string);
		}

		@Override
		public Predicate asMatchPredicate() {
			return predicate(Matcher::matches);
		}

		@Override
		public Predicate asFindPredicate() {
			return predicate(Matcher::find);
		}

		@Override
		public Predicate asLookingAtPredicate() {
			return predicate(Matcher::lookingAt);
		}

		Stream stream(String string) {
			Spliterator spliterator = spliterator(string, Spliterator.ORDERED | Spliterator.NONNULL);
			return StreamSupport.stream(spliterator, false);
		}

		Spliterator spliterator(String string, int options) {
			Iterator iterator = iterator(string);
			return Spliterators.spliteratorUnknownSize(iterator, options);
		}

		Iterator iterator(String string) {
			Matcher matcher = pattern().matcher(string);
			return new Iterator() {
				Optional match;

				@Override
				public boolean hasNext() {
					match = matches1(string, Matcher::find, matcher);
					return match.isPresent();
				}

				@Override
				public Match next() {
					return match.get();
				}
			};
		}

		@Override
		public Matcher getMatcher(CharSequence string) {
			return pattern().matcher(string);
		}

		@Override
		public boolean isSingle() {
			return literal.length() == 1;
		}

		@Override
		public Optional merge(RE re) {
			return Optional.empty();
		}

		@Override
		public Set getGroupNames() {
			return groups == null ? new HashSet<>() : new HashSet<>(groups);
		}

		@Override
		public void append(StringBuilder sb, String string, Function replacement) {
			AtomicInteger begin = new AtomicInteger(0);
			stream(string).forEach(match -> {
				sb.append(string.subSequence(begin.getAndSet(match.end()), match.start()));
				String r = replacement.apply(match);
				if (r != null)
					sb.append(r);
			});
			sb.append(string.substring(begin.get()));
		}

	}

	static class Group extends REImpl implements G {
		final Type		type;
		final String	name;

		Group(Type type, RE... res) {
			this(null, toGroupedString(false, res), type, names(res));
		}

		Group(Type type, String literal) {
			this(null, literal, type);
		}

		Group(String name, RE... res) {
			this(name, toGroupedString(false, res), name == null ? Group.Type.NONCAPTURING : Group.Type.NAMED,
				names(res));
		}

		private Group(String name, String literal, Type type, String... names) {
			super(literal, name == null ? names : combine(name, names));
			this.type = type;
			this.name = name;
		}

		@Override
		public Group not() {
			Type type = switch (this.type) {
				case AHEAD -> Type.NOT_AHEAD;
				case BEHIND -> Type.NOT_BEHIND;
				case NOT_AHEAD -> Type.AHEAD;
				case NOT_BEHIND -> Type.BEHIND;
				case NOT -> Type.NONCAPTURING;
				case NONCAPTURING -> Type.NOT;
				default -> null;
			};
			if (type == null)
				return new Group(Type.NOT, this);

			if (type == this.type)
				return this;

			return new Group(null, literal, type);
		}

		@Override
		public String toString() {
			StringBuilder sb = new StringBuilder();
			sb.append(type.prefix);
			if (type == Type.NAMED) {
				sb.append(name);
				sb.append(">");
			}
			sb.append(literal);
			sb.append(type.suffix);
			return sb.toString();
		}

		@Override
		public boolean isSingle() {
			return true;
		}

		@Override
		public Type groupType() {
			return type;
		}
	}

	static class Predefined extends CharacterClass {

		Predefined(String literal, boolean positive) {
			super(literal, positive, null);
		}

		Predefined(String literal) {
			this(literal, true);
		}

		@Override
		public boolean isSingle(String literal) {
			return true;
		}

		@Override
		public String asSetContent() {
			return toString();
		}

		@Override
		public Predefined not() {
			return new Predefined(literal, !positive);
		}

		@Override
		public String toString() {
			if (positive)
				return "\\p{" + literal + "}";
			else
				return "\\P{" + literal + "}";
		}
	}

	static class Special extends CharacterClass {

		Special(String literal, boolean positive) {
			super(literal, positive, null);
		}

		Special(String literal) {
			this(literal, true);
		}

		@Override
		public boolean isSingle() {
			return true;
		}

		@Override
		public Special not() {
			return new Special(literal, !positive);
		}

		@Override
		public String asSetContent() {
			return toString();
		}

		@Override
		public String toString() {
			if (positive)
				return literal;
			else {
				String s = switch (literal) {
					case "\\s" -> "\\S";
					case "\\S" -> "\\s";
					case "\\w" -> "\\W";
					case "\\W" -> "\\w";
					case "\\d" -> "\\D";
					case "\\D" -> "\\d";
					case "\\h" -> "\\H";
					case "\\H" -> "\\h";
					case "\\v" -> "\\V";
					case "\\V" -> "\\v";
					default -> {
						StringBuilder sb = new StringBuilder();
						sb.append("[^");
						sb.append(literal);
						sb.append("]");
						yield sb.toString();
					}
				};
				return s;
			}
		}
	}

	static class Quantified extends REImpl implements Q {
		final Type	type;
		final int	minimum;
		final int	maximum;

		Quantified(int minimum, int maximum, Type type, RE... res) {
			this(toGroupedString(true, res), minimum, maximum, type, names(res));
		}

		public Quantified(String grouped, int minimum, int maximum, Type type, String... names) {
			super(grouped, names);
			assert minimum >= 0;
			assert maximum > 0;
			this.minimum = minimum;
			this.maximum = maximum;
			this.type = type;
		}

		@Override
		public String toString() {
			StringBuilder sb = new StringBuilder();

			if (minimum == 1 && maximum == 1 && type == Type.greedy)
				return literal;

			sb.append(literal);

			if (minimum == 0 && maximum == 1) {
				sb.append("?");
			} else if (minimum == 0 && maximum == Integer.MAX_VALUE) {
				sb.append("*");
			} else if (minimum == 1 && maximum == Integer.MAX_VALUE) {
				sb.append("+");
			} else if (minimum == maximum) {
				sb.append("{")
					.append(minimum)
					.append("}");
			} else if (maximum == Integer.MAX_VALUE) {
				sb.append("{")
					.append(minimum)
					.append(",")
					.append("}");
			} else {
				sb.append("{")
					.append(minimum)
					.append(",")
					.append(maximum)
					.append("}");
			}
			switch (type) {
				case greedy :
					break;
				case possesive :
					sb.append("+");
					break;
				case reluctant :
					sb.append("?");
					break;
			}
			return sb.toString();
		}

		@Override
		public RE reluctant() {
			return new Quantified(literal, minimum, maximum, Type.reluctant);
		}

		@Override
		public RE greedy() {
			return new Quantified(literal, minimum, maximum, Type.reluctant);
		}

		@Override
		public RE possesive() {
			return new Quantified(literal, minimum, maximum, Type.possesive);
		}
	}

	static class Boundary extends REImpl {

		Boundary(String literal) {
			super(literal);
		}

		@Override
		public RE not() {
			return switch (literal) {
				case "^" -> Catalog.startOfLine;
				case "$" -> Catalog.endOfLine;
				case "\\b" -> new Boundary("\\B");
				case "\\A" -> Catalog.endOfInput;
				case "\\z" -> Catalog.beginInput;
				default -> super.not();
			};
		}

	}

	static class Option extends REImpl implements F {
		private static final EnumSet	NONE_OF	= EnumSet.noneOf(Flag.class);

		final EnumSet					positive;
		final EnumSet					negative;

		Option(EnumSet positive, EnumSet negative, RE... res) {
			this(toGroupedString(false, res), positive, negative, names(res));
		}

		Option(String ungrouped, EnumSet p, EnumSet n, String... names) {
			super(ungrouped, names);
			this.positive = p == null ? NONE_OF : p;
			this.negative = n == null ? NONE_OF : n;
		}

		@Override
		public String toString() {
			EnumSet p = EnumSet.copyOf(positive);
			p.removeAll(negative);
			EnumSet n = EnumSet.copyOf(negative);
			n.removeAll(positive);

			if (p.isEmpty() && n.isEmpty())
				return super.toString();

			StringBuilder sb = new StringBuilder();
			sb.append("(?");
			for (Flag f : p) {
				sb.append(f.flag);
			}
			if (!n.isEmpty()) {
				sb.append("-");
				for (Flag f : n) {
					sb.append(f.flag);
				}
			}
			if (!literal.isEmpty()) {
				sb.append(":");
				sb.append(literal);
			}
			sb.append(")");
			return sb.toString();
		}

		@Override
		public Optional merge(RE re) {
			if (re instanceof Option op) {
				EnumSet p = EnumSet.copyOf(positive);
				EnumSet n = EnumSet.copyOf(negative);
				p.addAll(op.positive);
				n.addAll(op.negative);
				return Optional.of(new Option(literal, p, n));
			} else
				return Optional.empty();
		}

		@Override
		public Set positive() {
			return EnumSet.copyOf(positive);
		}

		@Override
		public Set negative() {
			return EnumSet.copyOf(negative);
		}

	}

	static class CharacterClass extends REImpl implements C {
		final boolean			positive;
		final CharacterClass[]	conjunction;

		CharacterClass(String literal, boolean positive, CharacterClass[] conjunction) {
			super(literal);
			this.positive = positive;
			this.conjunction = conjunction == null ? new CharacterClass[0] : conjunction;
		}

		public CharacterClass(String string) {
			this(string, true, new CharacterClass[0]);
		}

		@Override
		public CharacterClass not() {
			return new CharacterClass(literal, !positive, conjunction);
		}

		@Override
		public Optional merge(RE other) {
			return Optional.empty();
		}

		/**
		 * https://www.regular-expressions.info/charclassintersect.html
		 */
		@Override
		public String toString() {
			if (isSingle() && positive) {
				return asSetContent();
			}
			StringBuilder sb = new StringBuilder("[");

			if (!positive)
				sb.append("^");

			sb.append(asSetContent());
			for (CharacterClass c : conjunction) {
				sb.append("&&");
				if (c.isSingle()) {
					sb.append(c.asSetContent());
				} else {
					sb.append(c);
				}
			}
			sb.append("]");
			return sb.toString();
		}

		@Override
		public String asSetContent() {
			return literal;
		}

		boolean isSingle(String literal) {
			return literal.length() == 1 || (literal.length() == 2 && literal.charAt(0) == '\\');
		}

		@Override
		public boolean isSingle() {
			return positive && isSingle(literal);
		}

		@Override
		public C and(C and) {
			CharacterClass[] copyOf = Arrays.copyOf(conjunction, conjunction.length + 1);
			copyOf[conjunction.length] = (CharacterClass) and;
			return new CharacterClass(asSetContent(), positive, copyOf);
		}

		@Override
		public C or(C or) {
			return new CharacterClass(asSetContent() + or.asSetContent(), positive, conjunction);
		}

	}

	static RE[] merge(RE[] res) {
		if (res.length < 2)
			return res;

		List result = new ArrayList<>();
		RE last = null;
		for (RE re : res) {
			if (last == null)
				last = re;
			else {
				RE merged = last.merge(re)
					.orElse(null);
				if (merged != null) {
					last = merged;
				} else {
					result.add(last);
					last = re;
				}
			}
		}
		result.add(last);
		return result.toArray(RE[]::new);
	}

	static String[] names(RE... res) {
		return Stream.of(res)
			.map(RE::getGroupNames)
			.filter(Objects::nonNull)
			.flatMap(Collection::stream)
			.toArray(String[]::new);
	}

	static String[] combine(String name, String... names) {
		if (names == null || names.length == 0)
			return new String[] {
				name
			};

		String[] result = Arrays.copyOf(names, names.length + 1);
		result[names.length] = name;
		return result;
	}

	static String toGroupedString(boolean force, RE... res) {
		res = merge(res);
		if (res.length == 0)
			return "";

		if (res.length == 1 && res[0].isSingle()) {
			return res[0].toString();
		}

		StringBuilder sb = new StringBuilder();
		if (force) {
			sb.append("(?:");
		}
		for (RE re : res) {
			sb.append(re);
		}
		if (force) {
			sb.append(")");
		}
		return sb.toString();
	}

	static boolean isWhiteSpace(RE re) {
		if (re == ws || re == setWs || re == someWs)
			return true;

		String s = re.toString();

		return switch (s) {
			case "\\s", " ", "\t", "\\s*", "\\s+", "(\\s)*", "(\\s)+", " *", " +", "( )*", "( )+" -> true;
			default -> false;
		};
	}

	public static RE re(String regex) {
		return new REImpl(regex);
	}

}