All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unkrig.commons.text.pattern.Glob Maven / Gradle / Ivy

Go to download

A versatile Java(TM) library that implements many useful container and utility classes.

There is a newer version: 1.1.12
Show newest version

/*
 * de.unkrig.commons - A general-purpose Java class library
 *
 * Copyright (c) 2011, Arno Unkrig
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 * following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
 *       following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 *       following disclaimer in the documentation and/or other materials provided with the distribution.
 *    3. The name of the author may not be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package de.unkrig.commons.text.pattern;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import de.unkrig.commons.lang.protocol.Predicate;
import de.unkrig.commons.nullanalysis.Nullable;

/**
 * This class extends the concepts of the JDK {@link Pattern java.util.regex.Pattern} and {@link Pattern2
 * de.unkrig.commons.util.pattern.Pattern2} classes as follows:
 * 
    *
  • * {@link Pattern} defines the both the interface of pattern matching and its implementation * (regular expressions). This makes it impossible to have different pattern matchers with the same interface. * {@link Glob} is that new interface, and {@link #compile(String, int) Glob.compile()} compiles a regular * expression into a {@link Glob}, just like {@link Pattern2#compile(String, int) Pattern2.compile()} compiles it * into a {@link Pattern}. *
  • *
  • * {@link Glob} replaces the powerful (yet huge) API of {@code java.util.regex} with a simple one: {@link * #matches(String)} and {@link #replace(String)}. Pattern finding (as opposed to matching) and * repeated replacements are no longer supported. However, this approach is much more generic than (highly * regex-specific) concepts of "groups", "anchors", "lookaheads" and "lookbehinds". *
  • *
  • * The new {@link #REPLACEMENT} compilation flag modifies the pattern compilation such that a "replacement string" * can be defined in the pattern, which is often convenient. *
  • *
  • * The new {@link #INCLUDES_EXCLUDES} compilation flag modifies the pattern compilation such that a pattern can * be composed from a sequence of patterns, which are combined logically. *
  • *
* * @see #compile(String) * @see #compile(String, int) * @see #compileRegex(Pattern) * @see #compileRegex(Pattern, String) */ public abstract class Glob implements Predicate { /** * @return Whether the {@code subject} matches this {@link Glob}. */ public abstract boolean matches(String subject); /** * Implementation of {@link Predicate#evaluate}; calls {#matches}. *

* If your code uses only {@link Glob} and not {@link Predicate}, you should favor calling {@link #matches}, * because that method name is more expressive. *

* A {@code subject} value {@code null} evaluates to {@code false}. */ @Override public boolean evaluate(@Nullable String subject) { return subject != null && this.matches(subject); } /** * Iff the {@code subject} matches this {@link Glob}, then a non-null string ist returned; the algorithm that * computes that string depends on the concrete {@link Glob} implementation; the default implementation simply * returns the {@code subject}. *

* Otherwise, {@code null} is returned. */ @Nullable public String replace(String subject) { return this.matches(subject) ? subject : null; } /** * Modifies the pattern compilation as follows: *

* '=' is now a metacharacter, i.e. to include it literally in the pattern, it must be escaped with a backslash. *

* The semantics of '=' is as follows: *

* * * * * * * *
Replacement
a=b * If a subject matches a, then the {@link #replace(String)} method does not return the {@code * subject}, but b. The replacement string b may contain references to captured subsequences as * in the {@link Matcher#appendReplacement} method. *
*/ public static final int REPLACEMENT = 0x40000000; /** * Modifies the pattern compilation as follows: *

* ',' and '~' are now metacharacters, i.e. to include them literally in the pattern, they must be escaped with a * backslash. *

* The semantics of ',' and '~' are as follows: *

* * * * * * * * * * * * * * * *
ConstructMatches
Includes and excludes
a,bAny subject that matches a or b
a~bAny subject that matches a, but not b
* Patterns are applied right-to-left, i.e. the rightmost pattern that matches determines the result. This is * particularly relevant in conjunction with {@link #REPLACEMENT}. */ public static final int INCLUDES_EXCLUDES = 0x80000000; /** * A {@link Glob} that {@link #matches(String) matches} any string (and thus {@link #replace(String) replace}s it * with itself). */ public static final Glob ANY = new Glob() { @Override public boolean matches(String subject) { return true; } @Override public String toString() { return "ANY"; } }; /** * A {@link Glob} that {@link #matches(String) matches} no string. */ public static final Glob NONE = new Glob() { @Override public boolean matches(String subject) { return false; } @Override public String toString() { return "NONE"; } }; /** * Like {@link #compile(String, int)}, but without support for {@link #INCLUDES_EXCLUDES}. */ private static Glob compileWithReplacement(String pattern, int flags) { // Process the REPLACEMENT flag. final String replacement; if ((flags & (Pattern.LITERAL | Glob.REPLACEMENT)) == Glob.REPLACEMENT) { String[] tmp = Pattern2.parsePatternAndReplacement(pattern); pattern = tmp[0]; replacement = tmp[1]; } else { replacement = null; } final Glob glob = Glob.compileRegex(Pattern2.compile(pattern, flags), replacement); // In the WILDCARD mode, wrap the glob in order to override the "toString()" method, so it returns the // *wildcard patten*, and not the *regex pattern*. if ((flags | Pattern2.WILDCARD) == Pattern2.WILDCARD) { final String pattern2 = pattern; return new Glob() { @Override public boolean matches(String subject) { return glob.matches(subject); } @Override public boolean evaluate(@Nullable String subject) { return glob.evaluate(subject); } @Override @Nullable public String replace(String subject) { return glob.replace(subject); } @Override public String toString() { return replacement == null ? pattern2 : pattern2 + "=" + replacement; } }; } return glob; } /** * Returns a {@link Glob} who's {@link #replace(String)} method will return its {@code subject} argument if the * subject matches the given regular expression. */ public static Glob compileRegex(final Pattern regex) { return Glob.compileRegex(regex, null); } /** * The behavior of the {@link #matches(String)} method of the returned {@link Glob} is as follows: *
    *
  • If the {@code regex} matches the {@code subject}, then {@code true} is returned.
  • *
  • * Otherwise, if the {@code regex} matches a prefix of {@code subject}, and that prefix is followed by '/' or * '!', then {@code true} is returned. (Effectively, a glob 'dir' or 'dir/file.zip' matches all members and * entries under 'dir' resp. 'dir/file.zip'.) *
  • *
  • * Otherwise, if the subject ends with "!" or "/", and the {@code regex} could match the concatenation of the * {@code subject} and another string, then {@code true} is returned. (Example: The {@code subject} "dir/" is * matched by {@code regex}s "dir", "dir/", "dir/anything" and "**.c", but not by {@code regex}s * "dirr/anything", "file", "*.c" and "file.zip!file".) *
  • *
  • Otherwise {@code false} is returned.
  • *
* The behavior of the {@link #replace(String)} method of the returned {@link Glob} is as follows: *
    *
  • * If the subject matches the {@code regex}, then a non-null string is returned: *
      *
    • If replacementString is {@code null}, then the subject is returned.
    • *
    • * Otherwise, the replacementString is returned, with '$1', '$2', ... replaced with the {@code * regex}'s "capturing groups" (see {@link Pattern}). *
    • *
    *
  • *
* * @see Matcher#appendReplacement(StringBuffer, String) */ public static Glob compileRegex(final Pattern regex, @Nullable final String replacementString) { return new Glob() { @Override public boolean matches(String subject) { Matcher matcher = regex.matcher(subject); if (subject.endsWith("/") || subject.endsWith("!")) { // Subcomponent match (e.g. subject 'a/b/c/d' vs. glob 'a/b/')? return matcher.matches() || matcher.hitEnd(); } for (;;) { // Precise match (e.g. subject 'a/b/c' vs. glob 'a/b/c') or subcomponent match (e.g. subject // 'a/b/c/d' vs. glob 'a/b/c')? if (matcher.matches()) return true; for (int i = subject.length() - 1;; i--) { if (i < 0) return false; char c = subject.charAt(i); if (c == '/' || c == '!') { subject = subject.substring(0, i); break; } } } } @Override @Nullable public String replace(String subject) { Matcher matcher = regex.matcher(subject); if ((regex.flags() & Pattern2.WILDCARD) == 0 ? matcher.matches() : matcher.lookingAt()) { StringBuffer sb = new StringBuffer(); final int matchEnd = matcher.end(); matcher.appendReplacement(sb, replacementString == null ? "$0" : replacementString); // Precise match (e.g. subject 'a/b/c' vs. glob 'a/b/c')? if (matchEnd == subject.length()) return sb.toString(); // Subcomponent match (e.g. subject 'a/b/c!d' vs. glob 'a/b/c')? char c = subject.charAt(matchEnd); if (c == '/' || c == '!') return sb.append(subject.substring(matchEnd)).toString(); return null; } return null; } @Override public String toString() { return replacementString == null ? regex.toString() : regex + "=" + replacementString; } }; } /** * Equivalent with {@code compile(regex, 0)}. * * @see #compile(String, int) */ public static Glob compile(final String pattern) { return Glob.compile(pattern, 0); } /** * Similar to {@link Pattern#compile(String, int)}, but returns a {@link Glob} instead of a {@link Pattern}. *

* Iff the flag {@link #REPLACEMENT} is set, then the pattern may include a "replacement". *

*

* Iff a replacement is specified, then {@link Glob#replace(String)} will return the replacement, with * "{@code $1}"... replaces with the match groups; otherwise the subject will be returned. *

* * @param flags Modifies the semantics of the {@code pattern}, e.g. {@link Pattern2#WILDCARD} switches from regular * expressions to wildcards * @see #INCLUDES_EXCLUDES * @see #REPLACEMENT * @see Pattern2#WILDCARD * @see Pattern#CANON_EQ * @see Pattern#CASE_INSENSITIVE * @see Pattern#COMMENTS * @see Pattern#DOTALL * @see Pattern#LITERAL * @see Pattern#MULTILINE * @see Pattern#UNICODE_CASE * @see Pattern#UNIX_LINES */ public static Glob compile(final String pattern, int flags) { if ((flags & Glob.INCLUDES_EXCLUDES) == 0) { return Glob.compileWithReplacement(pattern, flags); } // Break the wildcard pattern up at ',' and '~' and construct an 'IncludeExclude' object from it. int idx; IncludeExclude includeExclude = new IncludeExclude(); if (pattern.startsWith("~")) { Glob glob = Glob.compileWithReplacement( pattern.substring(1, (idx = Pattern2.findMeta(",~", pattern, 1))), flags ); includeExclude.addExclude(glob, true); } else { Glob glob = Glob.compileWithReplacement( pattern.substring(0, (idx = Pattern2.findMeta(",~", pattern, 0))), flags ); // Shortcut for a wildcard pattern without ',' and '~'. if (idx == pattern.length()) return glob; includeExclude.addInclude(glob, true); } while (idx != pattern.length()) { char c = pattern.charAt(idx++); Glob glob = Glob.compileWithReplacement( pattern.substring(idx, (idx = Pattern2.findMeta(",~", pattern, idx))), flags ); if (c == ',') { includeExclude.addInclude(glob, true); } else { includeExclude.addExclude(glob, true); } } return includeExclude; } /** * The {@link #matches(String)} method of the returned {@link Glob} returns whether its {@code subject} * argument matches both {@code pattern1} and {@code pattern2}. *

* The {@link #replace(String)} method of the returned {@link Glob} returns checks whether the {@code subject} * matches {@code pattern1}; if so, it calls {@link #replace(String)} on {@code pattern2} and returns the result; * otherwise it returns {@code null}. */ public static Glob and(final Glob pattern1, final Glob pattern2) { return new Glob() { @Override public boolean matches(String subject) { return pattern1.matches(subject) && pattern2.matches(subject); } @Override @Nullable public String replace(String subject) { return pattern1.matches(subject) ? pattern2.replace(subject) : null; } @Override public String toString() { return pattern1 + " && " + pattern2; } }; } /** * The {@link #matches(String)} method of the returned {@link Glob} returns whether its {@code subject} * argument matches {@code pattern1} or, if not, {@code pattern2}. *

* The {@link #replace(String)} method of the returned {@link Glob} returns calls {@link #replace(String)} on * {@code pattern1} and returns the result if it is not {@code null}; otherwise it calls {@link * #replace(String)} on {@code pattern2} and returns the result. */ public static Glob or(final Glob pattern1, final Glob pattern2) { return new Glob() { @Override public boolean matches(String subject) { return pattern1.matches(subject) || pattern2.matches(subject); } @Override @Nullable public String replace(String subject) { String replacement = pattern1.replace(subject); if (replacement != null) return replacement; return pattern2.replace(subject); } @Override public String toString() { return pattern1 + " || " + pattern2; } }; } /** * The {@link #matches(String)} method of the returned {@link Glob} returns whether the {@code predicate} evaluates * to {@code true} and the {@code subject} argument matches the {@code pattern}. *

* The {@link #replace(String)} method of the returned {@link Glob} returns checks whether the {@code predicate} * evaluates to {@code true}; if so, it calls {@link #replace(String)} on {@code pattern} and returns the result; * otherwise it returns {@code null}. */ public static Glob and(final Predicate predicate, final Glob pattern) { return new Glob() { @Override public boolean matches(String subject) { return predicate.evaluate(subject) && pattern.matches(subject); } @Override @Nullable public String replace(String subject) { return predicate.evaluate(subject) ? pattern.replace(subject) : null; } @Override public String toString() { return predicate + " && " + pattern; } }; } /** * The {@link #matches(String)} method of the returned {@link Glob} returns whether the {@code subject} argument * matches the {@code pattern} and the {@code predicate} evaluates to {@code true}. *

* The {@link #replace(String)} method of the returned {@link Glob} returns checks whether the {@link * Glob#replace(String)} on the {@code pattern} returns nuon-{@code null} and the {@code predicate} evaluates to * {@code true}; if so, it returns the result of the {@link Glob#replace(String)} call; otherwise it returns {@code * null}. */ public static Glob and(final Glob pattern, final Predicate predicate) { return new Glob() { @Override public boolean matches(String subject) { return pattern.matches(subject) && predicate.evaluate(subject); } @Override @Nullable public String replace(String subject) { String replacement = pattern.replace(subject); return replacement != null && predicate.evaluate(subject) ? replacement : null; } @Override public String toString() { return pattern + " && " + predicate; } }; } /** @return A glob that wraps the given {@code predicate} */ public static Glob fromPredicate(final Predicate predicate) { return new Glob() { @Override public boolean matches(String subject) { return predicate.evaluate(subject); } }; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy