de.unkrig.commons.text.pattern.Glob Maven / Gradle / Ivy
Show all versions of de-unkrig-commons Show documentation
/*
* de.unkrig.commons - A general-purpose Java class library
*
* Copyright (c) 2011, Arno Unkrig
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package de.unkrig.commons.text.pattern;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.unkrig.commons.lang.protocol.Predicate;
import de.unkrig.commons.nullanalysis.Nullable;
/**
* This class extends the concepts of the JDK {@link Pattern java.util.regex.Pattern} and {@link Pattern2
* de.unkrig.commons.util.pattern.Pattern2} classes as follows:
*
* -
* {@link Pattern} defines the both the interface of pattern matching and its implementation
* (regular expressions). This makes it impossible to have different pattern matchers with the same interface.
* {@link Glob} is that new interface, and {@link #compile(String, int) Glob.compile()} compiles a regular
* expression into a {@link Glob}, just like {@link Pattern2#compile(String, int) Pattern2.compile()} compiles it
* into a {@link Pattern}.
*
* -
* {@link Glob} replaces the powerful (yet huge) API of {@code java.util.regex} with a simple one: {@link
* #matches(String)} and {@link #replace(String)}. Pattern finding (as opposed to matching) and
* repeated replacements are no longer supported. However, this approach is much more generic than (highly
* regex-specific) concepts of "groups", "anchors", "lookaheads" and "lookbehinds".
*
* -
* The new {@link #REPLACEMENT} compilation flag modifies the pattern compilation such that a "replacement string"
* can be defined in the pattern, which is often convenient.
*
* -
* The new {@link #INCLUDES_EXCLUDES} compilation flag modifies the pattern compilation such that a pattern can
* be composed from a sequence of patterns, which are combined logically.
*
*
*
* @see #compile(String)
* @see #compile(String, int)
* @see #compileRegex(Pattern)
* @see #compileRegex(Pattern, String)
*/
public abstract
class Glob implements Predicate {
/**
* @return Whether the {@code subject} matches this {@link Glob}.
*/
public abstract boolean
matches(String subject);
/**
* Implementation of {@link Predicate#evaluate}; calls {#matches}.
*
* If your code uses only {@link Glob} and not {@link Predicate}, you should favor calling {@link #matches},
* because that method name is more expressive.
*
* A {@code subject} value {@code null} evaluates to {@code false}.
*/
@Override public boolean
evaluate(@Nullable String subject) {
return subject != null && this.matches(subject);
}
/**
* Iff the {@code subject} matches this {@link Glob}, then a non-null string ist returned; the algorithm that
* computes that string depends on the concrete {@link Glob} implementation; the default implementation simply
* returns the {@code subject}.
*
* Otherwise, {@code null} is returned.
*/
@Nullable public String
replace(String subject) {
return this.matches(subject) ? subject : null;
}
/**
* Modifies the pattern compilation as follows:
*
* '=' is now a metacharacter, i.e. to include it literally in the pattern, it must be escaped with a backslash.
*
* The semantics of '=' is as follows:
*
*
*
* Replacement
*
* a=b
*
* If a subject matches a, then the {@link #replace(String)} method does not return the {@code
* subject}, but b. The replacement string b may contain references to captured subsequences as
* in the {@link Matcher#appendReplacement} method.
*
*
*
*/
public static final int REPLACEMENT = 0x40000000;
/**
* Modifies the pattern compilation as follows:
*
* ',' and '~' are now metacharacters, i.e. to include them literally in the pattern, they must be escaped with a
* backslash.
*
* The semantics of ',' and '~' are as follows:
*
*
* Construct
* Matches
*
*
* Includes and excludes
*
*
* a,b
* Any subject that matches a or b
*
*
* a~b
* Any subject that matches a, but not b
*
*
* Patterns are applied right-to-left, i.e. the rightmost pattern that matches determines the result. This is
* particularly relevant in conjunction with {@link #REPLACEMENT}.
*/
public static final int INCLUDES_EXCLUDES = 0x80000000;
/**
* A {@link Glob} that {@link #matches(String) matches} any string (and thus {@link #replace(String) replace}s it
* with itself).
*/
public static final Glob ANY = new Glob() {
@Override public boolean matches(String subject) { return true; }
@Override public String toString() { return "ANY"; }
};
/**
* A {@link Glob} that {@link #matches(String) matches} no string.
*/
public static final Glob NONE = new Glob() {
@Override public boolean matches(String subject) { return false; }
@Override public String toString() { return "NONE"; }
};
/**
* Like {@link #compile(String, int)}, but without support for {@link #INCLUDES_EXCLUDES}.
*/
private static Glob
compileWithReplacement(String pattern, int flags) {
// Process the REPLACEMENT flag.
final String replacement;
if ((flags & (Pattern.LITERAL | Glob.REPLACEMENT)) == Glob.REPLACEMENT) {
String[] tmp = Pattern2.parsePatternAndReplacement(pattern);
pattern = tmp[0];
replacement = tmp[1];
} else {
replacement = null;
}
final Glob glob = Glob.compileRegex(Pattern2.compile(pattern, flags), replacement);
// In the WILDCARD mode, wrap the glob in order to override the "toString()" method, so it returns the
// *wildcard patten*, and not the *regex pattern*.
if ((flags | Pattern2.WILDCARD) == Pattern2.WILDCARD) {
final String pattern2 = pattern;
return new Glob() {
@Override public boolean matches(String subject) { return glob.matches(subject); }
@Override public boolean evaluate(@Nullable String subject) { return glob.evaluate(subject); }
@Override @Nullable public String replace(String subject) { return glob.replace(subject); }
@Override public String
toString() { return replacement == null ? pattern2 : pattern2 + "=" + replacement; }
};
}
return glob;
}
/**
* Returns a {@link Glob} who's {@link #replace(String)} method will return its {@code subject} argument if the
* subject matches the given regular expression.
*/
public static Glob
compileRegex(final Pattern regex) {
return Glob.compileRegex(regex, null);
}
/**
* The behavior of the {@link #matches(String)} method of the returned {@link Glob} is as follows:
*
* - If the {@code regex} matches the {@code subject}, then {@code true} is returned.
* -
* Otherwise, if the {@code regex} matches a prefix of {@code subject}, and that prefix is followed by '/' or
* '!', then {@code true} is returned. (Effectively, a glob 'dir' or 'dir/file.zip' matches all members and
* entries under 'dir' resp. 'dir/file.zip'.)
*
* -
* Otherwise, if the subject ends with "!" or "/", and the {@code regex} could match the concatenation of the
* {@code subject} and another string, then {@code true} is returned. (Example: The {@code subject} "dir/" is
* matched by {@code regex}s "dir", "dir/", "dir/anything" and "**.c", but not by {@code regex}s
* "dirr/anything", "file", "*.c" and "file.zip!file".)
*
* - Otherwise {@code false} is returned.
*
* The behavior of the {@link #replace(String)} method of the returned {@link Glob} is as follows:
*
* -
* If the subject matches the {@code regex}, then a non-null string is returned:
*
* - If replacementString is {@code null}, then the subject is returned.
* -
* Otherwise, the replacementString is returned, with '$1', '$2', ... replaced with the {@code
* regex}'s "capturing groups" (see {@link Pattern}).
*
*
*
*
*
* @see Matcher#appendReplacement(StringBuffer, String)
*/
public static Glob
compileRegex(final Pattern regex, @Nullable final String replacementString) {
return new Glob() {
@Override public boolean
matches(String subject) {
Matcher matcher = regex.matcher(subject);
if (subject.endsWith("/") || subject.endsWith("!")) {
// Subcomponent match (e.g. subject 'a/b/c/d' vs. glob 'a/b/')?
return matcher.matches() || matcher.hitEnd();
}
for (;;) {
// Precise match (e.g. subject 'a/b/c' vs. glob 'a/b/c') or subcomponent match (e.g. subject
// 'a/b/c/d' vs. glob 'a/b/c')?
if (matcher.matches()) return true;
for (int i = subject.length() - 1;; i--) {
if (i < 0) return false;
char c = subject.charAt(i);
if (c == '/' || c == '!') {
subject = subject.substring(0, i);
break;
}
}
}
}
@Override @Nullable public String
replace(String subject) {
Matcher matcher = regex.matcher(subject);
if ((regex.flags() & Pattern2.WILDCARD) == 0 ? matcher.matches() : matcher.lookingAt()) {
StringBuffer sb = new StringBuffer();
final int matchEnd = matcher.end();
matcher.appendReplacement(sb, replacementString == null ? "$0" : replacementString);
// Precise match (e.g. subject 'a/b/c' vs. glob 'a/b/c')?
if (matchEnd == subject.length()) return sb.toString();
// Subcomponent match (e.g. subject 'a/b/c!d' vs. glob 'a/b/c')?
char c = subject.charAt(matchEnd);
if (c == '/' || c == '!') return sb.append(subject.substring(matchEnd)).toString();
return null;
}
return null;
}
@Override public String
toString() {
return replacementString == null ? regex.toString() : regex + "=" + replacementString;
}
};
}
/**
* Equivalent with {@code compile(regex, 0)}.
*
* @see #compile(String, int)
*/
public static Glob
compile(final String pattern) {
return Glob.compile(pattern, 0);
}
/**
* Similar to {@link Pattern#compile(String, int)}, but returns a {@link Glob} instead of a {@link Pattern}.
*
* Iff the flag {@link #REPLACEMENT} is set, then the pattern may include a "replacement".
*
*
* Iff a replacement is specified, then {@link Glob#replace(String)} will return the replacement, with
* "{@code $1}"... replaces with the match groups; otherwise the subject will be returned.
*
*
* @param flags Modifies the semantics of the {@code pattern}, e.g. {@link Pattern2#WILDCARD} switches from regular
* expressions to wildcards
* @see #INCLUDES_EXCLUDES
* @see #REPLACEMENT
* @see Pattern2#WILDCARD
* @see Pattern#CANON_EQ
* @see Pattern#CASE_INSENSITIVE
* @see Pattern#COMMENTS
* @see Pattern#DOTALL
* @see Pattern#LITERAL
* @see Pattern#MULTILINE
* @see Pattern#UNICODE_CASE
* @see Pattern#UNIX_LINES
*/
public static Glob
compile(final String pattern, int flags) {
if ((flags & Glob.INCLUDES_EXCLUDES) == 0) {
return Glob.compileWithReplacement(pattern, flags);
}
// Break the wildcard pattern up at ',' and '~' and construct an 'IncludeExclude' object from it.
int idx;
IncludeExclude includeExclude = new IncludeExclude();
if (pattern.startsWith("~")) {
Glob glob = Glob.compileWithReplacement(
pattern.substring(1, (idx = Pattern2.findMeta(",~", pattern, 1))),
flags
);
includeExclude.addExclude(glob, true);
} else {
Glob glob = Glob.compileWithReplacement(
pattern.substring(0, (idx = Pattern2.findMeta(",~", pattern, 0))),
flags
);
// Shortcut for a wildcard pattern without ',' and '~'.
if (idx == pattern.length()) return glob;
includeExclude.addInclude(glob, true);
}
while (idx != pattern.length()) {
char c = pattern.charAt(idx++);
Glob glob = Glob.compileWithReplacement(
pattern.substring(idx, (idx = Pattern2.findMeta(",~", pattern, idx))),
flags
);
if (c == ',') {
includeExclude.addInclude(glob, true);
} else {
includeExclude.addExclude(glob, true);
}
}
return includeExclude;
}
/**
* The {@link #matches(String)} method of the returned {@link Glob} returns whether its {@code subject}
* argument matches both {@code pattern1} and {@code pattern2}.
*
* The {@link #replace(String)} method of the returned {@link Glob} returns checks whether the {@code subject}
* matches {@code pattern1}; if so, it calls {@link #replace(String)} on {@code pattern2} and returns the result;
* otherwise it returns {@code null}.
*/
public static Glob
and(final Glob pattern1, final Glob pattern2) {
return new Glob() {
@Override public boolean
matches(String subject) {
return pattern1.matches(subject) && pattern2.matches(subject);
}
@Override @Nullable public String
replace(String subject) {
return pattern1.matches(subject) ? pattern2.replace(subject) : null;
}
@Override public String
toString() {
return pattern1 + " && " + pattern2;
}
};
}
/**
* The {@link #matches(String)} method of the returned {@link Glob} returns whether its {@code subject}
* argument matches {@code pattern1} or, if not, {@code pattern2}.
*
* The {@link #replace(String)} method of the returned {@link Glob} returns calls {@link #replace(String)} on
* {@code pattern1} and returns the result if it is not {@code null}; otherwise it calls {@link
* #replace(String)} on {@code pattern2} and returns the result.
*/
public static Glob
or(final Glob pattern1, final Glob pattern2) {
return new Glob() {
@Override public boolean
matches(String subject) {
return pattern1.matches(subject) || pattern2.matches(subject);
}
@Override @Nullable public String
replace(String subject) {
String replacement = pattern1.replace(subject);
if (replacement != null) return replacement;
return pattern2.replace(subject);
}
@Override public String
toString() {
return pattern1 + " || " + pattern2;
}
};
}
/**
* The {@link #matches(String)} method of the returned {@link Glob} returns whether the {@code predicate} evaluates
* to {@code true} and the {@code subject} argument matches the {@code pattern}.
*
* The {@link #replace(String)} method of the returned {@link Glob} returns checks whether the {@code predicate}
* evaluates to {@code true}; if so, it calls {@link #replace(String)} on {@code pattern} and returns the result;
* otherwise it returns {@code null}.
*/
public static Glob
and(final Predicate super String> predicate, final Glob pattern) {
return new Glob() {
@Override public boolean
matches(String subject) {
return predicate.evaluate(subject) && pattern.matches(subject);
}
@Override @Nullable public String
replace(String subject) {
return predicate.evaluate(subject) ? pattern.replace(subject) : null;
}
@Override public String
toString() {
return predicate + " && " + pattern;
}
};
}
/**
* The {@link #matches(String)} method of the returned {@link Glob} returns whether the {@code subject} argument
* matches the {@code pattern} and the {@code predicate} evaluates to {@code true}.
*
* The {@link #replace(String)} method of the returned {@link Glob} returns checks whether the {@link
* Glob#replace(String)} on the {@code pattern} returns nuon-{@code null} and the {@code predicate} evaluates to
* {@code true}; if so, it returns the result of the {@link Glob#replace(String)} call; otherwise it returns {@code
* null}.
*/
public static Glob
and(final Glob pattern, final Predicate super String> predicate) {
return new Glob() {
@Override public boolean
matches(String subject) {
return pattern.matches(subject) && predicate.evaluate(subject);
}
@Override @Nullable public String
replace(String subject) {
String replacement = pattern.replace(subject);
return replacement != null && predicate.evaluate(subject) ? replacement : null;
}
@Override public String
toString() {
return pattern + " && " + predicate;
}
};
}
/** @return A glob that wraps the given {@code predicate} */
public static Glob
fromPredicate(final Predicate super String> predicate) {
return new Glob() {
@Override public boolean
matches(String subject) { return predicate.evaluate(subject); }
};
}
}