org.apache.batchee.container.jsl.GlobPatternMatcherImpl Maven / Gradle / Ivy
/*
* Copyright 2012 International Business Machines Corp.
*
* See the NOTICE file distributed with this work for additional information
* regarding copyright ownership. Licensed under the Apache License,
* Version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.batchee.container.jsl;
import java.util.ArrayList;
import java.util.List;
public class GlobPatternMatcherImpl {
public boolean matchWithoutBackslashEscape(final String toMatch, final String pattern) {
// Precondition: Blow up with null or empty s or pattern.
//TODO - Spec ramifications or is this already covered by property and exit status defaulting?
if ((pattern == null) || (pattern.length() == 0) ||
(toMatch == null) || (toMatch.length() == 0)) {
throw new IllegalArgumentException("Pattern = " + pattern +
"and string to match = " + toMatch +
", but both pattern and to-match String are required to be non-null Strings with length >=1 ");
}
// Normalize away consecutive '*' chars:
String newPattern = pattern;
while (true) {
if (newPattern.contains("**")) {
newPattern = newPattern.replace("**", "*");
} else {
break;
}
}
return recursiveMatch(toMatch, newPattern);
}
//
// TODO - confirm in spec that there is NO backslashing of '*', '?'.
//
private boolean recursiveMatch(final String toMatch, final String subpattern) {
final int firstAsterisk = subpattern.indexOf("*");
final int secondAsterisk = subpattern.indexOf("*", firstAsterisk + 1);
final int lastAsterisk = subpattern.lastIndexOf("*");
//
// A) If there are no '*'(s), do a non-wildcard (except for ?) match
//
if (firstAsterisk == -1) {
return matchNoAsterisk(toMatch, subpattern);
}
//
// B) Match off any beginning or end
//
// Match any beginning BEFORE the first asterisk
if (firstAsterisk > 0) {
final String beginPattern = subpattern.substring(0, firstAsterisk);
if (toMatch.length() < beginPattern.length()) {
return false;
}
final String beginToMatch = toMatch.substring(0, firstAsterisk);
if (!matchNoAsterisk(beginToMatch, beginPattern)) {
return false;
}
}
// This will just be a straight copy if 'firstAsterisk' = 0.
// Since we're using recursion, try to be a bit performance-sensitive and not do this until necessary.
String remainingToMatch = toMatch.substring(firstAsterisk);
//Match any end AFTER the first asterisk
if (lastAsterisk < subpattern.length() - 1) {
final String endPattern = subpattern.substring(lastAsterisk + 1);
if (remainingToMatch.length() < endPattern.length()) {
return false;
}
// Give me a substring the size of 'endPattern' at the end
final String endToMatch = remainingToMatch.substring(remainingToMatch.length() - endPattern.length(), remainingToMatch.length());
if (!matchNoAsterisk(endToMatch, endPattern)) {
return false;
}
// Already matched against char at index: 'remainingToMatch.length() - endPattern.length()', so truncate immediately before.
remainingToMatch = remainingToMatch.substring(0, remainingToMatch.length() - endPattern.length());
}
// C) Now I either have:
// 1) *
// 2) *-xxxxx-* (All non-'*' chars in the middle
// 3) *-xy-*-x-* (At least one '*' in the middle)
if (firstAsterisk == lastAsterisk) {
return true;
} else if (secondAsterisk == lastAsterisk) {
final String middlePattern = subpattern.substring(firstAsterisk + 1, lastAsterisk);
if (remainingToMatch.length() < middlePattern.length()) {
return false;
} else {
for (int i = 0; i <= remainingToMatch.length() - middlePattern.length(); i++) {
final String matchCandidate = remainingToMatch.substring(i, i + middlePattern.length());
if (matchNoAsterisk(matchCandidate, middlePattern)) {
return true;
}
}
return false;
}
} else {
//
// Now I have to match against sub-sub-pattern *xxx*xy*. The strategy here is to use recursion.
// 1. Isolate 'xxx' and store as 'patternBetweenAsterisk1and2'
final String patternBetweenAsterisk1and2 = subpattern.substring(firstAsterisk + 1, secondAsterisk);
// 2. Find every place in the string matching 'xxx' and store the remainder as a candidate
final List subMatchCandidates = new ArrayList();
for (int i = 0; i <= remainingToMatch.length() - patternBetweenAsterisk1and2.length(); i++) {
final String matchCandidate = remainingToMatch.substring(i, i + patternBetweenAsterisk1and2.length());
if (matchNoAsterisk(matchCandidate, patternBetweenAsterisk1and2)) {
// Grab the part of the string after the match.
String subMatchCandidate = remainingToMatch.substring(i + patternBetweenAsterisk1and2.length());
subMatchCandidates.add(subMatchCandidate);
}
}
if (subMatchCandidates.size() == 0) {
return false;
}
// 2. Calculate the new pattern to match against.
// For "*xxx*xy*" this would be "*xy*".
// For "*xxx*xy*z*" this would be "*xy*z*"
final String nestedPattern = subpattern.substring(secondAsterisk, lastAsterisk + 1); // We want to include the last asterisk.
// 3. try matching each one
for (final String candidate : subMatchCandidates) {
if (recursiveMatch(candidate, nestedPattern)) {
return true;
}
}
return false;
}
}
/**
* 1. Match (against regular char or ?)
* Default to 'true' which would apply to zero-length boundary condition
*/
private boolean matchNoAsterisk(final String toMatch, final String subpattern) {
final int toMatchLen = toMatch.length();
final int subpatternLen = subpattern.length();
if (toMatchLen != subpatternLen) {
return false;
}
for (int i = 0; i < toMatchLen; i++) {
if (subpattern.substring(i, i + 1).equals("*")) {
throw new IllegalStateException("Shouldn't have encountered a '*' in matchNoAsterisk, toMatch = " + toMatch + ", subPattern = " + subpattern);
} else if (!subpattern.substring(i, i + 1).equals("?")) {
if (subpattern.charAt(i) != toMatch.charAt(i)) {
return false;
}
// else continue
}
}
return true;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy