All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cocoon.util.WildcardMatcherHelper Maven / Gradle / Ivy

There is a newer version: 2.3.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cocoon.util;

import org.apache.regexp.RE;
import org.apache.regexp.RECompiler;
import org.apache.regexp.REProgram;

import java.util.HashMap;
import java.util.Map;


/**
 * This class is an utility class that perform wildcard-patterns matching and isolation.
 *
 * @version $Id: WildcardMatcherHelper.java 490762 2006-12-28 17:03:53Z danielf $
 */
public class WildcardMatcherHelper {
    //~ Static fields/initializers -----------------------------------------------------------------

    /** Default path separator: "/" */
    public static final char ESC = '\\';

    /** Default path separator: "/" */
    public static final char PATHSEP = '/';

    /** Default path separator: "/" */
    public static final char STAR = '*';

    //~ Methods ------------------------------------------------------------------------------------

    /**
     * Match a pattern agains a string and isolates wildcard replacement into a Map.
     * 
* Here is how the matching algorithm works: * *
    *
  • * The '*' character, meaning that zero or more characters (excluding the path separator '/') * are to be matched. *
  • *
  • * The '**' sequence, meaning that zero or more characters (including the path separator '/') * are to be matched. *
  • *
  • * The '\*' sequence is honored as a litteral '*' character, not a wildcard *
  • *
*
* When more than two '*' characters, not separated by another character, are found their value is * considered as '**' and immediate succeeding '*' are skipped. *
* The '**' wildcard is greedy and thus the following sample matches as {"foo/bar","baz","bug"}: *
*
pattern
*
STAR,STAR,PATHSEP,STAR,PATHSEP,STAR,STAR (why can't I express it litterally?) *
string
*
foo/bar/baz/bug *
* The first '**' in the pattern will suck up as much as possible without making the match fail. * * @param pat The pattern string. * @param str The string to math agains the pattern * * @return a Map containing the representation of the extracted pattern. The extracted patterns are * keys in the Map from left to right beginning with "1" for te left most, "2" for the next, * a.s.o. The key "0" is the string itself. If the return value is null, string does not match to the * pattern . */ public static Map match(final String pat, final String str) { Matcher matcher; synchronized (cache) { matcher = (Matcher) cache.get(pat); if ( matcher == null ) { matcher = new Matcher(pat); cache.put(pat, matcher); } } String[] list = matcher.getMatches(str); if ( list == null ) return null; int n = list.length; Map map = new HashMap(n * 2 + 1); for ( int i = 0; i < n; i++ ) { map.put(String.valueOf(i), list[i]); } return map; } /** Cache for compiled pattern matchers */ private static final Map cache = new HashMap(); //~ Inner Classes ------------------------------------------------------------------------------ /** * The private matcher class */ private static class Matcher { /** Regexp to split constant parts from front and back leaving wildcards in the middle. */ private static final REProgram splitter; static { final String fixedRE = "([^*\\\\]*)"; final String wcardRE = "(.*[*\\\\])"; final String splitRE = "^" + fixedRE + wcardRE + fixedRE + "$"; splitter = new RECompiler().compile(splitRE); } /** Wildcard types to short-cut simple '*' and "**' matches. */ private static final int WC_CONST = 0; private static final int WC_STAR = 1; private static final int WC_STARSTAR = 2; private static final int WC_REGEXP = 3; //~ Instance fields ------------------------------------------------------------------------ // All fields declared final to emphasize requirement to be thread-safe. /** Fixed text at start of pattern. */ private final String prefix; /** Fixed text at end of pattern. */ private final String suffix; /** Length of prefix and suffix. */ private final int fixlen; /** Wildcard type of pattern. */ private final int wctype; /** Compiled regexp equivalent to wildcard pattern between prefix and suffix. */ private final REProgram regexp; //~ Constructors --------------------------------------------------------------------------- /** * Creates a new Matcher object. * * @param pat The pattern * @param str The string */ Matcher(final String pat) { RE re = new RE(splitter); if ( re.match(pat) ) { // Split pattern into (foo/)(*)(/bar). prefix = re.getParen(1); String wildcard = re.getParen(2); String tail = re.getParen(3); // If wildcard ends with \ then add the first char of postfix to wildcard. if ( tail.length() != 0 && wildcard.charAt(wildcard.length() - 1) == ESC ) { wildcard = wildcard + tail.substring(0, 1); suffix = tail.substring(1); } else { suffix = tail; } // Use short-cuts for single * or ** wildcards if ( wildcard.equals("*") ) { wctype = WC_STAR; regexp = null; } else if ( wildcard.equals("**") ) { wctype = WC_STARSTAR; regexp = null; } else { wctype = WC_REGEXP; regexp = compileRegexp(wildcard); } } else { // Pattern is a constant without '*' or '\'. prefix = pat; suffix = ""; wctype = WC_CONST; regexp = null; } fixlen = prefix.length() + suffix.length(); } //~ Methods -------------------------------------------------------------------------------- /** * Match string against pattern. * * @param str The string * @return list of wildcard matches, null if match failed */ String[] getMatches(final String str) { // Protect against 'foo' matching 'foo*foo'. if ( str.length() < fixlen ) return null; if ( !str.startsWith(prefix) ) return null; if ( !str.endsWith(suffix) ) return null; String infix = str.substring(prefix.length(), str.length() - suffix.length()); if ( wctype == WC_REGEXP ) { RE re = new RE(regexp); if ( !re.match(infix) ) return null; int n = re.getParenCount(); String[] list = new String[n]; list[0] = str; for ( int i = 1; i < n; i++ ) list[i] = re.getParen(i); return list; } if ( wctype == WC_CONST ) { if ( infix.length() != 0 ) return null; return new String[] { str }; } if ( wctype == WC_STAR ) { if ( infix.indexOf(PATHSEP) != -1 ) return null; } return new String[] { str, infix }; } } /** * Compile wildcard pattern into regexp pattern. * * @param pat The wildcard pattern * @return compiled regexp program. */ private static REProgram compileRegexp(String pat) { StringBuffer repat = new StringBuffer(pat.length() * 6); repat.append('^'); // Add an extra character to allow unchecked wcpat[i+1] accesses. // Unterminated ESC sequences are silently handled as '\\'. char[] wcpat = (pat + ESC).toCharArray(); for ( int i = 0, n = pat.length(); i < n; i++ ) { char ch = wcpat[i]; if ( ch == STAR ) { if ( wcpat[i + 1] != STAR ) { repat.append("([^/]*)"); continue; } // Handle two and more '*' as single '**'. while ( wcpat[i + 1] == STAR ) i++; repat.append("(.*)"); continue; } // Match ESC+ESC and ESC+STAR as literal ESC and STAR which needs to be escaped // in regexp. Match ESC+other as two characters ESC+other where other may also // need to be escaped in regexp. if ( ch == ESC ) { ch = wcpat[++i]; if ( ch != ESC && ch != STAR ) repat.append("\\\\"); } if ( ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= '0' && ch <= '9' || ch == '/' ) { repat.append(ch); continue; } repat.append('\\'); repat.append(ch); } repat.append('$'); return new RECompiler().compile(repat.toString()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy