All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.exist.util.GlobToRegex Maven / Gradle / Ivy

There is a newer version: 6.3.0
Show newest version
/*  
 *  This code has been adapted from the jakarta ORO package. The original license
 *  follows below:
 *
 * ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 
 *    must not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact [email protected].
 *
 * 5. Products derived from this software may not be called "Apache" 
 *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 
 *    name, without prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * .
 * 
 * $Id$
 */
package org.exist.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Translates a glob expression into a Java regular expression.
 * 
 * The following syntax is supported for glob expressions:
 * 
 * 
    *
  • * - Matches zero or more instances of any character. If the * STAR_CANNOT_MATCH_NULL_MASK option is used, * matches * one or more instances of any character. *
  • ? - Matches one instance of any character. If the * QUESTION_MATCHES_ZERO_OR_ONE_MASK option is used, ? * matches zero or one instances of any character. *
  • [...] - Matches any of characters enclosed by the brackets. * * and ? lose their special meanings within a * character class. Additionaly if the first character following * the opening bracket is a ! or a ^, then any * character not in the character class is matched. A - * between two characters can be used to denote a range. A * - at the beginning or end of the character class matches * itself rather than referring to a range. A ] immediately * following the opening [ matches itself rather than * indicating the end of the character class, otherwise it must be * escaped with a backslash to refer to itself. *
  • \ - A backslash matches itself in most situations. But * when a special character such as a * follows it, a * backslash escapes the character, indicating that * the special chracter should be interpreted as a normal character * instead of its special meaning. *
  • All other characters match themselves. *
* * Please remember that the when you construct a Java string in Java code, * the backslash character is itself a special Java character, and it must * be double backslashed to represent single backslash in a regular * expression. * * The original code is adapted from the jakarta ORO package. * */ public class GlobToRegex { private static boolean __isPerl5MetaCharacter(char ch) { return (ch == '*' || ch == '?' || ch == '+' || ch == '[' || ch == ']' || ch == '(' || ch == ')' || ch == '|' || ch == '^' || ch == '$' || ch == '.' || ch == '{' || ch == '}' || ch == '\\'); } private static boolean __isGlobMetaCharacter(char ch) { return (ch == '*' || ch == '?' || ch == '[' || ch == ']'); } /** * This static method is the basic engine of the Glob PatternCompiler * implementation. It takes a glob expression in the form of a character * array and converts it into a String representation of a Perl5 pattern. * The method is made public so that programmers may use it for their own * purposes. However, the GlobCompiler compile methods work by converting * the glob pattern to a Perl5 pattern using this method, and then invoking * the compile() method of an internally stored Perl5Compiler instance. * * * @param pattern * A character array representation of a Glob pattern. * @return A String representation of a Perl5 pattern equivalent to the Glob * pattern. */ public static String globToRegexp(CharSequence pattern) { boolean inCharSet; // boolean starCannotMatchNull = false; // boolean questionMatchesZero; int ch; StringBuilder buffer; buffer = new StringBuilder(2 * pattern.length()); inCharSet = false; char c; for (ch = 0; ch < pattern.length(); ch++) { c = pattern.charAt(ch); switch (c) { case '*': if (inCharSet) {buffer.append('*');} else { buffer.append(".*"); } break; case '?': if (inCharSet) {buffer.append('?');} else { buffer.append(".?"); } break; case '[': if (inCharSet) { buffer.append("\\["); break; } inCharSet = true; buffer.append(c); if (ch + 1 < pattern.length()) { switch (pattern.charAt(ch + 1)) { case '!': case '^': buffer.append('^'); ++ch; continue; case ']': buffer.append("]"); ++ch; continue; } } break; case ']': inCharSet = false; buffer.append(c); break; case '\\': buffer.append('\\'); if (ch == pattern.length() - 1) { buffer.append('\\'); } else if (__isGlobMetaCharacter(pattern.charAt(ch + 1))) {buffer.append(pattern.charAt(++ch));} else {buffer.append('\\');} break; default: if (!inCharSet && __isPerl5MetaCharacter(c)) {buffer.append('\\');} buffer.append(c); break; } } return buffer.toString(); } /** * @param args the arguments */ public static void main(String[] args) { final String glob = "[gG]enerate? "; final String re = globToRegexp(glob); final String str = "This is generated text"; final Pattern pattern = Pattern.compile(re); final Matcher matcher = pattern.matcher(str); matcher.find(); System.out.println(str.substring(matcher.start(), matcher.end())); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy