All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unkrig.commons.text.pattern.Pattern2 Maven / Gradle / Ivy

Go to download

A versatile Java(TM) library that implements many useful container and utility classes.

There is a newer version: 1.1.12
Show newest version

/*
 * de.unkrig.commons - A general-purpose Java class library
 *
 * Copyright (c) 2011, Arno Unkrig
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 * following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
 *       following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 *       following disclaimer in the documentation and/or other materials provided with the distribution.
 *    3. The name of the author may not be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package de.unkrig.commons.text.pattern;

import java.io.File;
import java.util.regex.Pattern;

/**
 * This class extends the concepts of the JDK {@link Pattern java.util.regex.Pattern} class with a new {@link
 * #WILDCARD} compilation flag, which modifies the pattern compilation such that it combines the well-known
 * wildcard pattern matching with the power of regular
 * expressions.
 *
 * @see #compile(String, int)
 */
public final
class Pattern2 {

    private Pattern2() {}

    /**
     * Modifies the pattern compilation as follows:
     * 

* The meaning of the '*' and '?' metacharacters is now different, and '.' is no longer a metacharacter. *

* '/' is now a metacharacter, i.e. to include it literally in the pattern, it must be escaped with a backslash. *

* The semantics of '*', '?' and '.' are as follows: *

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
ConstructMatches
Wildcards
*Zero or more characters except '/', the file separator and '!'
**Zero or more characters except '!'
***Zero or more characters
?Any character except '/', the file separator and '!'
.The '.'
/ * '/' or the system-dependent file separator (see {@link java.io.File#separatorChar separatorChar}) *
* Naturally, '*' is no longer the regex quantifier '*', so if you need to quantify 'zero or more', then you'd have * to write '{0,}'. Similarly, to quantify 'zero or one', you can no longer write '{@code ?}', but * must use '{0,1}'. */ public static final int WILDCARD = 0x20000000; /** * Like {@link Pattern#compile(String,int)}, but with support for the {@link #WILDCARD} flag. *

* Notice that iff {@link #WILDCARD} is given, then {@link #toString()} returns the regular expression * that was generated from the wildcard pattern (and not the wildcard pattern, as you'd probably expect). *

* * @see #WILDCARD * @see Pattern#CANON_EQ * @see Pattern#CASE_INSENSITIVE * @see Pattern#COMMENTS * @see Pattern#DOTALL * @see Pattern#LITERAL * @see Pattern#MULTILINE * @see Pattern#UNICODE_CASE * @see Pattern#UNIX_LINES */ public static Pattern compile(String pattern, int flags) { if ((flags & (Pattern.LITERAL | Pattern2.WILDCARD)) != Pattern2.WILDCARD) { return Pattern.compile(pattern, flags); } String metaCharacters = "*?./{"; for ( int idx = Pattern2.findMeta(metaCharacters, pattern, 0); idx != pattern.length(); idx = Pattern2.findMeta(metaCharacters, pattern, idx) ) { switch (pattern.charAt(idx)) { case '*': { String s = pattern.substring(idx); if (s.startsWith("***")) { pattern = pattern.substring(0, idx) + ".*" + pattern.substring(idx + 3); idx += 2; } else if (s.startsWith("**")) { pattern = pattern.substring(0, idx) + "[^!]*" + pattern.substring(idx + 2); idx += 5; } else if (File.separatorChar != '/') { pattern = ( pattern.substring(0, idx) + "[^/\\" + File.separatorChar + "!]*" + pattern.substring(idx + 1) ); idx += 8; } else { pattern = pattern.substring(0, idx) + "[^/!]*" + pattern.substring(idx + 1); idx += 6; } } break; case '?': if (File.separatorChar != '/') { pattern = ( pattern.substring(0, idx) + "[^/\\" + File.separatorChar + "!]" + pattern.substring(idx + 1) ); idx += 7; } else { pattern = pattern.substring(0, idx) + "[^/!]" + pattern.substring(idx + 1); idx += 5; } break; case '.': pattern = pattern.substring(0, idx) + "\\." + pattern.substring(idx + 1); idx += 2; break; case '/': if (File.separatorChar != '/') { pattern = ( pattern.substring(0, idx) + "[/\\" + File.separatorChar + "]" + pattern.substring(idx + 1) ); idx += 5; } else { idx++; } break; case '{': { if (pattern.regionMatches(idx, "{0,1}", 0, 5)) { pattern = pattern.substring(0, idx) + "?" + pattern.substring(idx + 5); idx++; } else if (pattern.regionMatches(idx, "{0,}", 0, 4)) { pattern = pattern.substring(0, idx) + "*" + pattern.substring(idx + 4); idx++; } else { idx++; } } break; default: throw new IllegalStateException(); } } return Pattern.compile(pattern, flags); } /** * Splits the given string into "pattern" and "replacement". The "pattern" is the text before the first * non-escaped equals sign ("="), the "replacement" is the text after that equals sign. *

* Example: *

*

* {@code "(foo)=$1.bak"} results in { "(foo)", "$1.bak" }. *

*

* Iff there is no non-escaped equals sign, then the resulting replacement is {@code null}. *

* * @return An array of { pattern, replacement } */ public static String[] parsePatternAndReplacement(String pattern) { String replacement = null; int idx = Pattern2.findMeta("=", pattern, 0); if (idx != pattern.length()) { replacement = pattern.substring(idx + 1); pattern = pattern.substring(0, idx); } return new String[] { pattern, replacement }; } /** * Finds the next unescaped occurrence of one of the {@code metaCharacters} within {@code subject}, starting at * position {@code offset}. Metacharacters can be escaped by backslashes or by '{@code \Q ... \E}'. * * @return The position of the next meta character, or {@code subject.length()} iff no meta character is found */ public static int findMeta(String metaCharacters, String subject, int offset) { int cc = 0; // Character class count (character classes my be nested). boolean q = false; // Inside curly-brace quantifier, e.g. "{0,}" int state = 0; for (; offset != subject.length(); offset++) { char c = subject.charAt(offset); switch (state) { case 0: if (c == '\\') { state = 1; } else if (c == '[') { cc++; } else if (c == ']' && cc > 0) { cc--; } else if (cc > 0) { ; } else if (metaCharacters.indexOf(c) != -1 && !q) { return offset; } else if (c == '{') { q = true; } else if (c == '}') { q = false; } break; case 1: // After backslash. state = c == 'Q' ? 2 : 0; break; case 2: // In quoted section. if (c == '\\') state = 3; break; case 3: // Inquoted section, after backslash. if (c == 'E') { state = 0; } else if (c != '\\') { state = 2; } break; default: throw new IllegalStateException(); } } return offset; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy