All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twelvemonkeys.util.regex.REWildcardStringParser Maven / Gradle / Ivy

/*
 * Copyright (c) 2008, Harald Kuhr
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name "TwelveMonkeys" nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.twelvemonkeys.util.regex;

import com.twelvemonkeys.util.DebugUtil;

import java.io.PrintStream;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

/**
 * This class parses arbitrary strings against a wildcard string mask provided.
 * The wildcard characters are '*' and '?'.
 * 

* The string masks provided are treated as case sensitive.
* Null-valued string masks as well as null valued strings to be parsed, will lead to rejection. * *


* * This task is performed based on regular expression techniques. * The possibilities of string generation with the well-known wildcard characters stated above, * represent a subset of the possibilities of string generation with regular expressions.
* The '*' corresponds to ([Union of all characters in the alphabet])*
* The '?' corresponds to ([Union of all characters in the alphabet])
*       These expressions are not suited for textual representation at all, I must say. Is there any math tags included in HTML? * *

* * This class uses the Regexp package from Apache's Jakarta Project, links below. * *


* * Examples of usage:
* This example will return "Accepted!". *

 * REWildcardStringParser parser = new REWildcardStringParser("*_28????.jp*");
 * if (parser.parseString("gupu_280915.jpg")) {
 *     System.out.println("Accepted!");
 * } else {
 *     System.out.println("Not accepted!");
 * }
 * 
* *


* * @author Eirik Torske * @see Jakarta Regexp * @see {@code org.apache.regexp.RE} * @see com.twelvemonkeys.util.regex.WildcardStringParser * * @todo Rewrite to use this regex package, and not Jakarta directly! */ public class REWildcardStringParser /*extends EntityObject*/ { // Constants /** Field ALPHABET */ public static final char[] ALPHABET = { 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '?', '?', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'N', 'M', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '?', '?', '?', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.', '_', '-' }; /** Field FREE_RANGE_CHARACTER */ public static final char FREE_RANGE_CHARACTER = '*'; /** Field FREE_PASS_CHARACTER */ public static final char FREE_PASS_CHARACTER = '?'; // Members Pattern mRegexpParser; String mStringMask; boolean mInitialized = false; int mTotalNumberOfStringsParsed; boolean mDebugging; PrintStream out; // Properties // Constructors /** * Creates a wildcard string parser. *

* @param pStringMask the wildcard string mask. */ public REWildcardStringParser(final String pStringMask) { this(pStringMask, false); } /** * Creates a wildcard string parser. *

* @param pStringMask the wildcard string mask. * @param pDebugging {@code true} will cause debug messages to be emitted to {@code System.out}. */ public REWildcardStringParser(final String pStringMask, final boolean pDebugging) { this(pStringMask, pDebugging, System.out); } /** * Creates a wildcard string parser. *

* @param pStringMask the wildcard string mask. * @param pDebugging {@code true} will cause debug messages to be emitted. * @param pDebuggingPrintStream the {@code java.io.PrintStream} to which the debug messages will be emitted. */ public REWildcardStringParser(final String pStringMask, final boolean pDebugging, final PrintStream pDebuggingPrintStream) { this.mStringMask = pStringMask; this.mDebugging = pDebugging; this.out = pDebuggingPrintStream; mInitialized = buildRegexpParser(); } // Methods /** * Converts wildcard string mask to regular expression. * This method should reside in som utility class, but I don't know how proprietary the regular expression format is... * @return the corresponding regular expression or {@code null} if an error occurred. */ private String convertWildcardExpressionToRegularExpression(final String pWildcardExpression) { if (pWildcardExpression == null) { if (mDebugging) { out.println(DebugUtil.getPrefixDebugMessage(this) + "wildcard expression is null - also returning null as regexp!"); } return null; } StringBuilder regexpBuffer = new StringBuilder(); boolean convertingError = false; for (int i = 0; i < pWildcardExpression.length(); i++) { if (convertingError) { return null; } // Free-range character '*' char stringMaskChar = pWildcardExpression.charAt(i); if (isFreeRangeCharacter(stringMaskChar)) { regexpBuffer.append("(([a-?A-?0-9]|.|_|-)*)"); } // Free-pass character '?' else if (isFreePassCharacter(stringMaskChar)) { regexpBuffer.append("([a-?A_?0-9]|.|_|-)"); } // Valid characters else if (isInAlphabet(stringMaskChar)) { regexpBuffer.append(stringMaskChar); } // Invalid character - aborting else { if (mDebugging) { out.println(DebugUtil.getPrefixDebugMessage(this) + "one or more characters in string mask are not legal characters - returning null as regexp!"); } convertingError = true; } } return regexpBuffer.toString(); } /** * Builds the regexp parser. */ private boolean buildRegexpParser() { // Convert wildcard string mask to regular expression String regexp = convertWildcardExpressionToRegularExpression(mStringMask); if (regexp == null) { out.println(DebugUtil.getPrefixErrorMessage(this) + "irregularity in regexp conversion - now not able to parse any strings, all strings will be rejected!"); return false; } // Instantiate a regular expression parser try { mRegexpParser = Pattern.compile(regexp); } catch (PatternSyntaxException e) { if (mDebugging) { out.println(DebugUtil.getPrefixErrorMessage(this) + "RESyntaxException \"" + e.getMessage() + "\" caught - now not able to parse any strings, all strings will be rejected!"); } if (mDebugging) { e.printStackTrace(System.err); } return false; } if (mDebugging) { out.println(DebugUtil.getPrefixDebugMessage(this) + "regular expression parser from regular expression " + regexp + " extracted from wildcard string mask " + mStringMask + "."); } return true; } /** * Simple check of the string to be parsed. */ private boolean checkStringToBeParsed(final String pStringToBeParsed) { // Check for nullness if (pStringToBeParsed == null) { if (mDebugging) { out.println(DebugUtil.getPrefixDebugMessage(this) + "string to be parsed is null - rejection!"); } return false; } // Check if valid character (element in alphabet) for (int i = 0; i < pStringToBeParsed.length(); i++) { if (!isInAlphabet(pStringToBeParsed.charAt(i))) { if (mDebugging) { out.println(DebugUtil.getPrefixDebugMessage(this) + "one or more characters in string to be parsed are not legal characters - rejection!"); } return false; } } return true; } /** * Tests if a certain character is a valid character in the alphabet that is applying for this automaton. */ public static boolean isInAlphabet(final char pCharToCheck) { for (int i = 0; i < ALPHABET.length; i++) { if (pCharToCheck == ALPHABET[i]) { return true; } } return false; } /** * Tests if a certain character is the designated "free-range" character ('*'). */ public static boolean isFreeRangeCharacter(final char pCharToCheck) { return pCharToCheck == FREE_RANGE_CHARACTER; } /** * Tests if a certain character is the designated "free-pass" character ('?'). */ public static boolean isFreePassCharacter(final char pCharToCheck) { return pCharToCheck == FREE_PASS_CHARACTER; } /** * Tests if a certain character is a wildcard character ('*' or '?'). */ public static boolean isWildcardCharacter(final char pCharToCheck) { return ((isFreeRangeCharacter(pCharToCheck)) || (isFreePassCharacter(pCharToCheck))); } /** * Gets the string mask that was used when building the parser atomaton. *

* @return the string mask used for building the parser automaton. */ public String getStringMask() { return mStringMask; } /** * Parses a string. *

* * @param pStringToBeParsed * @return {@code true} if and only if the string are accepted by the parser. */ public boolean parseString(final String pStringToBeParsed) { if (mDebugging) { out.println(); } if (mDebugging) { out.println(DebugUtil.getPrefixDebugMessage(this) + "parsing \"" + pStringToBeParsed + "\"..."); } // Update statistics mTotalNumberOfStringsParsed++; // Check string to be parsed if (!checkStringToBeParsed(pStringToBeParsed)) { return false; } // Perform parsing and return accetance/rejection flag if (mInitialized) { return mRegexpParser.matcher(pStringToBeParsed).matches(); } else { out.println(DebugUtil.getPrefixErrorMessage(this) + "trying to use non-initialized parser - string rejected!"); } return false; } /* * Overriding mandatory methods from EntityObject's. */ /** * Method toString * * * @return * */ public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append(DebugUtil.getClassName(this)); buffer.append(": String mask "); buffer.append(mStringMask); buffer.append("\n"); return buffer.toString(); } // Just taking the lazy, easy and dangerous way out /** * Method equals * * * @param pObject * * @return * */ public boolean equals(Object pObject) { if (pObject instanceof REWildcardStringParser) { REWildcardStringParser externalParser = (REWildcardStringParser) pObject; return (externalParser.mStringMask == this.mStringMask); } return ((Object) this).equals(pObject); } // Just taking the lazy, easy and dangerous way out /** * Method hashCode * * * @return * */ public int hashCode() { return ((Object) this).hashCode(); } protected Object clone() throws CloneNotSupportedException { return new REWildcardStringParser(mStringMask); } // Just taking the lazy, easy and dangerous way out protected void finalize() throws Throwable {} } /*--- Formatted in Sun Java Convention Style on ma, des 1, '03 ---*/ /*------ Formatted by Jindent 3.23 Basic 1.0 --- http://www.jindent.de ------*/





© 2015 - 2025 Weber Informatics LLC | Privacy Policy