Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
//
// Copyright 2001 - 2010 Charles W. Rapp
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package net.sf.eBus.util.regex;
import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.logging.Logger;
import net.sf.eBus.util.TernarySearchTree;
/**
* LIKE conditions use a separate parser for their regular
* expression values. Regular expressions are parsed into
* their constituent components. This parser supports the
* SQL regular expression syntax which is limited to:
*
*
* Literals: The argument string's current location must
* match exactly this literal character.
*
*
* []: The argument string's current character must match one
* of the characters in this set. The set's form may be
* either [abcd] or [a-d] but not a combination of the forms.
*
*
* [^]: The argument string's current character must
* not match any character in this set.
*
*
* .: Matches the argument string's current character. Fails
* only if at the argument string's end.
*
*
* *: Matches zero or more of the argument string's
* current characters. This is a reluctant quantifier. This
* means the regular expression "%abc%" matches "12abc3".
* A greedy * quantifier would not because the first "%"
* in the regular expression would greedily consume the
* entire argument string.
*
*
* +: Matches one or more the argument string's
* current characters. This is a reluctant quantifier.
*
*
* {m, n}:
*
*
* The reason for parsing the regular expression are two-fold:
*
*
* To convert the SQL syntax to the java.util.regex.Pattern
* syntax.
*
*
* To support {@link TernarySearchTree#entrySet(Pattern)}.
*
*
*
* @author Charles Rapp
*/
@SuppressWarnings("unchecked")
/* package */ final class RegexLexer
{
//---------------------------------------------------------------
// Member data.
//
//-----------------------------------------------------------
// Constants.
//
// There are four token types: not set, done failed,
// done success and regex component.
/* package */ static final int TOKEN_NOT_SET = 0;
/* package */ static final int DONE_FAILED = 1;
/* package */ static final int DONE_SUCCESS = 2;
/* package */ static final int REGEX_COMPONENT = 3;
/* package */ static final int TOKEN_COUNT =
REGEX_COMPONENT + 1;
/* package */ static final int SIZE_NOT_SET = -2;
private static final int MIN_ASCII_CHAR = 0;
private static final int MAX_ASCII_CHAR = 128;
private static final int MAX_OCTAL_CHAR = 255;
// Predefined character sets.
/* package */ static final int DIGIT_FIRST = '0';
/* package */ static final int DIGIT_LAST = '9';
/* package */ static final int LC_ALPHA_FIRST = 'a';
/* package */ static final int LC_ALPHA_LAST = 'z';
/* package */ static final int UC_ALPHA_FIRST = 'A';
/* package */ static final int UC_ALPHA_LAST = 'Z';
/* package */ static final int[] WHITESPACE =
{' ', '\t', '\n', 0x0B, '\f', '\r'};
//-----------------------------------------------------------
// Statics.
//
/**
* Logging subsystem interface.
*/
private static final Logger sLogger =
Logger.getLogger(RegexLexer.class.getName());
private static String[] sTokenTypeNames;
private static Method[] sTransMethod;
static
{
String transName = "";
sTokenTypeNames = new String[TOKEN_COUNT];
sTokenTypeNames[RegexLexer.TOKEN_NOT_SET] = "TOKEN_NOT_SET";
sTokenTypeNames[RegexLexer.DONE_FAILED] = "DONE_FAILED";
sTokenTypeNames[RegexLexer.DONE_SUCCESS] = "DONE_SUCCESS";
sTokenTypeNames[RegexLexer.REGEX_COMPONENT] =
"REGEX_COMPONENT";
sTransMethod = new Method[RegexLexer.MAX_ASCII_CHAR + 1];
try
{
final Class fsmClass =
RegexLexerContext.class;
final Class>[] paramTypes = new Class>[1];
int i;
Method alpha;
Method digit;
paramTypes[0] = int.class;
transName = "alpha";
alpha =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "digit";
digit =
fsmClass.getDeclaredMethod(
transName, paramTypes);
// Set all transitions to alpha. This will be
// overriden later.
for (i = MIN_ASCII_CHAR; i < MAX_ASCII_CHAR; ++i)
{
sTransMethod[i] = alpha;
}
// Now set digits.
for (i = '0'; i <= '9'; ++i)
{
sTransMethod[i] = digit;
}
// Now set the special characters.
transName = "asterisk";
sTransMethod['*'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "plus";
sTransMethod['+'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "question_mark";
sTransMethod['?'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "period";
sTransMethod['.'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "left_bracket";
sTransMethod['['] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "right_bracket";
sTransMethod[']'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "up_arrow";
sTransMethod['^'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "dash";
sTransMethod['-'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "left_brace";
sTransMethod['{'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "right_brace";
sTransMethod['}'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "comma";
sTransMethod[','] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
transName = "backslash";
sTransMethod['\\'] =
fsmClass.getDeclaredMethod(
transName, paramTypes);
}
catch (NoSuchMethodException | SecurityException jex)
{
sLogger.severe(
String.format(
"INITIALIZATION ERROR! No such method as net.sf.eBus.util.regex.LexerContext.%s(int).%n",
transName));
}
} // end of static
//-----------------------------------------------------------
// Locals.
//
// The lexer's state map.
private final RegexLexerContext mFsm;
// Place the string to be parsed into this reader and
// then read in characters from this reader.
private final StringReader mInput;
// We are now at this character in the input.
private int mIndex;
// Use only one token object.
private final Token mToken;
// Count the number of tokens we generate.
private int mTokenCount;
// Collect the current token string in this buffer before
// generating a literal or character set token from it.
private final StringBuilder mTokenBuffer;
// This flag is set to true when a token has been found or
// an error occurs.
private boolean mStopFlag;
//-----------------------------------------------------------
// The following data members are used to store information
// for the various component types before instantiating the
// Component subclass.
//
// The target component type: LITERAL, CHARACTER_SET or
// ANY_CHAR.
private int mcComponentType;
// Literal: store the literal character.
private int mcLiteral;
// Character Set: set true each character's bit and whether
// the set is negated or not. Alse store the *potentially*
// first character in a set range.
private final SortedSet mcCharacterSet;
private boolean mcNegatedFlag;
private int mcFirstCharacter;
// Range: store incoming digits in the buffer and the
// minimum and maximum range values.
private final StringBuilder mcNumber;
private int mcMinimumSize;
private int mcMaximumSize;
//
// Component data.
//-----------------------------------------------------------
//---------------------------------------------------------------
// Member methods.
//
//-----------------------------------------------------------
// Constructors.
//
/**
* Creates a regular expression lexer which passes tokens
* to the specified parser. The lexer drives the parsing.
* @param s find tokens in this string.
*/
/* package */ RegexLexer(final String s)
{
mFsm = new RegexLexerContext(this);
mInput = new StringReader(s);
mIndex = -1;
mToken = new Token();
mTokenCount = 0;
mTokenBuffer = new StringBuilder();
mStopFlag = false;
mcComponentType = 0;
mcLiteral = -1;
mcCharacterSet = new TreeSet<>();
mcNegatedFlag = false;
mcFirstCharacter = -1;
mcNumber = new StringBuilder();
mcMinimumSize = SIZE_NOT_SET;
mcMaximumSize = SIZE_NOT_SET;
// _fsm.setDebugFlag(true);
} // end of RegexLexer(String)
//
// end of Constructors.
//-----------------------------------------------------------
/**
* Finds tokens in the input string and passes the tokens to
* the regular expression parser.
* @return {@code true} if {@code re} was
* successfully tokenized and {@code false} otherwise.
*/
/* package */ Token nextToken()
{
// Keep reading until told to stop or the end-of-string
// is reached.
try
{
int c;
mStopFlag = false;
while (!mStopFlag)
{
// If this is the end of the string, then we
// have successfully completed.
if ((c = readChar()) < 0)
{
mFsm.EOS();
}
else if (c >= MAX_ASCII_CHAR)
{
// Place this character into the token buffer
// before issuing the transition.
mTokenBuffer.append((char) c);
mFsm.alpha(c);
}
else
{
// Place this character into the token buffer
// before issuing the transition.
mTokenBuffer.append((char) c);
sTransMethod[c].invoke(mFsm, c);
}
}
}
catch (IOException | IllegalAccessException jex)
{
badToken(jex);
}
catch (InvocationTargetException inex)
{
badToken(inex.getCause());
}
return (mToken);
} // end of nextToken()
// Returns the next character from the string.
/* package */ int readChar()
throws IOException
{
// Mark the current position in case we have to
// unread.
mInput.mark(1);
++mIndex;
return (mInput.read());
} // end of readChar()
// Unread the current character by resetting to the marked
// position.
/* package */ void unreadChar()
{
try
{
final int bufferSize = mTokenBuffer.length();
mInput.reset();
--mIndex;
// Remove this character from the end of the buffer.
if (bufferSize > 0)
{
mTokenBuffer.deleteCharAt(bufferSize - 1);
}
}
catch (IOException ioex)
{
// Oh, please!
}
} // end of unreadChar()
// Closes the string reader to prevent further reading.
/* package */ void closeInput()
{
mInput.close();
} // end of closeInput()
//-----------------------------------------------------------
// State Machine Actions
//
/**
* Returns the current token count.
* @return the current token count.
*/
/* package */ int tokenCount()
{
return (mTokenCount);
} // end of tokenCount()
// Clears out all token data and sets the new token type
// and component type.
/* package */ void startToken(final int componentType)
{
mToken.clear();
mToken.type(REGEX_COMPONENT);
mcComponentType = componentType;
mTokenBuffer.delete(0, mTokenBuffer.length());
mcLiteral = -1;
mcCharacterSet.clear();
mcNegatedFlag = false;
mcFirstCharacter = -1;
mcNumber.delete(0, mcNumber.length());
mcMinimumSize = SIZE_NOT_SET;
mcMaximumSize = SIZE_NOT_SET;
} // end of startToken(int)
// Creates a regular expression component token.
/* package */ void endToken(final int minSize,
final int maxSize)
{
mToken.value(mTokenBuffer.toString());
// Instantiates the appropriate regular expression
// component.
switch (mcComponentType)
{
case Component.LITERAL:
mToken.regexComponent(
new Literal((char) mcLiteral,
minSize,
maxSize,
mTokenCount));
break;
case Component.CHARACTER_SET:
final SortedSet setCopy =
new TreeSet<>(mcCharacterSet);
mToken.regexComponent(
new CharacterSet(
setCopy,
mcNegatedFlag,
minSize,
maxSize,
mTokenCount));
break;
default:
mToken.regexComponent(
new AnyChar(minSize, maxSize, mTokenCount));
break;
}
++mTokenCount;
// Stop. We have found a token.
mStopFlag = true;
} // end of endToken(int, int)
// Returns an error token which should cause the parsing
// to terminate.
/* package */ void badToken(final String message)
{
mToken.type(DONE_FAILED);
mToken.value(mTokenBuffer.toString());
mToken.errorMessage(message);
mToken.index(mIndex);
mStopFlag = true;
} // end of badToken(String)
// Returns an error token with the exception set.
/* package */ void badToken(final Throwable cause)
{
mToken.type(DONE_FAILED);
mToken.value(mTokenBuffer.toString());
mToken.errorMessage(cause.getMessage());
mToken.cause(cause);
mToken.index(mIndex);
mStopFlag = true;
} // end of badToken(Throwable)
// The parsing has successfully completed.
/* package */ void done()
{
mToken.type(DONE_SUCCESS);
mToken.regexComponent(null);
mToken.value(null);
mToken.errorMessage(null);
mToken.cause(null);
mStopFlag = true;
} // end of done()
// Stores the literal component's value.
/* package */ void literal(final int c)
{
mcLiteral = c;
} // end of literal(int)
// Stores the character set negation flag.
/* package */ void negatedFlag(final boolean flag)
{
mcNegatedFlag = flag;
} // end of negatedFlag(boolean)
// Returns the character set size.
/* package */ int characterSetSize()
{
return (mcCharacterSet.size());
} // end of characterSetSize()
// Sets the single character's bit in the character bit set.
/* package */ void addToSet(final int c)
{
mcCharacterSet.add((char) c);
} // end of addToSet(int)
// Sets a range of characters.
/* package */ void addToSet(final int first, final int last)
{
int index;
for (index = first; index <= last; ++index)
{
mcCharacterSet.add((char) index);
}
} // end of addToSet(int, int)
// Adds multiple characters.
/* package */ void addToSet(final int[] cs)
{
for (int c: cs)
{
mcCharacterSet.add((char) c);
}
} // end of addToSet(int[])
/* package */ int firstCharacter()
{
return (mcFirstCharacter);
} // end of firstCharacter()
/* package */ void firstCharacter(final int c)
{
mcFirstCharacter = c;
} // end of firstCharacter(int)
/* package */ int numberLength()
{
return (mcNumber.length());
} // end of numberLength()
/* package */ String number()
{
return (mcNumber.toString());
} // end of number()
/* package */ void appendNumber(final int c)
{
mcNumber.append((char) c);
} // end of appendNumber(int)
/* package */ void clearNumber()
{
mcNumber.delete(0, mcNumber.length());
} // end of clearNumber()
/* package */ int minimumSize()
{
return (mcMinimumSize);
} // end of minimumSize()
/* package */ void minimumSize(final int size)
{
mcMinimumSize = size;
} // end of mimimumSize(int)
/* package */ boolean minimumSize(final String s)
{
boolean retcode = true;
try
{
mcMinimumSize = Integer.parseInt(s);
}
catch (NumberFormatException formex)
{
retcode = false;
mcMinimumSize = SIZE_NOT_SET;
}
return (retcode);
} // end of minimumSize(String)
/* package */ int maximumSize()
{
return (mcMaximumSize);
} // end of maximumSize()
/* package */ boolean maximumSize(final String s)
{
boolean retcode = true;
try
{
mcMaximumSize = Integer.parseInt(s);
}
catch (NumberFormatException formex)
{
retcode = false;
mcMaximumSize = SIZE_NOT_SET;
}
return (retcode);
} // end of maximumSize(String)
/* package */ boolean isOctal()
{
final int i =
Integer.parseInt(mTokenBuffer.substring(2), 8);
return (i <= MAX_OCTAL_CHAR);
} // end of isOctal()
/* package */ char octalChar()
{
char retval;
try
{
retval =
(char) Integer.parseInt(
mTokenBuffer.substring(2), 8);
}
catch (NumberFormatException jex)
{
retval = Character.MIN_VALUE;
}
return (retval);
} // end of octalChar()
/* package */ char hexChar()
{
char retval;
try
{
retval =
(char) Integer.parseInt(
mTokenBuffer.substring(2), 16);
}
catch (NumberFormatException jex)
{
retval = Character.MIN_VALUE;
}
return (retval);
} // end of hexChar()
//
// end of State Machine Actions
//-----------------------------------------------------------
//---------------------------------------------------------------
// Inner classes.
//
// Token information is placed in this class and the Token
// instance passed to the parser.
public static final class Token
{
//-----------------------------------------------------------
// Member data.
//
//-------------------------------------------------------
// Locals.
//
private int mType;
private Component mRegexComponent;
private String mValue;
private String mErrorMessage;
private Throwable mCause;
private int mIndex;
//-----------------------------------------------------------
// Member methods.
//
//-------------------------------------------------------
// Constructors.
//
public Token()
{
mType = TOKEN_NOT_SET;
mRegexComponent = null;
mValue = null;
mErrorMessage = null;
mCause = null;
mIndex = -1;
} // end of Token()
//
// end of Constructors.
//-------------------------------------------------------
//-------------------------------------------------------
// Get methods.
//
public int type()
{
return (mType);
} // end of type()
public Component regexComponent()
{
return (mRegexComponent);
} // end of regexComponent()
public String value()
{
return (mValue);
} // end of value()
public String errorMessage()
{
return (mErrorMessage);
} // end of errorMessage()
public Throwable cause()
{
return (mCause);
} // end of cause()
public int index()
{
return (mIndex);
} // end of index()
//
// end of Get methods.
//-------------------------------------------------------
//-------------------------------------------------------
// Set methods.
//
public void type(final int type)
{
mType = type;
} // end fo type(int)
public void regexComponent(final Component component)
{
mRegexComponent = component;
} // end of regexComponent(Component)
public void value(final String value)
{
mValue = value;
} // end of value(String)
public void errorMessage(final String message)
{
mErrorMessage = message;
} // end of errorMessage(String)
public void cause(final Throwable cause)
{
mCause = cause;
} // end of cause(Throwable)
public void index(final int index)
{
mIndex = index;
} // end of index(int)
//
// end of Set methods.
//-------------------------------------------------------
public void clear()
{
mType = TOKEN_NOT_SET;
mRegexComponent = null;
mValue = null;
mErrorMessage = null;
mCause = null;
mIndex = -1;
} // end of clear()
} // end of class Token
} // end of class RegexLexer