org.apache.oro.text.awk.AwkCompiler Maven / Gradle / Ivy
/*
* $Id: AwkCompiler.java,v 1.10 2003/11/07 20:16:24 dfs Exp $
*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
* must not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [email protected].
*
* 5. Products derived from this software may not be called "Apache"
* or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
* name, without prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* .
*/
package org.apache.oro.text.awk;
import org.apache.oro.text.regex.*;
/**
* The AwkCompiler class is used to create compiled regular expressions
* conforming to the Awk regular expression syntax. It generates
* AwkPattern instances upon compilation to be used in conjunction
* with an AwkMatcher instance. AwkMatcher finds true leftmost-longest
* matches, so you must take care with how you formulate your regular
* expression to avoid matching more than you really want.
*
* The supported regular expression syntax is a superset of traditional AWK,
* but NOT to be confused with GNU AWK or other AWK variants. Additionally,
* this AWK implementation is DFA-based and only supports 8-bit ASCII.
* Consequently, these classes can perform very fast pattern matches in
* most cases.
*
* This is the traditional Awk syntax that is supported:
*
* - Alternatives separated by |
*
- Quantified atoms
*
* - *
- Match 0 or more times.
*
- +
- Match 1 or more times.
*
- ?
- Match 0 or 1 times.
*
* - Atoms
*
* - regular expression within parentheses
*
- a . matches everything including newline
*
- a ^ is a null token matching the beginning of a string
* but has no relation to newlines (and is only valid at the
* beginning of a regex; this differs from traditional awk
* for the sake of efficiency in Java).
*
- a $ is a null token matching the end of a string but has
* no relation to newlines (and is only valid at the
* end of a regex; this differs from traditional awk for the
* sake of efficiency in Java).
*
- Character classes (e.g., [abcd]) and ranges (e.g. [a-z])
*
* - Special backslashed characters work within a character class
*
* - Special backslashed characters
*
* - \b
- backspace
*
- \n
- newline
*
- \r
- carriage return
*
- \t
- tab
*
- \f
- formfeed
*
- \xnn
- hexadecimal representation of character
*
- \nn or \nnn
- octal representation of character
*
- Any other backslashed character matches itself
*
*
*
* This is the extended syntax that is supported:
*
* - Quantified atoms
*
* - {n,m}
- Match at least n but not more than m times.
*
- {n,}
- Match at least n times.
*
- {n}
- Match exactly n times.
*
* - Atoms
*
* - Special backslashed characters
*
* - \d
- digit [0-9]
*
- \D
- non-digit [^0-9]
*
- \w
- word character [0-9a-z_A-Z]
*
- \W
- a non-word character [^0-9a-z_A-Z]
*
- \s
- a whitespace character [ \t\n\r\f]
*
- \S
- a non-whitespace character [^ \t\n\r\f]
*
- \cD
- matches the corresponding control character
*
- \0
- matches null character
*
*
*
* @version @version@
* @since 1.0
* @see org.apache.oro.text.regex.PatternCompiler
* @see org.apache.oro.text.regex.MalformedPatternException
* @see AwkPattern
* @see AwkMatcher
*/
public final class AwkCompiler implements PatternCompiler {
/**
* The default mask for the {@link #compile compile} methods.
* It is equal to 0 and indicates no special options are active.
*/
public static final int DEFAULT_MASK = 0;
/**
* A mask passed as an option to the {@link #compile compile} methods
* to indicate a compiled regular expression should be case insensitive.
*/
public static final int CASE_INSENSITIVE_MASK = 0x0001;
/**
* A mask passed as an option to the {@link #compile compile} methods
* to indicate a compiled regular expression should treat input as having
* multiple lines. This option affects the interpretation of
* the . metacharacters. When this mask is used,
* the . metacharacter will not match newlines. The default
* behavior is for . to match newlines.
*/
public static final int MULTILINE_MASK = 0x0002;
static final char _END_OF_INPUT = '\uFFFF';
// All of these are initialized by the compile() and _parse() methods
// so there is no need or use in initializing them in the constructor
// although this may change in the future.
private boolean __inCharacterClass, __caseSensitive, __multiline;
private boolean __beginAnchor, __endAnchor;
private char __lookahead;
private int __position, __bytesRead, __expressionLength;
private char[] __regularExpression;
private int __openParen, __closeParen;
// We do not currently need to initialize any state, but keep this
// commented out as a reminder that we may have to at some point.
//public AwkCompiler() { }
private static boolean __isMetachar(char token) {
return (token == '*' || token == '?' || token == '+' ||
token == '[' || token == ']' || token == '(' ||
token == ')' || token == '|' || /* token == '^' ||
token == '$' || */ token == '.');
}
static boolean _isWordCharacter(char token) {
return ((token >= 'a' && token <= 'z') ||
(token >= 'A' && token <= 'Z') ||
(token >= '0' && token <= '9') ||
(token == '_'));
}
static boolean _isLowerCase(char token){
return (token >= 'a' && token <= 'z');
}
static boolean _isUpperCase(char token){
return (token >= 'A' && token <= 'Z');
}
static char _toggleCase(char token){
if(_isUpperCase(token))
return (char)(token + 32);
else if(_isLowerCase(token))
return (char)(token - 32);
return token;
}
private void __match(char token) throws MalformedPatternException {
if(token == __lookahead){
if(__bytesRead < __expressionLength)
__lookahead = __regularExpression[__bytesRead++];
else
__lookahead = _END_OF_INPUT;
}
else
throw new MalformedPatternException("token: " + token +
" does not match lookahead: " +
__lookahead + " at position: " +
__bytesRead);
}
private void __putback() {
if(__lookahead != _END_OF_INPUT)
--__bytesRead;
__lookahead = __regularExpression[__bytesRead - 1];
}
private SyntaxNode __regex() throws MalformedPatternException {
SyntaxNode left;
left = __branch();
if(__lookahead == '|') {
__match('|');
return (new OrNode(left, __regex()));
}
return left;
}
private SyntaxNode __branch() throws MalformedPatternException {
CatNode current;
SyntaxNode left, root;
left = __piece();
if(__lookahead == ')'){
if(__openParen > __closeParen)
return left;
else
throw
new MalformedPatternException("Parse error: close parenthesis"
+ " without matching open parenthesis at position " + __bytesRead);
} else if(__lookahead == '|' || __lookahead == _END_OF_INPUT)
return left;
root = current = new CatNode();
current._left = left;
while(true) {
left = __piece();
if(__lookahead == ')'){
if(__openParen > __closeParen){
current._right = left;
break;
}
else
throw
new MalformedPatternException("Parse error: close parenthesis"
+ " without matching open parenthesis at position " + __bytesRead);
} else if(__lookahead == '|' || __lookahead == _END_OF_INPUT){
current._right = left;
break;
}
current._right = new CatNode();
current = (CatNode)current._right;
current._left = left;
}
return root;
}
private SyntaxNode __piece() throws MalformedPatternException {
SyntaxNode left;
left = __atom();
switch(__lookahead){
case '+' : __match('+'); return (new PlusNode(left));
case '?' : __match('?'); return (new QuestionNode(left));
case '*' : __match('*'); return (new StarNode(left));
case '{' : return __repetition(left);
}
return left;
}
// if numChars is 0, this means match as many as you want
private int __parseUnsignedInteger(int radix, int minDigits, int maxDigits)
throws MalformedPatternException {
int num, digits = 0;
StringBuffer buf;
// We don't expect huge numbers, so an initial buffer of 4 is fine.
buf = new StringBuffer(4);
while(Character.digit(__lookahead, radix) != -1 && digits < maxDigits){
buf.append((char)__lookahead);
__match(__lookahead);
++digits;
}
if(digits < minDigits || digits > maxDigits)
throw
new MalformedPatternException(
"Parse error: unexpected number of digits at position " + __bytesRead);
try {
num = Integer.parseInt(buf.toString(), radix);
} catch(NumberFormatException e) {
throw
new MalformedPatternException("Parse error: numeric value at " +
"position " + __bytesRead + " is invalid");
}
return num;
}
private SyntaxNode __repetition(SyntaxNode atom)
throws MalformedPatternException {
int min, max, startPosition[];
SyntaxNode root = null;
CatNode catNode;
__match('{');
min = __parseUnsignedInteger(10, 1, Integer.MAX_VALUE);
startPosition = new int[1];
startPosition[0] = __position;
if(__lookahead == '}'){
// Match exactly min times. Concatenate the atom min times.
__match('}');
if(min == 0)
throw
new MalformedPatternException(
"Parse error: Superfluous interval specified at position " +
__bytesRead + ". Number of occurences was set to zero.");
if(min == 1)
return atom;
root = catNode = new CatNode();
catNode._left = atom;
while(--min > 1) {
atom = atom._clone(startPosition);
catNode._right = new CatNode();
catNode = (CatNode)catNode._right;
catNode._left = atom;
}
catNode._right = atom._clone(startPosition);
} else if(__lookahead == ','){
__match(',');
if(__lookahead == '}') {
// match at least min times
__match('}');
if(min == 0)
return new StarNode(atom);
if(min == 1)
return new PlusNode(atom);
root = catNode = new CatNode();
catNode._left = atom;
while(--min > 0) {
atom = atom._clone(startPosition);
catNode._right = new CatNode();
catNode = (CatNode)catNode._right;
catNode._left = atom;
}
catNode._right = new StarNode(atom._clone(startPosition));
} else {
// match at least min times and at most max times
max = __parseUnsignedInteger(10, 1, Integer.MAX_VALUE);
__match('}');
if(max < min)
throw
new MalformedPatternException("Parse error: invalid interval; "
+ max + " is less than " + min + " at position " + __bytesRead);
if(max == 0)
throw
new MalformedPatternException(
"Parse error: Superfluous interval specified at position " +
__bytesRead + ". Number of occurences was set to zero.");
if(min == 0) {
if(max == 1)
return new QuestionNode(atom);
root = catNode = new CatNode();
atom = new QuestionNode(atom);
catNode._left = atom;
while(--max > 1) {
atom = atom._clone(startPosition);
catNode._right = new CatNode();
catNode = (CatNode)catNode._right;
catNode._left = atom;
}
catNode._right = atom._clone(startPosition);
} else if(min == max) {
if(min == 1)
return atom;
root = catNode = new CatNode();
catNode._left = atom;
while(--min > 1) {
atom = atom._clone(startPosition);
catNode._right = new CatNode();
catNode = (CatNode)catNode._right;
catNode._left = atom;
}
catNode._right = atom._clone(startPosition);
} else {
int count;
root = catNode = new CatNode();
catNode._left = atom;
for(count=1; count < min; count++) {
atom = atom._clone(startPosition);
catNode._right = new CatNode();
catNode = (CatNode)catNode._right;
catNode._left = atom;
}
atom = new QuestionNode(atom._clone(startPosition));
count = max-min;
if(count == 1)
catNode._right = atom;
else {
catNode._right = new CatNode();
catNode = (CatNode)catNode._right;
catNode._left = atom;
while(--count > 1) {
atom = atom._clone(startPosition);
catNode._right = new CatNode();
catNode = (CatNode)catNode._right;
catNode._left = atom;
}
catNode._right = atom._clone(startPosition);
}
}
}
} else
throw
new MalformedPatternException("Parse error: unexpected character " +
__lookahead + " in interval at position " + __bytesRead);
__position = startPosition[0];
return root;
}
private SyntaxNode __backslashToken() throws MalformedPatternException {
SyntaxNode current;
char token;
int number;
__match('\\');
if(__lookahead == 'x'){
__match('x');
// Parse a hexadecimal number
current = _newTokenNode((char)__parseUnsignedInteger(16, 2, 2),
__position++);
} else if(__lookahead == 'c') {
__match('c');
// Create a control character
token = Character.toUpperCase(__lookahead);
token = (char)(token > 63 ? token - 64 : token + 64);
current = new TokenNode(token, __position++);
__match(__lookahead);
} else if(__lookahead >= '0' && __lookahead <= '9') {
__match(__lookahead);
if(__lookahead >= '0' && __lookahead <= '9'){
// We have an octal character or a multi-digit backreference.
// Assume octal character for now.
__putback();
number = __parseUnsignedInteger(10, 2, 3);
number = Integer.parseInt(Integer.toString(number), 8);
current = _newTokenNode((char)number, __position++);
} else {
// We have either \0, an escaped digit, or a backreference.
__putback();
if(__lookahead == '0'){
// \0 matches the null character
__match('0');
current = new TokenNode('\0', __position++);
} else {
// Either an escaped digit or backreference.
number = Character.digit(__lookahead, 10);
current = _newTokenNode(__lookahead, __position++);
__match(__lookahead);
}
}
} else if(__lookahead == 'b') {
// Inside of a character class the \b means backspace, otherwise
// it means a word boundary
//if(__inCharacterClass)
// \b always means backspace
current = new TokenNode('\b', __position++);
/*
else
current = new TokenNode((char)LeafNode._WORD_BOUNDARY_MARKER_TOKEN,
position++);
*/
__match('b');
} /*else if(__lookahead == 'B' && !__inCharacterClass){
current = new TokenNode((char)LeafNode._NONWORD_BOUNDARY_MARKER_TOKEN,
position++);
__match('B');
} */ else {
CharacterClassNode characterSet;
token = __lookahead;
switch(__lookahead){
case 'n' : token = '\n'; break;
case 'r' : token = '\r'; break;
case 't' : token = '\t'; break;
case 'f' : token = '\f'; break;
}
switch(token) {
case 'd' :
characterSet = new CharacterClassNode(__position++);
characterSet._addTokenRange('0', '9');
current = characterSet;
break;
case 'D' :
characterSet = new NegativeCharacterClassNode(__position++);
characterSet._addTokenRange('0', '9');
current = characterSet;
break;
case 'w' :
characterSet = new CharacterClassNode(__position++);
characterSet._addTokenRange('0', '9');
characterSet._addTokenRange('a', 'z');
characterSet._addTokenRange('A', 'Z');
characterSet._addToken('_');
current = characterSet;
break;
case 'W' :
characterSet = new NegativeCharacterClassNode(__position++);
characterSet._addTokenRange('0', '9');
characterSet._addTokenRange('a', 'z');
characterSet._addTokenRange('A', 'Z');
characterSet._addToken('_');
current = characterSet;
break;
case 's' :
characterSet = new CharacterClassNode(__position++);
characterSet._addToken(' ');
characterSet._addToken('\f');
characterSet._addToken('\n');
characterSet._addToken('\r');
characterSet._addToken('\t');
current = characterSet;
break;
case 'S' :
characterSet = new NegativeCharacterClassNode(__position++);
characterSet._addToken(' ');
characterSet._addToken('\f');
characterSet._addToken('\n');
characterSet._addToken('\r');
characterSet._addToken('\t');
current = characterSet;
break;
default : current = _newTokenNode(token, __position++); break;
}
__match(__lookahead);
}
return current;
}
private SyntaxNode __atom() throws MalformedPatternException {
SyntaxNode current;
if(__lookahead == '(') {
__match('(');
++__openParen;
current = __regex();
__match(')');
++__closeParen;
} else if(__lookahead == '[')
current = __characterClass();
else if(__lookahead == '.') {
CharacterClassNode characterSet;
__match('.');
characterSet = new NegativeCharacterClassNode(__position++);
if(__multiline)
characterSet._addToken('\n');
current = characterSet;
} else if(__lookahead == '\\') {
current = __backslashToken();
} /*else if(__lookahead == '^') {
current =
new TokenNode((char)LeafNode._BEGIN_LINE_MARKER_TOKEN, __position++);
__match('^');
} else if(__lookahead == '$') {
current =
new TokenNode((char)LeafNode._END_LINE_MARKER_TOKEN, __position++);
__match('$');
} */ else if(!__isMetachar(__lookahead)) {
current = _newTokenNode(__lookahead, __position++);
__match(__lookahead);
} else
throw
new MalformedPatternException("Parse error: unexpected character " +
__lookahead + " at position " + __bytesRead);
return current;
}
private SyntaxNode __characterClass() throws MalformedPatternException {
char lastToken, token;
SyntaxNode node;
CharacterClassNode current;
__match('[');
__inCharacterClass = true;
if(__lookahead == '^'){
__match('^');
current = new NegativeCharacterClassNode(__position++);
} else
current = new CharacterClassNode(__position++);
while(__lookahead != ']' && __lookahead != _END_OF_INPUT) {
if(__lookahead == '\\'){
node = __backslashToken();
--__position;
// __backslashToken() (actually newTokenNode()) does not take care of
// case insensitivity when __inCharacterClass is true.
if(node instanceof TokenNode){
lastToken = ((TokenNode)node)._token;
current._addToken(lastToken);
if(!__caseSensitive)
current._addToken(_toggleCase(lastToken));
} else {
CharacterClassNode slash;
slash = (CharacterClassNode)node;
// This could be made more efficient by manipulating the
// characterSet elements of the CharacterClassNodes but
// for the moment, this is more clear.
for(token=0; token < LeafNode._NUM_TOKENS; token++){
if(slash._matches(token))
current._addToken(token);
}
// A byproduct of this act is that when a '-' occurs after
// a \d, \w, etc. it is not interpreted as a range and no
// parse exception is thrown.
// This is considered a feature and not a bug for now.
continue;
}
} else {
lastToken = __lookahead;
current._addToken(__lookahead);
if(!__caseSensitive)
current._addToken(_toggleCase(__lookahead));
__match(__lookahead);
}
// In Perl, a - is a token if it occurs at the beginning
// or end of the character class. Anywhere else, it indicates
// a range.
// A byproduct of this implementation is that if a '-' occurs
// after the end of a range, it is interpreted as a '-' and no
// exception is thrown. e.g., the second dash in [a-z-x]
// This is considered a feature and not a bug for now.
if(__lookahead == '-'){
__match('-');
if(__lookahead == ']'){
current._addToken('-');
break;
} else if(__lookahead == '\\') {
node = __backslashToken();
--__position;
if(node instanceof TokenNode)
token = ((TokenNode)node)._token;
else
throw new MalformedPatternException(
"Parse error: invalid range specified at position " + __bytesRead);
} else {
token = __lookahead;
__match(__lookahead);
}
if(token < lastToken)
throw new MalformedPatternException(
"Parse error: invalid range specified at position " + __bytesRead);
current._addTokenRange(lastToken + 1, token);
if(!__caseSensitive)
current._addTokenRange(_toggleCase((char)(lastToken + 1)),
_toggleCase(token));
}
}
__match(']');
__inCharacterClass = false;
return current;
}
SyntaxNode _newTokenNode(char token, int position){
if(!__inCharacterClass && !__caseSensitive &&
(_isUpperCase(token) || _isLowerCase(token))){
CharacterClassNode node = new CharacterClassNode(position);
node._addToken(token);
node._addToken(_toggleCase(token));
return node;
}
return new TokenNode(token, position);
}
SyntaxTree _parse(char[] expression) throws MalformedPatternException {
SyntaxTree tree;
__openParen = __closeParen = 0;
__regularExpression = expression;
__bytesRead = 0;
__expressionLength = expression.length;
__inCharacterClass = false;
__position = 0;
__match(__lookahead); // Call match to read first input.
if(__lookahead == '^') {
__beginAnchor = true;
__match(__lookahead);
}
if(__expressionLength > 0 && expression[__expressionLength - 1] == '$') {
--__expressionLength;
__endAnchor = true;
}
if(__expressionLength > 1 || (__expressionLength == 1 && !__beginAnchor)) {
CatNode root;
root = new CatNode();
root._left = __regex();
// end marker
root._right =
new TokenNode((char)LeafNode._END_MARKER_TOKEN, __position++);
tree = new SyntaxTree(root, __position);
} else
tree = new
SyntaxTree(new TokenNode((char)LeafNode._END_MARKER_TOKEN, 0), 1);
tree._computeFollowPositions();
return tree;
}
/**
* Compiles an Awk regular expression into an AwkPattern instance that
* can be used by an AwkMatcher object to perform pattern matching.
*
* @param pattern An Awk regular expression to compile.
* @param options A set of flags giving the compiler instructions on
* how to treat the regular expression. Currently the
* only meaningful flag is AwkCompiler.CASE_INSENSITIVE_MASK.
* @return A Pattern instance constituting the compiled regular expression.
* This instance will always be an AwkPattern and can be reliably
* be casted to an AwkPattern.
* @exception MalformedPatternException If the compiled expression
* is not a valid Awk regular expression.
*/
public Pattern compile(char[] pattern, int options)
throws MalformedPatternException
{
SyntaxTree tree;
AwkPattern regexp;
__beginAnchor = __endAnchor = false;
__caseSensitive = ((options & CASE_INSENSITIVE_MASK) == 0);
__multiline = ((options & MULTILINE_MASK) != 0);
tree = _parse(pattern);
regexp = new AwkPattern(new String(pattern), tree);
regexp._options = options;
regexp._hasBeginAnchor = __beginAnchor;
regexp._hasEndAnchor = __endAnchor;
return regexp;
}
/**
* Compiles an Awk regular expression into an AwkPattern instance that
* can be used by an AwkMatcher object to perform pattern matching.
*
* @param pattern An Awk regular expression to compile.
* @param options A set of flags giving the compiler instructions on
* how to treat the regular expression. Currently the
* only meaningful flag is AwkCompiler.CASE_INSENSITIVE_MASK.
* @return A Pattern instance constituting the compiled regular expression.
* This instance will always be an AwkPattern and can be reliably
* be casted to an AwkPattern.
* @exception MalformedPatternException If the compiled expression
* is not a valid Awk regular expression.
*/
public Pattern compile(String pattern, int options)
throws MalformedPatternException
{
SyntaxTree tree;
AwkPattern regexp;
__beginAnchor = __endAnchor = false;
__caseSensitive = ((options & CASE_INSENSITIVE_MASK) == 0);
__multiline = ((options & MULTILINE_MASK) != 0);
tree = _parse(pattern.toCharArray());
regexp = new AwkPattern(pattern, tree);
regexp._options = options;
regexp._hasBeginAnchor = __beginAnchor;
regexp._hasEndAnchor = __endAnchor;
return regexp;
}
/**
* Same as calling compile(pattern, AwkCompiler.DEFAULT_MASK);
*
* @param pattern A regular expression to compile.
* @return A Pattern instance constituting the compiled regular expression.
* This instance will always be an AwkPattern and can be reliably
* be casted to an AwkPattern.
* @exception MalformedPatternException If the compiled expression
* is not a valid Awk regular expression.
*/
public Pattern compile(char[] pattern) throws MalformedPatternException {
return compile(pattern, DEFAULT_MASK);
}
/**
* Same as calling compile(pattern, AwkCompiler.DEFAULT_MASK);
*
* @param pattern A regular expression to compile.
* @return A Pattern instance constituting the compiled regular expression.
* This instance will always be an AwkPattern and can be reliably
* be casted to an AwkPattern.
* @exception MalformedPatternException If the compiled expression
* is not a valid Awk regular expression.
*/
public Pattern compile(String pattern) throws MalformedPatternException {
return compile(pattern, DEFAULT_MASK);
}
}