org.apache.oro.text.regex.OpCode Maven / Gradle / Ivy
/*
* $Id: OpCode.java,v 1.11 2003/11/07 20:16:25 dfs Exp $
*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
* must not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [email protected].
*
* 5. Products derived from this software may not be called "Apache"
* or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
* name, without prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* .
*/
package org.apache.oro.text.regex;
/**
* The OpCode class should not be instantiated. It is a holder of various
* constants and static methods pertaining to the manipulation of the
* op-codes used in a compiled regular expression.
*
* @version @version@
* @since 1.0
*/
final class OpCode {
private OpCode() { }
// Names, values, and descriptions of operators correspond to those of
// Perl regex bytecodes and for compatibility purposes are drawn from
// regcomp.h in the Perl source tree by Larry Wall.
static final char // Has Operand Meaning
_END = 0, // no End of program.
_BOL = 1, // no Match "" at beginning of line.
_MBOL = 2, // no Same, assuming multiline.
_SBOL = 3, // no Same, assuming singleline.
_EOL = 4, // no Match "" at end of line.
_MEOL = 5, // no Same, assuming multiline.
_SEOL = 6, // no Same, assuming singleline.
_ANY = 7, // no Match any one character (except newline).
_SANY = 8, // no Match any one character.
_ANYOF = 9, // yes Match character in (or not in) this class.
_CURLY = 10, // yes Match this simple thing {n,m} times.
_CURLYX = 11, // yes Match this complex thing {n,m} times.
_BRANCH = 12, // yes Match this alternative, or the next...
_BACK = 13, // no Match "", "next" ptr points backward.
_EXACTLY = 14, // yes Match this string (preceded by length).
_NOTHING = 15, // no Match empty string.
_STAR = 16, // yes Match this (simple) thing 0 or more times.
_PLUS = 17, // yes Match this (simple) thing 1 or more times.
_ALNUM = 18, // no Match any word character
_NALNUM = 19, // no Match any non-word character
_BOUND = 20, // no Match "" at any word boundary
_NBOUND = 21, // no Match "" at any word non-boundary
_SPACE = 22, // no Match any whitespace character
_NSPACE = 23, // no Match any non-whitespace character
_DIGIT = 24, // no Match any numeric character
_NDIGIT = 25, // no Match any non-numeric character
_REF = 26, // yes Match some already matched string
_OPEN = 27, // yes Mark this point in input as start of #n.
_CLOSE = 28, // yes Analogous to OPEN.
_MINMOD = 29, // no Next operator is not greedy.
_GBOL = 30, // no Matches where last m//g left off.
_IFMATCH = 31, // no Succeeds if the following matches.
_UNLESSM = 32, // no Fails if the following matches.
_SUCCEED = 33, // no Return from a subroutine, basically.
_WHILEM = 34, // no Do curly processing and see if rest matches.
_ANYOFUN = 35, // yes Match unicode character in this class.
_NANYOFUN= 36, // yes Match unicode character not in this class.
_RANGE = 37, // yes Range flag in
// Change the names of these constants later to make it clear they
// are POSIX classes.
_ALPHA = 38,
_BLANK = 39,
_CNTRL = 40,
_GRAPH = 41,
_LOWER = 42,
_PRINT = 43,
_PUNCT = 44,
_UPPER = 45,
_XDIGIT = 46,
_OPCODE = 47,
_NOPCODE = 48,
_ONECHAR = 49,
_ALNUMC = 50,
_ASCII = 51;
// Lengths of the various operands.
static final int _operandLength[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 0-9
2, 2, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 10-19
0, 0, 0, 0, 0, 0, 1, 1, 1, 0, // OpCode 20-29
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 30-39
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 40-49
0, 0 // OpCode 50-51
};
static final char _opType[] = {
_END, _BOL, _BOL, _BOL, _EOL, _EOL, _EOL, _ANY, _ANY, _ANYOF, _CURLY,
_CURLY, _BRANCH, _BACK, _EXACTLY, _NOTHING, _STAR, _PLUS, _ALNUM,
_NALNUM, _BOUND, _NBOUND, _SPACE, _NSPACE, _DIGIT, _NDIGIT, _REF,
_OPEN, _CLOSE, _MINMOD, _BOL, _BRANCH, _BRANCH, _END, _WHILEM,
_ANYOFUN, _NANYOFUN, _RANGE, _ALPHA, _BLANK, _CNTRL, _GRAPH,
_LOWER, _PRINT, _PUNCT, _UPPER, _XDIGIT, _OPCODE, _NOPCODE,
_ONECHAR, _ALNUMC, _ASCII
};
static final char _opLengthVaries[] = {
_BRANCH, _BACK, _STAR, _PLUS, _CURLY, _CURLYX, _REF, _WHILEM
};
static final char _opLengthOne[] = {
_ANY, _SANY, _ANYOF, _ALNUM, _NALNUM, _SPACE, _NSPACE, _DIGIT, _NDIGIT,
_ANYOFUN, _NANYOFUN, _ALPHA, _BLANK, _CNTRL, _GRAPH, _LOWER, _PRINT,
_PUNCT, _UPPER, _XDIGIT, _OPCODE, _NOPCODE, _ONECHAR, _ALNUMC,
_ASCII
};
static final int _NULL_OFFSET = -1;
static final char _NULL_POINTER = 0;
static final int _getNextOffset(char[] program, int offset) {
return ((int)program[offset + 1]);
}
static final char _getArg1(char[] program, int offset) {
return program[offset + 2];
}
static final char _getArg2(char[] program, int offset) {
return program[offset + 3];
}
static final int _getOperand(int offset) {
return (offset + 2);
}
static final boolean _isInArray(char ch, char[] array, int start) {
while(start < array.length)
if(ch == array[start++])
return true;
return false;
}
static final int _getNextOperator(int offset) { return (offset + 2); }
static final int _getPrevOperator(int offset) { return (offset - 2); }
static final int _getNext(char[] program, int offset) {
int offs;
if(program == null)
return _NULL_OFFSET;
offs = _getNextOffset(program, offset);
if(offs == _NULL_POINTER)
return _NULL_OFFSET;
if(program[offset] == OpCode._BACK)
return (offset - offs);
return (offset + offs);
}
// doesn't really belong in this class, but we want Perl5Matcher not to
// depend on Perl5Compiler
// Matches Perl's definition of \w, which is different from [:alnum:]
static final boolean _isWordCharacter(char token) {
return (Character.isLetterOrDigit(token) || token == '_');
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy