All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.oro.text.regex.OpCode Maven / Gradle / Ivy

There is a newer version: 5.0.84
Show newest version
/*
 * $Id: OpCode.java,v 1.11 2003/11/07 20:16:25 dfs Exp $
 *
 * ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 
 *    must not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact [email protected].
 *
 * 5. Products derived from this software may not be called "Apache" 
 *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 
 *    name, without prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * .
 */


package org.apache.oro.text.regex;


/**
 * The OpCode class should not be instantiated.  It is a holder of various
 * constants and static methods pertaining to the manipulation of the 
 * op-codes used in a compiled regular expression.
 *
 * @version @version@
 * @since 1.0
 */
final class OpCode {

  private OpCode() { }

  // Names, values, and descriptions of operators correspond to those of
  // Perl regex bytecodes and for compatibility purposes are drawn from
  // regcomp.h in the Perl source tree by Larry Wall.
  static final char  // Has Operand   Meaning
     _END     = 0,   // no       End of program.
     _BOL     = 1,   // no       Match "" at beginning of line.
     _MBOL    = 2,   // no       Same, assuming multiline.
     _SBOL    = 3,   // no       Same, assuming singleline.
     _EOL     = 4,   // no       Match "" at end of line.
     _MEOL    = 5,   // no       Same, assuming multiline.
     _SEOL    = 6,   // no       Same, assuming singleline.
     _ANY     = 7,   // no       Match any one character (except newline).
     _SANY    = 8,   // no       Match any one character.
     _ANYOF   = 9,   // yes      Match character in (or not in) this class.
     _CURLY   = 10,  // yes      Match this simple thing {n,m} times.
     _CURLYX  = 11,  // yes      Match this complex thing {n,m} times.
     _BRANCH  = 12,  // yes      Match this alternative, or the next...
     _BACK    = 13,  // no       Match "", "next" ptr points backward.
     _EXACTLY = 14,  // yes      Match this string (preceded by length).
     _NOTHING = 15,  // no       Match empty string.
     _STAR    = 16,  // yes      Match this (simple) thing 0 or more times.
     _PLUS    = 17,  // yes      Match this (simple) thing 1 or more times.
     _ALNUM   = 18,   // no       Match any word character
     _NALNUM  = 19,   // no       Match any non-word character
     _BOUND   = 20,  // no       Match "" at any word boundary
     _NBOUND  = 21,  // no       Match "" at any word non-boundary
     _SPACE   = 22,  // no       Match any whitespace character
     _NSPACE  = 23,  // no       Match any non-whitespace character
     _DIGIT   = 24,  // no       Match any numeric character
     _NDIGIT  = 25,  // no       Match any non-numeric character
     _REF     = 26,  // yes      Match some already matched string
     _OPEN    = 27,  // yes      Mark this point in input as start of #n.
     _CLOSE   = 28,  // yes      Analogous to OPEN.
     _MINMOD  = 29,  // no       Next operator is not greedy.
     _GBOL    = 30,  // no       Matches where last m//g left off.
     _IFMATCH = 31,  // no       Succeeds if the following matches.
     _UNLESSM = 32,  // no       Fails if the following matches.
     _SUCCEED = 33,  // no       Return from a subroutine, basically.
     _WHILEM  = 34,  // no       Do curly processing and see if rest matches.
     _ANYOFUN = 35,  // yes      Match unicode character in this class.
     _NANYOFUN= 36,  // yes      Match unicode character not in this class.
     _RANGE   = 37,  // yes      Range flag in 
    // Change the names of these constants later to make it clear they
    // are POSIX classes.
     _ALPHA   = 38,
     _BLANK   = 39,
     _CNTRL   = 40,
     _GRAPH   = 41,
     _LOWER   = 42,
     _PRINT   = 43,
     _PUNCT   = 44,
     _UPPER   = 45,
     _XDIGIT  = 46,
     _OPCODE  = 47,
     _NOPCODE = 48,
     _ONECHAR = 49,
     _ALNUMC  = 50,
     _ASCII   = 51;

  // Lengths of the various operands.
  static final int _operandLength[] = {
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 0-9
    2, 2, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 10-19
    0, 0, 0, 0, 0, 0, 1, 1, 1, 0, // OpCode 20-29
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 30-39
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // OpCode 40-49 
    0, 0                          // OpCode 50-51 
  };

  static final char _opType[] = {
	_END, _BOL, _BOL, _BOL, _EOL, _EOL, _EOL, _ANY, _ANY, _ANYOF, _CURLY,
	_CURLY, _BRANCH, _BACK, _EXACTLY, _NOTHING, _STAR, _PLUS, _ALNUM,
	_NALNUM, _BOUND, _NBOUND, _SPACE, _NSPACE, _DIGIT, _NDIGIT, _REF,
	_OPEN, _CLOSE, _MINMOD,	_BOL, _BRANCH, _BRANCH, _END, _WHILEM,
	_ANYOFUN, _NANYOFUN, _RANGE, _ALPHA, _BLANK, _CNTRL, _GRAPH,
	_LOWER, _PRINT, _PUNCT, _UPPER, _XDIGIT, _OPCODE, _NOPCODE,
	_ONECHAR, _ALNUMC, _ASCII
  };

  static final char _opLengthVaries[] = {
    _BRANCH, _BACK, _STAR, _PLUS, _CURLY, _CURLYX, _REF, _WHILEM
  };

  static final char _opLengthOne[] = {
    _ANY, _SANY, _ANYOF, _ALNUM, _NALNUM, _SPACE, _NSPACE, _DIGIT, _NDIGIT, 
    _ANYOFUN, _NANYOFUN, _ALPHA, _BLANK, _CNTRL, _GRAPH, _LOWER, _PRINT,
    _PUNCT, _UPPER, _XDIGIT, _OPCODE, _NOPCODE, _ONECHAR, _ALNUMC,
    _ASCII
  };

  static final int  _NULL_OFFSET  = -1;
  static final char _NULL_POINTER =  0;

  static final int _getNextOffset(char[] program, int offset) {
    return ((int)program[offset + 1]); 
  }

  static final char _getArg1(char[] program, int offset) {
    return program[offset + 2]; 
  }

  static final char _getArg2(char[] program, int offset) {
    return program[offset + 3]; 
  }

  static final int _getOperand(int offset) {
    return (offset + 2);
  }

  static final boolean _isInArray(char ch, char[] array, int start) {
    while(start < array.length)
      if(ch == array[start++])
	return true;
    return false;
  }

  static final int _getNextOperator(int offset) { return (offset + 2); }
  static final int _getPrevOperator(int offset) { return (offset - 2); }

  static final int _getNext(char[] program, int offset) {
    int offs;

    if(program == null)
      return _NULL_OFFSET;


    offs = _getNextOffset(program, offset);
    if(offs == _NULL_POINTER)
      return _NULL_OFFSET;

    if(program[offset] == OpCode._BACK)
      return (offset - offs);

    return (offset + offs);
  }

  // doesn't really belong in this class, but we want Perl5Matcher not to
  // depend on Perl5Compiler
  // Matches Perl's definition of \w, which is different from [:alnum:]
  static final boolean _isWordCharacter(char token) {
    return (Character.isLetterOrDigit(token) || token == '_');
  }
}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy