com.caucho.quercus.lib.regexp.RegexpNode Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quercus Show documentation
Show all versions of quercus Show documentation
A PHP engine implemented in 100% Java
/*
* Copyright (c) 1998-2012 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
*
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
*
* @author Scott Ferguson
*/
package com.caucho.quercus.lib.regexp;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import com.caucho.quercus.env.StringValue;
import com.caucho.util.CharBuffer;
import com.caucho.util.IntSet;
class RegexpNode {
static final int RC_END = 0;
static final int RC_NULL = 1;
static final int RC_STRING = 2;
static final int RC_SET = 3;
static final int RC_NSET = 4;
static final int RC_BEG_GROUP = 5;
static final int RC_END_GROUP = 6;
static final int RC_GROUP_REF = 7;
static final int RC_LOOP = 8;
static final int RC_LOOP_INIT = 9;
static final int RC_LOOP_SHORT = 10;
static final int RC_LOOP_UNIQUE = 11;
static final int RC_LOOP_SHORT_UNIQUE = 12;
static final int RC_LOOP_LONG = 13;
static final int RC_OR = 64;
static final int RC_OR_UNIQUE = 65;
static final int RC_POS_LOOKAHEAD = 66;
static final int RC_NEG_LOOKAHEAD = 67;
static final int RC_POS_LOOKBEHIND = 68;
static final int RC_NEG_LOOKBEHIND = 69;
static final int RC_LOOKBEHIND_OR = 70;
static final int RC_WORD = 73;
static final int RC_NWORD = 74;
static final int RC_BLINE = 75;
static final int RC_ELINE = 76;
static final int RC_BSTRING = 77;
static final int RC_ESTRING = 78;
static final int RC_ENSTRING = 79;
static final int RC_GSTRING = 80;
// conditionals
static final int RC_COND = 81;
// ignore case
static final int RC_STRING_I = 128;
static final int RC_SET_I = 129;
static final int RC_NSET_I = 130;
static final int RC_GROUP_REF_I = 131;
static final int RC_LEXEME = 256;
// unicode properties
static final int RC_UNICODE = 512;
static final int RC_NUNICODE = 513;
// unicode properties sets
static final int RC_C = 1024;
static final int RC_L = 1025;
static final int RC_M = 1026;
static final int RC_N = 1027;
static final int RC_P = 1028;
static final int RC_S = 1029;
static final int RC_Z = 1030;
// negated unicode properties sets
static final int RC_NC = 1031;
static final int RC_NL = 1032;
static final int RC_NM = 1033;
static final int RC_NN = 1034;
static final int RC_NP = 1035;
// POSIX character classes
static final int RC_CHAR_CLASS = 2048;
static final int RC_ALNUM = 1;
static final int RC_ALPHA = 2;
static final int RC_BLANK = 3;
static final int RC_CNTRL = 4;
static final int RC_DIGIT = 5;
static final int RC_GRAPH = 6;
static final int RC_LOWER = 7;
static final int RC_PRINT = 8;
static final int RC_PUNCT = 9;
static final int RC_SPACE = 10;
static final int RC_UPPER = 11;
static final int RC_XDIGIT = 12;
// #2526, possible JIT/OS issue with Integer.MAX_VALUE
private static final int INTEGER_MAX = Integer.MAX_VALUE - 1;
public static final int FAIL = -1;
public static final int SUCCESS = 0;
static final RegexpNode N_END = new End();
static final RegexpNode ANY_CHAR;
/**
* Creates a node with a code
*/
protected RegexpNode()
{
}
/**
* Returns a copy of this node that is suitable for recursion.
* Needed because concat() modifies original backing nodes.
*/
final RegexpNode copy()
{
return copy(new HashMap());
}
final RegexpNode copy(HashMap state)
{
RegexpNode copy = state.get(this);
if (copy != null) {
return copy;
}
else {
copy = copyImpl(state);
return copy;
}
}
RegexpNode copyImpl(HashMap state)
{
return this;
}
//
// parsing constructors
//
RegexpNode concat(RegexpNode next)
{
return new Concat(this, next);
}
/**
* '?' operator
*/
RegexpNode createOptional(Regcomp parser)
{
return createLoop(parser, 0, 1);
}
/**
* '*' operator
*/
RegexpNode createStar(Regcomp parser)
{
return createLoop(parser, 0, INTEGER_MAX);
}
/**
* '+' operator
*/
RegexpNode createPlus(Regcomp parser)
{
return createLoop(parser, 1, INTEGER_MAX);
}
/**
* Any loop
*/
RegexpNode createLoop(Regcomp parser, int min, int max)
{
return new LoopHead(parser, this, min, max);
}
/**
* Any loop
*/
RegexpNode createLoopUngreedy(Regcomp parser, int min, int max)
{
return new LoopHeadUngreedy(parser, this, min, max);
}
/**
* Possessive loop
*/
RegexpNode createPossessiveLoop(int min, int max)
{
return new PossessiveLoop(getHead(), min, max);
}
/**
* Create an or expression
*/
RegexpNode createOr(RegexpNode node)
{
return Or.create(this, node);
}
/**
* Create a not expression
*/
RegexpNode createNot()
{
return Not.create(this);
}
//
// optimization functions
//
int minLength()
{
return 0;
}
String prefix()
{
return "";
}
int firstChar()
{
return -1;
}
boolean isNullable()
{
return false;
}
boolean []firstSet(boolean []firstSet)
{
return null;
}
boolean isAnchorBegin()
{
return false;
}
RegexpNode getTail()
{
return this;
}
RegexpNode getHead()
{
return this;
}
//
// matching
//
int match(StringValue string, int length, int offset, RegexpState state)
{
throw new UnsupportedOperationException(getClass().getName());
}
@Override
public String toString()
{
Map map = new IdentityHashMap();
StringBuilder sb = new StringBuilder();
toString(sb, map);
return sb.toString();
}
protected void toString(StringBuilder sb, Map map)
{
if (toStringAdd(sb, map))
return;
sb.append(toStringName()).append("[]");
}
protected boolean toStringAdd(StringBuilder sb, Map map)
{
Integer v = map.get(this);
if (v != null) {
sb.append("#").append(v);
return true;
}
map.put(this, map.size());
return false;
}
protected String toStringName()
{
String name = getClass().getName();
int p = name.lastIndexOf('$');
if (p < 0)
p = name.lastIndexOf('.');
return name.substring(p + 1);
}
/**
* A node with exactly one character matches.
*/
static class AbstractCharNode extends RegexpNode {
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
return new CharLoop(this, min, max);
}
@Override
RegexpNode createLoopUngreedy(Regcomp parser, int min, int max)
{
return new CharUngreedyLoop(this, min, max);
}
@Override
int minLength()
{
return 1;
}
}
static class CharNode extends AbstractCharNode {
private char _ch;
CharNode(char ch)
{
_ch = ch;
}
@Override
int firstChar()
{
return _ch;
}
@Override
boolean []firstSet(boolean []firstSet)
{
if (firstSet != null && _ch < firstSet.length) {
firstSet[_ch] = true;
return firstSet;
}
else
return null;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (offset < length && string.charAt(offset) == _ch)
return offset + 1;
else
return -1;
}
}
static final AnchorBegin ANCHOR_BEGIN = new AnchorBegin();
static final AnchorBeginOrNewline ANCHOR_BEGIN_OR_NEWLINE
= new AnchorBeginOrNewline();
static final AnchorBeginRelative ANCHOR_BEGIN_RELATIVE
= new AnchorBeginRelative();
static final AnchorEnd ANCHOR_END = new AnchorEnd();
static final AnchorEndOnly ANCHOR_END_ONLY = new AnchorEndOnly();
static final AnchorEndOrNewline ANCHOR_END_OR_NEWLINE
= new AnchorEndOrNewline();
static class AnchorBegin extends NullableNode {
@Override
boolean isAnchorBegin()
{
return true;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (offset == 0)
return offset;
else
return -1;
}
}
private static class AnchorBeginOrNewline extends NullableNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset == 0 || string.charAt(offset - 1) == '\n')
return offset;
else
return -1;
}
}
static class AnchorBeginRelative extends NullableNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset == state._start)
return offset;
else
return -1;
}
}
private static class AnchorEnd extends NullableNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset == strlen
|| offset + 1 == strlen && string.charAt(offset) == '\n')
return offset;
else
return -1;
}
}
private static class AnchorEndOnly extends NullableNode {
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (offset == length)
return offset;
else
return -1;
}
}
private static class AnchorEndOrNewline extends NullableNode {
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (offset == length || string.charAt(offset) == '\n')
return offset;
else
return -1;
}
}
static final RegexpNode DIGIT = RegexpSet.DIGIT.createNode();
static final RegexpNode NOT_DIGIT = RegexpSet.DIGIT.createNotNode();
static final RegexpNode DOT = RegexpSet.DOT.createNotNode();
static final RegexpNode NOT_DOT = RegexpSet.DOT.createNode();
static final RegexpNode SPACE = RegexpSet.SPACE.createNode();
static final RegexpNode NOT_SPACE = RegexpSet.SPACE.createNotNode();
static final RegexpNode S_WORD = RegexpSet.WORD.createNode();
static final RegexpNode NOT_S_WORD = RegexpSet.WORD.createNotNode();
static class AsciiSet extends AbstractCharNode {
private final boolean []_set;
AsciiSet()
{
_set = new boolean[128];
}
AsciiSet(boolean []set)
{
_set = set;
}
@Override
boolean []firstSet(boolean []firstSet)
{
if (firstSet == null)
return null;
for (int i = 0; i < _set.length; i++) {
if (_set[i])
firstSet[i] = true;
}
return firstSet;
}
void setChar(char ch)
{
_set[ch] = true;
}
void clearChar(char ch)
{
_set[ch] = false;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (length <= offset)
return -1;
char ch = string.charAt(offset);
if (ch < 128 && _set[ch])
return offset + 1;
else
return -1;
}
}
static class AsciiNotSet extends AbstractCharNode {
private final boolean []_set;
AsciiNotSet()
{
_set = new boolean[128];
}
AsciiNotSet(boolean []set)
{
_set = set;
}
void setChar(char ch)
{
_set[ch] = true;
}
void clearChar(char ch)
{
_set[ch] = false;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (length <= offset) {
return -1;
}
char ch = string.charAt(offset);
if (ch < 128 && _set[ch]) {
return -1;
}
else if (Character.isHighSurrogate(ch)
&& offset + 1 < length
&& Character.isLowSurrogate(string.charAt(offset + 1))) {
// php/4ef3
return offset + 2;
}
else {
return offset + 1;
}
}
}
static class CharLoop extends RegexpNode {
private final RegexpNode _node;
private RegexpNode _next = N_END;
private int _min;
private int _max;
CharLoop(RegexpNode node, int min, int max)
{
_node = node.getHead();
_min = min;
_max = max;
if (_min < 0)
throw new IllegalStateException();
}
@Override
RegexpNode copyImpl(HashMap state)
{
RegexpNode next = _next.copy(state);
RegexpNode node = _node.copy(state);
CharLoop copy = new CharLoop(node, _min, _max);
copy._next = next;
return copy;
}
@Override
RegexpNode concat(RegexpNode next)
{
if (next == null)
throw new NullPointerException();
if (_next != null)
_next = _next.concat(next);
else
_next = next.getHead();
return this;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
if (min == 0 && max == 1) {
_min = 0;
return this;
}
else
return new LoopHead(parser, this, min, max);
}
@Override
int minLength()
{
return _min;
}
@Override
boolean []firstSet(boolean []firstSet)
{
firstSet = _node.firstSet(firstSet);
if (_min > 0 && ! _node.isNullable())
return firstSet;
firstSet = _next.firstSet(firstSet);
return firstSet;
}
//
// match functions
//
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
RegexpNode next = _next;
RegexpNode node = _node;
int min = _min;
int max = _max;
int i;
int tail;
for (i = 0; i < min; i++) {
tail = node.match(string, length, offset + i, state);
if (tail < 0)
return tail;
}
for (; i < max; i++) {
if (node.match(string, length, offset + i, state) < 0) {
break;
}
}
for (; min <= i; i--) {
tail = next.match(string, length, offset + i, state);
if (tail >= 0)
return tail;
}
return -1;
}
@Override
protected void toString(StringBuilder sb, Map map)
{
if (toStringAdd(sb, map))
return;
sb.append(toStringName());
sb.append("[").append(_min).append(", ").append(_max).append(", ");
_node.toString(sb, map);
sb.append(", ");
_next.toString(sb, map);
sb.append("]");
}
}
static class CharUngreedyLoop extends RegexpNode {
private final RegexpNode _node;
private RegexpNode _next = N_END;
private int _min;
private int _max;
CharUngreedyLoop(RegexpNode node, int min, int max)
{
_node = node.getHead();
_min = min;
_max = max;
if (_min < 0)
throw new IllegalStateException();
}
@Override
RegexpNode copyImpl(HashMap state)
{
RegexpNode next = _next.copy(state);
RegexpNode node = _node.copy(state);
CharUngreedyLoop copy = new CharUngreedyLoop(node, _min, _max);
copy._next = next;
return copy;
}
@Override
RegexpNode concat(RegexpNode next)
{
if (next == null)
throw new NullPointerException();
if (_next != null)
_next = _next.concat(next);
else
_next = next.getHead();
return this;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
if (min == 0 && max == 1) {
_min = 0;
return this;
}
else
return new LoopHead(parser, this, min, max);
}
@Override
int minLength()
{
return _min;
}
@Override
boolean []firstSet(boolean []firstSet)
{
firstSet = _node.firstSet(firstSet);
if (_min > 0 && ! _node.isNullable())
return firstSet;
firstSet = _next.firstSet(firstSet);
return firstSet;
}
//
// match functions
//
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
RegexpNode next = _next;
RegexpNode node = _node;
int min = _min;
int max = _max;
int i;
int tail;
for (i = 0; i < min; i++) {
tail = node.match(string, length, offset + i, state);
if (tail < 0)
return tail;
}
for (; i <= max; i++) {
tail = next.match(string, length, offset + i, state);
if (tail >= 0)
return tail;
if (node.match(string, length, offset + i, state) < 0) {
return -1;
}
}
return -1;
}
@Override
public String toString()
{
return "CharUngreedyLoop[" + _min + ", "
+ _max + ", " + _node + ", " + _next + "]";
}
}
final static class Concat extends RegexpNode {
private final RegexpNode _head;
private RegexpNode _next;
Concat(RegexpNode head, RegexpNode next)
{
if (head == null || next == null)
throw new NullPointerException();
_head = head;
_next = next;
}
@Override
RegexpNode copyImpl(HashMap state)
{
RegexpNode head = _head.copy(state);
RegexpNode next = _next.copy(state);
return new Concat(head, next);
}
@Override
RegexpNode concat(RegexpNode next)
{
_next = _next.concat(next);
return this;
}
//
// optim functions
//
@Override
int minLength()
{
return _head.minLength() + _next.minLength();
}
@Override
int firstChar()
{
return _head.firstChar();
}
@Override
boolean []firstSet(boolean []firstSet)
{
firstSet = _head.firstSet(firstSet);
if (_head.isNullable())
firstSet = _next.firstSet(firstSet);
return firstSet;
}
@Override
String prefix()
{
return _head.prefix();
}
@Override
boolean isAnchorBegin()
{
return _head.isAnchorBegin();
}
RegexpNode getConcatHead()
{
return _head;
}
RegexpNode getConcatNext()
{
return _next;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
offset = _head.match(string, length, offset, state);
if (offset < 0)
return -1;
else
return _next.match(string, length, offset, state);
}
@Override
protected void toString(StringBuilder sb, Map map)
{
if (toStringAdd(sb, map))
return;
sb.append(toStringName());
sb.append("[");
_head.toString(sb, map);
sb.append(", ");
_next.toString(sb, map);
sb.append("]");
}
}
abstract static class ConditionalHead extends RegexpNode {
protected RegexpNode _first;
protected RegexpNode _second;
protected RegexpNode _tail = new ConditionalTail(this);
void setFirst(RegexpNode first)
{
_first = first;
}
void setSecond(RegexpNode second)
{
_second = second;
}
void setTail(RegexpNode tail)
{
_tail = tail;
}
@Override
RegexpNode getTail()
{
return _tail;
}
@Override
RegexpNode concat(RegexpNode next)
{
_tail.concat(next);
return this;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
return _tail.createLoop(parser, min, max);
}
/**
* Create an or expression
*/
@Override
RegexpNode createOr(RegexpNode node)
{
return _tail.createOr(node);
}
}
static class GenericConditionalHead extends ConditionalHead {
private final RegexpNode _conditional;
GenericConditionalHead(RegexpNode conditional)
{
_conditional = conditional;
}
@Override
RegexpNode copyImpl(HashMap state)
{
RegexpNode conditional = _conditional.copy(state);
GenericConditionalHead copy = new GenericConditionalHead(conditional);
state.put(this, copy);
copy._first = _first.copy(state);
copy._second = _second.copy(state);
copy._tail = _tail.copy(state);
return copy;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (_conditional.match(string, length, offset, state) >= 0) {
int match = _first.match(string, length, offset, state);
return match;
}
else if (_second != null)
return _second.match(string, length, offset, state);
else
return _tail.match(string, length, offset, state);
}
@Override
public String toString()
{
return getClass().getSimpleName() + "[" + _conditional
+ "," + _first
+ "," + _tail
+ "]";
}
}
static class GroupConditionalHead extends ConditionalHead {
private final int _group;
GroupConditionalHead(int group)
{
_group = group;
}
@Override
RegexpNode copyImpl(HashMap state)
{
GroupConditionalHead copy = new GroupConditionalHead(_group);
state.put(this, copy);
copy._first = _first.copy(state);
copy._second = _second.copy(state);
copy._tail = _tail.copy(state);
return copy;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
int begin = state.getBegin(_group);
int end = state.getEnd(_group);
if (_group <= state.getLength() && begin >= 0 && begin <= end) {
int match = _first.match(string, length, offset, state);
return match;
}
else if (_second != null)
return _second.match(string, length, offset, state);
else
return _tail.match(string, length, offset, state);
}
@Override
public String toString()
{
return getClass().getSimpleName() + "[" + _group
+ "," + _first
+ "," + _tail
+ "]";
}
}
static class ConditionalTail extends RegexpNode {
private RegexpNode _head;
private RegexpNode _next;
private ConditionalTail()
{
}
ConditionalTail(ConditionalHead head)
{
_next = N_END;
_head = head;
head.setTail(this);
}
@Override
RegexpNode getHead()
{
return _head;
}
@Override
RegexpNode copyImpl(HashMap state)
{
ConditionalTail copy = new ConditionalTail();
state.put(this, copy);
copy._head = _head.copy(state);
copy._next = _next.copy(state);
return copy;
}
@Override
RegexpNode concat(RegexpNode next)
{
if (_next != null)
_next = _next.concat(next);
else
_next = next;
return _head;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
LoopHead head = new LoopHead(parser, _head, min, max);
_next = _next.concat(head.getTail());
return head;
}
@Override
RegexpNode createLoopUngreedy(Regcomp parser, int min, int max)
{
LoopHeadUngreedy head = new LoopHeadUngreedy(parser, _head, min, max);
_next = _next.concat(head.getTail());
return head;
}
/**
* Create an or expression
*/
@Override
RegexpNode createOr(RegexpNode node)
{
_next = _next.createOr(node);
return getHead();
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
return _next.match(string, length, offset, state);
}
}
final static EmptyNode EMPTY = new EmptyNode();
/**
* Matches an empty production
*/
static class EmptyNode extends RegexpNode {
// needed for php/4e6b
EmptyNode()
{
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
return offset;
}
}
static class End extends RegexpNode {
@Override
RegexpNode concat(RegexpNode next)
{
return next;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
return offset;
}
}
static class Group extends RegexpNode {
private final RegexpNode _node;
private final int _group;
Group(RegexpNode node, int group)
{
_node = node.getHead();
_group = group;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
int oldBegin = state.getBegin(_group);
state.setBegin(_group, offset);
int tail = _node.match(string, length, offset, state);
if (tail >= 0) {
state.setEnd(_group, tail);
return tail;
}
else {
state.setBegin(_group, oldBegin);
return -1;
}
}
}
static class GroupHead extends RegexpNode {
private RegexpNode _node;
private GroupTail _tail;
private int _group;
private GroupHead()
{
}
GroupHead(int group)
{
_group = group;
_tail = new GroupTail(group, this);
}
void setNode(RegexpNode node)
{
_node = node.getHead();
// php/4eh1
if (_node == this)
_node = _tail;
}
@Override
RegexpNode getTail()
{
return _tail;
}
RegexpNode getNode()
{
return _node;
}
@Override
RegexpNode copyImpl(HashMap state)
{
GroupHead copy = new GroupHead();
state.put(this, copy);
copy._group = _group;
if (_node == this) {
copy._node = copy;
}
else if (_node == null) {
}
else {
copy._node = _node.copy(state);
}
copy._tail = (GroupTail) _tail.copy(state);
return copy;
}
@Override
RegexpNode concat(RegexpNode next)
{
_tail.concat(next);
return this;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
return _tail.createLoop(parser, min, max);
}
@Override
RegexpNode createLoopUngreedy(Regcomp parser, int min, int max)
{
return _tail.createLoopUngreedy(parser, min, max);
}
@Override
int minLength()
{
return _node.minLength();
}
@Override
int firstChar()
{
return _node.firstChar();
}
@Override
boolean []firstSet(boolean []firstSet)
{
return _node.firstSet(firstSet);
}
@Override
String prefix()
{
return _node.prefix();
}
@Override
boolean isAnchorBegin()
{
return _node.isAnchorBegin();
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
int oldBegin = state.getBegin(_group);
state.setBegin(_group, offset);
int tail = _node.match(string, length, offset, state);
if (tail >= 0) {
return tail;
}
else {
state.setBegin(_group, oldBegin);
return tail;
}
}
@Override
protected void toString(StringBuilder sb, Map map)
{
if (toStringAdd(sb, map))
return;
sb.append(toStringName());
sb.append("[");
sb.append(_group);
sb.append(", ");
_node.toString(sb, map);
sb.append("]");
}
}
static class GroupTail extends RegexpNode {
private GroupHead _head;
private RegexpNode _next;
private final int _group;
private GroupTail(int group)
{
_group = group;
}
private GroupTail(int group, GroupHead head)
{
_next = N_END;
_head = head;
_group = group;
}
@Override
RegexpNode getHead()
{
return _head;
}
@Override
RegexpNode copyImpl(HashMap state)
{
GroupTail tail = new GroupTail(_group);
state.put(this, tail);
GroupHead head = (GroupHead) _head.copy(state);
tail._head = head;
tail._next = _next.copy(state);
return tail;
}
@Override
RegexpNode concat(RegexpNode next)
{
if (_next != null) {
_next = _next.concat(next);
}
else {
_next = next;
}
return _head;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
LoopHead head = new LoopHead(parser, _head, min, max);
_next = head.getTail();
return head;
}
@Override
RegexpNode createLoopUngreedy(Regcomp parser, int min, int max)
{
LoopHeadUngreedy head = new LoopHeadUngreedy(parser, _head, min, max);
_next = head.getTail();
return head;
}
/**
* Create an or expression
*/
// php/4e6b
/*
@Override
RegexpNode createOr(RegexpNode node)
{
_next = _next.createOr(node);
return getHead();
}
*/
@Override
int minLength()
{
return _next.minLength();
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (state.isFinalized(_group)) {
return _next.match(string, length, offset, state);
}
int oldEnd = state.getEnd(_group);
int oldLength = state.getLength();
if (_group > 0) {
state.setEnd(_group, offset);
if (oldLength < _group)
state.setLength(_group);
}
int tail = _next.match(string, length, offset, state);
if (tail < 0) {
state.setEnd(_group, oldEnd);
state.setLength(oldLength);
return -1;
}
else {
return tail;
}
}
@Override
protected void toString(StringBuilder sb, Map map)
{
if (toStringAdd(sb, map))
return;
sb.append(toStringName());
sb.append("[");
sb.append(_group);
sb.append(", ");
_next.toString(sb, map);
sb.append("]");
}
}
static class GroupRef extends RegexpNode {
private final int _group;
GroupRef(int group)
{
_group = group;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (state.getLength() < _group)
return -1;
int groupBegin = state.getBegin(_group);
int groupLength = state.getEnd(_group) - groupBegin;
if (string.regionMatches(offset, string, groupBegin, groupLength)) {
return offset + groupLength;
}
else
return -1;
}
}
static class Lookahead extends RegexpNode {
private final RegexpNode _head;
Lookahead(RegexpNode head)
{
_head = head;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (_head.match(string, length, offset, state) >= 0)
return offset;
else
return -1;
}
}
static class NotLookahead extends RegexpNode {
private final RegexpNode _head;
NotLookahead(RegexpNode head)
{
_head = head;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
if (_head.match(string, length, offset, state) < 0)
return offset;
else
return -1;
}
}
static class Lookbehind extends RegexpNode {
private final RegexpNode _head;
Lookbehind(RegexpNode head)
{
_head = head.getHead();
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
int length = _head.minLength();
if (offset < length)
return -1;
else if (_head.match(string, strlen, offset - length, state) >= 0)
return offset;
else
return -1;
}
}
static class NotLookbehind extends RegexpNode {
private final RegexpNode _head;
NotLookbehind(RegexpNode head)
{
_head = head;
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
int length = _head.minLength();
if (offset < length)
return offset;
else if (_head.match(string, strlen, offset - length, state) < 0)
return offset;
else
return -1;
}
}
/**
* A nullable node can match an empty string.
*/
abstract static class NullableNode extends RegexpNode {
@Override
boolean isNullable()
{
return true;
}
}
static class LoopHead extends RegexpNode {
private final int _index;
RegexpNode _node;
private RegexpNode _tail;
private int _min;
private int _max;
private LoopHead(int index, int min, int max)
{
_index = index;
_min = min;
_max = max;
}
LoopHead(Regcomp parser, RegexpNode node, int min, int max)
{
_index = parser.nextLoopIndex();
_tail = new LoopTail(_index, this);
_node = node.concat(_tail).getHead();
_min = min;
_max = max;
}
@Override
RegexpNode getTail()
{
return _tail;
}
@Override
RegexpNode copyImpl(HashMap state)
{
LoopHead head = new LoopHead(_index, _min, _max);
state.put(this, head);
RegexpNode node = _node.copy(state);
RegexpNode tail = _tail.copy(state);
head._node = node;
head._tail = tail;
return head;
}
@Override
RegexpNode concat(RegexpNode next)
{
_tail.concat(next);
return this;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
if (min == 0 && max == 1) {
_min = 0;
return this;
}
else
return new LoopHead(parser, this, min, max);
}
@Override
int minLength()
{
return _min * _node.minLength() + _tail.minLength();
}
@Override
boolean []firstSet(boolean []firstSet)
{
firstSet = _node.firstSet(firstSet);
if (_min > 0 && ! _node.isNullable())
return firstSet;
firstSet = _tail.firstSet(firstSet);
return firstSet;
}
//
// match functions
//
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
state._loopCount[_index] = 0;
RegexpNode node = _node;
int min = _min;
int i;
for (i = 0; i < min - 1; i++) {
state._loopCount[_index] = i;
offset = node.match(string, strlen, offset, state);
if (offset < 0)
return offset;
}
state._loopCount[_index] = i;
state._loopOffset[_index] = offset;
int tail = node.match(string, strlen, offset, state);
if (tail >= 0) {
return tail;
}
else if (state._loopCount[_index] < _min) {
return tail;
}
else {
return _tail.match(string, strlen, offset, state);
}
}
@Override
public String toString()
{
return "LoopHead[" + _min + ", " + _max + ", " + _node + "]";
}
}
static class LoopTail extends RegexpNode {
private final int _index;
private LoopHead _head;
private RegexpNode _next;
private LoopTail(int index)
{
_index = index;
}
LoopTail(int index, LoopHead head)
{
_index = index;
_head = head;
_next = N_END;
}
@Override
RegexpNode getHead()
{
return _head;
}
@Override
RegexpNode copyImpl(HashMap state)
{
LoopTail tail = new LoopTail(_index);
state.put(this, tail);
LoopHead head = (LoopHead) _head.copy(state);
RegexpNode next = _next.copy(state);
tail._head = head;
tail._next = next;
return tail;
}
@Override
RegexpNode concat(RegexpNode next)
{
if (_next != null)
_next = _next.concat(next);
else
_next = next;
if (_next == this)
throw new IllegalStateException();
return this;
}
//
// match functions
//
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
int oldCount = state._loopCount[_index];
if (oldCount + 1 < _head._min) {
return offset;
}
else if (oldCount + 1 < _head._max) {
int oldOffset = state._loopOffset[_index];
if (oldOffset != offset) {
state._loopCount[_index] = oldCount + 1;
state._loopOffset[_index] = offset;
int tail = _head._node.match(string, strlen, offset, state);
if (tail >= 0) {
return tail;
}
state._loopCount[_index] = oldCount;
state._loopOffset[_index] = oldOffset;
}
}
int match = _next.match(string, strlen, offset, state);
return match;
}
@Override
public String toString()
{
return "LoopTail[" + _next + "]";
}
}
static class LoopHeadUngreedy extends RegexpNode {
private final int _index;
RegexpNode _node;
private LoopTailUngreedy _tail;
private int _min;
private int _max;
private LoopHeadUngreedy(int index, int min, int max)
{
_index = index;
_min = min;
_max = max;
}
LoopHeadUngreedy(Regcomp parser, RegexpNode node, int min, int max)
{
_index = parser.nextLoopIndex();
_min = min;
_max = max;
_tail = new LoopTailUngreedy(_index, this);
_node = node.getTail().concat(_tail).getHead();
}
@Override
RegexpNode getTail()
{
return _tail;
}
@Override
RegexpNode copyImpl(HashMap state)
{
LoopHeadUngreedy copy = new LoopHeadUngreedy(_index, _min, _max);
state.put(this, copy);
RegexpNode tail = _tail.copy(state);
RegexpNode node = _node.copy(state);
copy._tail = (LoopTailUngreedy) tail;
copy._node = node;
return copy;
}
@Override
RegexpNode concat(RegexpNode next)
{
_tail.concat(next);
return this;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
if (min == 0 && max == 1) {
_min = 0;
return this;
}
else
return new LoopHead(parser, this, min, max);
}
@Override
int minLength()
{
return _min * _node.minLength() + _tail.minLength();
}
//
// match functions
//
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
state._loopCount[_index] = 0;
RegexpNode node = _node;
int min = _min;
for (int i = 0; i < min; i++) {
state._loopCount[_index] = i;
state._loopOffset[_index] = offset;
offset = node.match(string, strlen, offset, state);
if (offset < 0)
return -1;
}
int tail = _tail._next.match(string, strlen, offset, state);
if (tail >= 0)
return tail;
if (min < _max) {
state._loopCount[_index] = min;
state._loopOffset[_index] = offset;
return node.match(string, strlen, offset, state);
}
else
return -1;
}
@Override
public String toString()
{
return "LoopHeadUngreedy[" + _min + ", " + _max + ", " + _node + "]";
}
}
static class LoopTailUngreedy extends RegexpNode {
private final int _index;
private LoopHeadUngreedy _head;
private RegexpNode _next;
private LoopTailUngreedy(int index)
{
_index = index;
}
LoopTailUngreedy(int index, LoopHeadUngreedy head)
{
_index = index;
_head = head;
_next = N_END;
}
@Override
RegexpNode getHead()
{
return _head;
}
@Override
RegexpNode copyImpl(HashMap state)
{
LoopTailUngreedy copy = new LoopTailUngreedy(_index);
state.put(this, copy);
RegexpNode head = _head.copy(state);
RegexpNode next = _next.copy(state);
copy._head = (LoopHeadUngreedy) head;
copy._next = next;
return copy;
}
@Override
RegexpNode concat(RegexpNode next)
{
if (_next != null)
_next = _next.concat(next);
else
_next = next;
if (_next == this)
throw new IllegalStateException();
return this;
}
//
// match functions
//
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
int i = state._loopCount[_index];
int oldOffset = state._loopOffset[_index];
if (i < _head._min)
return offset;
if (offset == oldOffset)
return -1;
int tail = _next.match(string, strlen, offset, state);
if (tail >= 0)
return tail;
if (i + 1 < _head._max) {
state._loopCount[_index] = i + 1;
state._loopOffset[_index] = offset;
tail = _head._node.match(string, strlen, offset, state);
state._loopCount[_index] = i;
state._loopOffset[_index] = oldOffset;
return tail;
}
else
return -1;
}
@Override
public String toString()
{
return "LoopTailUngreedy[" + _next + "]";
}
}
static class Not extends RegexpNode {
private RegexpNode _node;
private Not(RegexpNode node)
{
_node = node;
}
static Not create(RegexpNode node)
{
return new Not(node);
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
int result = _node.match(string, strlen, offset, state);
if (result >= 0)
return -1;
else
return offset + 1;
}
}
final static class Or extends RegexpNode {
private final RegexpNode _left;
private Or _right;
private Or(RegexpNode left, Or right)
{
_left = left;
_right = right;
}
static Or create(RegexpNode left, RegexpNode right)
{
if (left instanceof Or)
return ((Or) left).append(right);
else if (right instanceof Or)
return new Or(left, (Or) right);
else
return new Or(left, new Or(right, null));
}
private Or append(RegexpNode right)
{
if (_right != null)
_right = _right.append(right);
else if (right instanceof Or)
_right = (Or) right;
else
_right = new Or(right, null);
return this;
}
@Override
RegexpNode copyImpl(HashMap state)
{
RegexpNode left = _left.copy(state);
RegexpNode right = null;
if (_right != null) {
right = _right.copy(state);
}
Or copy = new Or(left, (Or) right);
return copy;
}
@Override
int minLength()
{
if (_right != null)
return Math.min(_left.minLength(), _right.minLength());
else
return _left.minLength();
}
@Override
int firstChar()
{
if (_right == null)
return _left.firstChar();
int leftChar = _left.firstChar();
int rightChar = _right.firstChar();
if (leftChar == rightChar)
return leftChar;
else
return -1;
}
@Override
boolean []firstSet(boolean []firstSet)
{
if (_right == null)
return _left.firstSet(firstSet);
firstSet = _left.firstSet(firstSet);
firstSet = _right.firstSet(firstSet);
return firstSet;
}
@Override
boolean isAnchorBegin()
{
return _left.isAnchorBegin() && _right != null && _right.isAnchorBegin();
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
for (Or ptr = this; ptr != null; ptr = ptr._right) {
int value = ptr._left.match(string, strlen, offset, state);
if (value >= 0)
return value;
}
return -1;
}
@Override
protected void toString(StringBuilder sb, Map map)
{
if (toStringAdd(sb, map))
return;
sb.append(toStringName());
sb.append("[");
_left.toString(sb, map);
for (Or ptr = _right; ptr != null; ptr = ptr._right) {
sb.append(",");
ptr._left.toString(sb, map);
}
sb.append("]");
}
@Override
public String toString()
{
StringBuilder sb = new StringBuilder();
sb.append("Or[");
sb.append(_left);
for (Or ptr = _right; ptr != null; ptr = ptr._right) {
sb.append(",");
sb.append(ptr._left);
}
sb.append("]");
return sb.toString();
}
}
static class PossessiveLoop extends RegexpNode {
private RegexpNode _node;
private RegexpNode _next = N_END;
private int _min;
private int _max;
private PossessiveLoop(int min, int max)
{
_min = min;
_max = max;
}
PossessiveLoop(RegexpNode node, int min, int max)
{
_node = node.getHead();
_min = min;
_max = max;
}
@Override
RegexpNode copyImpl(HashMap state)
{
PossessiveLoop copy = new PossessiveLoop(_min, _max);
state.put(this, copy);
RegexpNode node = _node.copy(state);
RegexpNode next = _next.copy(state);
copy._node = node;
copy._next = next;
return copy;
}
@Override
RegexpNode concat(RegexpNode next)
{
if (next == null)
throw new NullPointerException();
if (_next != null)
_next = _next.concat(next);
else
_next = next;
return this;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
if (min == 0 && max == 1) {
_min = 0;
return this;
}
else
return new LoopHead(parser, this, min, max);
}
//
// match functions
//
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
RegexpNode node = _node;
int min = _min;
int max = _max;
int i;
for (i = 0; i < min; i++) {
offset = node.match(string, strlen, offset, state);
if (offset < 0)
return -1;
}
for (; i < max; i++) {
int tail = node.match(string, strlen, offset, state);
if (tail < 0 || tail == offset)
return _next.match(string, strlen, offset, state);
offset = tail;
}
return _next.match(string, strlen, offset, state);
}
@Override
public String toString()
{
return "PossessiveLoop[" + _min + ", "
+ _max + ", " + _node + ", " + _next + "]";
}
}
static final PropC PROP_C = new PropC();
static final PropNotC PROP_NOT_C = new PropNotC();
static final Prop PROP_Cc = new Prop(Character.CONTROL);
static final PropNot PROP_NOT_Cc = new PropNot(Character.CONTROL);
static final Prop PROP_Cf = new Prop(Character.FORMAT);
static final PropNot PROP_NOT_Cf = new PropNot(Character.FORMAT);
static final Prop PROP_Cn = new Prop(Character.UNASSIGNED);
static final PropNot PROP_NOT_Cn = new PropNot(Character.UNASSIGNED);
static final Prop PROP_Co = new Prop(Character.PRIVATE_USE);
static final PropNot PROP_NOT_Co = new PropNot(Character.PRIVATE_USE);
static final Prop PROP_Cs = new Prop(Character.SURROGATE);
static final PropNot PROP_NOT_Cs = new PropNot(Character.SURROGATE);
static final PropL PROP_L = new PropL();
static final PropNotL PROP_NOT_L = new PropNotL();
static final Prop PROP_Ll = new Prop(Character.LOWERCASE_LETTER);
static final PropNot PROP_NOT_Ll = new PropNot(Character.LOWERCASE_LETTER);
static final Prop PROP_Lm = new Prop(Character.MODIFIER_LETTER);
static final PropNot PROP_NOT_Lm = new PropNot(Character.MODIFIER_LETTER);
static final Prop PROP_Lo = new Prop(Character.OTHER_LETTER);
static final PropNot PROP_NOT_Lo = new PropNot(Character.OTHER_LETTER);
static final Prop PROP_Lt = new Prop(Character.TITLECASE_LETTER);
static final PropNot PROP_NOT_Lt = new PropNot(Character.TITLECASE_LETTER);
static final Prop PROP_Lu = new Prop(Character.UPPERCASE_LETTER);
static final PropNot PROP_NOT_Lu = new PropNot(Character.UPPERCASE_LETTER);
static final PropM PROP_M = new PropM();
static final PropNotM PROP_NOT_M = new PropNotM();
static final Prop PROP_Mc = new Prop(Character.COMBINING_SPACING_MARK);
static final PropNot PROP_NOT_Mc
= new PropNot(Character.COMBINING_SPACING_MARK);
static final Prop PROP_Me = new Prop(Character.ENCLOSING_MARK);
static final PropNot PROP_NOT_Me = new PropNot(Character.ENCLOSING_MARK);
static final Prop PROP_Mn = new Prop(Character.NON_SPACING_MARK);
static final PropNot PROP_NOT_Mn = new PropNot(Character.NON_SPACING_MARK);
static final PropN PROP_N = new PropN();
static final PropNotN PROP_NOT_N = new PropNotN();
static final Prop PROP_Nd = new Prop(Character.DECIMAL_DIGIT_NUMBER);
static final PropNot PROP_NOT_Nd
= new PropNot(Character.DECIMAL_DIGIT_NUMBER);
static final Prop PROP_Nl = new Prop(Character.LETTER_NUMBER);
static final PropNot PROP_NOT_Nl = new PropNot(Character.LETTER_NUMBER);
static final Prop PROP_No = new Prop(Character.OTHER_NUMBER);
static final PropNot PROP_NOT_No = new PropNot(Character.OTHER_NUMBER);
static final PropP PROP_P = new PropP();
static final PropNotP PROP_NOT_P = new PropNotP();
static final Prop PROP_Pc = new Prop(Character.CONNECTOR_PUNCTUATION);
static final PropNot PROP_NOT_Pc
= new PropNot(Character.CONNECTOR_PUNCTUATION);
static final Prop PROP_Pd = new Prop(Character.DASH_PUNCTUATION);
static final PropNot PROP_NOT_Pd = new PropNot(Character.DASH_PUNCTUATION);
static final Prop PROP_Pe = new Prop(Character.END_PUNCTUATION);
static final PropNot PROP_NOT_Pe = new PropNot(Character.END_PUNCTUATION);
static final Prop PROP_Pf = new Prop(Character.FINAL_QUOTE_PUNCTUATION);
static final PropNot PROP_NOT_Pf
= new PropNot(Character.FINAL_QUOTE_PUNCTUATION);
static final Prop PROP_Pi = new Prop(Character.INITIAL_QUOTE_PUNCTUATION);
static final PropNot PROP_NOT_Pi
= new PropNot(Character.INITIAL_QUOTE_PUNCTUATION);
static final Prop PROP_Po = new Prop(Character.OTHER_PUNCTUATION);
static final PropNot PROP_NOT_Po = new PropNot(Character.OTHER_PUNCTUATION);
static final Prop PROP_Ps = new Prop(Character.START_PUNCTUATION);
static final PropNot PROP_NOT_Ps = new PropNot(Character.START_PUNCTUATION);
static final PropS PROP_S = new PropS();
static final PropNotS PROP_NOT_S = new PropNotS();
static final Prop PROP_Sc = new Prop(Character.CURRENCY_SYMBOL);
static final PropNot PROP_NOT_Sc = new PropNot(Character.CURRENCY_SYMBOL);
static final Prop PROP_Sk = new Prop(Character.MODIFIER_SYMBOL);
static final PropNot PROP_NOT_Sk = new PropNot(Character.MODIFIER_SYMBOL);
static final Prop PROP_Sm = new Prop(Character.MATH_SYMBOL);
static final PropNot PROP_NOT_Sm = new PropNot(Character.MATH_SYMBOL);
static final Prop PROP_So = new Prop(Character.OTHER_SYMBOL);
static final PropNot PROP_NOT_So = new PropNot(Character.OTHER_SYMBOL);
static final PropZ PROP_Z = new PropZ();
static final PropNotZ PROP_NOT_Z = new PropNotZ();
static final Prop PROP_Zl = new Prop(Character.LINE_SEPARATOR);
static final PropNot PROP_NOT_Zl = new PropNot(Character.LINE_SEPARATOR);
static final Prop PROP_Zp = new Prop(Character.PARAGRAPH_SEPARATOR);
static final PropNot PROP_NOT_Zp
= new PropNot(Character.PARAGRAPH_SEPARATOR);
static final Prop PROP_Zs = new Prop(Character.SPACE_SEPARATOR);
static final PropNot PROP_NOT_Zs = new PropNot(Character.SPACE_SEPARATOR);
private static class Prop extends AbstractCharNode {
private final int _category;
Prop(int category)
{
_category = category;
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
if (Character.getType(ch) == _category)
return offset + 1;
}
return -1;
}
}
private static class PropNot extends AbstractCharNode {
private final int _category;
PropNot(int category)
{
_category = category;
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
if (Character.getType(ch) != _category)
return offset + 1;
}
return -1;
}
}
static class PropC extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (value == Character.CONTROL
|| value == Character.FORMAT
|| value == Character.UNASSIGNED
|| value == Character.PRIVATE_USE
|| value == Character.SURROGATE) {
return offset + 1;
}
}
return -1;
}
}
static class PropNotC extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (! (value == Character.CONTROL
|| value == Character.FORMAT
|| value == Character.UNASSIGNED
|| value == Character.PRIVATE_USE
|| value == Character.SURROGATE)) {
return offset + 1;
}
}
return -1;
}
}
static class PropL extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (value == Character.LOWERCASE_LETTER
|| value == Character.MODIFIER_LETTER
|| value == Character.OTHER_LETTER
|| value == Character.TITLECASE_LETTER
|| value == Character.UPPERCASE_LETTER) {
return offset + 1;
}
}
return -1;
}
}
static class PropNotL extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (! (value == Character.LOWERCASE_LETTER
|| value == Character.MODIFIER_LETTER
|| value == Character.OTHER_LETTER
|| value == Character.TITLECASE_LETTER
|| value == Character.UPPERCASE_LETTER)) {
return offset + 1;
}
}
return -1;
}
}
static class PropM extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (value == Character.COMBINING_SPACING_MARK
|| value == Character.ENCLOSING_MARK
|| value == Character.NON_SPACING_MARK) {
return offset + 1;
}
}
return -1;
}
}
static class PropNotM extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (! (value == Character.COMBINING_SPACING_MARK
|| value == Character.ENCLOSING_MARK
|| value == Character.NON_SPACING_MARK)) {
return offset + 1;
}
}
return -1;
}
}
static class PropN extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (value == Character.DECIMAL_DIGIT_NUMBER
|| value == Character.LETTER_NUMBER
|| value == Character.OTHER_NUMBER) {
return offset + 1;
}
}
return -1;
}
}
static class PropNotN extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (! (value == Character.DECIMAL_DIGIT_NUMBER
|| value == Character.LETTER_NUMBER
|| value == Character.OTHER_NUMBER)) {
return offset + 1;
}
}
return -1;
}
}
static class PropP extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (value == Character.CONNECTOR_PUNCTUATION
|| value == Character.DASH_PUNCTUATION
|| value == Character.END_PUNCTUATION
|| value == Character.FINAL_QUOTE_PUNCTUATION
|| value == Character.INITIAL_QUOTE_PUNCTUATION
|| value == Character.OTHER_PUNCTUATION
|| value == Character.START_PUNCTUATION) {
return offset + 1;
}
}
return -1;
}
}
static class PropNotP extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (! (value == Character.CONNECTOR_PUNCTUATION
|| value == Character.DASH_PUNCTUATION
|| value == Character.END_PUNCTUATION
|| value == Character.FINAL_QUOTE_PUNCTUATION
|| value == Character.INITIAL_QUOTE_PUNCTUATION
|| value == Character.OTHER_PUNCTUATION
|| value == Character.START_PUNCTUATION)) {
return offset + 1;
}
}
return -1;
}
}
static class PropS extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (value == Character.CURRENCY_SYMBOL
|| value == Character.MODIFIER_SYMBOL
|| value == Character.MATH_SYMBOL
|| value == Character.OTHER_SYMBOL) {
return offset + 1;
}
}
return -1;
}
}
static class PropNotS extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (! (value == Character.CURRENCY_SYMBOL
|| value == Character.MODIFIER_SYMBOL
|| value == Character.MATH_SYMBOL
|| value == Character.OTHER_SYMBOL)) {
return offset + 1;
}
}
return -1;
}
}
static class PropZ extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (value == Character.LINE_SEPARATOR
|| value == Character.PARAGRAPH_SEPARATOR
|| value == Character.SPACE_SEPARATOR) {
return offset + 1;
}
}
return -1;
}
}
static class PropNotZ extends AbstractCharNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset < strlen) {
char ch = string.charAt(offset);
int value = Character.getType(ch);
if (! (value == Character.LINE_SEPARATOR
|| value == Character.PARAGRAPH_SEPARATOR
|| value == Character.SPACE_SEPARATOR)) {
return offset + 1;
}
}
return -1;
}
}
static class Recursive extends RegexpNode {
private final int _group;
private RegexpNode _top;
Recursive(int group)
{
_group = group;
}
void setTop(RegexpNode top)
{
_top = top;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
int oldBegin = state.getBegin(_group);
int match = _top.match(string, length, offset, state);
if (match >= 0) {
if (oldBegin >= 0) {
state.setBegin(_group, oldBegin);
}
else {
state.setBegin(_group, offset);
}
}
return match;
}
}
static class GroupNumberRecursive extends RegexpNode {
private final int _group;
private RegexpNode _top;
GroupNumberRecursive(int group)
{
_group = group;
}
int getGroup()
{
return _group;
}
void setTop(RegexpNode top)
{
_top = top;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
int match = _top.match(string, length, offset, state);
return match;
}
}
static class GroupNameRecursive extends RegexpNode {
private final StringValue _name;
private RegexpNode _top;
GroupNameRecursive(StringValue name)
{
_name = name;
}
StringValue getGroup()
{
return _name;
}
void setTop(RegexpNode top)
{
_top = top;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
int match = _top.match(string, length, offset, state);
return match;
}
}
static class Subroutine extends RegexpNode {
private final int _group;
private final RegexpNode _node;
Subroutine(int group, RegexpNode node)
{
_group = group;
_node = node;
}
@Override
int match(StringValue string, int length, int offset, RegexpState state)
{
state.setFinalized(_group, true);
int match = _node.match(string, length, offset, state);
return match;
}
}
static class Set extends AbstractCharNode {
private final boolean []_asciiSet;
private final IntSet _range;
Set(boolean []set, IntSet range)
{
_asciiSet = set;
_range = range;
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (strlen <= offset)
return -1;
char ch = string.charAt(offset++);
if (ch < 128)
return _asciiSet[ch] ? offset : -1;
int codePoint = ch;
if ('\uD800' <= ch && ch <= '\uDBFF' && offset < strlen) {
char low = string.charAt(offset++);
if ('\uDC00' <= low && ch <= '\uDFFF')
codePoint = Character.toCodePoint(ch, low);
}
return _range.contains(codePoint) ? offset : -1;
}
}
static class NotSet extends AbstractCharNode {
private final boolean []_asciiSet;
private final IntSet _range;
NotSet(boolean []set, IntSet range)
{
_asciiSet = set;
_range = range;
}
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (strlen <= offset)
return -1;
char ch = string.charAt(offset);
if (ch < 128)
return _asciiSet[ch] ? -1 : offset + 1;
else
return _range.contains(ch) ? -1 : offset + 1;
}
}
static final class StringNode extends RegexpNode {
private final char []_buffer;
private final int _length;
StringNode(CharBuffer value)
{
_length = value.length();
_buffer = new char[_length];
if (_length == 0)
throw new IllegalStateException("empty string");
System.arraycopy(value.getBuffer(), 0, _buffer, 0, _buffer.length);
}
StringNode(char []buffer, int length)
{
_length = length;
_buffer = buffer;
if (_length == 0)
throw new IllegalStateException("empty string");
}
StringNode(char ch)
{
_length = 1;
_buffer = new char[1];
_buffer[0] = ch;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
if (_length == 1)
return new CharLoop(this, min, max);
else {
char ch = _buffer[_length - 1];
RegexpNode head = new StringNode(_buffer, _length - 1);
return head.concat(new CharNode(ch).createLoop(parser, min, max));
}
}
@Override
RegexpNode createLoopUngreedy(Regcomp parser, int min, int max)
{
if (_length == 1)
return new CharUngreedyLoop(this, min, max);
else {
char ch = _buffer[_length - 1];
RegexpNode head = new StringNode(_buffer, _length - 1);
return head.concat(
new CharNode(ch).createLoopUngreedy(parser, min, max));
}
}
@Override
RegexpNode createPossessiveLoop(int min, int max)
{
if (_length == 1)
return super.createPossessiveLoop(min, max);
else {
char ch = _buffer[_length - 1];
RegexpNode head = new StringNode(_buffer, _length - 1);
return head.concat(new CharNode(ch).createPossessiveLoop(min, max));
}
}
//
// optim functions
//
@Override
int minLength()
{
return _length;
}
@Override
int firstChar()
{
if (_length > 0)
return _buffer[0];
else
return -1;
}
@Override
boolean []firstSet(boolean []firstSet)
{
if (firstSet != null && _length > 0 && _buffer[0] < firstSet.length) {
firstSet[_buffer[0]] = true;
return firstSet;
}
else
return null;
}
@Override
String prefix()
{
return new String(_buffer, 0, _length);
}
//
// match function
//
@Override
final int match(StringValue string,
int strlen,
int offset,
RegexpState state)
{
if (string.regionMatches(offset, _buffer, 0, _length))
return offset + _length;
else
return -1;
}
@Override
protected void toString(StringBuilder sb, Map map)
{
sb.append(toStringName());
sb.append("[");
sb.append(_buffer, 0, _length);
sb.append("]");
}
}
static class StringIgnoreCase extends RegexpNode {
private final char []_buffer;
private final int _length;
StringIgnoreCase(CharBuffer value)
{
_length = value.length();
_buffer = new char[_length];
if (_length == 0)
throw new IllegalStateException("empty string");
System.arraycopy(value.getBuffer(), 0, _buffer, 0, _buffer.length);
}
StringIgnoreCase(char []buffer, int length)
{
_length = length;
_buffer = buffer;
if (_length == 0)
throw new IllegalStateException("empty string");
}
StringIgnoreCase(char ch)
{
_length = 1;
_buffer = new char[1];
_buffer[0] = ch;
}
@Override
RegexpNode createLoop(Regcomp parser, int min, int max)
{
if (_length == 1)
return new CharLoop(this, min, max);
else {
char ch = _buffer[_length - 1];
RegexpNode head = new StringIgnoreCase(_buffer, _length - 1);
RegexpNode tail = new StringIgnoreCase(new char[] { ch }, 1);
return head.concat(tail.createLoop(parser, min, max));
}
}
@Override
RegexpNode createLoopUngreedy(Regcomp parser, int min, int max)
{
if (_length == 1)
return new CharUngreedyLoop(this, min, max);
else {
char ch = _buffer[_length - 1];
RegexpNode head = new StringIgnoreCase(_buffer, _length - 1);
RegexpNode tail = new StringIgnoreCase(new char[] { ch }, 1);
return head.concat(tail.createLoopUngreedy(parser, min, max));
}
}
@Override
RegexpNode createPossessiveLoop(int min, int max)
{
if (_length == 1)
return super.createPossessiveLoop(min, max);
else {
char ch = _buffer[_length - 1];
RegexpNode head = new StringIgnoreCase(_buffer, _length - 1);
RegexpNode tail = new StringIgnoreCase(new char[] { ch }, 1);
return head.concat(tail.createPossessiveLoop(min, max));
}
}
//
// optim functions
//
@Override
int minLength()
{
return _length;
}
@Override
int firstChar()
{
if (_length > 0
&& (Character.toLowerCase(_buffer[0])
== Character.toUpperCase(_buffer[0])))
return _buffer[0];
else
return -1;
}
@Override
boolean []firstSet(boolean []firstSet)
{
if (_length > 0 && firstSet != null) {
char lower = Character.toLowerCase(_buffer[0]);
char upper = Character.toUpperCase(_buffer[0]);
if (lower < firstSet.length && upper < firstSet.length) {
firstSet[lower] = true;
firstSet[upper] = true;
return firstSet;
}
}
return null;
}
@Override
String prefix()
{
return new String(_buffer, 0, _length);
}
//
// match function
//
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (string.regionMatchesIgnoreCase(offset, _buffer, 0, _length))
return offset + _length;
else
return -1;
}
}
static final StringBegin STRING_BEGIN = new StringBegin();
static final StringEnd STRING_END = new StringEnd();
static final StringFirst STRING_FIRST = new StringFirst();
static final StringNewline STRING_NEWLINE = new StringNewline();
private static class StringBegin extends RegexpNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset == state._start)
return offset;
else
return -1;
}
}
private static class StringEnd extends RegexpNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset == strlen)
return offset;
else
return -1;
}
}
private static class StringFirst extends RegexpNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset == state._first)
return offset;
else
return -1;
}
}
private static class StringNewline extends RegexpNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if (offset == strlen
|| string.charAt(offset) == '\n' && offset + 1 == string.length())
return offset;
else
return -1;
}
}
static final Word WORD = new Word();
static final NotWord NOT_WORD = new NotWord();
private static class Word extends RegexpNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if ((state._start < offset
&& RegexpSet.WORD.match(string.charAt(offset - 1)))
!= (offset < strlen
&& RegexpSet.WORD.match(string.charAt(offset))))
return offset;
else
return -1;
}
}
private static class NotWord extends RegexpNode {
@Override
int match(StringValue string, int strlen, int offset, RegexpState state)
{
if ((state._start < offset
&& RegexpSet.WORD.match(string.charAt(offset - 1)))
== (offset < strlen
&& RegexpSet.WORD.match(string.charAt(offset))))
return offset;
else
return -1;
}
}
static {
ANY_CHAR = new AsciiNotSet();
}
}