org.joni.ArrayCompiler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of joni Show documentation
Show all versions of joni Show documentation
Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma
that uses byte arrays directly instead of java Strings and chars
The newest version!
/*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to
* use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is furnished to do
* so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package org.joni;
import static org.joni.BitStatus.bsAt;
import static org.joni.Option.isDynamic;
import static org.joni.Option.isIgnoreCase;
import static org.joni.Option.isMultiline;
import static org.joni.ast.QuantifierNode.isRepeatInfinite;
import org.jcodings.constants.CharacterType;
import org.joni.ast.AnchorNode;
import org.joni.ast.BackRefNode;
import org.joni.ast.CClassNode;
import org.joni.ast.CTypeNode;
import org.joni.ast.CallNode;
import org.joni.ast.ListNode;
import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
import org.joni.constants.internal.AnchorType;
import org.joni.constants.internal.EncloseType;
import org.joni.constants.internal.NodeType;
import org.joni.constants.internal.OPCode;
import org.joni.constants.internal.OPSize;
import org.joni.constants.internal.TargetInfo;
final class ArrayCompiler extends Compiler {
private int[]code;
private int codeLength;
private byte[][]templates;
private int templateNum;
ArrayCompiler(Analyser analyser) {
super(analyser);
}
@Override
protected final void prepare() {
int codeSize = Config.USE_STRING_TEMPLATES ? 8 : ((analyser.getEnd() - analyser.getBegin()) * 2 + 2);
code = new int[codeSize];
codeLength = 0;
}
@Override
protected final void finish() {
addOpcode(OPCode.END);
addOpcode(OPCode.FINISH); // for stack bottom
regex.code = code;
regex.codeLength = codeLength;
regex.templates = templates;
regex.templateNum = templateNum;
regex.factory = MatcherFactory.DEFAULT;
if (Config.USE_SUBEXP_CALL && analyser.env.unsetAddrList != null) {
analyser.env.unsetAddrList.fix(regex);
analyser.env.unsetAddrList = null;
}
}
@Override
protected void compileAltNode(ListNode node) {
ListNode aln = node;
int len = 0;
do {
len += compileLengthTree(aln.value);
if (aln.tail != null) {
len += OPSize.PUSH + OPSize.JUMP;
}
} while ((aln = aln.tail) != null);
int pos = codeLength + len; /* goal position */
aln = node;
do {
len = compileLengthTree(aln.value);
if (aln.tail != null) {
regex.requireStack = true;
addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP);
}
compileTree(aln.value);
if (aln.tail != null) {
len = pos - (codeLength + OPSize.JUMP);
addOpcodeRelAddr(OPCode.JUMP, len);
}
} while ((aln = aln.tail) != null);
}
private boolean isNeedStrLenOpExact(int op) {
return op == OPCode.EXACTN ||
op == OPCode.EXACTMB2N ||
op == OPCode.EXACTMB3N ||
op == OPCode.EXACTMBN ||
op == OPCode.EXACTN_IC ||
op == OPCode.EXACTN_IC_SB;
}
private boolean opTemplated(int op) {
return isNeedStrLenOpExact(op);
}
private int selectStrOpcode(int mbLength, int byteLength, boolean ignoreCase) {
int op;
int strLength = (byteLength + mbLength - 1) / mbLength;
if (ignoreCase) {
switch(strLength) {
case 1: op = enc.toLowerCaseTable() != null ? OPCode.EXACT1_IC_SB : OPCode.EXACT1_IC; break;
default:op = enc.toLowerCaseTable() != null ? OPCode.EXACTN_IC_SB : OPCode.EXACTN_IC; break;
} // switch
} else {
switch (mbLength) {
case 1:
switch (strLength) {
case 1: op = OPCode.EXACT1; break;
case 2: op = OPCode.EXACT2; break;
case 3: op = OPCode.EXACT3; break;
case 4: op = OPCode.EXACT4; break;
case 5: op = OPCode.EXACT5; break;
default:op = OPCode.EXACTN; break;
} // inner switch
break;
case 2:
switch (strLength) {
case 1: op = OPCode.EXACTMB2N1; break;
case 2: op = OPCode.EXACTMB2N2; break;
case 3: op = OPCode.EXACTMB2N3; break;
default:op = OPCode.EXACTMB2N; break;
} // inner switch
break;
case 3:
op = OPCode.EXACTMB3N;
break;
default:
op = OPCode.EXACTMBN;
} // switch
}
return op;
}
private void compileTreeEmptyCheck(Node node, int emptyInfo) {
int savedNumNullCheck = regex.numNullCheck;
if (emptyInfo != 0) {
regex.requireStack = true;
addOpcode(OPCode.NULL_CHECK_START);
addMemNum(regex.numNullCheck); /* NULL CHECK ID */
regex.numNullCheck++;
}
compileTree(node);
if (emptyInfo != 0) {
switch(emptyInfo) {
case TargetInfo.IS_EMPTY:
addOpcode(OPCode.NULL_CHECK_END);
break;
case TargetInfo.IS_EMPTY_MEM:
addOpcode(OPCode.NULL_CHECK_END_MEMST);
break;
case TargetInfo.IS_EMPTY_REC:
addOpcode(OPCode.NULL_CHECK_END_MEMST_PUSH);
break;
} // switch
addMemNum(savedNumNullCheck); /* NULL CHECK ID */
}
}
private int addCompileStringlength(byte[]bytes, int p, int mbLength, int byteLength, boolean ignoreCase) {
int op = selectStrOpcode(mbLength, byteLength, ignoreCase);
int len = OPSize.OPCODE;
if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
// string length, template index, template string pointer
len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX;
} else {
if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
len += byteLength;
}
if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
return len;
}
@Override
protected final void addCompileString(byte[]bytes, int p, int mbLength, int byteLength, boolean ignoreCase) {
int op = selectStrOpcode(mbLength, byteLength, ignoreCase);
addOpcode(op);
if (op == OPCode.EXACTMBN) addLength(mbLength);
if (isNeedStrLenOpExact(op)) {
if (op == OPCode.EXACTN_IC || op == OPCode.EXACTN_IC_SB) {
addLength(byteLength);
} else {
addLength(byteLength / mbLength);
}
}
if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
addInt(templateNum);
addInt(p);
addTemplate(bytes);
} else {
addBytes(bytes, p, byteLength);
}
}
private int compileLengthStringNode(Node node) {
StringNode sn = (StringNode)node;
if (sn.length() <= 0) return 0;
boolean ambig = sn.isAmbig();
int p, prev;
p = prev = sn.p;
int end = sn.end;
byte[]bytes = sn.bytes;
int prevLen = enc.length(bytes, p, end);
p += prevLen;
int blen = prevLen;
int rlen = 0;
while (p < end) {
int len = enc.length(bytes, p, end);
if (len == prevLen || ambig) {
blen += len;
} else {
int r = addCompileStringlength(bytes, prev, prevLen, blen, ambig);
rlen += r;
prev = p;
blen = len;
prevLen = len;
}
p += len;
}
int r = addCompileStringlength(bytes, prev, prevLen, blen, ambig);
rlen += r;
return rlen;
}
private int compileLengthStringRawNode(StringNode sn) {
if (sn.length() <= 0) return 0;
return addCompileStringlength(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false);
}
private void addMultiByteCClass(CodeRangeBuffer mbuf) {
addLength(mbuf.getUsed());
addInts(mbuf.getCodeRange(), mbuf.getUsed());
}
private int compileLengthCClassNode(CClassNode cc) {
int len;
if (cc.mbuf == null) {
len = OPSize.OPCODE + BitSet.BITSET_SIZE;
} else {
if (enc.minLength() > 1 || cc.bs.isEmpty()) {
len = OPSize.OPCODE;
} else {
len = OPSize.OPCODE + BitSet.BITSET_SIZE;
}
len += OPSize.LENGTH + cc.mbuf.getUsed();
}
return len;
}
@Override
protected void compileCClassNode(CClassNode cc) {
if (cc.mbuf == null) {
if (cc.isNot()) {
addOpcode(OPCode.CCLASS_NOT);
} else {
addOpcode(OPCode.CCLASS);
}
addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
} else {
if (enc.minLength() > 1 || cc.bs.isEmpty()) {
if (cc.isNot()) {
addOpcode(OPCode.CCLASS_MB_NOT);
} else {
addOpcode(OPCode.CCLASS_MB);
}
addMultiByteCClass(cc.mbuf);
} else {
if (cc.isNot()) {
addOpcode(OPCode.CCLASS_MIX_NOT);
} else {
addOpcode(OPCode.CCLASS_MIX);
}
// store the bit set and mbuf themself!
addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
addMultiByteCClass(cc.mbuf);
}
}
}
@Override
protected void compileCTypeNode(CTypeNode node) {
CTypeNode cn = node;
int op;
switch (cn.ctype) {
case CharacterType.WORD:
if (cn.not) {
if (cn.asciiRange) {
op = OPCode.ASCII_NOT_WORD;
} else {
op = OPCode.NOT_WORD;
}
} else {
if (cn.asciiRange) {
op = OPCode.ASCII_WORD;
} else {
op = OPCode.WORD;
}
}
break;
default:
newInternalException(PARSER_BUG);
return; // not reached
} // inner switch
addOpcode(op);
}
@Override
protected void compileAnyCharNode() {
if (isMultiline(regex.options)) {
addOpcode(OPCode.ANYCHAR_ML);
} else {
addOpcode(OPCode.ANYCHAR);
}
}
@Override
protected void compileCallNode(CallNode node) {
addOpcode(OPCode.CALL);
node.unsetAddrList.add(codeLength, node.target);
addAbsAddr(0); /*dummy addr.*/
}
@Override
protected void compileBackrefNode(BackRefNode node) {
BackRefNode br = node;
if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
addOpcode(OPCode.BACKREF_WITH_LEVEL);
addOption(regex.options & Option.IGNORECASE);
addLength(br.nestLevel);
// !goto add_bacref_mems;!
addLength(br.backNum);
for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
return;
} else { // USE_BACKREF_AT_LEVEL
if (br.backNum == 1) {
if (isIgnoreCase(regex.options)) {
addOpcode(OPCode.BACKREFN_IC);
addMemNum(br.back[0]);
} else {
switch (br.back[0]) {
case 1:
addOpcode(OPCode.BACKREF1);
break;
case 2:
addOpcode(OPCode.BACKREF2);
break;
default:
addOpcode(OPCode.BACKREFN);
addOpcode(br.back[0]);
break;
} // switch
}
} else {
if (isIgnoreCase(regex.options)) {
addOpcode(OPCode.BACKREF_MULTI_IC);
} else {
addOpcode(OPCode.BACKREF_MULTI);
}
// !add_bacref_mems:!
addLength(br.backNum);
for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
}
}
}
private static final int REPEAT_RANGE_ALLOC = 8;
private void entryRepeatRange(int id, int lower, int upper) {
if (regex.repeatRangeLo == null) {
regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC];
regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC];
} else if (id >= regex.repeatRangeLo.length){
int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC];
System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length);
regex.repeatRangeLo = tmp;
tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC];
System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length);
regex.repeatRangeHi = tmp;
}
regex.repeatRangeLo[id] = lower;
regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper;
}
private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) {
regex.requireStack = true;
int numRepeat = regex.numRepeat;
addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG);
addMemNum(numRepeat); /* OP_REPEAT ID */
regex.numRepeat++;
addRelAddr(targetLen + OPSize.REPEAT_INC);
entryRepeatRange(numRepeat, qn.lower, qn.upper);
compileTreeEmptyCheck(qn.target, emptyInfo);
if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) {
addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG);
} else {
addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG);
}
addMemNum(numRepeat); /* OP_REPEAT ID */
}
private static final int QUANTIFIER_EXPAND_LIMIT_SIZE = 50; // was 50
private static boolean cknOn(int ckn) {
return ckn > 0;
}
private int compileCECLengthQuantifierNode(QuantifierNode qn) {
boolean infinite = isRepeatInfinite(qn.upper);
int emptyInfo = qn.targetEmptyInfo;
int tlen = compileLengthTree(qn.target);
int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0;
/* anychar repeat */
if (qn.target.getType() == NodeType.CANY) {
if (qn.greedy && infinite) {
if (qn.nextHeadExact != null && !cknOn(ckn)) {
return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen;
} else {
return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen;
}
}
}
int modTLen;
if (emptyInfo != 0) {
modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
} else {
modTLen = tlen;
}
int len;
if (infinite && qn.lower <= 1) {
if (qn.greedy) {
if (qn.lower == 1) {
len = OPSize.JUMP;
} else {
len = 0;
}
len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP;
} else {
if (qn.lower == 0) {
len = OPSize.JUMP;
} else {
len = 0;
}
len += modTLen + OPSize.PUSH + cklen;
}
} else if (qn.upper == 0) {
if (qn.isRefered) { /* /(?..){0}/ */
len = OPSize.JUMP + tlen;
} else {
len = 0;
}
} else if (qn.upper == 1 && qn.greedy) {
if (qn.lower == 0) {
if (cknOn(ckn)) {
len = OPSize.STATE_CHECK_PUSH + tlen;
} else {
len = OPSize.PUSH + tlen;
}
} else {
len = tlen;
}
} else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
len = OPSize.PUSH + cklen + OPSize.JUMP + tlen;
} else {
len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
if (cknOn(ckn)) {
len += OPSize.STATE_CHECK;
}
}
return len;
}
@Override
protected void compileCECQuantifierNode(QuantifierNode qn) {
regex.requireStack = true;
boolean infinite = isRepeatInfinite(qn.upper);
int emptyInfo = qn.targetEmptyInfo;
int tlen = compileLengthTree(qn.target);
int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
if (qn.isAnyCharStar()) {
compileTreeNTimes(qn.target, qn.lower);
if (qn.nextHeadExact != null && !cknOn(ckn)) {
if (isMultiline(regex.options)) {
addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
} else {
addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT);
}
if (cknOn(ckn)) {
addStateCheckNum(ckn);
}
StringNode sn = (StringNode)qn.nextHeadExact;
addBytes(sn.bytes, sn.p, 1);
return;
} else {
if (isMultiline(regex.options)) {
if (cknOn(ckn)) {
addOpcode(OPCode.STATE_CHECK_ANYCHAR_ML_STAR);
} else {
addOpcode(OPCode.ANYCHAR_ML_STAR);
}
} else {
if (cknOn(ckn)) {
addOpcode(OPCode.STATE_CHECK_ANYCHAR_STAR);
} else {
addOpcode(OPCode.ANYCHAR_STAR);
}
}
if (cknOn(ckn)) {
addStateCheckNum(ckn);
}
return;
}
}
int modTLen;
if (emptyInfo != 0) {
modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
} else {
modTLen = tlen;
}
if (infinite && qn.lower <= 1) {
if (qn.greedy) {
if (qn.lower == 1) {
addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH :
OPSize.PUSH);
}
if (cknOn(ckn)) {
addOpcode(OPCode.STATE_CHECK_PUSH);
addStateCheckNum(ckn);
addRelAddr(modTLen + OPSize.JUMP);
} else {
addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
}
compileTreeEmptyCheck(qn.target, emptyInfo);
addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ?
OPSize.STATE_CHECK_PUSH :
OPSize.PUSH)));
} else {
if (qn.lower == 0) {
addOpcodeRelAddr(OPCode.JUMP, modTLen);
}
compileTreeEmptyCheck(qn.target, emptyInfo);
if (cknOn(ckn)) {
addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP);
addStateCheckNum(ckn);
addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP));
} else {
addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
}
}
} else if (qn.upper == 0) {
if (qn.isRefered) { /* /(?..){0}/ */
addOpcodeRelAddr(OPCode.JUMP, tlen);
compileTree(qn.target);
} // else r=0 ???
} else if (qn.upper == 1 && qn.greedy) {
if (qn.lower == 0) {
if (cknOn(ckn)) {
addOpcode(OPCode.STATE_CHECK_PUSH);
addStateCheckNum(ckn);
addRelAddr(tlen);
} else {
addOpcodeRelAddr(OPCode.PUSH, tlen);
}
}
compileTree(qn.target);
} else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */
if (cknOn(ckn)) {
addOpcode(OPCode.STATE_CHECK_PUSH);
addStateCheckNum(ckn);
addRelAddr(OPSize.JUMP);
} else {
addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
}
addOpcodeRelAddr(OPCode.JUMP, tlen);
compileTree(qn.target);
} else {
compileRangeRepeatNode(qn, modTLen, emptyInfo);
if (cknOn(ckn)) {
addOpcode(OPCode.STATE_CHECK);
addStateCheckNum(ckn);
}
}
}
private int compileNonCECLengthQuantifierNode(QuantifierNode qn) {
boolean infinite = isRepeatInfinite(qn.upper);
int emptyInfo = qn.targetEmptyInfo;
int tlen = compileLengthTree(qn.target);
/* anychar repeat */
if (qn.target.getType() == NodeType.CANY) {
if (qn.greedy && infinite) {
if (qn.nextHeadExact != null) {
return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower;
} else {
return OPSize.ANYCHAR_STAR + tlen * qn.lower;
}
}
}
int modTLen;
if (emptyInfo != 0) {
modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
} else {
modTLen = tlen;
}
int len;
if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
len = OPSize.JUMP;
} else {
len = tlen * qn.lower;
}
if (qn.greedy) {
if (qn.headExact != null) {
len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP;
} else if (qn.nextHeadExact != null) {
len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP;
} else {
len += OPSize.PUSH + modTLen + OPSize.JUMP;
}
} else {
len += OPSize.JUMP + modTLen + OPSize.PUSH;
}
} else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */
len = OPSize.JUMP + tlen;
} else if (!infinite && qn.greedy &&
(qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) {
len = tlen * qn.lower;
len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower);
} else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
len = OPSize.PUSH + OPSize.JUMP + tlen;
} else {
len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
}
return len;
}
@Override
protected void compileNonCECQuantifierNode(QuantifierNode qn) {
regex.requireStack = true;
boolean infinite = isRepeatInfinite(qn.upper);
int emptyInfo = qn.targetEmptyInfo;
int tlen = compileLengthTree(qn.target);
if (qn.isAnyCharStar()) {
compileTreeNTimes(qn.target, qn.lower);
if (qn.nextHeadExact != null) {
if (isMultiline(regex.options)) {
addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
} else {
addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT);
}
StringNode sn = (StringNode)qn.nextHeadExact;
addBytes(sn.bytes, sn.p, 1);
return;
} else {
if (isMultiline(regex.options)) {
addOpcode(OPCode.ANYCHAR_ML_STAR);
} else {
addOpcode(OPCode.ANYCHAR_STAR);
}
return;
}
}
int modTLen;
if (emptyInfo != 0) {
modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
} else {
modTLen = tlen;
}
if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
if (qn.greedy) {
if (qn.headExact != null) {
addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1);
} else if (qn.nextHeadExact != null) {
addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT);
} else {
addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH);
}
} else {
addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP);
}
} else {
compileTreeNTimes(qn.target, qn.lower);
}
if (qn.greedy) {
if (qn.headExact != null) {
addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP);
StringNode sn = (StringNode)qn.headExact;
addBytes(sn.bytes, sn.p, 1);
compileTreeEmptyCheck(qn.target, emptyInfo);
addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1));
} else if (qn.nextHeadExact != null) {
addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP);
StringNode sn = (StringNode)qn.nextHeadExact;
addBytes(sn.bytes, sn.p, 1);
compileTreeEmptyCheck(qn.target, emptyInfo);
addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT));
} else {
addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
compileTreeEmptyCheck(qn.target, emptyInfo);
addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH));
}
} else {
addOpcodeRelAddr(OPCode.JUMP, modTLen);
compileTreeEmptyCheck(qn.target, emptyInfo);
addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
}
} else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */
addOpcodeRelAddr(OPCode.JUMP, tlen);
compileTree(qn.target);
} else if (!infinite && qn.greedy &&
(qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
int n = qn.upper - qn.lower;
compileTreeNTimes(qn.target, qn.lower);
for (int i=0; i= code.length) {
int length = code.length << 1;
while (length <= size) length <<= 1;
int[]tmp = new int[length];
System.arraycopy(code, 0, tmp, 0, code.length);
code = tmp;
}
}
private void addInt(int i) {
if (codeLength >= code.length) {
int[]tmp = new int[code.length << 1];
System.arraycopy(code, 0, tmp, 0, code.length);
code = tmp;
}
code[codeLength++] = i;
}
void setInt(int i, int offset) {
ensure(offset);
regex.code[offset] = i;
}
private void addBytes(byte[]bytes, int p ,int length) {
ensure(codeLength + length);
int end = p + length;
while (p < end) code[codeLength++] = bytes[p++];
}
private void addInts(int[]ints, int length) {
ensure(codeLength + length);
System.arraycopy(ints, 0, code, codeLength, length);
codeLength += length;
}
private void addOpcode(int opcode) {
addInt(opcode);
}
private void addStateCheckNum(int num) {
addInt(num);
}
private void addRelAddr(int addr) {
addInt(addr);
}
private void addAbsAddr(int addr) {
addInt(addr);
}
private void addLength(int length) {
addInt(length);
}
private void addMemNum(int num) {
addInt(num);
}
private void addOption(int option) {
addInt(option);
}
private void addOpcodeRelAddr(int opcode, int addr) {
addOpcode(opcode);
addRelAddr(addr);
}
private void addOpcodeOption(int opcode, int option) {
addOpcode(opcode);
addOption(option);
}
private void addTemplate(byte[]bytes) {
if (templateNum == 0) {
templates = new byte[2][];
} else if (templateNum == templates.length) {
byte[][]tmp = new byte[templateNum * 2][];
System.arraycopy(templates, 0, tmp, 0, templateNum);
templates = tmp;
}
templates[templateNum++] = bytes;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy