All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.joni.ArrayCompiler Maven / Gradle / Ivy

Go to download

Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma that uses byte arrays directly instead of java Strings and chars

The newest version!
/*
 * Permission is hereby granted, free of charge, to any person obtaining a copy of
 * this software and associated documentation files (the "Software"), to deal in
 * the Software without restriction, including without limitation the rights to
 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
 * of the Software, and to permit persons to whom the Software is furnished to do
 * so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package org.joni;

import static org.joni.BitStatus.bsAt;
import static org.joni.Option.isDynamic;
import static org.joni.Option.isIgnoreCase;
import static org.joni.Option.isMultiline;
import static org.joni.ast.QuantifierNode.isRepeatInfinite;

import org.jcodings.constants.CharacterType;
import org.joni.ast.AnchorNode;
import org.joni.ast.BackRefNode;
import org.joni.ast.CClassNode;
import org.joni.ast.CTypeNode;
import org.joni.ast.CallNode;
import org.joni.ast.ListNode;
import org.joni.ast.EncloseNode;
import org.joni.ast.Node;
import org.joni.ast.QuantifierNode;
import org.joni.ast.StringNode;
import org.joni.constants.internal.AnchorType;
import org.joni.constants.internal.EncloseType;
import org.joni.constants.internal.NodeType;
import org.joni.constants.internal.OPCode;
import org.joni.constants.internal.OPSize;
import org.joni.constants.internal.TargetInfo;

final class ArrayCompiler extends Compiler {
    private int[]code;
    private int codeLength;

    private byte[][]templates;
    private int templateNum;

    ArrayCompiler(Analyser analyser) {
        super(analyser);
    }

    @Override
    protected final void prepare() {
        int codeSize = Config.USE_STRING_TEMPLATES ? 8 : ((analyser.getEnd() - analyser.getBegin()) * 2 + 2);
        code = new int[codeSize];
        codeLength = 0;
    }

    @Override
    protected final void finish() {
        addOpcode(OPCode.END);
        addOpcode(OPCode.FINISH); // for stack bottom

        regex.code = code;
        regex.codeLength = codeLength;
        regex.templates = templates;
        regex.templateNum = templateNum;
        regex.factory = MatcherFactory.DEFAULT;

        if (Config.USE_SUBEXP_CALL && analyser.env.unsetAddrList != null) {
            analyser.env.unsetAddrList.fix(regex);
            analyser.env.unsetAddrList = null;
        }
    }

    @Override
    protected void compileAltNode(ListNode node) {
        ListNode aln = node;
        int len = 0;

        do {
            len += compileLengthTree(aln.value);
            if (aln.tail != null) {
                len += OPSize.PUSH + OPSize.JUMP;
            }
        } while ((aln = aln.tail) != null);

        int pos = codeLength + len;  /* goal position */

        aln = node;
        do {
            len = compileLengthTree(aln.value);
            if (aln.tail != null) {
                regex.requireStack = true;
                addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP);
            }
            compileTree(aln.value);
            if (aln.tail != null) {
                len = pos - (codeLength + OPSize.JUMP);
                addOpcodeRelAddr(OPCode.JUMP, len);
            }
        } while ((aln = aln.tail) != null);
    }

    private boolean isNeedStrLenOpExact(int op) {
        return  op == OPCode.EXACTN         ||
                op == OPCode.EXACTMB2N      ||
                op == OPCode.EXACTMB3N      ||
                op == OPCode.EXACTMBN       ||
                op == OPCode.EXACTN_IC      ||
                op == OPCode.EXACTN_IC_SB;
    }

    private boolean opTemplated(int op) {
        return isNeedStrLenOpExact(op);
    }

    private int selectStrOpcode(int mbLength, int byteLength, boolean ignoreCase) {
        int op;
        int strLength = (byteLength + mbLength - 1) / mbLength;

        if (ignoreCase) {
            switch(strLength) {
            case 1: op = enc.toLowerCaseTable() != null ? OPCode.EXACT1_IC_SB : OPCode.EXACT1_IC; break;
            default:op = enc.toLowerCaseTable() != null ? OPCode.EXACTN_IC_SB : OPCode.EXACTN_IC; break;
            } // switch
        } else {
            switch (mbLength) {
            case 1:
                switch (strLength) {
                case 1: op = OPCode.EXACT1; break;
                case 2: op = OPCode.EXACT2; break;
                case 3: op = OPCode.EXACT3; break;
                case 4: op = OPCode.EXACT4; break;
                case 5: op = OPCode.EXACT5; break;
                default:op = OPCode.EXACTN; break;
                } // inner switch
                break;
            case 2:
                switch (strLength) {
                case 1: op = OPCode.EXACTMB2N1; break;
                case 2: op = OPCode.EXACTMB2N2; break;
                case 3: op = OPCode.EXACTMB2N3; break;
                default:op = OPCode.EXACTMB2N;  break;
                } // inner switch
                break;
            case 3:
                op = OPCode.EXACTMB3N;
                break;
            default:
                op = OPCode.EXACTMBN;
            } // switch
        }
        return op;
    }

    private void compileTreeEmptyCheck(Node node, int emptyInfo) {
        int savedNumNullCheck = regex.numNullCheck;

        if (emptyInfo != 0) {
            regex.requireStack = true;
            addOpcode(OPCode.NULL_CHECK_START);
            addMemNum(regex.numNullCheck); /* NULL CHECK ID */
            regex.numNullCheck++;
        }

        compileTree(node);

        if (emptyInfo != 0) {
            switch(emptyInfo) {
            case TargetInfo.IS_EMPTY:
                addOpcode(OPCode.NULL_CHECK_END);
                break;
            case TargetInfo.IS_EMPTY_MEM:
                addOpcode(OPCode.NULL_CHECK_END_MEMST);
                break;
            case TargetInfo.IS_EMPTY_REC:
                addOpcode(OPCode.NULL_CHECK_END_MEMST_PUSH);
                break;
            } // switch

            addMemNum(savedNumNullCheck); /* NULL CHECK ID */
        }
    }

    private int addCompileStringlength(byte[]bytes, int p, int mbLength, int byteLength, boolean ignoreCase) {
        int op = selectStrOpcode(mbLength, byteLength, ignoreCase);
        int len = OPSize.OPCODE;

        if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
            // string length, template index, template string pointer
            len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX;
        } else {
            if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH;
            len += byteLength;
        }
        if (op == OPCode.EXACTMBN) len += OPSize.LENGTH;
        return len;
    }

    @Override
    protected final void addCompileString(byte[]bytes, int p, int mbLength, int byteLength, boolean ignoreCase) {
        int op = selectStrOpcode(mbLength, byteLength, ignoreCase);
        addOpcode(op);

        if (op == OPCode.EXACTMBN) addLength(mbLength);

        if (isNeedStrLenOpExact(op)) {
            if (op == OPCode.EXACTN_IC || op == OPCode.EXACTN_IC_SB) {
                addLength(byteLength);
            } else {
                addLength(byteLength / mbLength);
            }
        }

        if (Config.USE_STRING_TEMPLATES && opTemplated(op)) {
            addInt(templateNum);
            addInt(p);
            addTemplate(bytes);
        } else {
            addBytes(bytes, p, byteLength);
        }
    }

    private int compileLengthStringNode(Node node) {
        StringNode sn = (StringNode)node;
        if (sn.length() <= 0) return 0;
        boolean ambig = sn.isAmbig();

        int p, prev;
        p = prev = sn.p;
        int end = sn.end;
        byte[]bytes = sn.bytes;
        int prevLen = enc.length(bytes, p, end);
        p += prevLen;
        int blen = prevLen;
        int rlen = 0;

        while (p < end) {
            int len = enc.length(bytes, p, end);
            if (len == prevLen || ambig) {
                blen += len;
            } else {
                int r = addCompileStringlength(bytes, prev, prevLen, blen, ambig);
                rlen += r;
                prev = p;
                blen = len;
                prevLen = len;
            }
            p += len;
        }
        int r = addCompileStringlength(bytes, prev, prevLen, blen, ambig);
        rlen += r;
        return rlen;
    }

    private int compileLengthStringRawNode(StringNode sn) {
        if (sn.length() <= 0) return 0;
        return addCompileStringlength(sn.bytes, sn.p, 1 /*sb*/, sn.length(), false);
    }

    private void addMultiByteCClass(CodeRangeBuffer mbuf) {
        addLength(mbuf.getUsed());
        addInts(mbuf.getCodeRange(), mbuf.getUsed());
    }

    private int compileLengthCClassNode(CClassNode cc) {
        int len;
        if (cc.mbuf == null) {
            len = OPSize.OPCODE + BitSet.BITSET_SIZE;
        } else {
            if (enc.minLength() > 1 || cc.bs.isEmpty()) {
                len = OPSize.OPCODE;
            } else {
                len = OPSize.OPCODE + BitSet.BITSET_SIZE;
            }

            len += OPSize.LENGTH + cc.mbuf.getUsed();
        }
        return len;
    }

    @Override
    protected void compileCClassNode(CClassNode cc) {
        if (cc.mbuf == null) {
            if (cc.isNot()) {
                addOpcode(OPCode.CCLASS_NOT);
            } else {
                addOpcode(OPCode.CCLASS);
            }
            addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
        } else {
            if (enc.minLength() > 1 || cc.bs.isEmpty()) {
                if (cc.isNot()) {
                    addOpcode(OPCode.CCLASS_MB_NOT);
                } else {
                    addOpcode(OPCode.CCLASS_MB);
                }
                addMultiByteCClass(cc.mbuf);
            } else {
                if (cc.isNot()) {
                    addOpcode(OPCode.CCLASS_MIX_NOT);
                } else {
                    addOpcode(OPCode.CCLASS_MIX);
                }
                // store the bit set and mbuf themself!
                addInts(cc.bs.bits, BitSet.BITSET_SIZE); // add_bitset
                addMultiByteCClass(cc.mbuf);
            }
        }
    }

    @Override
    protected void compileCTypeNode(CTypeNode node) {
        CTypeNode cn = node;
        int op;
        switch (cn.ctype) {
        case CharacterType.WORD:
            if (cn.not) {
                if (cn.asciiRange) {
                    op = OPCode.ASCII_NOT_WORD;
                } else {
                    op = OPCode.NOT_WORD;
                }
            } else {
                if (cn.asciiRange) {
                    op = OPCode.ASCII_WORD;
                } else {
                    op = OPCode.WORD;
                }
            }
            break;

        default:
            newInternalException(PARSER_BUG);
            return; // not reached
        } // inner switch
        addOpcode(op);
    }

    @Override
    protected void compileAnyCharNode() {
        if (isMultiline(regex.options)) {
            addOpcode(OPCode.ANYCHAR_ML);
        } else {
            addOpcode(OPCode.ANYCHAR);
        }
    }

    @Override
    protected void compileCallNode(CallNode node) {
        addOpcode(OPCode.CALL);
        node.unsetAddrList.add(codeLength, node.target);
        addAbsAddr(0); /*dummy addr.*/
    }

    @Override
    protected void compileBackrefNode(BackRefNode node) {
        BackRefNode br = node;
        if (Config.USE_BACKREF_WITH_LEVEL && br.isNestLevel()) {
            addOpcode(OPCode.BACKREF_WITH_LEVEL);
            addOption(regex.options & Option.IGNORECASE);
            addLength(br.nestLevel);
            // !goto add_bacref_mems;!
            addLength(br.backNum);
            for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
            return;
        } else { // USE_BACKREF_AT_LEVEL
            if (br.backNum == 1) {
                if (isIgnoreCase(regex.options)) {
                    addOpcode(OPCode.BACKREFN_IC);
                    addMemNum(br.back[0]);
                } else {
                    switch (br.back[0]) {
                    case 1:
                        addOpcode(OPCode.BACKREF1);
                        break;
                    case 2:
                        addOpcode(OPCode.BACKREF2);
                        break;
                    default:
                        addOpcode(OPCode.BACKREFN);
                        addOpcode(br.back[0]);
                        break;
                    } // switch
                }
            } else {
                if (isIgnoreCase(regex.options)) {
                    addOpcode(OPCode.BACKREF_MULTI_IC);
                } else {
                    addOpcode(OPCode.BACKREF_MULTI);
                }
                // !add_bacref_mems:!
                addLength(br.backNum);
                for (int i=br.backNum-1; i>=0; i--) addMemNum(br.back[i]);
            }
        }
    }

    private static final int REPEAT_RANGE_ALLOC = 8;
    private void entryRepeatRange(int id, int lower, int upper) {
        if (regex.repeatRangeLo == null) {
            regex.repeatRangeLo = new int[REPEAT_RANGE_ALLOC];
            regex.repeatRangeHi = new int[REPEAT_RANGE_ALLOC];
        } else if (id >= regex.repeatRangeLo.length){
            int[]tmp = new int[regex.repeatRangeLo.length + REPEAT_RANGE_ALLOC];
            System.arraycopy(regex.repeatRangeLo, 0, tmp, 0, regex.repeatRangeLo.length);
            regex.repeatRangeLo = tmp;
            tmp = new int[regex.repeatRangeHi.length + REPEAT_RANGE_ALLOC];
            System.arraycopy(regex.repeatRangeHi, 0, tmp, 0, regex.repeatRangeHi.length);
            regex.repeatRangeHi = tmp;
        }

        regex.repeatRangeLo[id] = lower;
        regex.repeatRangeHi[id] = isRepeatInfinite(upper) ? 0x7fffffff : upper;
    }

    private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) {
        regex.requireStack = true;
        int numRepeat = regex.numRepeat;
        addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG);
        addMemNum(numRepeat); /* OP_REPEAT ID */
        regex.numRepeat++;
        addRelAddr(targetLen + OPSize.REPEAT_INC);

        entryRepeatRange(numRepeat, qn.lower, qn.upper);

        compileTreeEmptyCheck(qn.target, emptyInfo);

        if ((Config.USE_SUBEXP_CALL && regex.numCall > 0) || qn.isInRepeat()) {
            addOpcode(qn.greedy ? OPCode.REPEAT_INC_SG : OPCode.REPEAT_INC_NG_SG);
        } else {
            addOpcode(qn.greedy ? OPCode.REPEAT_INC : OPCode.REPEAT_INC_NG);
        }

        addMemNum(numRepeat); /* OP_REPEAT ID */
    }

    private static final int QUANTIFIER_EXPAND_LIMIT_SIZE   = 50; // was 50

    private static boolean cknOn(int ckn) {
        return ckn > 0;
    }

    private int compileCECLengthQuantifierNode(QuantifierNode qn) {
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);
        int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;
        int cklen = cknOn(ckn) ? OPSize.STATE_CHECK_NUM : 0;

        /* anychar repeat */
        if (qn.target.getType() == NodeType.CANY) {
            if (qn.greedy && infinite) {
                if (qn.nextHeadExact != null && !cknOn(ckn)) {
                    return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower + cklen;
                } else {
                    return OPSize.ANYCHAR_STAR + tlen * qn.lower + cklen;
                }
            }
        }

        int modTLen;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }

        int len;
        if (infinite && qn.lower <= 1) {
            if (qn.greedy) {
                if (qn.lower == 1) {
                    len = OPSize.JUMP;
                } else {
                    len = 0;
                }
                len += OPSize.PUSH + cklen + modTLen + OPSize.JUMP;
            } else {
                if (qn.lower == 0) {
                    len = OPSize.JUMP;
                } else {
                    len = 0;
                }
                len += modTLen + OPSize.PUSH + cklen;
            }
        } else if (qn.upper == 0) {
            if (qn.isRefered) { /* /(?..){0}/ */
                len = OPSize.JUMP + tlen;
            } else {
                len = 0;
            }
        } else if (qn.upper == 1 && qn.greedy) {
            if (qn.lower == 0) {
                if (cknOn(ckn)) {
                    len = OPSize.STATE_CHECK_PUSH + tlen;
                } else {
                    len = OPSize.PUSH + tlen;
                }
            } else {
                len = tlen;
            }
        } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
            len = OPSize.PUSH + cklen + OPSize.JUMP + tlen;
        } else {
            len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;

            if (cknOn(ckn)) {
                len += OPSize.STATE_CHECK;
            }
        }
        return len;
    }

    @Override
    protected void compileCECQuantifierNode(QuantifierNode qn) {
        regex.requireStack = true;
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);

        int ckn = regex.numCombExpCheck > 0 ? qn.combExpCheckNum : 0;

        if (qn.isAnyCharStar()) {
            compileTreeNTimes(qn.target, qn.lower);
            if (qn.nextHeadExact != null && !cknOn(ckn)) {
                if (isMultiline(regex.options)) {
                    addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
                } else {
                    addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT);
                }
                if (cknOn(ckn)) {
                    addStateCheckNum(ckn);
                }
                StringNode sn = (StringNode)qn.nextHeadExact;
                addBytes(sn.bytes, sn.p, 1);
                return;
            } else {
                if (isMultiline(regex.options)) {
                    if (cknOn(ckn)) {
                        addOpcode(OPCode.STATE_CHECK_ANYCHAR_ML_STAR);
                    } else {
                        addOpcode(OPCode.ANYCHAR_ML_STAR);
                    }
                } else {
                    if (cknOn(ckn)) {
                        addOpcode(OPCode.STATE_CHECK_ANYCHAR_STAR);
                    } else {
                        addOpcode(OPCode.ANYCHAR_STAR);
                    }
                }
                if (cknOn(ckn)) {
                    addStateCheckNum(ckn);
                }
                return;
            }
        }

        int modTLen;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }
        if (infinite && qn.lower <= 1) {
            if (qn.greedy) {
                if (qn.lower == 1) {
                    addOpcodeRelAddr(OPCode.JUMP, cknOn(ckn) ? OPSize.STATE_CHECK_PUSH :
                                                                     OPSize.PUSH);
                }
                if (cknOn(ckn)) {
                    addOpcode(OPCode.STATE_CHECK_PUSH);
                    addStateCheckNum(ckn);
                    addRelAddr(modTLen + OPSize.JUMP);
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
                }
                compileTreeEmptyCheck(qn.target, emptyInfo);
                addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + (cknOn(ckn) ?
                                                                               OPSize.STATE_CHECK_PUSH :
                                                                               OPSize.PUSH)));
            } else {
                if (qn.lower == 0) {
                    addOpcodeRelAddr(OPCode.JUMP, modTLen);
                }
                compileTreeEmptyCheck(qn.target, emptyInfo);
                if (cknOn(ckn)) {
                    addOpcode(OPCode.STATE_CHECK_PUSH_OR_JUMP);
                    addStateCheckNum(ckn);
                    addRelAddr(-(modTLen + OPSize.STATE_CHECK_PUSH_OR_JUMP));
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
                }
            }
        } else if (qn.upper == 0) {
            if (qn.isRefered) { /* /(?..){0}/ */
                addOpcodeRelAddr(OPCode.JUMP, tlen);
                compileTree(qn.target);
            } // else r=0 ???
        } else if (qn.upper == 1 && qn.greedy) {
            if (qn.lower == 0) {
                if (cknOn(ckn)) {
                    addOpcode(OPCode.STATE_CHECK_PUSH);
                    addStateCheckNum(ckn);
                    addRelAddr(tlen);
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, tlen);
                }
            }
            compileTree(qn.target);
        } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0){ /* '??' */
            if (cknOn(ckn)) {
                addOpcode(OPCode.STATE_CHECK_PUSH);
                addStateCheckNum(ckn);
                addRelAddr(OPSize.JUMP);
            } else {
                addOpcodeRelAddr(OPCode.PUSH, OPSize.JUMP);
            }

            addOpcodeRelAddr(OPCode.JUMP, tlen);
            compileTree(qn.target);
        } else {
            compileRangeRepeatNode(qn, modTLen, emptyInfo);
            if (cknOn(ckn)) {
                addOpcode(OPCode.STATE_CHECK);
                addStateCheckNum(ckn);
            }
        }
    }

    private int compileNonCECLengthQuantifierNode(QuantifierNode qn) {
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);

        /* anychar repeat */
        if (qn.target.getType() == NodeType.CANY) {
            if (qn.greedy && infinite) {
                if (qn.nextHeadExact != null) {
                    return OPSize.ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower;
                } else {
                    return OPSize.ANYCHAR_STAR + tlen * qn.lower;
                }
            }
        }

        int modTLen;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }

        int len;
        if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
            if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
                len = OPSize.JUMP;
            } else {
                len = tlen * qn.lower;
            }

            if (qn.greedy) {
                if (qn.headExact != null) {
                    len += OPSize.PUSH_OR_JUMP_EXACT1 + modTLen + OPSize.JUMP;
                } else if (qn.nextHeadExact != null) {
                    len += OPSize.PUSH_IF_PEEK_NEXT + modTLen + OPSize.JUMP;
                } else {
                    len += OPSize.PUSH + modTLen + OPSize.JUMP;
                }
            } else {
                len += OPSize.JUMP + modTLen + OPSize.PUSH;
            }

        } else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */
            len = OPSize.JUMP + tlen;
        } else if (!infinite && qn.greedy &&
                  (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE )) {
            len = tlen * qn.lower;
            len += (OPSize.PUSH + tlen) * (qn.upper - qn.lower);
        } else if (!qn.greedy && qn.upper == 1 && qn.lower == 0) { /* '??' */
            len = OPSize.PUSH + OPSize.JUMP + tlen;
        } else {
            len = OPSize.REPEAT_INC + modTLen + OPSize.OPCODE + OPSize.RELADDR + OPSize.MEMNUM;
        }
        return len;
    }

    @Override
    protected void compileNonCECQuantifierNode(QuantifierNode qn) {
        regex.requireStack = true;
        boolean infinite = isRepeatInfinite(qn.upper);
        int emptyInfo = qn.targetEmptyInfo;

        int tlen = compileLengthTree(qn.target);

        if (qn.isAnyCharStar()) {
            compileTreeNTimes(qn.target, qn.lower);
            if (qn.nextHeadExact != null) {
                if (isMultiline(regex.options)) {
                    addOpcode(OPCode.ANYCHAR_ML_STAR_PEEK_NEXT);
                } else {
                    addOpcode(OPCode.ANYCHAR_STAR_PEEK_NEXT);
                }
                StringNode sn = (StringNode)qn.nextHeadExact;
                addBytes(sn.bytes, sn.p, 1);
                return;
            } else {
                if (isMultiline(regex.options)) {
                    addOpcode(OPCode.ANYCHAR_ML_STAR);
                } else {
                    addOpcode(OPCode.ANYCHAR_STAR);
                }
                return;
            }
        }

        int modTLen;
        if (emptyInfo != 0) {
            modTLen = tlen + (OPSize.NULL_CHECK_START + OPSize.NULL_CHECK_END);
        } else {
            modTLen = tlen;
        }
        if (infinite && (qn.lower <= 1 || tlen * qn.lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
            if (qn.lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
                if (qn.greedy) {
                    if (qn.headExact != null) {
                        addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_OR_JUMP_EXACT1);
                    } else if (qn.nextHeadExact != null) {
                        addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH_IF_PEEK_NEXT);
                    } else {
                        addOpcodeRelAddr(OPCode.JUMP, OPSize.PUSH);
                    }
                } else {
                    addOpcodeRelAddr(OPCode.JUMP, OPSize.JUMP);
                }
            } else {
                compileTreeNTimes(qn.target, qn.lower);
            }

            if (qn.greedy) {
                if (qn.headExact != null) {
                    addOpcodeRelAddr(OPCode.PUSH_OR_JUMP_EXACT1, modTLen + OPSize.JUMP);
                    StringNode sn = (StringNode)qn.headExact;
                    addBytes(sn.bytes, sn.p, 1);
                    compileTreeEmptyCheck(qn.target, emptyInfo);
                    addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_OR_JUMP_EXACT1));
                } else if (qn.nextHeadExact != null) {
                    addOpcodeRelAddr(OPCode.PUSH_IF_PEEK_NEXT, modTLen + OPSize.JUMP);
                    StringNode sn = (StringNode)qn.nextHeadExact;
                    addBytes(sn.bytes, sn.p, 1);
                    compileTreeEmptyCheck(qn.target, emptyInfo);
                    addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH_IF_PEEK_NEXT));
                } else {
                    addOpcodeRelAddr(OPCode.PUSH, modTLen + OPSize.JUMP);
                    compileTreeEmptyCheck(qn.target, emptyInfo);
                    addOpcodeRelAddr(OPCode.JUMP, -(modTLen + OPSize.JUMP + OPSize.PUSH));
                }
            } else {
                addOpcodeRelAddr(OPCode.JUMP, modTLen);
                compileTreeEmptyCheck(qn.target, emptyInfo);
                addOpcodeRelAddr(OPCode.PUSH, -(modTLen + OPSize.PUSH));
            }
        } else if (qn.upper == 0 && qn.isRefered) { /* /(?..){0}/ */
            addOpcodeRelAddr(OPCode.JUMP, tlen);
            compileTree(qn.target);
        } else if (!infinite && qn.greedy &&
                  (qn.upper == 1 || (tlen + OPSize.PUSH) * qn.upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
            int n = qn.upper - qn.lower;
            compileTreeNTimes(qn.target, qn.lower);

            for (int i=0; i= code.length) {
            int length = code.length << 1;
            while (length <= size) length <<= 1;
            int[]tmp = new int[length];
            System.arraycopy(code, 0, tmp, 0, code.length);
            code = tmp;
        }
    }

    private void addInt(int i) {
        if (codeLength >= code.length) {
            int[]tmp = new int[code.length << 1];
            System.arraycopy(code, 0, tmp, 0, code.length);
            code = tmp;
        }
        code[codeLength++] = i;
    }

    void setInt(int i, int offset) {
        ensure(offset);
        regex.code[offset] = i;
    }

    private void addBytes(byte[]bytes, int p ,int length) {
        ensure(codeLength + length);
        int end = p + length;

        while (p < end) code[codeLength++] = bytes[p++];
    }

    private void addInts(int[]ints, int length) {
        ensure(codeLength + length);
        System.arraycopy(ints, 0, code, codeLength, length);
        codeLength += length;
    }

    private void addOpcode(int opcode) {
        addInt(opcode);
    }

    private void addStateCheckNum(int num) {
        addInt(num);
    }

    private void addRelAddr(int addr) {
        addInt(addr);
    }

    private void addAbsAddr(int addr) {
        addInt(addr);
    }

    private void addLength(int length) {
        addInt(length);
    }

    private void addMemNum(int num) {
        addInt(num);
    }

    private void addOption(int option) {
        addInt(option);
    }

    private void addOpcodeRelAddr(int opcode, int addr) {
        addOpcode(opcode);
        addRelAddr(addr);
    }

    private void addOpcodeOption(int opcode, int option) {
        addOpcode(opcode);
        addOption(option);
    }

    private void addTemplate(byte[]bytes) {
        if (templateNum == 0) {
            templates = new byte[2][];
        } else if (templateNum == templates.length) {
            byte[][]tmp = new byte[templateNum * 2][];
            System.arraycopy(templates, 0, tmp, 0, templateNum);
            templates = tmp;
        }
        templates[templateNum++] = bytes;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy