net.sf.saxon.regex.REMatcher Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
There is a newer version: 12.5
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * Originally part of Apache's Jakarta project (downloaded January 2012),
 * this file has been extensively modified for integration into Saxon by
 * Michael Kay, Saxonica.
 */

package net.sf.saxon.regex;


import net.sf.saxon.str.*;
import net.sf.saxon.z.IntIterator;
import net.sf.saxon.z.IntPredicateProxy;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;


/**
 * RE is an efficient, lightweight regular expression evaluator/matcher
 * class. Regular expressions are pattern descriptions which enable
 * sophisticated matching of strings.  In addition to being able to
 * match a string against a pattern, you can also extract parts of the
 * match.  This is especially useful in text parsing! Details on the
 * syntax of regular expression patterns are given below.
 * To compile a regular expression (RE), you can simply construct an RE
 * matcher object from the string specification of the pattern, like this:
 *  *  RE r = new RE("a*b");
 * 
 * Once you have done this, you can call either of the RE.match methods to
 * perform matching on a String.  For example:
 *  *  boolean matched = r.match("aaaab");
 * 
 * will cause the boolean matched to be set to true because the
 * pattern "a*b" matches the string "aaaab".
 * If you were interested in the number of a's which matched the
 * first part of our example expression, you could change the expression to
 * "(a*)b".  Then when you compiled the expression and matched it against
 * something like "xaaaab", you would get results like this:
 *  *  RE r = new RE("(a*)b");                  // Compile expression
 *  boolean matched = r.match("xaaaab");     // Match against "xaaaab"
 *
 *  String wholeExpr = r.getParen(0);        // wholeExpr will be 'aaaab'
 *  String insideParens = r.getParen(1);     // insideParens will be 'aaaa'
 *
 *  int startWholeExpr = r.getParenStart(0); // startWholeExpr will be index 1
 *  int endWholeExpr = r.getParenEnd(0);     // endWholeExpr will be index 6
 *  int lenWholeExpr = r.getParenLength(0);  // lenWholeExpr will be 5
 *
 *  int startInside = r.getParenStart(1);    // startInside will be index 1
 *  int endInside = r.getParenEnd(1);        // endInside will be index 5
 *  int lenInside = r.getParenLength(1);     // lenInside will be 4
 * 
 * You can also refer to the contents of a parenthesized expression
 * within a regular expression itself.  This is called a
 * 'backreference'.  The first backreference in a regular expression is
 * denoted by \1, the second by \2 and so on.  So the expression:
 *  *  ([0-9]+)=\1
 * 
 * will match any string of the form n=n (like 0=0 or 2=2).
 * The full regular expression syntax accepted by RE is as defined in the XSD 1.1
 * specification, modified by the XPath 2.0 or 3.0 specifications.
 * Line terminators
 * A line terminator is a one- or two-character sequence that marks
 * the end of a line of the input character sequence. The following
 * are recognized as line terminators:
 * 
 * A newline (line feed) character ('\n'),
 * A carriage-return character followed immediately by a newline character ("\r\n"),
 * A standalone carriage-return character ('\r'),
 * A next-line character ('\u0085'),
 * A line-separator character ('\u2028'), or
 * A paragraph-separator character ('\u2029).
 * 
 * RE runs programs compiled by the RECompiler class.  But the RE
 * matcher class does not include the actual regular expression compiler
 * for reasons of efficiency.  In fact, if you want to pre-compile one
 * or more regular expressions, the 'recompile' class can be invoked
 * from the command line to produce compiled output like this:
 *  *    // Pre-compiled regular expression "a*b"
 *    char[] re1Instructions =
 *    {
 *        0x007c, 0x0000, 0x001a, 0x007c, 0x0000, 0x000d, 0x0041,
 *        0x0001, 0x0004, 0x0061, 0x007c, 0x0000, 0x0003, 0x0047,
 *        0x0000, 0xfff6, 0x007c, 0x0000, 0x0003, 0x004e, 0x0000,
 *        0x0003, 0x0041, 0x0001, 0x0004, 0x0062, 0x0045, 0x0000,
 *        0x0000,
 *    };
 *
 *
 *    REProgram re1 = new REProgram(re1Instructions);
 * 
 * You can then construct a regular expression matcher (RE) object from
 * the pre-compiled expression re1 and thus avoid the overhead of
 * compiling the expression at runtime. If you require more dynamic
 * regular expressions, you can construct a single RECompiler object and
 * re-use it to compile each expression. Similarly, you can change the
 * program run by a given matcher object at any time. However, RE and
 * RECompiler are not threadsafe (for efficiency reasons, and because
 * requiring thread safety in this class is deemed to be a rare
 * requirement), so you will need to construct a separate compiler or
 * matcher object for each thread (unless you do thread synchronization
 * yourself). Once expression compiled into the REProgram object, REProgram
 * can be safely shared across multiple threads and RE objects.
 * ISSUES:
 * 
 * Not *all* possibilities are considered for greediness when backreferences
 * are involved (as POSIX suggests should be the case).  The POSIX RE
 * "(ac*)c*d[ac]*\1", when matched against "acdacaa" should yield a match
 * of acdacaa where \1 is "a".  This is not the case in this RE package,
 * and actually Perl doesn't go to this extent either!  Until someone
 * actually complains about this, I'm not sure it's worth "fixing".
 * If it ever is fixed, test #137 in RETest.txt should be updated.
 * 
 * This library is based on the Apache Jakarta regex library as downloaded
 * on 3 January 2012. Changes have been made to make the grammar and semantics conform to XSD
 * and XPath rules; these changes are listed in source code comments in the
 * RECompiler source code module.
 *
 * @author Jonathan Locke
 * @author Tobias Schäfer
 * @author Michael Kay
 * @see RECompiler
 */
public class REMatcher {

    // Limits
    static final int MAX_PAREN = 16;              // Number of paren pairs

    // State of current program
    REProgram program;                            // Compiled regular expression 'program'
    UnicodeString search;                           // The string being matched against
    History history = new History();
    int maxParen = MAX_PAREN;

    // Parenthesized subexpressions
    State _captureState = new State();

    // Backreferences
    int[] startBackref;                 // Lazily-allocated array of backref starts
    int[] endBackref;                   // Lazily-allocated array of backref ends

    Operation operation;
    boolean anchoredMatch;


    /**
     * Construct a matcher for a pre-compiled regular expression from program
     * (bytecode) data.
     *
     * @param program Compiled regular expression program
     * @see RECompiler
     */
    public REMatcher(REProgram program) {
        setProgram(program);
    }

    /**
     * Sets the current regular expression program used by this matcher object.
     *
     * @param program Regular expression program compiled by RECompiler.
     * @see RECompiler
     * @see REProgram
     */
    public void setProgram(REProgram program) {
        this.program = program;
        if (program != null && program.maxParens != -1) {
            this.operation = program.operation;
            this.maxParen = program.maxParens;
        } else {
            this.maxParen = MAX_PAREN;
        }
    }

    /**
     * Returns the current regular expression program in use by this matcher object.
     *
     * @return Regular expression program
     * @see #setProgram
     */
    public REProgram getProgram() {
        return program;
    }

    /**
     * Returns the number of parenthesized subexpressions available after a successful match.
     *
     * @return Number of available parenthesized subexpressions
     */
    public int getParenCount() {
        return _captureState.parenCount;
    }

    /**
     * Gets the contents of a parenthesized subexpression after a successful match.
     *
     * @param which Nesting level of subexpression
     * @return String
     */
    public UnicodeString getParen(int which) {
        int start;
        if (which < _captureState.parenCount && (start = getParenStart(which)) >= 0) {
            return search.substring(start, getParenEnd(which));
        }
        return null;
    }

    /**
     * Returns the start index of a given paren level.
     *
     * @param which Nesting level of subexpression
     * @return String index
     */
    public final int getParenStart(int which) {
        if (which < _captureState.startn.length) {
            return _captureState.startn[which];
        }
        return -1;
    }

    /**
     * Returns the end index of a given paren level.
     *
     * @param which Nesting level of subexpression
     * @return String index
     */
    public final int getParenEnd(int which) {
        if (which < _captureState.endn.length) {
            return _captureState.endn[which];
        }
        return -1;
    }

    /**
     * Sets the start of a paren level
     *
     * @param which Which paren level
     * @param i     Index in input array
     */
    protected final void setParenStart(int which, int i) {
        while (which > _captureState.startn.length - 1) {
            int[] s2 = new int[_captureState.startn.length * 2];
            System.arraycopy(_captureState.startn, 0, s2, 0, _captureState.startn.length);
            Arrays.fill(s2, _captureState.startn.length, s2.length, -1);
            _captureState.startn = s2;
        }
        _captureState.startn[which] = i;
    }

    /**
     * Sets the end of a paren level
     *
     * @param which Which paren level
     * @param i     Index in input array
     */
    protected final void setParenEnd(int which, int i) {
        while (which > _captureState.endn.length - 1) {
            int[] e2 = new int[_captureState.endn.length * 2];
            System.arraycopy(_captureState.endn, 0, e2, 0, _captureState.endn.length);
            Arrays.fill(e2, _captureState.endn.length, e2.length, -1);
            _captureState.endn = e2;
        }
        _captureState.endn[which] = i;
    }

    /**
     * Clear any captured groups whose start position is at or beyond some specified position
     * @param pos the specified position
     */

    protected void clearCapturedGroupsBeyond(int pos) {
        for (int i = 0; i < _captureState.startn.length; i++) {
            if (_captureState.startn[i] >= pos) {
                _captureState.endn[i] = _captureState.startn[i];
            }
        }
        if (startBackref != null) {
            for (int i = 0; i < startBackref.length; i++) {
                if (startBackref[i] >= pos) {
                    endBackref[i] = startBackref[i];
                }
            }
        }
    }

    /**
     * Match the current regular expression program against the current
     * input string, starting at index i of the input string.  This method
     * is only meant for internal use.
     *
     * @param i        The input string index to start matching at
     * @param anchored true if the regex must match all characters up to the end of the string
     * @return True if the input matched the expression
     */
    protected boolean matchAt(int i, boolean anchored) {
        // Initialize start pointer, paren cache and paren count
        _captureState.parenCount = 1;
        anchoredMatch = anchored;
        setParenStart(0, i);

        // Allocate backref arrays (unless optimizations indicate otherwise)
        if ((program.optimizationFlags & REProgram.OPT_HASBACKREFS) != 0) {
            startBackref = new int[maxParen];
            endBackref = new int[maxParen];
        }

        // Match against string
        int idx;
        IntIterator iter = operation.iterateMatches(this, i);
        if (iter.hasNext()) {
            idx = iter.next();
            setParenEnd(0, idx);
            return true;
        }

        // Didn't match
        _captureState.parenCount = 0;
        return false;
    }

    /**
     * Tests whether the regex matches a string in its entirety, anchored
     * at both ends
     *
     * @param search the string to be matched
     * @return true if the regex matches the whole string
     */

    public boolean isAnchoredMatch(UnicodeString search) {
        this.search = search;
        return matchAt(0, true);
    }

    /**
     * Matches the current regular expression program against a character array,
     * starting at a given index.
     *
     * @param search String to match against
     * @param i      Index to start searching at
     * @return True if string matched
     */
    public boolean match(UnicodeString search, int i) {
        //System.err.println("Matching '" + search + "'");
        Objects.requireNonNull(search);
        // Save string to search
        this.search = search.tidy();

        // Clear the captured group state
        _captureState = new State();

        // Can we optimize the search by looking for new lines?
        if ((program.optimizationFlags & REProgram.OPT_HASBOL) == REProgram.OPT_HASBOL) {
            // Non multi-line matching with BOL: Must match at '0' index
            if (!program.flags.isMultiLine()) {
                return i == 0 && checkPreconditions(i) && matchAt(i, false);
            }

            // Multi-line matching with BOL: Seek to next line
            int nl = i;
            if (matchAt(nl, false)) {
                return true;
            }
            while (true) {
                nl = (int)search.indexOf('\n', nl) + 1;
                if (nl >= search.length() || nl <= 0) {
                    return false; // "^" does not match a NL at the end of the string
                } else {
                    if (matchAt(nl, false)) {
                        return true;
                    }
                }
            }
        }

        // Is the string long enough to match?
        int actualLength = search.length32() - i;
        if (actualLength < program.minimumLength) {
            return false;
        }

        // Can we optimize the search by looking for a prefix string?
        if (program.prefix == null) {
            if (program.initialCharClass != null) {
                // no prefix known; but the first character must match a predicate
                IntPredicateProxy pred = program.initialCharClass;
                for (; !(i >= search.length32()); i++) {
                    if (pred.test(search.codePointAt(i))) {
                        if (matchAt(i, false)) {
                            return true;
                        }
                    }
                }
                return false;
            }
            // Check the preconditions
            if (!checkPreconditions(i)) {
                return false;
            }
            // Unprefixed matching must try for a match at each character
            for (; !(i - 1 >= search.length32()); i++) {
                // Try a match at index i
                if (matchAt(i, false)) {
                    return true;
                }
            }
            return false;
        } else {
            // Prefix-anchored matching is possible
            UnicodeString prefix = program.prefix;
            int prefixLength = prefix.length32();
            boolean ignoreCase = program.flags.isCaseIndependent();
            for (; !(i + prefixLength - 1 >= search.length()); i++) {
                boolean prefixOK = true;
                if (ignoreCase) {
                    for (int j = i, k = 0; k < prefixLength; j++, k++) {
                        if (!equalCaseBlind(search.codePointAt(j), prefix.codePointAt(k))) {
                            prefixOK = false;
                            break;
                        }
                    }
                } else {
                    for (int j = i, k=0; k < prefixLength; j++, k++) {
                        if (search.codePointAt(j) != prefix.codePointAt(k)) {
                            prefixOK = false;
                            break;
                        }
                    }
                }

                // See if the whole prefix string matched
                if (prefixOK) {
                    // We matched the full prefix at firstChar, so try it
                    if (matchAt(i, false)) {
                        return true;
                    }
                }
            }
            return false;
        }
    }

    /**
     * Check the preconditions for a match, testing the precondition at every position
     * from some start point
     * @param start the start position for matching preconditions
     *
     */

    private boolean checkPreconditions(int start) {
        for (RegexPrecondition condition : program.preconditions) {
            if (condition.fixedPosition != -1) {
                boolean match = condition.operation.iterateMatches(this, condition.fixedPosition).hasNext();
                if (!match) {
                    return false;
                }
            } else {
                int i = start;
                if (i < condition.minPosition) {
                    i = condition.minPosition;
                }
                boolean found = false;
                for (; !(i >= search.length()); i++) {
                    if ((condition.fixedPosition == -1 || condition.fixedPosition == i) &&
                        condition.operation.iterateMatches(this, i).hasNext()) {
                        found = true;
                        break;
                    }
                }
                if (!found) {
                    return false;
                }
            }
        }
        return true;
    }

    /**
     * Matches the current regular expression program against a String.
     *
     * @param search String to match against
     * @return True if string matched
     */
    public boolean match(String search) {
        return match(StringView.of(search).tidy(), 0);
    }

    /**
     * Splits a string into an array of strings on regular expression boundaries.
     * This function works the same way as the Perl function of the same name.
     * Given a regular expression of "[ab]+" and a string to split of
     * "xyzzyababbayyzabbbab123", the result would be the array of Strings
     * "[xyzzy, yyz, 123]".
     * Please note that the first string in the resulting array may be an empty
     * string. This happens when the very first character of input string is
     * matched by the pattern.
     *
     * @param s String to split on this regular exression
     * @return Array of strings
     */
    public List split(UnicodeString s) {
        // Create new vector
        List v = new ArrayList<>();

        // Start at position 0 and search the whole string
        int pos = 0;
        int len = s.length32();

        // Try a match at each position
        while (pos < len && match(s, pos)) {
            // Get start of match
            int start = getParenStart(0);

            // Get end of match
            int newpos = getParenEnd(0);

            // Check if no progress was made
            if (newpos == pos) {
                v.add(s.substring(pos, start + 1));
                newpos++;
            } else {
                v.add(s.substring(pos, start));
            }

            // Move to new position
            pos = newpos;
        }

        // Push remainder even if it's empty
        UnicodeString remainder = s.substring(pos, len);
        v.add(remainder);

        // Return the list
        return v;
    }

    /**
     * Substitutes a string for this regular expression in another string.
     * This method works like the Perl function of the same name.
     * Given a regular expression of "a*b", a String to substituteIn of
     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
     * resulting String returned by subst would be "-foo-garply-wacky-".
     * It is also possible to reference the contents of a parenthesized expression
     * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
     * a String to substituteIn of "visit us: http://www.apache.org!" and the
     * substitution String "<a href=\"$0\">$0</a>", the resulting String
     * returned by subst would be
     * "visit us: <a href=\"http://www.apache.org\">http://www.apache.org</a>!".
     * Note: $0 represents the whole match.
     *
     * @param in          String to substitute within
     * @param replacement String to substitute for matches of this regular expression
     * @return The string substituteIn with zero or more occurrences of the current
     *         regular expression replaced with the substitution String (if this regular
     *         expression object doesn't match at any position, the original String is returned
     *         unchanged).
     */
    public UnicodeString replace(UnicodeString in, UnicodeString replacement) {
        // String to return
        UnicodeString result = EmptyUnicodeString.getInstance();

        // Start at position 0 and search the whole string
        int pos = 0;
        int len = in.length32();

        boolean firstMatch = true;
        boolean simpleReplacement = false;

        // Try a match at each position
        while (pos < len && match(in, pos)) {
            // Append chars from input string before match
            result = result.concat(in.substring(pos, getParenStart(0)));

            if (firstMatch) {
                simpleReplacement = program.flags.isLiteral();
                firstMatch = false;
            }

            if (!simpleReplacement) {
                // Process references to captured substrings
                int maxCapture = program.maxParens - 1;
                simpleReplacement = true;
                for (int i = 0; i < replacement.length(); i++) {
                    int ch = replacement.codePointAt(i);
                    if (ch == '\\') {
                        simpleReplacement = false;
                        int index = ++i;
                        ch = replacement.codePointAt(index);
                        if (ch == '\\' || ch == '$') {
                            result = result.concat(BMPString.of("" + (char) ch));
                        } else {
                            throw new RESyntaxException("Invalid escape '" + ch + "' in replacement string");
                        }
                    } else if (ch == '$') {
                        simpleReplacement = false;
                        int index = ++i;
                        ch = replacement.codePointAt(index);
                        if (!(ch >= '0' && ch <= '9')) {
                            throw new RESyntaxException("$ in replacement string must be followed by a digit");
                        }
                        int n = ch - '0';
                        if (maxCapture <= 9) {
                            if (maxCapture >= n) {
                                UnicodeString captured = getParen(n);
                                if (captured != null) {
                                    result = result.concat(captured);
                                }
                            }
                        } else {
                            while (true) {
                                if (++i >= replacement.length()) {
                                    break;
                                }
                                ch = replacement.codePointAt(i);
                                if (ch >= '0' && ch <= '9') {
                                    int m = n * 10 + (ch - '0');
                                    if (m > maxCapture) {
                                        i--;
                                        break;
                                    } else {
                                        n = m;
                                    }
                                } else {
                                    i--;
                                    break;
                                }
                            }
                            UnicodeString captured = getParen(n);
                            if (captured != null) {
                                result = result.concat(captured);
                            }
                        }
                    } else {
                        result = result.concat(new UnicodeChar(ch));
                    }
                }

            } else {
                // Append substitution without processing backreferences
                result = result.concat(replacement);
            }

            // Move forward, skipping past match
            int newpos = getParenEnd(0);

            // We always want to make progress!
            if (newpos == pos) {
                newpos++;
            }

            // Try new position
            pos = newpos;

        }

        // If no matches were found, return the input unchanged
        if (firstMatch) {
            return in;
        }

        // If there's remaining input, append it
        result = result.concat(in.substring(pos, len));

        // Return string buffer
        return result.economize();
    }

    /**
     * Substitutes a string for this regular expression in another string.
     * This method works like the Perl function of the same name.
     * Given a regular expression of "a*b", a String to substituteIn of
     * "aaaabfooaaabgarplyaaabwackyb" and the substitution String "-", the
     * resulting String returned by subst would be "-foo-garply-wacky-".
     * It is also possible to reference the contents of a parenthesized expression
     * with $0, $1, ... $9. A regular expression of "http://[\\.\\w\\-\\?/~_@&=%]+",
     * a String to substituteIn of "visit us: http://www.apache.org!" and the
     * substitution String "<a href=\"$0\">$0</a>", the resulting String
     * returned by subst would be
     * "visit us: <a href=\"http://www.apache.org\">http://www.apache.org</a>!".
     * Note: $0 represents the whole match.
     *
     * @param in          String to substitute within
     * @param replacer    Function to process each matching substring and return a replacement
     * @return The string substituteIn with zero or more occurrences of the current
     * regular expression replaced with the substitution String (if this regular
     * expression object doesn't match at any position, the original String is returned
     * unchanged).
     */
    public UnicodeString replaceWith(UnicodeString in, Function replacer) {
        // String to return
        UnicodeBuilder sb = new UnicodeBuilder();

        // Start at position 0 and search the whole string
        int pos = 0;
        int len = in.length32();

        // Try a match at each position
        while (pos < len && match(in, pos)) {
            // Append chars from input string before match
            for (long i = pos; i < getParenStart(0); i++) {
                sb.append(in.codePointAt(i));
            }
            UnicodeString matchingSubstring = in.substring(getParenStart(0), getParenEnd(0));
            UnicodeString replacement = replacer.apply(matchingSubstring);
            IntIterator iter = replacement.codePoints();
            while (iter.hasNext()) {
                sb.append(iter.next());
            }

            // Move forward, skipping past match
            int newpos = getParenEnd(0);

            // We always want to make progress!
            if (newpos == pos) {
                newpos++;
            }

            // Try new position
            pos = newpos;

        }

        // If there's remaining input, append it
        for (int i = pos; i < len; i++) {
            sb.append(in.codePointAt(i));
        }

        // Return string buffer
        return sb.toUnicodeString();
    }


    /**
     * Test whether the character at a given position is a newline
     *
     * @param i the position of the character to be tested
     * @return true if character at i-th position in the search string is a newline
     */
    boolean isNewline(int i) {
        return search.codePointAt(i) == '\n';
    }

    /**
     * Compares two characters ignoring case.
     *
     * @param c1 first character to compare.
     * @param c2 second character to compare.
     * @return true the first character is equal to the second ignoring case.
     */
    boolean equalCaseBlind(int c1, int c2) {
        if (c1 == c2) {
            return true;
        }
        for (int v : CaseVariants.getCaseVariants(c2)) {
            if (c1 == v) {
                return true;
            }
        }
        return false;
    }

    public State captureState() {
        return new State(_captureState);
    }

    public void resetState(State state) {
        _captureState = new State(state);
    }

    public static class State {
        int parenCount;                     // Number of subexpressions matched (num open parens + 1)
        int[] startn;                       // Lazily-allocated array of sub-expression starts
        int[] endn;                         // Lazily-allocated array of sub-expression ends

        public State() {
            parenCount = 0;
            startn = new int[3];
            startn[0] = startn[1] = startn[2] = -1;
            endn = new int[3];
            endn[0] = endn[1] = endn[2] = -1;
        }

        public State(State s) {
            parenCount = s.parenCount;
            startn = Arrays.copyOf(s.startn, s.startn.length);
            endn = Arrays.copyOf(s.endn, s.endn.length);
        }
    }
}