net.pwall.util.ParseText Maven / Gradle / Ivy

Go to download
/*
 * @(#) ParseText.java
 *
 * javautil Java Utility Library
 * Copyright (c) 2013, 2014, 2017, 2020 Peter Wall
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package net.pwall.util;

import java.io.IOException;
import java.util.Collection;
import java.util.function.IntPredicate;

/**
 * A class to assist with text parsing.  A {@code ParseText} object contains a string of text to
 * be examined (in the form of a {@link CharSequence}), an index denoting the current parse
 * position and a start index pointing to the start of the last matched or skipped characters.
 *
 * The class includes a number of "{@code match}" operations, which return {@code true} if
 * the text matches the parameter(s) to the operation.  On a successful match, these operations
 * set the start index to the position of the first character matched, and increment the index
 * past the matched characters.  If the match is not successful, the index and start index are
 * not modified.
 *
 * For use following a successful match operation, there are several "{@code getResult}"
 * operations that will extract the matched characters from the text in a variety of forms
 * ({@code int}, {@code long}, {@link String} etc.).
 *
 * The class takes text in the form of a {@link CharSequence} rather than a {@link String},
 * and some implementations of {@link CharSequence} allow modification of the contents.  The
 * results of any method of this class are undefined in the case of concurrent modification of
 * the text.
 *
 * @author Peter Wall
 */
public class ParseText {

    private CharSequence text;
    private int index;
    private int start;

    /**
     * Construct a {@code ParseText} object with the given text and initial index value.
     *
     * @param   text    the text
     * @param   index   the initial index
     * @throws  NullPointerException if the text is {@code null}
     * @throws  StringIndexOutOfBoundsException if the index is negative or beyond the end of
     *          the text
     */
    public ParseText(CharSequence text, int index) {
        setText(text, index);
    }

    /**
     * Construct a {@code ParseText} object with the given text and an initial index value 0f 0.
     *
     * @param   text    the text
     * @throws          NullPointerException if the text is {@code null}
     */
    public ParseText(CharSequence text) {
        setText(text, 0);
    }

    /**
     * Set the text and the index within the text.  The start index is set to the same value as
     * the index.
     *
     * @param   text    the text
     * @param   index   the index
     * @return          the {@code ParseText} object (for chaining purposes)
     * @throws  NullPointerException if the text is {@code null}
     * @throws  StringIndexOutOfBoundsException if the index is outside the bounds of the text
     */
    public ParseText setText(CharSequence text, int index) {
        if (text == null)
            throw new NullPointerException("ParseText data invalid");
        this.text = text;
        setIndex(index);
        start = index;
        return this;
    }

    /**
     * Set the text.  The index and start index are set to zero.
     *
     * @param   text    the text
     * @return          the {@code ParseText} object (for chaining purposes)
     * @throws  NullPointerException if the text is {@code null}
     */
    public ParseText setText(CharSequence text) {
        setText(text, 0);
        return this;
    }

    /**
     * Get the entire text from the {@code ParseText} object (as a {@link CharSequence}).
     *
     * @return  the text
     */
    public CharSequence getText() {
        return text;
    }

    /**
     * Get the length of the entire text.
     *
     * @return  the text length
     */
    public int getTextLength() {
        return text.length();
    }

    /**
     * Test whether the {@code ParseText} object is exhausted (the index has reached the end of
     * the text).
     *
     * @return  {@code true} if the index has reached the end of the text
     */
    public boolean isExhausted() {
        return index >= text.length();
    }

    /**
     * Get the current index (the offset within the text).
     *
     * @return  the index
     */
    public int getIndex() {
        return index;
    }

    /**
     * Set the index to a specified value.
     *
     * @param   index   the new index
     * @return          the {@code ParseText} object (for chaining purposes)
     * @throws  StringIndexOutOfBoundsException if the index is outside the bounds of the text
     */
    public ParseText setIndex(int index) {
        if (index < 0 || index > text.length())
            throw new StringIndexOutOfBoundsException("ParseText index invalid");
        this.index = index;
        return this;
    }

    /**
     * Indicate successful match.  This is intended to be used by derived classes.
     *
     * @param   i   the index at end of match
     * @return      {@code true} to indicate successful match
     */
    protected boolean matchSuccess(int i) {
        start = index;
        index = i;
        return true;
    }

    /**
     * Get the character at the current index and increment the index.
     *
     * @return  the current character
     * @throws  StringIndexOutOfBoundsException if the index is at or beyond end of string
     */
    public char getChar() {
        start = index;
        if (index >= text.length())
            throw new StringIndexOutOfBoundsException("ParseText exhausted");
        return text.charAt(index++);
    }

    /**
     * Get the Unicode code point at the current character index and increment the index past
     * the code point.
     *
     * @return  the code point
     * @throws  StringIndexOutOfBoundsException if the index is at or beyond end of string
     */
    public int getCodePoint() {
        start = index;
        if (index >= text.length())
            throw new StringIndexOutOfBoundsException("ParseText exhausted");
        char ch = text.charAt(index++);
        if (Character.isHighSurrogate(ch) && index < text.length()) {
            char ch2 = text.charAt(index);
            if (Character.isLowSurrogate(ch2)) {
                index++;
                return (Character.toCodePoint(ch, ch2));
            }
        }
        return ch;
    }

    /**
     * Extract a string from the text, bounded by the given start and end offsets.
     *
     * @param   from    the start offset
     * @param   to      the end offset
     * @return  the specified string
     * @throws  IndexOutOfBoundsException if the start and end offsets are invalid
     */
    public String getString(int from, int to) {
        return text.subSequence(from, to).toString();
    }

    /**
     * Get the start index (the index of the start of the last matched sequence).
     *
     * @return  the start index
     */
    public int getStart() {
        return start;
    }

    /**
     * Set the start index (the index of the start of the last matched sequence).  The start
     * index must be less than or equal to the index (current position).
     *
     * @param   start   the new start index
     * @return          the {@code ParseText} object (for chaining purposes)
     * @throws          StringIndexOutOfBoundsException if the start index is invalid
     */
    public ParseText setStart(int start) {
        if (start < 0 || start > index)
            throw new StringIndexOutOfBoundsException("ParseText start index invalid");
        this.start = start;
        return this;
    }

    private static final int MAX_INT_DIV_10 = Integer.MAX_VALUE / 10;
    private static final int MAX_INT_MOD_10 = Integer.MAX_VALUE % 10;

    /**
     * Get the result of the last match operation as an {@code int}.
     *
     * @return  the result of the last match as an {@code int} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code int}
     */
    public int getResultInt() {
        return getInt(start, index);
    }

    /**
     * Get an {@code int} from the text.
     *
     * @param   from    the start offset
     * @param   to      the end offset
     * @return  the {@code int} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code int}
     * @throws  IndexOutOfBoundsException if the start and end indices are not contained within
     *          the text
     */
    public int getInt(int from, int to) {
        if (to <= from)
            throw new NumberFormatException();
        int result = 0;
        for (int i = from; i < to; i++) {
            int n = convertDecDigit(text.charAt(i));
            if (result > MAX_INT_DIV_10 || result == MAX_INT_DIV_10 && n > MAX_INT_MOD_10)
                throw new NumberFormatException();
            result = result * 10 + n;
        }
        return result;
    }

    private static final long MAX_LONG_DIV_10 = Long.MAX_VALUE / 10;
    private static final int MAX_LONG_MOD_10 = (int)(Long.MAX_VALUE % 10);

    /**
     * Get the result of the last match operation as a {@code long}.
     *
     * @return  the result of the last match as a {@code long} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code long}
     */
    public long getResultLong() {
        return getLong(start, index);
    }

    /**
     * Get a {@code long} from the text.
     *
     * @param   from    the start offset
     * @param   to      the end offset
     * @return  the {@code long} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code long}
     * @throws  IndexOutOfBoundsException if the start and end indices are not contained within
     *          the text
     */
    public long getLong(int from, int to) {
        if (to <= from)
            throw new NumberFormatException();
        long result = 0;
        for (int i = from; i < to; i++) {
            int n = convertDecDigit(text.charAt(i));
            if (result > MAX_LONG_DIV_10 || result == MAX_LONG_DIV_10 && n > MAX_LONG_MOD_10)
                throw new NumberFormatException();
            result = result * 10 + n;
        }
        return result;
    }

    /**
     * Convert a decimal digit to the integer value of the digit.  This method may be overridden
     * to provide for different definitions of a decimal digit.  If this method is overridden it
     * may be necessary to override {@link #isDigit(char)} as well.
     *
     * @param ch    the decimal digit
     * @return      the integer value (0 - 9)
     * @throws      NumberFormatException if the digit is not valid
     */
    public int convertDecDigit(char ch) {
        if (ch >= '0' && ch <= '9')
            return ch - '0';
        throw new NumberFormatException();
    }

    /**
     * Get the result of the last match operation as an {@code int}, treating the digits as
     * hexadecimal.
     *
     * @return  the result of the last match as an {@code int} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code int}
     */
    public int getResultHexInt() {
        return getHexInt(start, index);
    }

    private static final int MAX_INT_MASK = 0xF8 << 24;

    /**
     * Get an {@code int} from the text, treating the digits as hexadecimal.
     *
     * @param   from    the start offset
     * @param   to      the end offset
     * @return  the hexadecimal {@code int} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code int}
     * @throws  IndexOutOfBoundsException if the start and end indices are not contained within
     *          the text
     */
    public int getHexInt(int from, int to) {
        if (to <= from)
            throw new NumberFormatException();
        int result = 0;
        for (int i = from; i < to; i++) {
            if ((result & MAX_INT_MASK) != 0)
                throw new NumberFormatException();
            result = result << 4 | convertHexDigit(text.charAt(i));
        }
        return result;
    }

    /**
     * Get the result of the last match operation as a {@code long}, treating the digits as
     * hexadecimal.
     *
     * @return  the result of the last match as a {@code long} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code long}
     */
    public long getResultHexLong() {
        return getHexLong(start, index);
    }

    private static final long MAX_LONG_MASK = ((long)0xF8) << 56;

    /**
     * Get a {@code long} from the text, treating the digits as hexadecimal.
     *
     * @param   from    the start offset
     * @param   to      the end offset
     * @return  the hexadecimal {@code long} (always positive)
     * @throws  NumberFormatException if the start and end indices do not describe a valid
     *          {@code long}
     * @throws  IndexOutOfBoundsException if the start and end indices are not contained within
     *          the text
     */
    public long getHexLong(int from, int to) {
        if (to <= from)
            throw new NumberFormatException();
        long result = 0;
        for (int i = from; i < to; i++) {
            if ((result & MAX_LONG_MASK) != 0)
                throw new NumberFormatException();
            result = result << 4 | convertHexDigit(text.charAt(i));
        }
        return result;
    }

    /**
     * Convert a hexadecimal digit to the integer value of the digit.  This method may be
     * overridden to provide for different definitions of a hexadecimal digit.  If this method
     * is overridden it may be necessary to override {@link #isHexDigit(char)} as well.
     *
     * @param   ch      the hexadecimal digit
     * @return          the integer value (0 - 15)
     * @throws          NumberFormatException if the digit is not valid
     */
    public int convertHexDigit(char ch) {
        if (ch >= '0' && ch <= '9')
            return ch - '0';
        if (ch >= 'A' && ch <= 'F')
            return ch - 'A' + 10;
        if (ch >= 'a' && ch <= 'f')
            return ch - 'a' + 10;
        throw new NumberFormatException();
    }

    /**
     * Get the length of the result of the last match operation.
     *
     * @return  the length of the result of the last match
     */
    public int getResultLength() {
        return index - start;
    }

    /**
     * Get the result of the last match operation (or the first character of a longer match) as
     * a single character.
     *
     * @return  the first character of the result of the last match
     */
    public char getResultChar() {
        return text.charAt(start);
    }

    /**
     * Get the result of the last match operation as a {@link CharSequence}.
     *
     * @return  the result of the last match
     */
    public CharSequence getResultSequence() {
//        return seq.subSequence(start, index);
        return new SubSequence(text, start, index);
    }

    /**
     * Get the result of the last match operation as a {@link String}.
     *
     * @return  the result of the last match
     */
    public String getResultString() {
        return text.subSequence(start, index).toString();
    }

    /**
     * Copy the result character sequence to a {@link StringBuilder}.  This is equivalent to but
     * more efficient than
     *      *     sb.append(xxx.getResultSequence());
     * 
     * because it avoids the creation of an intermediate object.
     *
     * @param   sb      the {@link StringBuilder}
     * @return          the {@link StringBuilder} (for chaining purposes)
     */
    public StringBuilder appendResultTo(StringBuilder sb) {
        return sb.append(text, start, index);
    }

    /**
     * Copy the result character sequence to an {@link Appendable}.  This is equivalent to but
     * more efficient than
     *      *     a.append(xxx.getResultSequence());
     * 
     * because it avoids the creation of an intermediate object.
     *
     * @param   a       the {@link Appendable}
     * @return          the {@link Appendable} (for chaining purposes)
     * @throws  IOException if thrown by the {@link Appendable}
     */
    public Appendable appendResultTo(Appendable a) throws IOException {
        return a.append(text, start, index);
    }

    /**
     * Test whether the text has at least the specified number of characters left after the
     * index.
     *
     * @param   len     the number of characters required
     * @return  {@code true} if that number of characters are available
     */
    public boolean available(int len) {
        return index + len <= text.length();
    }

    /**
     * Get the length of the entire text.
     *
     * @return  the length of the text
     */
    public int length() {
        return text.length();
    }

    /**
     * Get a specific character from the text.
     *
     * @param   index   the index of the character
     * @return  the character at the specified index
     * @throws  IndexOutOfBoundsException if the index is negative or beyond the end of the text
     */
    public char charAt(int index) {
        return text.charAt(index);
    }

    /**
     * Match the current character in the text against a given Unicode code point.  Following a
     * successful match the start index will point to the matched code point and the index will
     * be incremented past it.
     *
     * @param   cp      the code point to match against
     * @return  {@code true} if the code point in the text matches the given character
     */
    public boolean match(int cp) {
        int i = index;
        if (i >= text.length())
            return false;
        char ch = text.charAt(i++);
        if (Character.isHighSurrogate(ch)) {
            if (i >= text.length())
                return false;
            char ch2 = text.charAt(i++);
            if (!Character.isLowSurrogate(ch2))
                return false;
            if (Character.toCodePoint(ch, ch2) != cp)
                return false;
        }
        else
            if (ch != cp)
                return false;
        start = index;
        index = i;
        return true;
    }

    /**
     * Match the current character in the text against a given character.  Following a
     * successful match the start index will point to the matched character and the index will
     * be incremented past it.
     *
     * @param   ch      the character to match against
     * @return  {@code true} if the character in the text matches the given character
     */
    public boolean match(char ch) {
        if (index >= text.length() || text.charAt(index) != ch)
            return false;
        start = index++;
        return true;
    }

    /**
     * Match the current character in the text against a given character, ignoring case.
     * Following a successful match the start index will point to the matched character and the
     * index will be incremented past it.
     *
     * @param ch  the character to match against
     * @return    {@code true} if the character in the text matches the given character
     */
    public boolean matchIgnoreCase(char ch) {
        if (index >= text.length() || !equalIgnoreCase(text.charAt(index), ch))
            return false;
        start = index++;
        return true;
    }

    /**
     * Test characters for equality, ignoring case.
     *
     * @param   a       the first character
     * @param   b       the second character
     * @return          {@code true} if the characters are equal, ignoring case
     */
    private static boolean equalIgnoreCase(char a, char b) {
        return a == b ||
                a == (Character.isLowerCase(a) ? Character.toLowerCase(b) :
                        Character.toUpperCase(b));
    }

    /**
     * Match the current character in the text against a given character range.  Following a
     * successful match the start index will point to the matched character and the index will
     * be incremented past it.
     *
     * @param from the low character in the range to match against
     * @param to   the high character in the range to match against (inclusive)
     * @return     {@code true} if the character in the text falls in the given range
     */
    public boolean matchRange(char from, char to) {
        if (index >= text.length())
            return false;
        char ch = text.charAt(index);
        if (ch < from || ch > to)
            return false;
        start = index++;
        return true;
    }

    /**
     * Match the current character in the text against any of the characters in a given
     * {@link String}.  Following a successful match the start index will point to the matched
     * character and the index will be incremented past it.
     *
     * @param str   the characters to match against (as a {@link String})
     * @return      {@code true} if the character in the text matches any of the characters in
     *              the string
     */
    public boolean matchAnyOf(String str) {
        if (index >= text.length())
            return false;
        if (str.indexOf(text.charAt(index)) < 0)
            return false;
        start = index++;
        return true;
    }

    /**
     * Match the current character in the text against any of the characters in a given array.
     * Following a successful match the start index will point to the matched character and the
     * index will be incremented past it.
     *
     * @param array the characters to match against (as an array or varargs list)
     * @return      {@code true} if the character in the text matches any of the characters in
     *              the array
     * @throws      IllegalArgumentException if the array is empty
     */
    public boolean matchAnyOf(char ... array) {
        if (array.length == 0)
            throw new IllegalArgumentException("Array must not be empty");
        if (index >= text.length())
            return false;
        char ch = text.charAt(index);
        for (int i = 0, n = array.length; i < n; i++) {
            if (ch == array[i]) {
                start = index++;
                return true;
            }
        }
        return false;
    }

    /**
     * Match the characters at the index against a given {@link CharSequence} ({@link String},
     * {@link StringBuilder} etc.).  Following a successful match the start index will point to
     * the first character of the matched sequence and the index will be incremented past it.
     *
     * @param target    the target {@link CharSequence}
     * @return          {@code true} if the characters in the text at the index match the target
     */
    public boolean match(CharSequence target) {
        int len = target.length();
        if (index + len > text.length())
            return false;
        int i = index;
        int j = 0;
        for (; len > 0; len--)
            if (text.charAt(i++) != target.charAt(j++))
                return false;
        start = index;
        index = i;
        return true;
    }

    /**
     * Match the characters at the index against a given {@link CharSequence} ({@link String},
     * {@link StringBuilder} etc.), checking that the character following the match is not part
     * of a name.  Following a successful match the start index will point to
     * the first character of the matched sequence and the index will be incremented past it.
     *
     * @param target    the target {@link CharSequence}
     * @return          {@code true} if the characters in the text at the index match the target
     */
    public boolean matchName(CharSequence target) {
        int len = target.length();
        if (index + len > text.length())
            return false;
        int i = index;
        int j = 0;
        for (; len > 0; len--)
            if (text.charAt(i++) != target.charAt(j++))
                return false;
        if (i < text.length() && isNameContinuation(text.charAt(i)))
            return false;
        start = index;
        index = i;
        return true;
    }

    /**
     * Match the characters at the index against any of an array of {@link CharSequence}
     * ({@link String}, {@link StringBuilder} etc.) objects.  Following a successful match the
     * start index will point to the first character of the matched sequence and the index will
     * be incremented past it.
     *
     * @param array     the array (or varargs list) of {@link CharSequence}
     * @return          {@code true} if the characters in the text at the index match any of the
     *                  entries in the array
     * @throws      IllegalArgumentException if the array is empty
     */
    public boolean matchAnyOf(CharSequence ... array) {
        if (array.length == 0)
            throw new IllegalArgumentException("Array must not be empty");
        for (CharSequence str : array)
            if (match(str))
                return true;
        return false;
    }

    /**
     * Match the characters at the index against any of a {@link Collection} of
     * {@link CharSequence} ({@link String}, {@link StringBuilder} etc.) objects.  Following a
     * successful match the start index will point to the first character of the matched
     * sequence and the index will be incremented past it.
     *
     * @param collection    the {@link Collection} of {@link CharSequence}s
     * @return              {@code true} if the characters in the text at the index match any of
     *                      the entries in the collection
     */
    public boolean matchAnyOf(Collection collection) {
        for (CharSequence str : collection)
            if (match(str))
                return true;
        return false;
    }

    /**
     * Match the characters at the index against a given {@link CharSequence} ({@link String},
     * {@link StringBuilder} etc.), ignoring case.  Following a successful match the start index
     * will point to the first character of the matched sequence and the index will be
     * incremented past it.
     *
     * @param target    the target {@link CharSequence}
     * @return          {@code true} if the characters in the text at the index match the target
     */
    public boolean matchIgnoreCase(CharSequence target) {
        int len = target.length();
        if (index + len > text.length())
            return false;
        int i = index;
        int j = 0;
        for (; len > 0; len--)
            if (!equalIgnoreCase(text.charAt(i++), target.charAt(j++)))
                return false;
        start = index;
        index = i;
        return true;
    }

    /**
     * Match the characters at the index as decimal digits, with a given minimum number of
     * digits and an optional maximum.
     *
     * @param maxDigits the maximum number digits to match (or 0 to indicate no limit)
     * @param minDigits the minimum number digits for a successful match
     * @return          {@code true} if the characters in the text at the index are decimal
     *                  digits (subject to the specified minimum and maximum number of digits)
     */
    public boolean matchDec(int maxDigits, int minDigits) {
        int i = index;
        int stopper = text.length();
        if (maxDigits > 0)
            stopper = Math.min(stopper, i + maxDigits);
        while (i < stopper && isDigit(text.charAt(i)))
            i++;
        if (i - index < minDigits)
            return false;
        start = index;
        index = i;
        return true;
    }

    /**
     * Match the characters at the index as decimal digits, with a minimum of 1 digit and an
     * optional maximum.
     *
     * @param maxDigits the maximum number digits to match (or 0 to indicate no limit)
     * @return          {@code true} if the characters in the text at the index are decimal
     *                  digits (subject to the specified maximum number of digits)
     */
    public boolean matchDec(int maxDigits) {
        return matchDec(maxDigits, 1);
    }

    /**
     * Match the characters at the index as decimal digits, with a minimum of 1 digit and no
     * maximum.
     *
     * @return  {@code true} if the characters in the text at the index are decimal digits
     */
    public boolean matchDec() {
        return matchDec(0, 1);
    }

    /**
     * Match the characters at the index as a fixed number of decimal digits.
     *
     * @param numDigits the number of digits expected
     * @return          {@code true} if the characters in the text at the index are decimal
     *                  digits
     */
    public boolean matchDecFixed(int numDigits) {
        return matchDec(numDigits, numDigits);
    }

    /**
     * Match the characters at the index as hexadecimal digits, with a given minimum number of
     * digits and an optional maximum.
     *
     * @param maxDigits the maximum number digits to match (or 0 to indicate no limit)
     * @param minDigits the minimum number digits for a successful match
     * @return          {@code true} if the characters in the text at the index are hexadecimal
     *                  digits (subject to the specified minimum and maximum number of digits)
     */
    public boolean matchHex(int maxDigits, int minDigits) {
        int i = index;
        int stopper = text.length();
        if (maxDigits > 0)
            stopper = Math.min(stopper, i + maxDigits);
        while (i < stopper && isHexDigit(text.charAt(i)))
            i++;
        if (i - index < minDigits)
            return false;
        start = index;
        index = i;
        return true;
    }

    /**
     * Match the characters at the index as hexadecimal digits, with a minimum of 1 digit and an
     * optional maximum.
     *
     * @param maxDigits the maximum number digits to match (or 0 to indicate no limit)
     * @return          {@code true} if the characters in the text at the index are hexadecimal
     *                  digits (subject to the specified maximum number of digits)
     */
    public boolean matchHex(int maxDigits) {
        return matchHex(maxDigits, 1);
    }

    /**
     * Match the characters at the index as hexadecimal digits, with a minimum of 1 digit and no
     * maximum.
     *
     * @return  {@code true} if the characters in the text at the index are hexadecimal digits
     */
    public boolean matchHex() {
        return matchHex(0, 1);
    }

    /**
     * Match the characters at the index as a fixed number of hexadecimal digits.
     *
     * @param numDigits the number of digits expected
     * @return          {@code true} if the characters in the text at the index are hexadecimal
     *                  digits
     */
    public boolean matchHexFixed(int numDigits) {
        return matchHex(numDigits, numDigits);
    }

    /**
     * Undo the effect of the last match operation.
     *
     * @return      the {@code ParseText} object (for chaining purposes)
     */
    public ParseText revert() {
        index = start;
        return this;
    }

    /**
     * Reset the index to the start of the text.
     *
     * @return      the {@code ParseText} object (for chaining purposes)
     */
    public ParseText reset() {
        index = 0;
        return this;
    }

    /**
     * Increment the index by n.
     *
     * @param n     the amount to add to the index
     * @return      the {@code ParseText} object (for chaining purposes)
     * @throws      StringIndexOutOfBoundsException if the result index would be beyond the end
     *              of the text
     */
    public ParseText skip(int n) {
        start = index;
        setIndex(index + n);
        return this;
    }

    /**
     * Decrement the index by n.
     *
     * @param n     the amount to subtract from the index
     * @return      the {@code ParseText} object (for chaining purposes)
     * @throws      StringIndexOutOfBoundsException if the result index would be negative
     */
    public ParseText back(int n) {
        setIndex(index - n);
        return this;
    }

    /**
     * Increment the index to the next occurrence of the given character.  The index is left
     * positioned at the matched character.
     *
     * @param   ch      the stopper character
     * @return          the {@code ParseText} object (for chaining purposes)
     */
    public ParseText skipTo(char ch) {
        int i = index;
        start = i;
        while (i < text.length() && text.charAt(i) != ch)
            i++;
        index = i;
        return this;
    }

    /**
     * Increment the index to the next occurrence of any of the given characters.  The index is
     * left positioned at the matched character.
     *
     * @param   array   the array (or varargs list) of possible stopper characters
     * @return          the {@code ParseText} object (for chaining purposes)
     * @throws      IllegalArgumentException if the array is empty
     */
    public ParseText skipToAnyOf(char ... array) {
        if (array.length == 0)
            throw new IllegalArgumentException("Array must not be empty");
        int i = index;
        start = i;
    outer:
        while (i < text.length()) {
            char ch = text.charAt(i);
            for (int j = 0; j < array.length; j++)
                if (ch == array[j])
                    break outer;
            i++;
        }
        index = i;
        return this;
    }

    /**
     * Increment the index to the next occurrence of any of the given characters.  The index is
     * left positioned at the stopper character.
     *
     * @param   stoppers    a {@link CharSequence} of possible stopper characters
     * @return              the {@code ParseText} object (for chaining purposes)
     * @throws              IllegalArgumentException if the {@link CharSequence} is empty
     */
    public ParseText skipToAnyOf(CharSequence stoppers) {
        if (stoppers.length() == 0)
            throw new IllegalArgumentException("String must not be empty");
        int i = index;
        start = i;
    outer:
        while (i < text.length()) {
            char ch = text.charAt(i);
            for (int j = 0; j < stoppers.length(); j++)
                if (ch == stoppers.charAt(j))
                    break outer;
            i++;
        }
        index = i;
        return this;
    }

    /**
     * Increment the index to the next occurrence of the stopper sequence.  The index is left
     * positioned at the stopper sequence.
     *
     * @param   target  the stopper sequence
     * @return          the {@code ParseText} object (for chaining purposes)
     */
    public ParseText skipTo(CharSequence target) {
        int len = target.length();
        int i = index;
        start = i;
        int stopper = text.length() - len;
    outer:
        for (;;) {
            if (i > stopper) {
                i = text.length();
                break;
            }
            int j = 0;
            for (;;) {
                if (j >= len)
                    break outer;
                if (text.charAt(i + j) != target.charAt(j))
                    break;
                j++;
            }
            i++;
        }
        index = i;
        return this;
    }

    /**
     * Match the characters at the index as spaces.
     *
     * @return  {@code true} if the characters in the text at the index are one or more spaces
     */
    public boolean matchSpaces() {
        int i = index;
        int len = text.length();
        if (i >= len || !isSpace(text.charAt(i)))
            return false;
        start = i;
        do {
            i++;
        } while (i < len && isSpace(text.charAt(i)));
        index = i;
        return true;
    }

    /**
     * Increment the index past any characters matching a given comparison function.
     *
     * @param   comparison  the comparison function
     * @return              the {@code ParseText} object (for chaining purposes)
     */
    public ParseText skipPast(IntPredicate comparison) {
        int i = index;
        start = i;
        int len = text.length();
        while (i < len && comparison.test(text.charAt(i)))
            i++;
        index = i;
        return this;
    }

    /**
     * Increment the index past zero or more spaces.
     *
     * @return      the {@code ParseText} object (for chaining purposes)
     */
    public ParseText skipSpaces() {
        int i = index;
        start = i;
        int len = text.length();
        while (i < len && isSpace(text.charAt(i)))
            i++;
        index = i;
        return this;
    }

    /**
     * Increment the index past any characters matching a given comparison function.
     *
     * @param   comparison  the comparison function
     * @return              the {@code ParseText} object (for chaining purposes)
     */
    public ParseText skipTo(IntPredicate comparison) {
        int i = index;
        start = i;
        int len = text.length();
        while (i < len && !comparison.test(text.charAt(i)))
            i++;
        index = i;
        return this;
    }

    /**
     * Increment the index to the next space.
     *
     * @return  the {@code ParseText} object (for chaining purposes)
     */
    public ParseText skipToSpace() {
        int i = index;
        start = i;
        int len = text.length();
        while (i < len && !isSpace(text.charAt(i)))
            i++;
        index = i;
        return this;
    }

    /**
     * Increment the index directly to the end of the text.
     *
     * @return  the {@code ParseText} object (for chaining purposes)
     */
    public ParseText skipToEnd() {
        start = index;
        index = text.length();
        return this;
    }

    /**
     * Match the characters at the index as a name, where a name is defined as starting with a
     * character that matches {@link #isNameStart(char)}, followed by zero or more characters
     * that match {@link #isNameContinuation(char)}.
     *
     * @return  {@code true} if the characters in the text at the index constitute a name
     */
    public boolean matchName() {
        int i = index;
        int len = text.length();
        if (i >= len || !isNameStart(text.charAt(i)))
            return false;
        start = i;
        do {
            ++i;
        } while (i < len && isNameContinuation(text.charAt(i)));
        index = i;
        return true;
    }

    /**
     * Unescape a string from the index, using the provided {@link CharUnmapper} and stopping on
     * the given character stopper value.  The index is left positioned at the stopper character
     * so that the caller can test whether the stopper was present.  A call to
     * {@link #getResultString()} or {@link #getResultSequence()} following this method will
     * return the section of the text prior to unescaping.
     *
     * @param   charUnmapper    the {@link CharUnmapper}
     * @param   stopper         the stopper character (e.g. the closing quote)
     * @return  the unescaped string
     * @throws  IllegalArgumentException if thrown by the {@link CharUnmapper}
     */
    public String unescape(CharUnmapper charUnmapper, char stopper) {
        int i = index;
        start = i;
        int len = text.length();
        while (i < len) {
            char ch = text.charAt(i);
            if (ch == stopper)
                break;
            if (charUnmapper.isEscape(text, i)) {
                StringBuilder sb = new StringBuilder();
                sb.append(text, start, i);
                i += charUnmapper.unmap(sb, text, i);
                while (i < len) {
                    ch = text.charAt(i);
                    if (ch == stopper)
                        break;
                    if (charUnmapper.isEscape(text, i))
                        i += charUnmapper.unmap(sb, text, i);
                    else {
                        sb.append(ch);
                        i++;
                    }
                }
                index = i;
                return sb.toString();
            }
            i++;
        }
        index = i;
        return text.subSequence(start, i).toString();
    }

    /**
     * Test whether the given character is a space.  This method may be overridden to provide
     * for different definitions of a space.
     *
     * @param ch    the character
     * @return      {@code true} if the character is a space
     */
    public boolean isSpace(char ch) {
        return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r';
    }

    /**
     * Test whether the given character is a digit.  This method may be overridden to provide
     * for different definitions of a digit.  If this method is overridden it may be necessary
     * to override {@link #convertDecDigit(char)} as well.
     *
     * @param ch    the character
     * @return      {@code true} if the character is a digit
     */
    public boolean isDigit(char ch) {
        return ch >= '0' && ch <= '9';
    }

    /**
     * Test whether the given character is a hexadecimal digit.  This method may be overridden
     * to provide for different definitions of a hex digit.  If this method is overridden it may
     * be necessary to override {@link #convertHexDigit(char)} as well.
     *
     * @param ch    the character
     * @return      {@code true} if the character is a hexadecimal digit
     */
    public boolean isHexDigit(char ch) {
        return ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f';
    }

    /**
     * Test whether the given character is the start character of a name.  This method may be
     * overridden to provide for different definitions of a name.
     *
     * @param ch    the character
     * @return      {@code true} if the character is a name start character
     */
    public boolean isNameStart(char ch) {
        return ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z' || ch == '_' || ch == '$';
    }

    /**
     * Test whether the given character is a continuation character of a name.  This method may
     * be overridden to provide for different definitions of a name.
     *
     * @param ch    the character
     * @return      {@code true} if the character is a name continuation character
     */
    public boolean isNameContinuation(char ch) {
        return isNameStart(ch) || ch >= '0' && ch <= '9';
    }

    /**
     * Create a {@link String} representation of the {@code ParseText} object.  This is
     * primarily intended for debugging purposes; the resulting string consists of the text
     * enclosed in square brackets, with a "{@code ~}" indicating the {@code start} position and
     * a "{@code ^}" for the {@code index}.
     *
     * @return      the {@link String} representation of the object
     */
    @Override
    public String toString() {
        int n = text.length();
        StringBuilder sb = new StringBuilder(n + 4);
        sb.append('[');
        int i = 0;
        for (;;) {
            if (i == start)
                sb.append('~');
            if (i == index)
                sb.append('^');
            if (i >= n)
                break;
            sb.append(text.charAt(i++));
        }
        sb.append(']');
        return sb.toString();
    }

    /**
     * Compare the {@code ParseText} with another object for equality.
     *
     * @param o     the other object
     * @return      {@code true} if the the other object is a {@code ParseText} and the objects
     *              are equal
     */
    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (!(o instanceof ParseText))
            return false;
        ParseText pt = (ParseText)o;
        if (text.length() != pt.text.length() || index != pt.index || start != pt.start)
            return false;
        for (int i = 0; i < text.length(); i++)
            if (text.charAt(i) != pt.text.charAt(i))
                return false;
        return true;
    }

    /**
     * Compute the hash code for this object (for completeness).
     *
     * @return      the hash code
     */
    @Override
    public int hashCode() {
        int result = text.length() + index + start;
        for (int i = 0; i < text.length(); i++)
            result += text.charAt(i);
        return result;
    }

}