All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.pwall.util.ParseText Maven / Gradle / Ivy

The newest version!
/*
 * @(#) ParseText.java
 *
 * javautil Java Utility Library
 * Copyright (c) 2013, 2014, 2017, 2020 Peter Wall
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package net.pwall.util;

import java.io.IOException;
import java.util.Collection;
import java.util.function.IntPredicate;

/**
 * A class to assist with text parsing.  A {@code ParseText} object contains a string of text to
 * be examined (in the form of a {@link CharSequence}), an index denoting the current parse
 * position and a start index pointing to the start of the last matched or skipped characters.
 *
 * 

The class includes a number of "{@code match}" operations, which return {@code true} if * the text matches the parameter(s) to the operation. On a successful match, these operations * set the start index to the position of the first character matched, and increment the index * past the matched characters. If the match is not successful, the index and start index are * not modified.

* *

For use following a successful match operation, there are several "{@code getResult}" * operations that will extract the matched characters from the text in a variety of forms * ({@code int}, {@code long}, {@link String} etc.).

* *

The class takes text in the form of a {@link CharSequence} rather than a {@link String}, * and some implementations of {@link CharSequence} allow modification of the contents. The * results of any method of this class are undefined in the case of concurrent modification of * the text.

* * @author Peter Wall */ public class ParseText { private CharSequence text; private int index; private int start; /** * Construct a {@code ParseText} object with the given text and initial index value. * * @param text the text * @param index the initial index * @throws NullPointerException if the text is {@code null} * @throws StringIndexOutOfBoundsException if the index is negative or beyond the end of * the text */ public ParseText(CharSequence text, int index) { setText(text, index); } /** * Construct a {@code ParseText} object with the given text and an initial index value 0f 0. * * @param text the text * @throws NullPointerException if the text is {@code null} */ public ParseText(CharSequence text) { setText(text, 0); } /** * Set the text and the index within the text. The start index is set to the same value as * the index. * * @param text the text * @param index the index * @return the {@code ParseText} object (for chaining purposes) * @throws NullPointerException if the text is {@code null} * @throws StringIndexOutOfBoundsException if the index is outside the bounds of the text */ public ParseText setText(CharSequence text, int index) { if (text == null) throw new NullPointerException("ParseText data invalid"); this.text = text; setIndex(index); start = index; return this; } /** * Set the text. The index and start index are set to zero. * * @param text the text * @return the {@code ParseText} object (for chaining purposes) * @throws NullPointerException if the text is {@code null} */ public ParseText setText(CharSequence text) { setText(text, 0); return this; } /** * Get the entire text from the {@code ParseText} object (as a {@link CharSequence}). * * @return the text */ public CharSequence getText() { return text; } /** * Get the length of the entire text. * * @return the text length */ public int getTextLength() { return text.length(); } /** * Test whether the {@code ParseText} object is exhausted (the index has reached the end of * the text). * * @return {@code true} if the index has reached the end of the text */ public boolean isExhausted() { return index >= text.length(); } /** * Get the current index (the offset within the text). * * @return the index */ public int getIndex() { return index; } /** * Set the index to a specified value. * * @param index the new index * @return the {@code ParseText} object (for chaining purposes) * @throws StringIndexOutOfBoundsException if the index is outside the bounds of the text */ public ParseText setIndex(int index) { if (index < 0 || index > text.length()) throw new StringIndexOutOfBoundsException("ParseText index invalid"); this.index = index; return this; } /** * Indicate successful match. This is intended to be used by derived classes. * * @param i the index at end of match * @return {@code true} to indicate successful match */ protected boolean matchSuccess(int i) { start = index; index = i; return true; } /** * Get the character at the current index and increment the index. * * @return the current character * @throws StringIndexOutOfBoundsException if the index is at or beyond end of string */ public char getChar() { start = index; if (index >= text.length()) throw new StringIndexOutOfBoundsException("ParseText exhausted"); return text.charAt(index++); } /** * Get the Unicode code point at the current character index and increment the index past * the code point. * * @return the code point * @throws StringIndexOutOfBoundsException if the index is at or beyond end of string */ public int getCodePoint() { start = index; if (index >= text.length()) throw new StringIndexOutOfBoundsException("ParseText exhausted"); char ch = text.charAt(index++); if (Character.isHighSurrogate(ch) && index < text.length()) { char ch2 = text.charAt(index); if (Character.isLowSurrogate(ch2)) { index++; return (Character.toCodePoint(ch, ch2)); } } return ch; } /** * Extract a string from the text, bounded by the given start and end offsets. * * @param from the start offset * @param to the end offset * @return the specified string * @throws IndexOutOfBoundsException if the start and end offsets are invalid */ public String getString(int from, int to) { return text.subSequence(from, to).toString(); } /** * Get the start index (the index of the start of the last matched sequence). * * @return the start index */ public int getStart() { return start; } /** * Set the start index (the index of the start of the last matched sequence). The start * index must be less than or equal to the index (current position). * * @param start the new start index * @return the {@code ParseText} object (for chaining purposes) * @throws StringIndexOutOfBoundsException if the start index is invalid */ public ParseText setStart(int start) { if (start < 0 || start > index) throw new StringIndexOutOfBoundsException("ParseText start index invalid"); this.start = start; return this; } private static final int MAX_INT_DIV_10 = Integer.MAX_VALUE / 10; private static final int MAX_INT_MOD_10 = Integer.MAX_VALUE % 10; /** * Get the result of the last match operation as an {@code int}. * * @return the result of the last match as an {@code int} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code int} */ public int getResultInt() { return getInt(start, index); } /** * Get an {@code int} from the text. * * @param from the start offset * @param to the end offset * @return the {@code int} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code int} * @throws IndexOutOfBoundsException if the start and end indices are not contained within * the text */ public int getInt(int from, int to) { if (to <= from) throw new NumberFormatException(); int result = 0; for (int i = from; i < to; i++) { int n = convertDecDigit(text.charAt(i)); if (result > MAX_INT_DIV_10 || result == MAX_INT_DIV_10 && n > MAX_INT_MOD_10) throw new NumberFormatException(); result = result * 10 + n; } return result; } private static final long MAX_LONG_DIV_10 = Long.MAX_VALUE / 10; private static final int MAX_LONG_MOD_10 = (int)(Long.MAX_VALUE % 10); /** * Get the result of the last match operation as a {@code long}. * * @return the result of the last match as a {@code long} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code long} */ public long getResultLong() { return getLong(start, index); } /** * Get a {@code long} from the text. * * @param from the start offset * @param to the end offset * @return the {@code long} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code long} * @throws IndexOutOfBoundsException if the start and end indices are not contained within * the text */ public long getLong(int from, int to) { if (to <= from) throw new NumberFormatException(); long result = 0; for (int i = from; i < to; i++) { int n = convertDecDigit(text.charAt(i)); if (result > MAX_LONG_DIV_10 || result == MAX_LONG_DIV_10 && n > MAX_LONG_MOD_10) throw new NumberFormatException(); result = result * 10 + n; } return result; } /** * Convert a decimal digit to the integer value of the digit. This method may be overridden * to provide for different definitions of a decimal digit. If this method is overridden it * may be necessary to override {@link #isDigit(char)} as well. * * @param ch the decimal digit * @return the integer value (0 - 9) * @throws NumberFormatException if the digit is not valid */ public int convertDecDigit(char ch) { if (ch >= '0' && ch <= '9') return ch - '0'; throw new NumberFormatException(); } /** * Get the result of the last match operation as an {@code int}, treating the digits as * hexadecimal. * * @return the result of the last match as an {@code int} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code int} */ public int getResultHexInt() { return getHexInt(start, index); } private static final int MAX_INT_MASK = 0xF8 << 24; /** * Get an {@code int} from the text, treating the digits as hexadecimal. * * @param from the start offset * @param to the end offset * @return the hexadecimal {@code int} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code int} * @throws IndexOutOfBoundsException if the start and end indices are not contained within * the text */ public int getHexInt(int from, int to) { if (to <= from) throw new NumberFormatException(); int result = 0; for (int i = from; i < to; i++) { if ((result & MAX_INT_MASK) != 0) throw new NumberFormatException(); result = result << 4 | convertHexDigit(text.charAt(i)); } return result; } /** * Get the result of the last match operation as a {@code long}, treating the digits as * hexadecimal. * * @return the result of the last match as a {@code long} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code long} */ public long getResultHexLong() { return getHexLong(start, index); } private static final long MAX_LONG_MASK = ((long)0xF8) << 56; /** * Get a {@code long} from the text, treating the digits as hexadecimal. * * @param from the start offset * @param to the end offset * @return the hexadecimal {@code long} (always positive) * @throws NumberFormatException if the start and end indices do not describe a valid * {@code long} * @throws IndexOutOfBoundsException if the start and end indices are not contained within * the text */ public long getHexLong(int from, int to) { if (to <= from) throw new NumberFormatException(); long result = 0; for (int i = from; i < to; i++) { if ((result & MAX_LONG_MASK) != 0) throw new NumberFormatException(); result = result << 4 | convertHexDigit(text.charAt(i)); } return result; } /** * Convert a hexadecimal digit to the integer value of the digit. This method may be * overridden to provide for different definitions of a hexadecimal digit. If this method * is overridden it may be necessary to override {@link #isHexDigit(char)} as well. * * @param ch the hexadecimal digit * @return the integer value (0 - 15) * @throws NumberFormatException if the digit is not valid */ public int convertHexDigit(char ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'A' && ch <= 'F') return ch - 'A' + 10; if (ch >= 'a' && ch <= 'f') return ch - 'a' + 10; throw new NumberFormatException(); } /** * Get the length of the result of the last match operation. * * @return the length of the result of the last match */ public int getResultLength() { return index - start; } /** * Get the result of the last match operation (or the first character of a longer match) as * a single character. * * @return the first character of the result of the last match */ public char getResultChar() { return text.charAt(start); } /** * Get the result of the last match operation as a {@link CharSequence}. * * @return the result of the last match */ public CharSequence getResultSequence() { // return seq.subSequence(start, index); return new SubSequence(text, start, index); } /** * Get the result of the last match operation as a {@link String}. * * @return the result of the last match */ public String getResultString() { return text.subSequence(start, index).toString(); } /** * Copy the result character sequence to a {@link StringBuilder}. This is equivalent to but * more efficient than *
     *     sb.append(xxx.getResultSequence());
     * 
* because it avoids the creation of an intermediate object. * * @param sb the {@link StringBuilder} * @return the {@link StringBuilder} (for chaining purposes) */ public StringBuilder appendResultTo(StringBuilder sb) { return sb.append(text, start, index); } /** * Copy the result character sequence to an {@link Appendable}. This is equivalent to but * more efficient than *
     *     a.append(xxx.getResultSequence());
     * 
* because it avoids the creation of an intermediate object. * * @param a the {@link Appendable} * @return the {@link Appendable} (for chaining purposes) * @throws IOException if thrown by the {@link Appendable} */ public Appendable appendResultTo(Appendable a) throws IOException { return a.append(text, start, index); } /** * Test whether the text has at least the specified number of characters left after the * index. * * @param len the number of characters required * @return {@code true} if that number of characters are available */ public boolean available(int len) { return index + len <= text.length(); } /** * Get the length of the entire text. * * @return the length of the text */ public int length() { return text.length(); } /** * Get a specific character from the text. * * @param index the index of the character * @return the character at the specified index * @throws IndexOutOfBoundsException if the index is negative or beyond the end of the text */ public char charAt(int index) { return text.charAt(index); } /** * Match the current character in the text against a given Unicode code point. Following a * successful match the start index will point to the matched code point and the index will * be incremented past it. * * @param cp the code point to match against * @return {@code true} if the code point in the text matches the given character */ public boolean match(int cp) { int i = index; if (i >= text.length()) return false; char ch = text.charAt(i++); if (Character.isHighSurrogate(ch)) { if (i >= text.length()) return false; char ch2 = text.charAt(i++); if (!Character.isLowSurrogate(ch2)) return false; if (Character.toCodePoint(ch, ch2) != cp) return false; } else if (ch != cp) return false; start = index; index = i; return true; } /** * Match the current character in the text against a given character. Following a * successful match the start index will point to the matched character and the index will * be incremented past it. * * @param ch the character to match against * @return {@code true} if the character in the text matches the given character */ public boolean match(char ch) { if (index >= text.length() || text.charAt(index) != ch) return false; start = index++; return true; } /** * Match the current character in the text against a given character, ignoring case. * Following a successful match the start index will point to the matched character and the * index will be incremented past it. * * @param ch the character to match against * @return {@code true} if the character in the text matches the given character */ public boolean matchIgnoreCase(char ch) { if (index >= text.length() || !equalIgnoreCase(text.charAt(index), ch)) return false; start = index++; return true; } /** * Test characters for equality, ignoring case. * * @param a the first character * @param b the second character * @return {@code true} if the characters are equal, ignoring case */ private static boolean equalIgnoreCase(char a, char b) { return a == b || a == (Character.isLowerCase(a) ? Character.toLowerCase(b) : Character.toUpperCase(b)); } /** * Match the current character in the text against a given character range. Following a * successful match the start index will point to the matched character and the index will * be incremented past it. * * @param from the low character in the range to match against * @param to the high character in the range to match against (inclusive) * @return {@code true} if the character in the text falls in the given range */ public boolean matchRange(char from, char to) { if (index >= text.length()) return false; char ch = text.charAt(index); if (ch < from || ch > to) return false; start = index++; return true; } /** * Match the current character in the text against any of the characters in a given * {@link String}. Following a successful match the start index will point to the matched * character and the index will be incremented past it. * * @param str the characters to match against (as a {@link String}) * @return {@code true} if the character in the text matches any of the characters in * the string */ public boolean matchAnyOf(String str) { if (index >= text.length()) return false; if (str.indexOf(text.charAt(index)) < 0) return false; start = index++; return true; } /** * Match the current character in the text against any of the characters in a given array. * Following a successful match the start index will point to the matched character and the * index will be incremented past it. * * @param array the characters to match against (as an array or varargs list) * @return {@code true} if the character in the text matches any of the characters in * the array * @throws IllegalArgumentException if the array is empty */ public boolean matchAnyOf(char ... array) { if (array.length == 0) throw new IllegalArgumentException("Array must not be empty"); if (index >= text.length()) return false; char ch = text.charAt(index); for (int i = 0, n = array.length; i < n; i++) { if (ch == array[i]) { start = index++; return true; } } return false; } /** * Match the characters at the index against a given {@link CharSequence} ({@link String}, * {@link StringBuilder} etc.). Following a successful match the start index will point to * the first character of the matched sequence and the index will be incremented past it. * * @param target the target {@link CharSequence} * @return {@code true} if the characters in the text at the index match the target */ public boolean match(CharSequence target) { int len = target.length(); if (index + len > text.length()) return false; int i = index; int j = 0; for (; len > 0; len--) if (text.charAt(i++) != target.charAt(j++)) return false; start = index; index = i; return true; } /** * Match the characters at the index against a given {@link CharSequence} ({@link String}, * {@link StringBuilder} etc.), checking that the character following the match is not part * of a name. Following a successful match the start index will point to * the first character of the matched sequence and the index will be incremented past it. * * @param target the target {@link CharSequence} * @return {@code true} if the characters in the text at the index match the target */ public boolean matchName(CharSequence target) { int len = target.length(); if (index + len > text.length()) return false; int i = index; int j = 0; for (; len > 0; len--) if (text.charAt(i++) != target.charAt(j++)) return false; if (i < text.length() && isNameContinuation(text.charAt(i))) return false; start = index; index = i; return true; } /** * Match the characters at the index against any of an array of {@link CharSequence} * ({@link String}, {@link StringBuilder} etc.) objects. Following a successful match the * start index will point to the first character of the matched sequence and the index will * be incremented past it. * * @param array the array (or varargs list) of {@link CharSequence} * @return {@code true} if the characters in the text at the index match any of the * entries in the array * @throws IllegalArgumentException if the array is empty */ public boolean matchAnyOf(CharSequence ... array) { if (array.length == 0) throw new IllegalArgumentException("Array must not be empty"); for (CharSequence str : array) if (match(str)) return true; return false; } /** * Match the characters at the index against any of a {@link Collection} of * {@link CharSequence} ({@link String}, {@link StringBuilder} etc.) objects. Following a * successful match the start index will point to the first character of the matched * sequence and the index will be incremented past it. * * @param collection the {@link Collection} of {@link CharSequence}s * @return {@code true} if the characters in the text at the index match any of * the entries in the collection */ public boolean matchAnyOf(Collection collection) { for (CharSequence str : collection) if (match(str)) return true; return false; } /** * Match the characters at the index against a given {@link CharSequence} ({@link String}, * {@link StringBuilder} etc.), ignoring case. Following a successful match the start index * will point to the first character of the matched sequence and the index will be * incremented past it. * * @param target the target {@link CharSequence} * @return {@code true} if the characters in the text at the index match the target */ public boolean matchIgnoreCase(CharSequence target) { int len = target.length(); if (index + len > text.length()) return false; int i = index; int j = 0; for (; len > 0; len--) if (!equalIgnoreCase(text.charAt(i++), target.charAt(j++))) return false; start = index; index = i; return true; } /** * Match the characters at the index as decimal digits, with a given minimum number of * digits and an optional maximum. * * @param maxDigits the maximum number digits to match (or 0 to indicate no limit) * @param minDigits the minimum number digits for a successful match * @return {@code true} if the characters in the text at the index are decimal * digits (subject to the specified minimum and maximum number of digits) */ public boolean matchDec(int maxDigits, int minDigits) { int i = index; int stopper = text.length(); if (maxDigits > 0) stopper = Math.min(stopper, i + maxDigits); while (i < stopper && isDigit(text.charAt(i))) i++; if (i - index < minDigits) return false; start = index; index = i; return true; } /** * Match the characters at the index as decimal digits, with a minimum of 1 digit and an * optional maximum. * * @param maxDigits the maximum number digits to match (or 0 to indicate no limit) * @return {@code true} if the characters in the text at the index are decimal * digits (subject to the specified maximum number of digits) */ public boolean matchDec(int maxDigits) { return matchDec(maxDigits, 1); } /** * Match the characters at the index as decimal digits, with a minimum of 1 digit and no * maximum. * * @return {@code true} if the characters in the text at the index are decimal digits */ public boolean matchDec() { return matchDec(0, 1); } /** * Match the characters at the index as a fixed number of decimal digits. * * @param numDigits the number of digits expected * @return {@code true} if the characters in the text at the index are decimal * digits */ public boolean matchDecFixed(int numDigits) { return matchDec(numDigits, numDigits); } /** * Match the characters at the index as hexadecimal digits, with a given minimum number of * digits and an optional maximum. * * @param maxDigits the maximum number digits to match (or 0 to indicate no limit) * @param minDigits the minimum number digits for a successful match * @return {@code true} if the characters in the text at the index are hexadecimal * digits (subject to the specified minimum and maximum number of digits) */ public boolean matchHex(int maxDigits, int minDigits) { int i = index; int stopper = text.length(); if (maxDigits > 0) stopper = Math.min(stopper, i + maxDigits); while (i < stopper && isHexDigit(text.charAt(i))) i++; if (i - index < minDigits) return false; start = index; index = i; return true; } /** * Match the characters at the index as hexadecimal digits, with a minimum of 1 digit and an * optional maximum. * * @param maxDigits the maximum number digits to match (or 0 to indicate no limit) * @return {@code true} if the characters in the text at the index are hexadecimal * digits (subject to the specified maximum number of digits) */ public boolean matchHex(int maxDigits) { return matchHex(maxDigits, 1); } /** * Match the characters at the index as hexadecimal digits, with a minimum of 1 digit and no * maximum. * * @return {@code true} if the characters in the text at the index are hexadecimal digits */ public boolean matchHex() { return matchHex(0, 1); } /** * Match the characters at the index as a fixed number of hexadecimal digits. * * @param numDigits the number of digits expected * @return {@code true} if the characters in the text at the index are hexadecimal * digits */ public boolean matchHexFixed(int numDigits) { return matchHex(numDigits, numDigits); } /** * Undo the effect of the last match operation. * * @return the {@code ParseText} object (for chaining purposes) */ public ParseText revert() { index = start; return this; } /** * Reset the index to the start of the text. * * @return the {@code ParseText} object (for chaining purposes) */ public ParseText reset() { index = 0; return this; } /** * Increment the index by n. * * @param n the amount to add to the index * @return the {@code ParseText} object (for chaining purposes) * @throws StringIndexOutOfBoundsException if the result index would be beyond the end * of the text */ public ParseText skip(int n) { start = index; setIndex(index + n); return this; } /** * Decrement the index by n. * * @param n the amount to subtract from the index * @return the {@code ParseText} object (for chaining purposes) * @throws StringIndexOutOfBoundsException if the result index would be negative */ public ParseText back(int n) { setIndex(index - n); return this; } /** * Increment the index to the next occurrence of the given character. The index is left * positioned at the matched character. * * @param ch the stopper character * @return the {@code ParseText} object (for chaining purposes) */ public ParseText skipTo(char ch) { int i = index; start = i; while (i < text.length() && text.charAt(i) != ch) i++; index = i; return this; } /** * Increment the index to the next occurrence of any of the given characters. The index is * left positioned at the matched character. * * @param array the array (or varargs list) of possible stopper characters * @return the {@code ParseText} object (for chaining purposes) * @throws IllegalArgumentException if the array is empty */ public ParseText skipToAnyOf(char ... array) { if (array.length == 0) throw new IllegalArgumentException("Array must not be empty"); int i = index; start = i; outer: while (i < text.length()) { char ch = text.charAt(i); for (int j = 0; j < array.length; j++) if (ch == array[j]) break outer; i++; } index = i; return this; } /** * Increment the index to the next occurrence of any of the given characters. The index is * left positioned at the stopper character. * * @param stoppers a {@link CharSequence} of possible stopper characters * @return the {@code ParseText} object (for chaining purposes) * @throws IllegalArgumentException if the {@link CharSequence} is empty */ public ParseText skipToAnyOf(CharSequence stoppers) { if (stoppers.length() == 0) throw new IllegalArgumentException("String must not be empty"); int i = index; start = i; outer: while (i < text.length()) { char ch = text.charAt(i); for (int j = 0; j < stoppers.length(); j++) if (ch == stoppers.charAt(j)) break outer; i++; } index = i; return this; } /** * Increment the index to the next occurrence of the stopper sequence. The index is left * positioned at the stopper sequence. * * @param target the stopper sequence * @return the {@code ParseText} object (for chaining purposes) */ public ParseText skipTo(CharSequence target) { int len = target.length(); int i = index; start = i; int stopper = text.length() - len; outer: for (;;) { if (i > stopper) { i = text.length(); break; } int j = 0; for (;;) { if (j >= len) break outer; if (text.charAt(i + j) != target.charAt(j)) break; j++; } i++; } index = i; return this; } /** * Match the characters at the index as spaces. * * @return {@code true} if the characters in the text at the index are one or more spaces */ public boolean matchSpaces() { int i = index; int len = text.length(); if (i >= len || !isSpace(text.charAt(i))) return false; start = i; do { i++; } while (i < len && isSpace(text.charAt(i))); index = i; return true; } /** * Increment the index past any characters matching a given comparison function. * * @param comparison the comparison function * @return the {@code ParseText} object (for chaining purposes) */ public ParseText skipPast(IntPredicate comparison) { int i = index; start = i; int len = text.length(); while (i < len && comparison.test(text.charAt(i))) i++; index = i; return this; } /** * Increment the index past zero or more spaces. * * @return the {@code ParseText} object (for chaining purposes) */ public ParseText skipSpaces() { int i = index; start = i; int len = text.length(); while (i < len && isSpace(text.charAt(i))) i++; index = i; return this; } /** * Increment the index past any characters matching a given comparison function. * * @param comparison the comparison function * @return the {@code ParseText} object (for chaining purposes) */ public ParseText skipTo(IntPredicate comparison) { int i = index; start = i; int len = text.length(); while (i < len && !comparison.test(text.charAt(i))) i++; index = i; return this; } /** * Increment the index to the next space. * * @return the {@code ParseText} object (for chaining purposes) */ public ParseText skipToSpace() { int i = index; start = i; int len = text.length(); while (i < len && !isSpace(text.charAt(i))) i++; index = i; return this; } /** * Increment the index directly to the end of the text. * * @return the {@code ParseText} object (for chaining purposes) */ public ParseText skipToEnd() { start = index; index = text.length(); return this; } /** * Match the characters at the index as a name, where a name is defined as starting with a * character that matches {@link #isNameStart(char)}, followed by zero or more characters * that match {@link #isNameContinuation(char)}. * * @return {@code true} if the characters in the text at the index constitute a name */ public boolean matchName() { int i = index; int len = text.length(); if (i >= len || !isNameStart(text.charAt(i))) return false; start = i; do { ++i; } while (i < len && isNameContinuation(text.charAt(i))); index = i; return true; } /** * Unescape a string from the index, using the provided {@link CharUnmapper} and stopping on * the given character stopper value. The index is left positioned at the stopper character * so that the caller can test whether the stopper was present. A call to * {@link #getResultString()} or {@link #getResultSequence()} following this method will * return the section of the text prior to unescaping. * * @param charUnmapper the {@link CharUnmapper} * @param stopper the stopper character (e.g. the closing quote) * @return the unescaped string * @throws IllegalArgumentException if thrown by the {@link CharUnmapper} */ public String unescape(CharUnmapper charUnmapper, char stopper) { int i = index; start = i; int len = text.length(); while (i < len) { char ch = text.charAt(i); if (ch == stopper) break; if (charUnmapper.isEscape(text, i)) { StringBuilder sb = new StringBuilder(); sb.append(text, start, i); i += charUnmapper.unmap(sb, text, i); while (i < len) { ch = text.charAt(i); if (ch == stopper) break; if (charUnmapper.isEscape(text, i)) i += charUnmapper.unmap(sb, text, i); else { sb.append(ch); i++; } } index = i; return sb.toString(); } i++; } index = i; return text.subSequence(start, i).toString(); } /** * Test whether the given character is a space. This method may be overridden to provide * for different definitions of a space. * * @param ch the character * @return {@code true} if the character is a space */ public boolean isSpace(char ch) { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; } /** * Test whether the given character is a digit. This method may be overridden to provide * for different definitions of a digit. If this method is overridden it may be necessary * to override {@link #convertDecDigit(char)} as well. * * @param ch the character * @return {@code true} if the character is a digit */ public boolean isDigit(char ch) { return ch >= '0' && ch <= '9'; } /** * Test whether the given character is a hexadecimal digit. This method may be overridden * to provide for different definitions of a hex digit. If this method is overridden it may * be necessary to override {@link #convertHexDigit(char)} as well. * * @param ch the character * @return {@code true} if the character is a hexadecimal digit */ public boolean isHexDigit(char ch) { return ch >= '0' && ch <= '9' || ch >= 'A' && ch <= 'F' || ch >= 'a' && ch <= 'f'; } /** * Test whether the given character is the start character of a name. This method may be * overridden to provide for different definitions of a name. * * @param ch the character * @return {@code true} if the character is a name start character */ public boolean isNameStart(char ch) { return ch >= 'A' && ch <= 'Z' || ch >= 'a' && ch <= 'z' || ch == '_' || ch == '$'; } /** * Test whether the given character is a continuation character of a name. This method may * be overridden to provide for different definitions of a name. * * @param ch the character * @return {@code true} if the character is a name continuation character */ public boolean isNameContinuation(char ch) { return isNameStart(ch) || ch >= '0' && ch <= '9'; } /** * Create a {@link String} representation of the {@code ParseText} object. This is * primarily intended for debugging purposes; the resulting string consists of the text * enclosed in square brackets, with a "{@code ~}" indicating the {@code start} position and * a "{@code ^}" for the {@code index}. * * @return the {@link String} representation of the object */ @Override public String toString() { int n = text.length(); StringBuilder sb = new StringBuilder(n + 4); sb.append('['); int i = 0; for (;;) { if (i == start) sb.append('~'); if (i == index) sb.append('^'); if (i >= n) break; sb.append(text.charAt(i++)); } sb.append(']'); return sb.toString(); } /** * Compare the {@code ParseText} with another object for equality. * * @param o the other object * @return {@code true} if the the other object is a {@code ParseText} and the objects * are equal */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof ParseText)) return false; ParseText pt = (ParseText)o; if (text.length() != pt.text.length() || index != pt.index || start != pt.start) return false; for (int i = 0; i < text.length(); i++) if (text.charAt(i) != pt.text.charAt(i)) return false; return true; } /** * Compute the hash code for this object (for completeness). * * @return the hash code */ @Override public int hashCode() { int result = text.length() + index + start; for (int i = 0; i < text.length(); i++) result += text.charAt(i); return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy