de.unkrig.commons.text.parser.AbstractParser Maven / Gradle / Ivy
/*
* de.unkrig.commons - A general-purpose Java class library
*
* Copyright (c) 2012, Arno Unkrig
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package de.unkrig.commons.text.parser;
import java.util.Arrays;
import de.unkrig.commons.lang.protocol.ProducerWhichThrows;
import de.unkrig.commons.nullanalysis.Nullable;
import de.unkrig.commons.text.scanner.AbstractScanner.Token;
import de.unkrig.commons.text.scanner.ScanException;
/**
* The base class for implementing parsers. Typically, you would declare methods named 'parse...()' which invoke
* each other and the 'peek...()', 'read...()' and 'peekRead...()' methods to parse a document.
*
* @param The enumerator representing the scanner's token types
*/
public
class AbstractParser> {
/**
* The source of tokens that are processed by this parser.
*/
protected final ProducerWhichThrows extends Token, ? extends ScanException> scanner;
/**
* One token read-ahead. Value {@code null} means that (A) no token was currently read-ahead, or (B) the scanner is
* at end-of-input.
*/
@Nullable private Token current;
/**
* @param scanner Its {@code toString()} method returns a human-readable indication of the scanner location
*/
public
AbstractParser(ProducerWhichThrows extends Token, ? extends ScanException> scanner) {
this.scanner = scanner;
}
// PEEK METHODS
/**
* Checks the next token, but does not consume it.
*
* @return The next token, or {@code null} if the scanner is at end-of-input
*/
@Nullable public Token
peek() throws ParseException {
if (this.current == null) this.current = this.produceToken();
return this.current;
}
/**
* Checks the next token, but does not consume it.
*
* @return The next token's text, or {@code null} if the next token's type is not tokenType, or if the
* scanner is at end-of-input
*/
@Nullable public String
peek(TT tokenType) throws ParseException {
this.peek();
return this.current != null && this.current.type == tokenType ? this.current.text : null;
}
/**
* Checks the next token, but does not consume it.
*
* @return Whether the next token's text equals text, or the scanner is not at end-of-input and
* text is {@code null}
*/
public boolean
peek(@Nullable String text) throws ParseException {
this.peek();
Token current = this.current;
return current == null ? text == null : current.text.equals(text);
}
/**
* Checks the next token, but does not consume it.
*
* An element of tokenTypeOrText matches iff:
*
*
* - the element equals the next token's type, or
* - the element equals the next token's text, or
* - the element is {@code null} and the scanner is at end-of-input
*
*
* @return The index of the first element of tokenTypeOrText that matches, or {@code -1}
*/
public int
peek(Object... tokenTypeOrText) throws ParseException {
Token c = this.peek();
for (int i = 0; i < tokenTypeOrText.length; i++) {
Object ttot = tokenTypeOrText[i];
if (c == null ? ttot == null : c.type.equals(ttot) || c.text.equals(ttot)) return i;
}
return -1;
}
// PEEK READ METHODS
/**
* Checks the next token and consumes it if its type is tokenType.
*
* @return The text of the next token, or {@code null} if the next token's type is not tokenType, or if
* the scanner is at end-of-input
*/
@Nullable public String
peekRead(TT tokenType) throws ParseException {
Token c = this.peek();
if (c == null || c.type != tokenType) return null;
final String result = c.text;
this.current = null;
return result;
}
/**
* Checks the next token and consumes it if it matches.
*
* @return Whether the next token's text equals text, or the scanner is at end-of-input and
* text is {@code null}
*/
public boolean
peekRead(@Nullable String text) throws ParseException {
Token c = this.peek();
if (c == null) return text == null;
if (!c.text.equals(text)) return false;
this.current = null;
return true;
}
/**
* Checks the next token and consumes it if it matches.
*
* An element of texts matches iff:
*
*
* - the element equals the next token's text, or
* - the element is {@code null} and the scanner is at end-of-input
*
*
* @return The index of the first element of texts that matches, or {@code -1}
*/
public int
peekRead(String... texts) throws ParseException {
Token c = this.peek();
if (c == null) return -1;
for (int i = 0; i < texts.length; i++) {
if (c.text.equals(texts[i])) {
this.current = null;
return i;
}
}
return -1;
}
/**
* Checks the next token and consumes it if its text equals the return value of {@link Object#toString()
* toString()} of one of the values.
*
* @return The matched value, or {@code null} if none match, or the scanner is at end-of-input
*/
@Nullable public > T
peekReadEnum(T... values) throws ParseException {
Token c = this.peek();
if (c == null) return null;
for (T value : values) {
if (c.text.equals(value.toString())) {
this.current = null;
return value;
}
}
return null;
}
/**
* Checks the next token and consumes it if its type is one of the tokenTypes.
*
* @return The matched token, or {@code null} iff none match, or the scanner is at end-of-input
*/
@Nullable public Token
peekRead(TT... tokenTypes) throws ParseException {
Token c = this.peek();
if (c == null) return null;
for (TT tokenType : tokenTypes) {
if (c.type == tokenType) {
this.current = null;
return c;
}
}
return null;
}
// READ METHODS
/**
* Consumes the next token.
*
* @return The next token
* @throws ParseException The scanner is at end-of-input
*/
public Token
read() throws ParseException {
if (this.current != null) {
final Token result = this.current;
this.current = null;
return result;
}
Token result = this.produceToken();
if (result == null) throw new ParseException("Unexpected end of input");
return result;
}
/**
* Consumes the next token.
*
* @return The next token's text
* @throws ParseException The next token's type is not tokenType
* @throws ParseException The scanner is at end-of-input
*/
public String
read(TT tokenType) throws ParseException {
Token result = this.read();
if (result.type != tokenType) {
throw new ParseException("'" + tokenType + "' expected instead of \"" + result + "\"");
}
return result.text;
}
/**
* Consumes the next token.
*
* @throws ParseException The next token's text does not equal text
* @throws ParseException The scanner is at end-of-input
* @see #eoi()
*/
public void
read(String text) throws ParseException {
Token t = this.read();
if (!t.text.equals(text)) throw new ParseException("'" + text + "' expected instead of \"" + t + "\"");
}
/**
* Consumes the next token.
*
* An element of tokenTypeOrText matches iff:
*
*
* - the element is {@code null} and the scanner is at end-of-input
* - the element equals the next token's type, or
* - the element equals the next token's text, or
*
*
* @return The index of the first element of tokenTypeOrText that matches
* @throws ParseException Neither the next token's type nor its text equals any of tokenTypeOrText
* @throws ParseException The scanner is at end-of-input
*/
public int
read(Object... tokenTypeOrText) throws ParseException {
Token t = this.peek();
if (t == null) {
for (int i = 0; i < tokenTypeOrText.length; i++) {
Object ttot = tokenTypeOrText[i];
if (ttot == null) return i;
}
throw new ParseException(
"One of " + Arrays.toString(tokenTypeOrText) + " expected instead of end-of-input"
);
}
for (int i = 0; i < tokenTypeOrText.length; i++) {
Object ttot = tokenTypeOrText[i];
if (ttot != null && t.type.equals(ttot) || t.text.equals(ttot)) {
this.current = null;
return i;
}
}
switch (tokenTypeOrText.length) {
case 0:
throw new ParseException("One of [none] expected instead of \"" + t + "\"");
case 1:
throw new ParseException(
AbstractParser.tokenTypeOrTextToString(tokenTypeOrText[0])
+ " expected instead of \""
+ t
+ "\""
);
default:
{
StringBuilder sb = new StringBuilder();
for (int i = 0;;) {
sb.append(AbstractParser.tokenTypeOrTextToString(tokenTypeOrText[i]));
if (++i == tokenTypeOrText.length) break;
sb.append(i == tokenTypeOrText.length - 1 ? " or " : ", ");
}
throw new ParseException("One of " + sb + " expected instead of \"" + t + "\"");
}
}
}
private static String
tokenTypeOrTextToString(@Nullable Object o) {
return o == null ? "end-of-input" : o instanceof String ? ('"' + (String) o + '"') : String.valueOf(o);
}
/**
* Modifies this parser such that t will appear as the "next token" before the actual next
* token.
*
* This operation is only permitted iff the next token has not been read-ahead.
*
*
* @throws IllegalStateException Another token has already been read-ahead, either by one of the {@code peek()}
* operations, or an unsuccessful {@code peekRead()} operation
*/
public void
unread(Token t) {
if (this.current != null) {
throw new IllegalStateException((
"Cannot unread \""
+ t
+ "\" because the next token, \""
+ this.current
+ "\", has already been read-ahead"
));
}
this.current = t;
}
/**
* Asserts that the scanner is at end-of-input.
*
* @throws ParseException Iff the scanner is not at end-of-input
*/
public void
eoi() throws ParseException {
Token t = this.peek();
if (t != null) throw new ParseException("Expected end-of-input instead of \"" + t + "\"");
}
@Nullable private Token
produceToken() throws ParseException {
try {
return this.scanner.produce();
} catch (ScanException se) {
throw new ParseException(se);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy