All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xjs.serialization.parser.TokenParser Maven / Gradle / Ivy

There is a newer version: 0.36
Show newest version
package xjs.serialization.parser;

import org.jetbrains.annotations.ApiStatus;
import xjs.core.JsonArray;
import xjs.core.JsonContainer;
import xjs.core.JsonValue;
import xjs.exception.SyntaxException;
import xjs.serialization.token.ContainerToken;
import xjs.serialization.token.Token;
import xjs.serialization.token.TokenType;
import xjs.serialization.token.TokenStream;
import xjs.serialization.util.BufferedStack;

import java.util.ArrayList;

/**
 * A basic parser type for processing {@link Token tokens} into JSON data.
 *
 * 

The {@link TokenParser} class should be safe to work with both {@link * ContainerToken containerized token streams} and regular {@link TokenStream * token streams}. * *

The basic procedure for using it with {@link ContainerToken container * tokens} is as follows: * *

    *
  • * Use {@link #read()} to pull the next value out of the * current iterator. Its output will be written into {@link * #current}. If the iterator does not yield a value, * {@link #EMPTY_VALUE} will be written instead. This guards * against null safety problems downstream. *
  • *
  • * Implementors may check to see if any tokens are left in the input * by calling {@link #isEndOfContainer()}. Be aware that * the exact implementation of this method may change at a later date. * The method itself should always be safe to call and accurate. *
  • *
  • * When working with {@link ContainerToken container tokens}, * implementors have the option to call {@link #push()} * to open the current token and push its iterator onto the stack, and * {@link #pop()} to pop the current iterator out of the * stack. *
  • *
* *

For non-containerized inputs, most of the containerized methods accept * an optional parameter specifying the opening or closing character. For * example, to pop the current container off the stack without * swapping iterators, call {@link #pop(char)}. * * Todo: most of these methods are missing direct unit test coverage */ public abstract class TokenParser implements ValueParser { /** * Represents that no values are left in the input. */ protected static final TokenStream EMPTY_VALUE = new ContainerToken("", 0, 0, 0, 0, 0, TokenType.OPEN, new ArrayList<>()); /** * An iterator which always returns empty, representing the end of input. */ protected static final TokenStream.Itr EMPTY_ITERATOR = EMPTY_VALUE.iterator(); /** * Direct access to the data stack used by this parser. This indirection is * designed to alleviate pressure from implementors by housing information * which they will always need. It is optimized to minimize the performance * impact. */ protected final BufferedStack.OfTwo< TokenStream.Itr, JsonContainer> stack; /** * The very root container of the input. Implementors may need access to * this information for analysis on how the stream was generated. * * @apiNote Experimental--this may be replaced with a parent * value in the future. */ @ApiStatus.Experimental protected final TokenStream root; /** * A reference to the input text. Callers must be aware that, when using * lazily-evaluated {@link TokenStream token streams}, this sequence * may not represent the full input until parsing is complete. * *

Additionally, this reference may point to a different text body * depending on the current iterator. For any token stream where sub-streams * have been generated, this reference may point to that generated source. * *

In other words, the indices of this reference may only correspond * the current token or any tokens in the current token stream. Authors * should avoid doing any manual inspection of this text if generated * streams are to be supported. */ protected CharSequence reference; /** * Houses any formatting data for the current value. As with {@link #stack}, * this value is designed to alleviate pressure from downstream implementors. * It may come with a subtle performance penalty. */ protected JsonContainer formatting; /** * The current source of tokens. */ protected TokenStream.Itr iterator; /** * The most recent token in the stream. */ protected Token current; /** * Represents any lines skipped since the last significant token. This value * can be used to infer formatting information in the output. */ protected int linesSkipped; /** * Constructs a new Parser when given a root stream of tokens. * *

Implementors should be aware that all tokens within this * stream are assumed to be in order by index. The API * makes no safety guarantees otherwise. * * @param root The root container representing the input. */ protected TokenParser(final TokenStream root) { this.stack = BufferedStack.ofTwo(); this.root = root; // Reference can be mutable and may expand lazily this.reference = root.reference; this.formatting = new JsonArray(); this.iterator = root.iterator(); this.current = root; } /** * Advanced the iterator a single time. If the iterator has * no values to return, yields a dummy value instead. */ protected void read() { final TokenStream.Itr itr = this.iterator; if (itr == EMPTY_ITERATOR) { return; } if (itr.hasNext()) { this.current = this.iterator.next(); } else { this.current = EMPTY_VALUE; } } /** * Pushes the current token onto the stack, if it is a {@link TokenStream}. * * @return true, if the method was able to push the container. */ protected boolean push() { if (this.iterator.getParent() == this.current) { return false; } if (this.current instanceof TokenStream) { this.stack.push(this.iterator, this.formatting); this.iterator = ((TokenStream) this.current).iterator(); this.formatting = new JsonArray(); this.reference = this.iterator.getReference(); return true; } return false; } /** * Pops the current iterator and formatting information out of the stack. * * @return true, if the method was able to pop the container. */ protected boolean pop() { this.expectEndOfContainer(); if (this.stack.isEmpty()) { this.iterator = EMPTY_ITERATOR; return false; } this.stack.pop(); this.iterator = this.stack.getFirst(); this.formatting = this.stack.getSecond(); this.reference = this.iterator.getReference(); return true; } /** * Begins recursion into a new container. Takes care of reading any initial * whitespace at the top of the container. * * @return true, if there are more tokens to parse. */ protected boolean open() { this.push(); if (this.isEndOfContainer()) { return false; } this.read(); this.readWhitespace(); return true; } /** * Closes the current container and pops one frame of the stack, resetting * the formatting and iterator to that of the previous level. * * @param container The current container being parsed. * @param The type of container being parsed. * @return The input container, with formatting applied. */ protected T close( final T container) { this.setTrailing(); this.takeFormatting(container); this.pop(); this.read(); return container; } /** * Variant of {@link #push()} for non-containerized input streams. * *

This method assumes that a matching opener has been found. * * @param opener The opening character of the container. * @throws SyntaxException if the opener is not found. * @see #push() */ protected void push(final char opener) { this.expect(opener); this.stack.push(null, this.formatting); this.formatting = new JsonArray(); } /** * Variant of {@link #pop()} for non-containerized input streams. * *

This method assumes that a matching closer has been found. * * @param closer The closing character of the container. * @throws SyntaxException if the closer is not found. * @see #pop() */ protected void pop(final char closer) { this.expectEndOfContainer(closer); this.stack.pop(); this.formatting = this.stack.getSecond(); } /** * Variant of {@link #open()} for non-containerized input streams. * * @param opener The opening character for this container. * @param closer The closing character for this container. * @return true if there are more tokens to parse. * @see #open() */ protected boolean open(final char opener, final char closer) { this.push(opener); this.readWhitespace(); return !this.isEndOfContainer(closer); } /** * Variant of {@link #close(JsonContainer)} for non-containerized * input streams. * * @param container The container being closed and formatted. * @param closer The closing character for this type of container. * @param The type of container being closed. * @return The input container, with formatting applied. * @see #close(JsonContainer) */ protected T close( final T container, final char closer) { this.setTrailing(); this.takeFormatting(container); this.pop(closer); this.read(); return container; } /** * Invokes the {@link #read()} method, if the current token * matches the given symbol. * * @param symbol The expected symbol at this position. * @return true, if the symbol matches {@link #current}. */ protected boolean readIf(final char symbol) { if (this.current.isSymbol(symbol)) { this.read(); return true; } return false; } /** * Specialized variant of {@link #readIf} designed for newline * characters. This method additionally takes care of flagging * the current line as skipped, if applicable. * * @return true, if the current token is a newline. */ protected boolean readNl() { if (this.current.type() == TokenType.BREAK) { this.read(); this.flagLineAsSkipped(); return true; } return false; } /** * Flags the current newline symbol as being skipped. Implementors * may use this to perform additional actions when lines are skipped. * *

For example, in some situations, the line may need to be * appended to a comment or stored in some other variable. */ protected void flagLineAsSkipped() { this.linesSkipped++; } /** * Indicates whether the current iterator has reached the end of input. * * @return true, if the iterator has finished. */ protected boolean isEndOfContainer() { return this.current == EMPTY_VALUE; } /** * Indicates whether the parser has reached the end of input. * * @return true, if the parser has reached end of input. */ protected boolean isEndOfText() { return this.stack.isEmpty() && !this.iterator.hasNext(); } /** * Variant of {@link #isEndOfContainer()} for non-containerized input * streams. * * @param closer The symbol indicating the end of this container. * @return true, if the parser has reached this token. * @see #isEndOfContainer() */ protected boolean isEndOfContainer(final char closer) { return this.current.isSymbol(closer); } /** * If applicable, consumes all whitespace at this point. */ protected void readWhitespace() { this.readWhitespace(true, true); } /** * Variant of {@link #readWhitespace}, indicating whether * to reset the {@link #linesSkipped lines skipped} counter. * * @param resetLinesSkipped Whether to reset the counter. */ protected void readWhitespace(final boolean resetLinesSkipped) { this.readWhitespace(resetLinesSkipped, true); } /** * Variant of {@link #readWhitespace(boolean)} which does not * consume newline characters. For regular {@link TokenStream * token streams}, this is essentially the same as capturing * comment data. * */ protected void readLineWhitespace() { this.readWhitespace(false, false); } /** * Verbose variant of {@link #readWhitespace} which specifies * both whether to reset the line counter and whether to read * newline characters. * * @param resetLinesSkipped Whether to reset the counter. * @param nl Whether to consume newline characters. */ protected void readWhitespace( final boolean resetLinesSkipped, final boolean nl) { if (resetLinesSkipped) { this.linesSkipped = 0; } final TokenStream.Itr itr = this.iterator; if (itr == EMPTY_ITERATOR) { return; } Token t = this.current; int peekAmount = 0; while (t != EMPTY_VALUE) { if (!this.consumeWhitespace(t, nl)) { break; } t = itr.peek(++peekAmount, EMPTY_VALUE); } this.current = t; itr.skip(peekAmount); } /** * Processes any metadata skipped by {@link #readWhitespace}. * * @param t The token being skipped. * @param nl Whether to consume newline characters, if applicable. * @return true, if the token was consumed. */ protected boolean consumeWhitespace(final Token t, final boolean nl) { if (nl && t.type() == TokenType.BREAK) { this.flagLineAsSkipped(); return true; } return false; } /** * Gets the actual offset of any text in the {@link #reference}. * * @param start The inclusive start index (e.g. of a line). * @param offset The expected offset. * @return The index after skipping offset. */ protected int getActualOffset(final int start, final int offset) { for (int i = start; i < start + offset; i++) { if (!this.isLineWhitespace(this.reference.charAt(i))) { return i; } } return start + offset; } /** * Indicates whether the current character represents non-newline * whitespace. * * @param c The character being evaluated. * @return true, if the character is line whitespace. */ protected boolean isLineWhitespace(final char c) { return c == ' ' || c == '\r' || c == '\t'; } /** * Skips any and all characters until the given symbol, a newline character, or * the end of the file is found. * * @param symbol The symbol being researched. Can be \u0000 for none. * @param nl Whether to stop when reaching a newline character. * @param eof Whether to tolerate end of input. * @return The number of tokens skipped. */ protected int skipTo(final char symbol, final boolean nl, final boolean eof) { final TokenStream.Itr itr = this.iterator; if (!itr.hasNext()) { if (eof) return 0; throw this.expectedSymbolOrNL(symbol, nl); } Token t = itr.peek(); Token lastRecorded = this.current; int peekAmount = 1; while (t != null) { // newlines would only be inside of containers for values if (t.isSymbol(symbol) || (nl && t.type() == TokenType.BREAK)) { itr.skip(peekAmount - 1); this.current = lastRecorded; return peekAmount - 1; } else if (!this.consumeWhitespace(t, false)) { lastRecorded = t; } t = itr.peek(++peekAmount); } if (eof) { if (peekAmount > 1) { itr.skip(peekAmount - 1); this.current = lastRecorded; } return peekAmount; } throw this.expectedSymbolOrNL(symbol, nl); } /** * Stores any data above the current value as formatting. */ protected void setAbove() { this.formatting.setLinesAbove(this.takeLinesSkipped()); } /** * Stores any data between the current key and value * as formatting. */ protected void setBetween() { this.formatting.setLinesBetween(this.takeLinesSkipped()); } /** * Stores any data at the end of the current container as * formatting. */ protected void setTrailing() { this.formatting.setLinesTrailing(this.takeLinesSkipped()); } /** * Reads whitespace and consumes it as formatting above any * given value. */ protected void readAbove() { this.readWhitespace(false); this.setAbove(); } /** * Reads whitespace and a separator, consuming any formatting * before the value. * * @param kvSeparator The expected separator, e.g. : * or =. */ protected void readBetween(final char kvSeparator) { this.readWhitespace(); this.expect(kvSeparator); this.readWhitespace(); this.setBetween(); } /** * Reads whitespace after a value, until the end of the line, and * consumes it as formatting, if applicable. */ protected void readAfter() { this.readLineWhitespace(); } /** * Reads whitespace at the bottom of the file and consumes it as * formatting, if applicable. */ protected void readBottom() { this.readWhitespace(false); this.expectEndOfText(); } /** * Returns the current number of {@link #linesSkipped lines skipped}, * resetting the counter to 0. * * @return The current number of lines skipped. */ protected int takeLinesSkipped() { final int skipped = this.linesSkipped; this.linesSkipped = 0; return skipped; } /** * Transfers any formatting data into the given value. * * @param value The value being formatted. * @param The type of value being formatted. * @return The input, value. */ protected T takeFormatting(final T value) { value.setDefaultMetadata(this.formatting); this.clearFormatting(); return value; } /** * Resets the {@link #formatting formatting data} to its default * state. */ protected void clearFormatting() { this.formatting.setLinesTrailing(-1) .setLinesAbove(-1) .setLinesBetween(-1) .setComments(null); } /** * Expects a symbol at the current position, or else throws a syntax * exception. * * @param expected The expected symbol at this position. */ protected void expect(final char expected) { if (!this.readIf(expected)) { throw this.expected(expected); } } /** * Throws a syntax exception if more tokens are found in the input. */ protected void expectEndOfText() { if (!this.isEndOfText()) { throw this.unexpected(this.current.type() + " before end of file"); } } /** * Indicates that either a specific symbol or newline character was * expected at this position. * * @param symbol The expected symbol. * @param nl Whether a newline character would have been accepted. * @return The exception to be thrown. */ protected SyntaxException expectedSymbolOrNL( final char symbol, final boolean nl) { if (nl) { return this.expected("'" + symbol + "' or new line"); } return this.expected(symbol); } /** * Throws an exception if there are more tokens left in the current * container. */ protected void expectEndOfContainer() { if (!this.isEndOfContainer()) { throw this.tokensInContainer(); } } /** * Variant of {@link #expectEndOfContainer()} for non-containerized * input streams. * * @param closer The closing character for this type of container. */ protected void expectEndOfContainer(final char closer) { if (!this.isEndOfContainer(closer)) { throw this.tokensInContainer(); } } /** * Indicates that unexpected tokens were found before the end of the * current container. * * @return The exception to be thrown. */ protected SyntaxException tokensInContainer() { return this.unexpected(this.current.type() + " before end of container (missing delimiter?)"); } /** * Generates a generic "expected" message exception for a specific * symbol. * * @param expected The symbol expected at this position. * @return The exception to be thrown. */ protected SyntaxException expected(final char expected) { return SyntaxException.expected( expected, this.current.line(), this.current.offset()); } /** * Generates a generic "expected" message exception when given * a description of the expected tokens. * * @param expected A description of what was expected. * @return The exception to be thrown. */ protected SyntaxException expected(final String expected) { return SyntaxException.expected( expected, this.current.line(), this.current.offset()); } /** * Generates a generic "unexpected" message exception for a specific * symbol. * * @param unexpected The symbol not expected at this position. * @return The exception to be thrown. */ protected SyntaxException unexpected(final char unexpected) { return SyntaxException.unexpected( unexpected, this.current.line(), this.current.offset()); } /** * Generates a generic "unexpected" message exception when given * a description of the unexpected tokens. * * @param unexpected A description of what was not expected. * @return The exception to be thrown. */ protected SyntaxException unexpected(final String unexpected) { return SyntaxException.unexpected( unexpected, this.current.line(), this.current.offset()); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy