xjs.serialization.util.PositionTrackingReader Maven / Gradle / Ivy
Show all versions of xjs-core Show documentation
package xjs.serialization.util;
import xjs.comments.CommentStyle;
import xjs.exception.SyntaxException;
import xjs.serialization.token.CommentToken;
import java.io.*;
import java.nio.charset.StandardCharsets;
/**
* A highly optimized counterpart to {@link Reader} which reduces the amount of
* read operations to be performed on the underlying input stream. This
* implementation is not thread-safe. It deliberately deviates from
* the contract of {@link Reader}. In particular, it does not flush or close
* the wrapped reader, nor does it ensure that the wrapped reader is open.
*/
public abstract class PositionTrackingReader implements Closeable {
protected static final int DEFAULT_BUFFER_SIZE = 1024;
protected static final int MIN_BUFFER_SIZE = 8;
/**
* The index of the current character, starting at 0.
*/
public int index;
/**
* The line number, starting at 0.
*/
public int line;
/**
* The column of the current character, starting at 0.
*/
public int column;
/**
* The number of lines read since the last time whitespace was skipped.
*
* Note: This API may change in the future.
*/
public int linesSkipped;
/**
* The current character, or else -1.
*/
public int current;
protected StringBuilder capture;
protected int captureStart;
protected PositionTrackingReader() {
this.index = -1;
this.line = 0;
this.column = -1;
this.captureStart = -1;
}
/**
* Generates an efficient reader optimized for existing strings.
*
* @param s The full text being parsed.
* @return A new reader for parsers and tokenizers.
*/
public static PositionTrackingReader fromString(final String s) {
return new DirectStringReader(s);
}
/**
* Generates a reader optimized for {@link InputStream}s and disk IO.
*
*
Unlike {@link Reader}, users should be aware that read calls
* are not synchronized. This utility is not thread safe,
* does not flush the underlying {@link InputStream}, and deliberately
* dodges some of the {@link Reader} API contract.
*
* @param is The source of characters being iterated over.
* @return A new reader for parsers and tokenizers.
* @throws IOException If the initial read operation fails.
*/
public static PositionTrackingReader fromIs(final InputStream is) throws IOException {
return new DirectInputStreamReader(is, DEFAULT_BUFFER_SIZE, false);
}
/**
* Variant of {@link #fromIs(InputStream)} specifying whether to capture
* the full text of the input. This text can be returned at any point
* by calling {@link #getFullText}. Callers should be aware that the
* text output will change as the reader progresses.
*
* @param is The source of characters being iterated over.
* @param captureFullText Whether to preserve the full text input.
* @return A new reader for parsers and tokenizers.
* @throws IOException If the initial read operation fails.
*/
public static PositionTrackingReader fromIs(
final InputStream is, final boolean captureFullText) throws IOException {
return new DirectInputStreamReader(is, DEFAULT_BUFFER_SIZE, captureFullText);
}
/**
* Variant of {@link #fromIs(InputStream, boolean)} specifying the size
* of the buffer used by the reader.
*
* @param is The source of characters being iterated over.
* @param size The size of the underlying character buffer.
* @param captureFullText Whether to preserve the full text input.
* @return A new reader for parsers and tokenizers.
* @throws IOException If the initial read operation fails.
*/
public static PositionTrackingReader fromIs(
final InputStream is, final int size, final boolean captureFullText) throws IOException {
if (size < MIN_BUFFER_SIZE) {
throw new IllegalArgumentException("buffer size < " + MIN_BUFFER_SIZE);
}
return new DirectInputStreamReader(is, size, captureFullText);
}
/**
* Returns the full text of the input, or as much as has been read up
* to this point.
*
*
Users should be aware that the return value of this method may
* mutate over time.
*
* @return The full text of the input.
* @throws UnsupportedOperationException If the reader is not configured
* to capture the full text input.
*/
public abstract CharSequence getFullText();
protected abstract void appendToCapture();
protected abstract String slice();
/**
* Advances the reader by a single character.
*
* @throws IOException If the underlying reader throws an exception.
*/
public abstract void read() throws IOException;
/**
* Advances the reader by a single character, if the current
* character equals the input.
*
* @param c The expected character at this position.
* @return True, if the current character matches the input.
* @throws IOException If the underlying reader throws an exception.
*/
public boolean readIf(final char c) throws IOException {
if (this.current != c) {
return false;
}
this.read();
return true;
}
/**
* Advances the reader by a single character, if the current
* character equals the input. If the input does not match,
* a syntax exception will be throws.
*
* @param c The expected character at this position.
* @throws IOException If the underlying reader throws an exception.
*/
public void expect(final char c) throws IOException {
if (!this.readIf(c)) {
throw this.expected(c);
}
}
/**
* Advances the reader to the very end of the input.
*
* @return The full text input to the reader.
* @throws IOException If the underlying reader throws an exception.
*/
public String readToEnd() throws IOException {
do {
this.read();
} while (!this.isEndOfText());
return this.getFullText().toString();
}
/**
* Marks the current position as the beginning of a capture.
*
*
The end of this capture can be exposed by calling {@link #endCapture}.
*/
public void startCapture() {
if (this.capture == null) {
this.capture = new StringBuilder();
}
this.captureStart = this.index;
}
/**
* Excludes a sequence of characters from the capture output.
*/
public void pauseCapture() {
this.appendToCapture();
this.captureStart = -1;
}
/**
* Terminates the capture, generating new string value from the contents.
*
* @return The full text of the capture buffer.
*/
public String endCapture() {
return this.endCapture(this.index);
}
/**
* Terminates the capture, generating new string value from the contents.
*
* @param idx The last index to include in the capture.
* @return The full text of the capture buffer.
*/
public String endCapture(final int idx) {
final String captured;
if (this.capture.length() > 0) {
this.appendToCapture();
if (idx < this.index) {
this.capture.setLength(
this.capture.length() - (this.index - idx));
}
captured = this.capture.toString();
this.capture.setLength(0);
} else {
final int tmp = this.index;
this.index = idx;
captured = this.slice();
this.index = tmp;
}
this.captureStart = -1;
return captured;
}
/**
* Skips all whitespace from the current point.
*
* @throws IOException If the underlying reader throws an exception.
*/
public void skipWhitespace() throws IOException {
this.skipWhitespace(true);
}
/**
* Skips whitespace and optionally resets the {@link #linesSkipped}
* counter to 0.
*
* @param reset Whether to reset the {@link #linesSkipped} counter.
* @throws IOException If the underlying reader throws an exception.
*/
public void skipWhitespace(final boolean reset) throws IOException {
if (reset) {
this.linesSkipped = 0;
}
while (this.isWhitespace()) {
this.read();
}
}
/**
* Skips all whitespace without advancing to the next line.
*
* @throws IOException If the underlying reader throws an exception.
*/
public void skipLineWhitespace() throws IOException {
while (this.isLineWhitespace()) {
this.read();
}
}
/**
* Skips the given number of whitespace characters.
*
* @param offset The offset (e.g. expected column) to read to.
* @return The number of whitespace characters skipped.
* @throws IOException If the underlying reader throws an exception.
*/
public int skipToOffset(final int offset) throws IOException {
for (int i = 0; i < offset; i++) {
if (!this.isLineWhitespace()) {
return i + 1;
}
this.read();
}
return offset;
}
/**
* Skips all characters until the next new line or end of input.
*
* @return The last index of a non-whitespace character.
* @throws IOException If the underlying reader throws an exception.
*/
public int skipToNL() throws IOException {
int last = this.index;
while (this.current != '\n' && this.current != -1) {
if (!this.isLineWhitespace()) {
last = this.index;
}
this.read();
}
return last;
}
/**
* Advances the reader if the current character is a digit.
*
* @return true, if the current character is a digit.
* @throws IOException If the underlying reader throws an exception.
*/
public boolean readDigit() throws IOException {
if (!this.isDigit()) {
return false;
}
this.read();
return true;
}
/**
* Reads all possible digits from the current point.
*
* @throws IOException If the underlying reader throws an exception.
*/
public void readAllDigits() throws IOException {
while (this.isDigit()) {
this.read();
}
}
/**
* Reads a formatted number from the current index. If the reader
* encounters a syntax error, a {@link SyntaxException} will be
* thrown.
*
* @return The parsed number.
* @throws IOException If the underlying reader throws an exception.
*/
public double readNumber() throws IOException {
this.startCapture();
this.readIf('-');
final int firstDigit = this.current;
if (!this.readDigit()) {
throw this.expected("digit");
}
if (firstDigit != '0') {
this.readAllDigits();
}
this.readDecimal();
this.readExponent();
return Double.parseDouble(this.endCapture());
}
/**
* Reads a decimal value as part of a number. If a decimal
* is found but further digits are not, a {@link
* SyntaxException} will be thrown.
*
* @return true, if a decimal was found.
* @throws IOException If the underlying reader throws an exception.
*/
public boolean readDecimal() throws IOException {
if (!this.readIf('.')) {
return false;
}
if (!this.readDigit()) {
throw this.expected("digit");
}
this.readAllDigits();
return true;
}
/**
* Reads an exponent value as part of a number. If an exponent
* indicator is found, but further digits are not, a
* {@link SyntaxException} will be thrown.
*
* @return true, if an exponent was found.
* @throws IOException If the underlying reader throws an exception.
*/
public boolean readExponent() throws IOException {
if (!this.readIf('e') && !this.readIf('E')) {
return false;
}
if (!this.readIf('+')) {
this.readIf('-');
}
if (!this.readDigit()) {
throw this.expected("digit");
}
this.readAllDigits();
return true;
}
/**
* Reads a single or double-quoted string from the current index.
* If the reader encounters a syntax error, a {@link SyntaxException}
* will be thrown.
*
* @param quote The type of quote being read.
* @return The parsed contents of this string.
* @throws IOException If the underlying reader throws an exception.
*/
public String readQuoted(final char quote) throws IOException {
this.read();
this.startCapture();
while (this.current != quote) {
if (this.current == '\\') {
this.pauseCapture();
this.readEscape(quote);
this.startCapture();
} else if (this.current < 0x20) {
throw this.expected("valid string character");
} else {
this.read();
}
}
final String string = this.endCapture();
this.read();
return string;
}
protected void readEscape(final char quote) throws IOException {
this.read();
if (this.current == quote) {
this.capture.append(quote);
this.read();
return;
}
switch (this.current) {
case '"':
case '/':
case '\\':
this.capture.append((char) this.current);
break;
case 'b':
this.capture.append('\b');
break;
case 'f':
this.capture.append('\f');
break;
case 'n':
this.capture.append('\n');
break;
case 'r':
this.capture.append('\r');
break;
case 't':
this.capture.append('\t');
break;
case 'u':
final char[] hexChars = new char[4];
for(int i = 0; i < 4; i++) {
this.read();
if (!isHexDigit()) {
throw this.expected("hexadecimal digit");
}
hexChars[i] = (char) this.current;
}
this.capture.append((char) Integer.parseInt(new String(hexChars), 16));
break;
default:
throw this.expected("valid escape sequence");
}
this.read();
}
/**
* Reads a standard multiline string in XJS or Hjson format.
*
* @param readQuotes Whether to read the first three quotes of the token.
* @return The parsed text of the token.
* @throws IOException If the underlying reader throws an exception.
*/
public String readMulti(final boolean readQuotes) throws IOException {
if (readQuotes) {
this.expect('\'');
this.expect('\'');
this.expect('\'');
}
final StringBuilder sb = new StringBuilder();
final int offset = this.column - 3;
this.skipLineWhitespace();
if (this.current == '\n') {
this.read();
this.skipToOffset(offset);
}
int triple = 0;
while (true) {
if (this.current < 0) {
throw this.expected("end of multiline string (''')");
} else if (this.current == '\'') {
triple++;
this.read();
if (triple == 3) {
if (sb.length() > 0 && sb.charAt(sb.length() - 1) == '\n') {
sb.deleteCharAt(sb.length() - 1);
}
return sb.toString();
} else {
continue;
}
} else {
while (triple > 0) {
sb.append('\'');
triple--;
}
}
if (this.current == '\n') {
sb.append('\n');
this.read();
this.skipToOffset(offset);
} else {
if (this.current != '\r') {
sb.append((char) this.current);
}
this.read();
}
}
}
public CommentToken readLineComment() throws IOException {
final int s = this.index - 1;
final int o = this.column - 1;
this.expect('/');
if (this.readIf('/')) {
return this.readSingleComment(CommentStyle.LINE_DOC, s, o);
}
return this.readSingleComment(CommentStyle.LINE, s, o);
}
public CommentToken readHashComment() throws IOException {
this.expect('#');
return this.readSingleComment(
CommentStyle.HASH, this.index - 1, this.column - 1);
}
private CommentToken readSingleComment(
final CommentStyle type, final int s, final int o) throws IOException {
if (this.isLineWhitespace()) {
this.read();
}
this.startCapture();
final int e = this.skipToNL() + 1;
return new CommentToken(s, e, this.line, o, type, this.endCapture(e));
}
public CommentToken readBlockComment() throws IOException {
final int s = this.index - 1;
final int o = this.column - 1;
this.expect('*');
if (this.readIf('*')) {
return this.readMultiComment(CommentStyle.MULTILINE_DOC, s, o);
}
return this.readMultiComment(CommentStyle.BLOCK, s, o);
}
private CommentToken readMultiComment(
final CommentStyle type, final int s, final int o) throws IOException {
final int line = this.line;
this.skipLineWhitespace();
this.readIf('\n');
final CharSequence reference = this.getFullText();
final StringBuilder output = new StringBuilder();
while (true) {
if (this.current == -1) {
throw this.expected("end of comment (*/)");
}
final int lineStart = this.skipToBlockLineStart();
if (lineStart == -1) {
this.expect('/');
break;
}
this.appendLine(reference, output, lineStart);
if (this.readIf('/')) {
break;
}
if (this.current != -1) {
this.read();
}
}
final int len = output.length();
if (len > 0 && output.charAt(len - 1) == '\n') {
output.setLength(len - 1);
}
return new CommentToken(
s, this.index, line, this.line, o, type, output.toString());
}
protected int skipToBlockLineStart() throws IOException {
this.skipLineWhitespace();
if (this.current == '*') {
this.read();
if (this.current == '/') {
return -1;
}
if (this.isLineWhitespace()) {
this.read();
}
}
return this.index;
}
protected void appendLine(
final CharSequence reference, final StringBuilder output, final int lineStart) throws IOException {
int lastChar = this.index;
while (this.current != -1) {
if (this.current == '\n') {
output.append(reference, lineStart, lastChar + 1);
if (this.index > lineStart) {
output.append('\n');
}
return;
} else if (this.current == '*') {
this.read();
if (this.current == '/') {
output.append(reference, lineStart, lastChar + 1);
return;
}
lastChar = this.index - 1;
continue;
} else if (!this.isWhitespace()) {
lastChar = this.index;
}
this.read();
}
}
/**
* Returns whether the current character is any form of non line break
* character.
*
* @return true, if the current character is line whitespace.
*/
public boolean isLineWhitespace() {
return this.current == ' ' || this.current == '\t' || this.current == '\r';
}
/**
* Returns whether the current character is any kind of
* whitespace character.
*
* @return true, if the current character is whitespace.
*/
public boolean isWhitespace() {
return this.current == ' '
|| this.current == '\t'
|| this.current == '\n'
|| this.current == '\r';
}
/**
* Returns whether the current character is a digit.
*
* @return true, if the current character is a digit.
*/
public boolean isDigit() {
return this.current >= '0' && this.current <= '9';
}
/**
* Returns whether the current character is a hex digit.
*
* @return true, if the current character is a hex digit.
*/
public boolean isHexDigit() {
return this.current >= '0' && this.current <= '9'
|| this.current >= 'a' && this.current <= 'f'
|| this.current >= 'A' && this.current <= 'F';
}
/**
* Returns whether the reader has run out of text to read.
*
* @return true, if the reader has run out of text to read.
*/
public boolean isEndOfText() {
return this.current == -1;
}
/**
* Returns a syntax exception indicating that a character was
* expected at the current position.
*
* @param expected The expected character.
* @return A new syntax exception.
*/
public SyntaxException expected(final char expected) {
return SyntaxException.expected(expected, this.line, this.column);
}
/**
* Returns a syntax exception indicating that something
* was expected.
*
* @param expected A description of the expected characters.
* @return A new syntax exception.
*/
public SyntaxException expected(final String expected) {
return SyntaxException.expected(expected, this.line, this.column);
}
/**
* Returns a syntax exception indicating that the current character
* was not expected.
*
* @return A new syntax exception.
*/
public SyntaxException unexpected() {
return this.unexpected((char) this.current);
}
/**
* Returns a syntax exception indicating that a character was
* not expected at the current position.
*
* @param unexpected The expected character.
* @return A new syntax exception.
*/
public SyntaxException unexpected(final char unexpected) {
return SyntaxException.unexpected(unexpected, this.line, this.column);
}
/**
* Returns a syntax exception indicating that something was not
* expected at the current position.
*
* @param unexpected A description of the unexpected characters.
* @return A new syntax exception.
*/
public SyntaxException unexpected(final String unexpected) {
return SyntaxException.unexpected(unexpected, this.line, this.column);
}
private static class DirectInputStreamReader extends PositionTrackingReader {
final InputStreamReader reader;
final char[] buffer;
StringBuilder out;
int bufferIndex;
int fill;
DirectInputStreamReader(
final InputStream is, final int size, final boolean captureFullText) throws IOException {
this.reader = new InputStreamReader(is, StandardCharsets.UTF_8);
this.buffer = new char[size];
this.bufferIndex = 0;
this.fill = 0;
if (captureFullText) this.out = new StringBuilder();
this.read();
}
@Override
public CharSequence getFullText() {
if (this.out == null) {
throw new UnsupportedOperationException("output not configured");
}
return this.out;
}
public void startCapture() {
if (this.capture == null) {
this.capture = new StringBuilder();
}
this.captureStart = this.bufferIndex - 1;
}
@Override
protected void appendToCapture() {
final int end = this.current == -1 ? this.bufferIndex : this.bufferIndex - 1;
this.capture.append(this.buffer, this.captureStart, end - this.captureStart);
}
@Override
protected String slice() {
final int end = this.current == -1 ? this.bufferIndex : this.bufferIndex - 1;
return new String(this.buffer, this.captureStart, end - this.captureStart);
}
public void read() throws IOException {
if (this.bufferIndex == this.fill) {
if (this.captureStart != -1) {
this.bufferIndex++; // Force the last index into capture
this.appendToCapture();
this.captureStart = 0;
}
this.fill = this.reader.read(this.buffer, 0, this.buffer.length);
this.bufferIndex = 0;
if (this.fill == -1) {
this.index++;
this.current = -1;
return;
}
if (this.out != null) {
this.out.append(this.buffer, 0, this.fill);
}
}
if (this.current == '\n') {
this.line++;
this.linesSkipped++;
this.column = -1;
}
this.index++;
this.column++;
this.current = this.buffer[this.bufferIndex++];
}
@Override
public void close() throws IOException {
this.reader.close();
}
}
private static class DirectStringReader extends PositionTrackingReader {
final String s;
DirectStringReader(final String s) {
this.s = s;
this.read();
}
@Override
public String getFullText() {
return this.s;
}
@Override
protected void appendToCapture() {
this.capture.append(this.s, this.captureStart, this.index);
}
@Override
protected String slice() {
return this.s.substring(this.captureStart, this.index);
}
@Override
public void read() {
if (this.index == this.s.length() - 1) {
this.index = this.s.length();
this.current = -1;
return;
}
if (this.current == '\n') {
this.line++;
this.linesSkipped++;
this.column = -1;
}
this.column++;
this.current = this.s.charAt(++this.index);
}
@Override
public void close() {}
}
}