org.apache.juneau.parser.ParserReader Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of juneau-all
Apache Juneau UberJar
There is a newer version: 9.0.1
// ***************************************************************************************************************************
// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
// * with the License.  You may obtain a copy of the License at                                                              *
// *                                                                                                                         *
// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
// *                                                                                                                         *
// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
// * specific language governing permissions and limitations under the License.                                              *
// ***************************************************************************************************************************
package org.apache.juneau.parser;

import java.io.*;

import org.apache.juneau.internal.*;

/**
 * Similar to a {@link java.io.PushbackReader} with a pushback buffer of 1 character.
 *
 * 
 * Code is optimized to work with a 1 character buffer.
 *
 * 

 * Additionally keeps track of current line and column number, and provides the ability to set mark points and capture
 * characters from the previous mark point.
 *
 * 

 * Warning:  Not thread safe.
 */
public class ParserReader extends Reader implements Positionable {

	/** Wrapped reader */
	protected final Reader r;

	private char[] buff;       // Internal character buffer
	private int line = 1;      // Current line number
	private int column;        // Current column number
	private int iCurrent = 0;  // Current pointer into character buffer
	private int iMark = -1;    // Mark position in buffer
	private int iEnd = 0;      // The last good character position in the buffer
	private boolean endReached, holesExist;
	private final boolean unbuffered;

	/**
	 * Constructor.
	 *
	 * @param pipe The parser input.
	 * @throws IOException
	 */
	public ParserReader(ParserPipe pipe) throws IOException {
		this.unbuffered = pipe.unbuffered;
		if (pipe.isString()) {
			String in = pipe.getInputAsString();
			this.r = new CharSequenceReader(in);
			this.buff = new char[in.length() < 1024 ? in.length() : 1024];
		} else {
			Reader _r = pipe.getReader();
			if (_r instanceof ParserReader)
				this.r = ((ParserReader)_r).r;
			else
				this.r = _r;
			this.buff = new char[1024];
		}
		pipe.setPositionable(this);
	}

	/**
	 * Reads a single character.
	 *
	 * 

	 * Note that this method does NOT process extended unicode characters (i.e. characters above 0x10000), but rather
	 * returns them as two chars.
	 * Use {@link #readCodePoint()} to ensure proper handling of extended unicode.
	 *
	 * @return The character read, or -1 if the end of the stream has been reached.
	 * @throws IOException If a problem occurred trying to read from the reader.
	 */
	@Override /* Reader */
	public final int read() throws IOException {
		int c = readFromBuff();
		if (c == -1)
			return -1;
		if (c == '\n') {
			line++;
			column = 0;
		} else {
			column++;
		}
		return c;
	}

	/**
	 * Same as {@link #read()} but skips over any whitespace characters.
	 *
	 * @return The first non-whitespace character, or -1 if the end of stream reached.
	 * @throws IOException
	 */
	public final int readSkipWs() throws IOException {
		while (true) {
			int c = read();
			if (c == -1 || ! Character.isWhitespace(c))
				return c;
		}
	}

	/**
	 * Same as {@link #read()} but detects and combines extended unicode characters (characters above 0x10000).
	 *
	 * @return The character read, or -1 if the end of the stream has been reached.
	 * @throws IOException If a problem occurred trying to read from the reader.
	 */
	public final int readCodePoint() throws IOException {
		int c = read();

		// Characters that take up 2 chars.
		if (c >= 0xd800 && c <= 0xdbff) {
			int low = read();
			if (low >= 0xdc00 && low <= 0xdfff)
				c = 0x10000 + ((c - 0xd800) << 10) + (low - 0xdc00);
		}

		return c;
	}

	private final int readFromBuff() throws IOException {
		while (iCurrent >= iEnd) {
			if (endReached)
				return -1;

			// If there's still space at the end of this buffer, fill it.
			// Make sure there's at least 2 character spaces free for extended unicode characters.
			//if (false) {
			if (iEnd+1 < buff.length) {
				int x = read(buff, iCurrent, buff.length-iEnd);
				if (x == -1) {
					endReached = true;
					return -1;
				}
				iEnd += x;

			} else {
				// If we're currently marking, then we want to copy from the current mark point
				// to the beginning of the buffer and then fill in the remainder of buffer.
				if (iMark >= 0) {

					// If we're marking from the beginning of the array, we double the size of the
					// buffer.  This isn't likely to occur often.
					if (iMark == 0) {
						char[] buff2 = new char[buff.length<<1];
						System.arraycopy(buff, 0, buff2, 0, buff.length);
						buff = buff2;

					// Otherwise, we copy what's currently marked to the beginning of the buffer.
					} else {
						int copyBuff = iMark;
						System.arraycopy(buff, copyBuff, buff, 0, buff.length - copyBuff);
						iCurrent -= copyBuff;
						iMark -= copyBuff;
					}
					int expected = buff.length - iCurrent;

					int x = read(buff, iCurrent, expected);
					if (x == -1) {
						endReached = true;
						iEnd = iCurrent;
						return -1;
					}
					iEnd = iCurrent + x;
				} else {
					// Copy the last 10 chars in the buffer to the beginning of the buffer.
					int copyBuff = Math.min(iCurrent, 10);
					System.arraycopy(buff, iCurrent-copyBuff, buff, 0, copyBuff);

					// Number of characters we expect to copy on the next read.
					int expected = buff.length - copyBuff;
					int x = read(buff, copyBuff, expected);
					iCurrent = copyBuff;
					if (x == -1) {
						endReached = true;
						iEnd = iCurrent;
						return -1;
					}
					iEnd = iCurrent + x;
				}
			}
		}
		return buff[iCurrent++];
	}

	/**
	 * Start buffering the calls to read() so that the text can be gathered from the mark point on calling {@code getFromMarked()}.
	 */
	public final void mark() {
		iMark = iCurrent;
	}

	/**
	 * Peeks the next character in the stream.
	 *
	 * 

	 * This is equivalent to doing a {@code read()} followed by an {@code unread()}.
	 *
	 * @return The peeked character, or (char)-1 if the end of the stream has been reached.
	 * @throws IOException If a problem occurred trying to read from the reader.
	 */
	public final int peek() throws IOException {
		int c = read();
		if (c != -1)
			unread();
		return c;
	}

	/**
	 * Same as {@link #peek()} but skips over any whitespace characters.
	 *
	 * 

	 * This is equivalent to doing a {@code read()} followed by an {@code unread()}.
	 *
	 * @return The peeked character, or (char)-1 if the end of the stream has been reached.
	 * @throws IOException If a problem occurred trying to read from the reader.
	 */
	public final int peekSkipWs() throws IOException {
		while(true) {
			int c = read();
			boolean isWs = Character.isWhitespace(c);
			if (c != -1 && ! isWs)
				unread();
			if (! isWs)
				return c;
		}
	}

	/**
	 * Read the specified number of characters off the stream.
	 *
	 * @param num The number of characters to read.
	 * @return The characters packaged as a String.
	 * @throws IOException If a problem occurred trying to read from the reader.
	 */
	public final String read(int num) throws IOException {
		char[] c = new char[num];
		for (int i = 0; i < num; i++) {
			int c2 = read();
			if (c2 == -1)
				return new String(c, 0, i);
			c[i] = (char)c2;
		}
		return new String(c);
	}

	/**
	 * Pushes the last read character back into the stream.
	 *
	 * @return This object (for method chaining).
	 * @throws IOException If a problem occurred trying to read from the reader.
	 */
	public ParserReader unread() throws IOException {
		if (iCurrent <= 0)
			throw new IOException("Buffer underflow.");
		iCurrent--;
		if (column == 0)
			line--;
		else
			column--;
		return this;
	}

	/**
	 * No-op.
	 *
	 * 

	 * Input readers are closed in the {@link ParserPipe} class.
	 *
	 * @throws IOException If a problem occurred trying to read from the reader.
	 */
	@Override /* Reader */
	public void close() throws IOException {
		// No-op
	}

	/**
	 * Returns the contents of the reusable character buffer as a string, and resets the buffer for next usage.
	 *
	 * @return The contents of the reusable character buffer as a string.
	 */
	public final String getMarked() {
		return getMarked(0, 0);
	}

	/**
	 * Same as {@link #getMarked()} except allows you to specify offsets into the buffer.
	 *
	 * 

	 * For example, to return the marked string, but trim the first and last characters, call the following:
	 * 

	 * 	getFromMarked(1, -1);
	 * 
	 *
	 * @param offsetStart The offset of the start position.
	 * @param offsetEnd The offset of the end position.
	 * @return The contents of the reusable character buffer as a string.
	 */
	public final String getMarked(int offsetStart, int offsetEnd) {
		int offset = 0;

		// Holes are \u00FF 'delete' characters that we need to get rid of now.
		if (holesExist) {
			for (int i = iMark; i < iCurrent; i++) {
				char c = buff[i];
				if (c == 127)
					offset++;
				else
					buff[i-offset] = c;
			}
			holesExist = false;
		}
		int start = iMark + offsetStart, len = iCurrent - iMark + offsetEnd - offsetStart - offset;
		String s = new String(buff, start, len);
		iMark = -1;
		return s;
	}

	/**
	 * Trims off the last character in the marking buffer.
	 *
	 * 
	 * Useful for removing escape characters from sequences.
	 *
	 * @return This object (for method chaining).
	 */
	public final ParserReader delete() {
		return delete(1);
	}

	/**
	 * Trims off the specified number of last characters in the marking buffer.
	 * Useful for removing escape characters from sequences.
	 *
	 * @param count The number of characters to delete.
	 * @return This object (for method chaining).
	 */
	public final ParserReader delete(int count) {
		for (int i = 0; i < count; i++)
			buff[iCurrent-i-1] = 127;
		holesExist = true;
		return this;
	}

	/**
	 * Replaces the last character in the marking buffer with the specified character.
	 *
	 * 

	 * offset must be at least 1 for normal characters, and 2 for extended
	 * unicode characters in order for the replacement to fit into the buffer.
	 *
	 * @param c The new character.
	 * @param offset The offset.
	 * @return This object (for method chaining).
	 * @throws IOException
	 */
	public final ParserReader replace(int c, int offset) throws IOException {
		if (c < 0x10000) {
			if (offset < 1)
				throw new IOException("Buffer underflow.");
			buff[iCurrent-offset] = (char)c;
		} else {
			if (offset < 2)
				throw new IOException("Buffer underflow.");
			c -= 0x10000;
			buff[iCurrent-offset] = (char)(0xd800 + (c >> 10));
			buff[iCurrent-offset+1] = (char)(0xdc00 + (c & 0x3ff));
			offset--;
		}
		// Fill in the gap with DEL characters.
		for (int i = 1; i < offset; i++)
			buff[iCurrent-i] = 127;
		holesExist |= (offset > 1);
		return this;
	}

	/**
	 * Replace the last read character in the buffer with the specified character.
	 *
	 * @param c The new character.
	 * @return This object (for method chaining).
	 * @throws IOException
	 */
	public final ParserReader replace(char c) throws IOException {
		return replace(c, 1);
	}

	/**
	 * Subclasses can override this method to provide additional filtering.
	 *
	 * 
	 * Default implementation simply calls the same method on the underlying reader.
	 */
	@Override /* Reader */
	public int read(char[] cbuf, int off, int len) throws IOException {
		return unbuffered ? r.read(cbuf, off, 1) : r.read(cbuf, off, len);
	}

	@Override /* Positionable */
	public Position getPosition() {
		return new Position(line, column);
	}
}