All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.juneau.parser.ParserReader Maven / Gradle / Ivy

There is a newer version: 9.0.1
Show newest version
// ***************************************************************************************************************************
// * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.  See the NOTICE file *
// * distributed with this work for additional information regarding copyright ownership.  The ASF licenses this file        *
// * to you under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance            *
// * with the License.  You may obtain a copy of the License at                                                              *
// *                                                                                                                         *
// *  http://www.apache.org/licenses/LICENSE-2.0                                                                             *
// *                                                                                                                         *
// * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an  *
// * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the License for the        *
// * specific language governing permissions and limitations under the License.                                              *
// ***************************************************************************************************************************
package org.apache.juneau.parser;

import java.io.*;

import org.apache.juneau.internal.*;

/**
 * Similar to a {@link java.io.PushbackReader} with a pushback buffer of 1 character.
 *
 * 

* Code is optimized to work with a 1 character buffer. * *

* Additionally keeps track of current line and column number, and provides the ability to set mark points and capture * characters from the previous mark point. * *

* Warning: Not thread safe. */ public class ParserReader extends Reader implements Positionable { /** Wrapped reader */ protected final Reader r; private char[] buff; // Internal character buffer private int line = 1; // Current line number private int column; // Current column number private int iCurrent = 0; // Current pointer into character buffer private int iMark = -1; // Mark position in buffer private int iEnd = 0; // The last good character position in the buffer private boolean endReached, holesExist; private final boolean unbuffered; /** * Constructor. * * @param pipe The parser input. * @throws IOException */ public ParserReader(ParserPipe pipe) throws IOException { this.unbuffered = pipe.unbuffered; if (pipe.isString()) { String in = pipe.getInputAsString(); this.r = new CharSequenceReader(in); this.buff = new char[in.length() < 1024 ? in.length() : 1024]; } else { Reader _r = pipe.getReader(); if (_r instanceof ParserReader) this.r = ((ParserReader)_r).r; else this.r = _r; this.buff = new char[1024]; } pipe.setPositionable(this); } /** * Reads a single character. * *

* Note that this method does NOT process extended unicode characters (i.e. characters above 0x10000), but rather * returns them as two chars. * Use {@link #readCodePoint()} to ensure proper handling of extended unicode. * * @return The character read, or -1 if the end of the stream has been reached. * @throws IOException If a problem occurred trying to read from the reader. */ @Override /* Reader */ public final int read() throws IOException { int c = readFromBuff(); if (c == -1) return -1; if (c == '\n') { line++; column = 0; } else { column++; } return c; } /** * Same as {@link #read()} but skips over any whitespace characters. * * @return The first non-whitespace character, or -1 if the end of stream reached. * @throws IOException */ public final int readSkipWs() throws IOException { while (true) { int c = read(); if (c == -1 || ! Character.isWhitespace(c)) return c; } } /** * Same as {@link #read()} but detects and combines extended unicode characters (characters above 0x10000). * * @return The character read, or -1 if the end of the stream has been reached. * @throws IOException If a problem occurred trying to read from the reader. */ public final int readCodePoint() throws IOException { int c = read(); // Characters that take up 2 chars. if (c >= 0xd800 && c <= 0xdbff) { int low = read(); if (low >= 0xdc00 && low <= 0xdfff) c = 0x10000 + ((c - 0xd800) << 10) + (low - 0xdc00); } return c; } private final int readFromBuff() throws IOException { while (iCurrent >= iEnd) { if (endReached) return -1; // If there's still space at the end of this buffer, fill it. // Make sure there's at least 2 character spaces free for extended unicode characters. //if (false) { if (iEnd+1 < buff.length) { int x = read(buff, iCurrent, buff.length-iEnd); if (x == -1) { endReached = true; return -1; } iEnd += x; } else { // If we're currently marking, then we want to copy from the current mark point // to the beginning of the buffer and then fill in the remainder of buffer. if (iMark >= 0) { // If we're marking from the beginning of the array, we double the size of the // buffer. This isn't likely to occur often. if (iMark == 0) { char[] buff2 = new char[buff.length<<1]; System.arraycopy(buff, 0, buff2, 0, buff.length); buff = buff2; // Otherwise, we copy what's currently marked to the beginning of the buffer. } else { int copyBuff = iMark; System.arraycopy(buff, copyBuff, buff, 0, buff.length - copyBuff); iCurrent -= copyBuff; iMark -= copyBuff; } int expected = buff.length - iCurrent; int x = read(buff, iCurrent, expected); if (x == -1) { endReached = true; iEnd = iCurrent; return -1; } iEnd = iCurrent + x; } else { // Copy the last 10 chars in the buffer to the beginning of the buffer. int copyBuff = Math.min(iCurrent, 10); System.arraycopy(buff, iCurrent-copyBuff, buff, 0, copyBuff); // Number of characters we expect to copy on the next read. int expected = buff.length - copyBuff; int x = read(buff, copyBuff, expected); iCurrent = copyBuff; if (x == -1) { endReached = true; iEnd = iCurrent; return -1; } iEnd = iCurrent + x; } } } return buff[iCurrent++]; } /** * Start buffering the calls to read() so that the text can be gathered from the mark point on calling {@code getFromMarked()}. */ public final void mark() { iMark = iCurrent; } /** * Peeks the next character in the stream. * *

* This is equivalent to doing a {@code read()} followed by an {@code unread()}. * * @return The peeked character, or (char)-1 if the end of the stream has been reached. * @throws IOException If a problem occurred trying to read from the reader. */ public final int peek() throws IOException { int c = read(); if (c != -1) unread(); return c; } /** * Same as {@link #peek()} but skips over any whitespace characters. * *

* This is equivalent to doing a {@code read()} followed by an {@code unread()}. * * @return The peeked character, or (char)-1 if the end of the stream has been reached. * @throws IOException If a problem occurred trying to read from the reader. */ public final int peekSkipWs() throws IOException { while(true) { int c = read(); boolean isWs = Character.isWhitespace(c); if (c != -1 && ! isWs) unread(); if (! isWs) return c; } } /** * Read the specified number of characters off the stream. * * @param num The number of characters to read. * @return The characters packaged as a String. * @throws IOException If a problem occurred trying to read from the reader. */ public final String read(int num) throws IOException { char[] c = new char[num]; for (int i = 0; i < num; i++) { int c2 = read(); if (c2 == -1) return new String(c, 0, i); c[i] = (char)c2; } return new String(c); } /** * Pushes the last read character back into the stream. * * @return This object (for method chaining). * @throws IOException If a problem occurred trying to read from the reader. */ public ParserReader unread() throws IOException { if (iCurrent <= 0) throw new IOException("Buffer underflow."); iCurrent--; if (column == 0) line--; else column--; return this; } /** * No-op. * *

* Input readers are closed in the {@link ParserPipe} class. * * @throws IOException If a problem occurred trying to read from the reader. */ @Override /* Reader */ public void close() throws IOException { // No-op } /** * Returns the contents of the reusable character buffer as a string, and resets the buffer for next usage. * * @return The contents of the reusable character buffer as a string. */ public final String getMarked() { return getMarked(0, 0); } /** * Same as {@link #getMarked()} except allows you to specify offsets into the buffer. * *

* For example, to return the marked string, but trim the first and last characters, call the following: *

* getFromMarked(1, -1); *

* * @param offsetStart The offset of the start position. * @param offsetEnd The offset of the end position. * @return The contents of the reusable character buffer as a string. */ public final String getMarked(int offsetStart, int offsetEnd) { int offset = 0; // Holes are \u00FF 'delete' characters that we need to get rid of now. if (holesExist) { for (int i = iMark; i < iCurrent; i++) { char c = buff[i]; if (c == 127) offset++; else buff[i-offset] = c; } holesExist = false; } int start = iMark + offsetStart, len = iCurrent - iMark + offsetEnd - offsetStart - offset; String s = new String(buff, start, len); iMark = -1; return s; } /** * Trims off the last character in the marking buffer. * *

* Useful for removing escape characters from sequences. * * @return This object (for method chaining). */ public final ParserReader delete() { return delete(1); } /** * Trims off the specified number of last characters in the marking buffer. * Useful for removing escape characters from sequences. * * @param count The number of characters to delete. * @return This object (for method chaining). */ public final ParserReader delete(int count) { for (int i = 0; i < count; i++) buff[iCurrent-i-1] = 127; holesExist = true; return this; } /** * Replaces the last character in the marking buffer with the specified character. * *

* offset must be at least 1 for normal characters, and 2 for extended * unicode characters in order for the replacement to fit into the buffer. * * @param c The new character. * @param offset The offset. * @return This object (for method chaining). * @throws IOException */ public final ParserReader replace(int c, int offset) throws IOException { if (c < 0x10000) { if (offset < 1) throw new IOException("Buffer underflow."); buff[iCurrent-offset] = (char)c; } else { if (offset < 2) throw new IOException("Buffer underflow."); c -= 0x10000; buff[iCurrent-offset] = (char)(0xd800 + (c >> 10)); buff[iCurrent-offset+1] = (char)(0xdc00 + (c & 0x3ff)); offset--; } // Fill in the gap with DEL characters. for (int i = 1; i < offset; i++) buff[iCurrent-i] = 127; holesExist |= (offset > 1); return this; } /** * Replace the last read character in the buffer with the specified character. * * @param c The new character. * @return This object (for method chaining). * @throws IOException */ public final ParserReader replace(char c) throws IOException { return replace(c, 1); } /** * Subclasses can override this method to provide additional filtering. * *

* Default implementation simply calls the same method on the underlying reader. */ @Override /* Reader */ public int read(char[] cbuf, int off, int len) throws IOException { return unbuffered ? r.read(cbuf, off, 1) : r.read(cbuf, off, len); } @Override /* Positionable */ public Position getPosition() { return new Position(line, column); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy