org.apache.jena.atlas.io.PeekReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jena-base Show documentation
Show all versions of jena-base Show documentation
This module contains non-RDF library code and the common system runtime.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.atlas.io;
import static org.apache.jena.atlas.io.IO.EOF;
import static org.apache.jena.atlas.io.IO.UNSET;
import java.io.*;
import org.apache.jena.atlas.AtlasException;
import org.apache.jena.atlas.lib.Chars;
/**
* Parsing-centric reader. This class is not thread safe.
* @see PeekInputStream
*/
public final class PeekReader extends Reader {
// Remember to apply fixes to PeekInputStream as well.
// Buffering is done by a CharStream - does it make difference?
// Yes. A lot (Java6).
// Using a Reader here seems to have zero cost or benefit but CharStream
// allows fast String handling.
private final CharStream source;
private static final int PUSHBACK_SIZE = 10;
static final byte CHAR0 = (char)0;
private char[] pushbackChars;
// Index into pushbackChars: points to next pushBack.
// -1 => none.
private int idxPushback;
// Next character to return when reading forwards.
private int currChar = UNSET;
private long posn;
public static final int INIT_LINE = 1;
public static final int INIT_COL = 1;
private long colNum;
private long lineNum;
// ---- static construction methods.
public static PeekReader make(Reader r) {
if ( r instanceof PeekReader pr )
return pr;
return make(r, CharStreamBuffered.CB_SIZE);
}
public static PeekReader make(Reader r, int bufferSize) {
// It is worth our own buffering even if a BufferedReader
// because of the synchronized on one char reads in BufferedReader.
return new PeekReader(new CharStreamBuffered(r, bufferSize));
}
/** Make PeekReader where the input is UTF8 : BOM is removed */
public static PeekReader makeUTF8(InputStream in) {
// This is the best route to make a PeekReader because it avoids
// chances of wrong charset for a Reader say.
PeekReader pr;
if ( true ) {
Reader r = IO.asUTF8(in);
// This adds reader-level buffering
pr = make(r);
} else {
// This is a bit slower - reason unknown.
InputStreamBuffered in2 = new InputStreamBuffered(in);
CharStream r = new InStreamUTF8(in2);
pr = new PeekReader(r);
}
// Skip BOM.
int ch = pr.peekChar();
if ( ch == Chars.BOM )
// Skip BOM
pr.readChar();
return pr;
}
/** Make PeekReader where the input is ASCII */
public static PeekReader makeASCII(InputStream in) {
Reader r = IO.asASCII(in);
return make(r);
}
public static PeekReader make(CharStream r) {
return new PeekReader(r);
}
public static PeekReader readString(String string) {
return new PeekReader(new CharStreamSequence(string));
}
public static PeekReader open(String filename) {
try {
InputStream in = new FileInputStream(filename);
return makeUTF8(in);
} catch (FileNotFoundException ex) {
throw new AtlasException("File not found: " + filename);
}
}
private PeekReader(CharStream stream) {
this.source = stream;
this.pushbackChars = new char[PUSHBACK_SIZE];
this.idxPushback = -1;
this.colNum = INIT_COL;
this.lineNum = INIT_LINE;
this.posn = 0;
}
public long getLineNum() {
return lineNum;
}
public long getColNum() {
return colNum;
}
public long getPosition() {
return posn;
}
// ---- Do not access currChar except with peekChar/setCurrChar.
public final int peekChar() {
if ( idxPushback >= 0 )
return pushbackChars[idxPushback];
// If not started ... delayed initialization.
if ( currChar == UNSET )
init();
return currChar;
}
// And the correct way to read the currChar is to call peekChar.
private final void setCurrChar(int ch) {
currChar = ch;
}
public final int readChar() {
return nextChar();
}
/**
* push back a character : does not alter underlying position, line or
* column counts
*/
public final void pushbackChar(int ch) {
unreadChar(ch);
}
// Reader operations
@Override
public final void close() throws IOException {
source.closeStream();
}
@Override
public final int read() throws IOException {
if ( eof() )
return EOF;
int x = readChar();
return x;
}
@Override
public final int read(char[] cbuf, int off, int len) throws IOException {
if ( eof() )
return EOF;
// Note - we need to preserve line count
// Single char ops are reasonably efficient.
for (int i = 0; i < len; i++) {
int ch = readChar();
if ( ch == EOF )
return (i == 0) ? EOF : i;
cbuf[i + off] = (char)ch;
}
return len;
}
public final boolean eof() {
return peekChar() == EOF;
}
// ----------------
// The methods below are the only ones to manipulate the character buffers.
// Other methods may read the state of variables.
private final void unreadChar(int ch) {
// The push back buffer is in the order where [0] is the oldest.
// Does not alter the line number, column number or position count
// not does reading a pushback charcater.
if ( idxPushback >= pushbackChars.length ) {
// Enlarge pushback buffer.
char[] pushbackChars2 = new char[pushbackChars.length * 2];
System.arraycopy(pushbackChars, 0, pushbackChars2, 0, pushbackChars.length);
pushbackChars = pushbackChars2;
// throw new JenaException("Pushback buffer overflow");
}
if ( ch == EOF || ch == UNSET )
IO.exception("Illegal character to push back: " + ch);
idxPushback++;
pushbackChars[idxPushback] = (char)ch;
}
private final void init() {
advanceAndSet();
if ( currChar == UNSET )
setCurrChar(EOF);
}
private final void advanceAndSet() {
int ch = source.advance();
setCurrChar(ch);
}
// Invariants.
// currChar is either chars[idx-1] or pushbackChars[idxPushback]
/**
* Return the next character, moving on one place and resetting the peek
* character
*/
private final int nextChar() {
int ch = peekChar();
if ( ch == EOF )
return EOF;
if ( idxPushback >= 0 ) {
char ch2 = pushbackChars[idxPushback];
idxPushback--;
return ch2;
}
posn++;
if ( ch == '\n' ) {
lineNum++;
colNum = INIT_COL;
} else
colNum++;
advanceAndSet();
return ch;
}
}