org.apache.jena.atlas.io.PeekReader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.atlas.io ;
import static org.apache.jena.atlas.io.IO.EOF ;
import static org.apache.jena.atlas.io.IO.UNSET ;
import java.io.* ;
import org.apache.jena.atlas.AtlasException ;
import org.apache.jena.atlas.lib.Chars ;
/**
* Parsing-centric reader. This class is not thread safe.
* @see PeekInputStream
*/
public final class PeekReader extends Reader {
// Remember to apply fixes to PeekInputStream as well.
// Buffering is done by a CharStream - does it make adifference?
// Yes. A lot (Java6).
// Using a Reader here seems to have zero cost or benefit but CharStream
// allows fast String handling.
private final CharStream source ;
private static final int PUSHBACK_SIZE = 10 ;
static final byte CHAR0 = (char)0 ;
private char[] pushbackChars ;
// Index into pushbackChars: points to next pushBack.
// -1 => none.
private int idxPushback ;
// Next character to return when reading forwards.
private int currChar = UNSET ;
private long posn ;
public static final int INIT_LINE = 1 ;
public static final int INIT_COL = 1 ;
private long colNum ;
private long lineNum ;
// ---- static construction methods.
public static PeekReader make(Reader r) {
if ( r instanceof PeekReader )
return (PeekReader)r ;
return make(r, CharStreamBuffered.CB_SIZE) ;
}
public static PeekReader make(Reader r, int bufferSize) {
// It is worth our own buffering even if a BufferedReader
// because of the synchronized on one char reads in BufferedReader.
return new PeekReader(new CharStreamBuffered(r, bufferSize)) ;
}
/** Make PeekReader where the input is UTF8 : BOM is removed */
public static PeekReader makeUTF8(InputStream in) {
// This is the best route to make a PeekReader because it avoids
// chances of wrong charset for a Reader say.
PeekReader pr ;
if ( true ) {
Reader r = IO.asUTF8(in) ;
// This adds reader-level buffering
pr = make(r) ;
} else {
// This is a bit slower - reason unknown.
InputStreamBuffered in2 = new InputStreamBuffered(in) ;
CharStream r = new InStreamUTF8(in2) ;
pr = new PeekReader(r) ;
}
// Skip BOM.
int ch = pr.peekChar() ;
if ( ch == Chars.BOM )
// Skip BOM
pr.readChar() ;
return pr ;
}
/** Make PeekReader where the input is ASCII */
public static PeekReader makeASCII(InputStream in) {
Reader r = IO.asASCII(in) ;
return make(r) ;
}
public static PeekReader make(CharStream r) {
return new PeekReader(r) ;
}
public static PeekReader readString(String string) {
return new PeekReader(new CharStreamSequence(string)) ;
}
public static PeekReader open(String filename) {
try {
InputStream in = new FileInputStream(filename) ;
return makeUTF8(in) ;
} catch (FileNotFoundException ex) {
throw new AtlasException("File not found: " + filename) ;
}
}
private PeekReader(CharStream stream) {
this.source = stream ;
this.pushbackChars = new char[PUSHBACK_SIZE] ;
this.idxPushback = -1 ;
this.colNum = INIT_COL ;
this.lineNum = INIT_LINE ;
this.posn = 0 ;
}
public long getLineNum() {
return lineNum ;
}
public long getColNum() {
return colNum ;
}
public long getPosition() {
return posn ;
}
// ---- Do not access currChar except with peekChar/setCurrChar.
public final int peekChar() {
if ( idxPushback >= 0 )
return pushbackChars[idxPushback] ;
// If not started ... delayed initialization.
if ( currChar == UNSET )
init() ;
return currChar ;
}
// And the correct way to read the currChar is to call peekChar.
private final void setCurrChar(int ch) {
currChar = ch ;
}
public final int readChar() {
return nextChar() ;
}
/**
* push back a character : does not alter underlying position, line or
* column counts
*/
public final void pushbackChar(int ch) {
unreadChar(ch) ;
}
// Reader operations
@Override
public final void close() throws IOException {
source.closeStream() ;
}
@Override
public final int read() throws IOException {
if ( eof() )
return EOF ;
int x = readChar() ;
return x ;
}
@Override
public final int read(char[] cbuf, int off, int len) throws IOException {
if ( eof() )
return EOF ;
// Note - we need to preserve line count
// Single char ops are reasonably efficient.
for (int i = 0; i < len; i++) {
int ch = readChar() ;
if ( ch == EOF )
return (i == 0) ? EOF : i ;
cbuf[i + off] = (char)ch ;
}
return len ;
}
public final boolean eof() {
return peekChar() == EOF ;
}
// ----------------
// The methods below are the only ones to manipulate the character buffers.
// Other methods may read the state of variables.
private final void unreadChar(int ch) {
// The push back buffer is in the order where [0] is the oldest.
// Does not alter the line number, column number or position count
// not does reading a pushback charcater.
if ( idxPushback >= pushbackChars.length ) {
// Enlarge pushback buffer.
char[] pushbackChars2 = new char[pushbackChars.length * 2] ;
System.arraycopy(pushbackChars, 0, pushbackChars2, 0, pushbackChars.length) ;
pushbackChars = pushbackChars2 ;
// throw new JenaException("Pushback buffer overflow") ;
}
if ( ch == EOF || ch == UNSET )
IO.exception("Illegal character to push back: " + ch) ;
idxPushback++ ;
pushbackChars[idxPushback] = (char)ch ;
}
private final void init() {
advanceAndSet() ;
if ( currChar == UNSET )
setCurrChar(EOF) ;
}
private final void advanceAndSet() {
int ch = source.advance() ;
setCurrChar(ch) ;
}
// Invariants.
// currChar is either chars[idx-1] or pushbackChars[idxPushback]
/**
* Return the next character, moving on one place and resetting the peek
* character
*/
private final int nextChar() {
int ch = peekChar() ;
if ( ch == EOF )
return EOF ;
if ( idxPushback >= 0 ) {
char ch2 = pushbackChars[idxPushback] ;
idxPushback-- ;
return ch2 ;
}
posn++ ;
if ( ch == '\n' ) {
lineNum++ ;
colNum = INIT_COL ;
} else
colNum++ ;
advanceAndSet() ;
return ch ;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy