
src-lib.atlas.io.PeekReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tdb Show documentation
Show all versions of tdb Show documentation
TDB is a storage subsystem for Jena and ARQ
/*
* (c) Copyright 2007, 2008, 2009 Hewlett-Packard Development Company, LP
* All rights reserved.
* [See end of file]
*/
package atlas.io;
import java.io.IOException ;
import java.io.InputStream ;
import java.io.Reader ;
import com.hp.hpl.jena.shared.JenaException ;
import com.hp.hpl.jena.util.FileUtils ;
/** Parsing-centric reader.
* Faster than using BufferedReader, sometimes a lot faster, when
* tokenizing is the critical performance point.
*
* Supports a line and column
* count. Initially, line = 1, col = 1. Columns go 1..N
*
* This class is not thread safe.
* @see BufferingWriter
*/
public abstract class PeekReader extends Reader
{
// Does buffering here instead of using a BufferedReader help?
// YES. A lot (Java6).
// Possibly because BufferedReader internally uses synchronized,
// even on getting a singe character. Thisis not only an unnecessary cost
// but also possibly because it stops the JIT doing a better job.
// **** read(char[]) is a loop of single char operations.
private static final int PUSHBACK_SIZE = 10 ;
static final byte CHAR0 = (char)0 ;
static final int EOF = -1 ;
static final int UNSET = -2 ;
private char[] pushbackChars ;
private int idxPushback ; // Index into pushbackChars: points to next pushBack. -1 => none.
private int currChar = UNSET ; // Next character to return when reading forwards.
private long posn ;
public static final int INIT_LINE = 1 ;
public static final int INIT_COL = 1 ;
private long colNum ;
private long lineNum ;
// ---- static construction methods.
public static PeekReader make(Reader r)
{
if ( r instanceof PeekReader )
return (PeekReader)r ;
// if ( r instanceof BufferedReader )
// Log.warn(PeekReader.class, "BufferedReader passed to PeekReader") ;
return new PeekReaderSource(r) ;
}
public static PeekReader make(Reader r, int bufferSize)
{
if ( r instanceof PeekReader )
return (PeekReader)r ;
return new PeekReaderSource(r, bufferSize) ;
}
public static PeekReader makeUTF8(InputStream in)
{
Reader r = FileUtils.asUTF8(in) ;
return make(r) ;
}
public static PeekReader make(String string)
{
return new PeekReaderCharSequence(string) ;
}
protected PeekReader()
{
this.pushbackChars = new char[PUSHBACK_SIZE] ;
this.idxPushback = -1 ;
this.colNum = INIT_COL ;
this.lineNum = INIT_LINE ;
this.posn = 0 ;
// We start at character "-1", i.e. just before the file starts.
// Advance always so that the peek character is valid (is character 0)
// Returns the character before the file starts (i.e. UNSET).
}
// public static PeekReader test(String x) { return new PeekReaderSource(new StringReader(x)) ; }
//
// // A bit slow in that it copies out of the string into the intermediate char array.
// // But happens in one block operations when less than buffer size.
// static PeekReader make(String x, int buffSize)
// { return new PeekReaderSource(new StringReader(x), buffSize) ; }
public long getLineNum() { return lineNum; }
public long getColNum() { return colNum; }
public long getPosition() { return posn; }
//---- Do not access currChar except with peekChar/setCurrChar.
public int peekChar()
{
if ( idxPushback >= 0 )
return pushbackChars[idxPushback] ;
// If not started ... delayed initialization.
if ( currChar == UNSET )
init() ;
return currChar ;
}
// And the correct way to read the currChar is to call peekChar.
private void setCurrChar(int ch)
{
currChar = ch ;
}
public int readChar() { return nextChar() ; }
/** push back a character : does not alter underlying position, line or column counts*/
public void pushbackChar(int ch) { unreadChar(ch) ; }
// Reader operations
@Override
public void close() throws IOException
{
closeInput() ;
}
@Override
public int read() throws IOException
{
if ( eof() )
return EOF ;
int x = readChar() ;
return x ;
}
@Override
public int read(char[] cbuf, int off, int len) throws IOException
{
if ( eof() )
return EOF ;
// Note - need to preserve line count, so single char ops are reasonably efficient.
for ( int i = 0 ; i < len ; i++ )
{
int ch = readChar() ;
if ( ch == EOF )
return (i==0)? EOF : i ;
cbuf[i+off] = (char)ch ;
}
return len ;
}
public final boolean eof() { return peekChar() == EOF ; }
//protected abstract void init() ;
protected abstract int advance() ;
protected abstract void closeInput() ;
// ----------------
// The methods below are the only ones to manipulate the character buffers.
// Other methods may read the state of variables.
private void unreadChar(int ch)
{
// The push back buffer is in the order where [0] is the oldest.
// Does not alter the line number, column number or position count.
if ( idxPushback >= pushbackChars.length )
{
// Enlarge pushback buffer.
char[] pushbackChars2 = new char[pushbackChars.length*2] ;
System.arraycopy(pushbackChars2, 0, pushbackChars2, 0, pushbackChars.length) ;
pushbackChars = pushbackChars2 ;
//throw new JenaException("Pushback buffer overflow") ;
}
if ( ch == EOF || ch == UNSET )
throw new JenaException("Illegal character to push back: "+ch) ;
idxPushback++ ;
pushbackChars[idxPushback] = (char)ch ;
}
private void init()
{
advanceAndSet() ;
if ( currChar == UNSET )
setCurrChar(EOF) ;
}
private void advanceAndSet()
{
int ch = advance() ;
setCurrChar(ch) ;
}
// /** Move forward one character and return it.
// * Ensure the buffer is not empty, or boolean eof is set
// */
// private int _advance()
// {
// if ( idx >= buffLen )
// // Points outsize the array. Refill it
// _fillArray() ;
//
// // Advance one character.
// if ( buffLen >= 0 )
// {
// char ch = chars[idx] ;
// // Advance the lookahead character
// idx++ ;
// return ch ;
// }
// else
// // Buffer empty, end of stream.
// return EOF ;
// }
//
// private int _fillArray()
// {
// int x = source.fill(chars) ;
// idx = 0 ;
// buffLen = x ; // Maybe -1
// return x ;
// }
// Invariants.
// currChar is either chars[idx-1] or pushbackChars[idxPushback]
/** Return the next character, moving on one place and resetting the peek character */
private int nextChar()
{
int ch = peekChar() ;
if ( ch == EOF )
return EOF ;
if ( idxPushback >= 0 )
{
ch = replayPushback() ;
return ch ;
}
posn++ ;
if (ch == '\n')
{
lineNum++;
colNum = INIT_COL ;
}
else
colNum++;
advanceAndSet() ;
return ch ;
}
private int replayPushback()
{
if ( idxPushback >=0 )
{
// Simple case : theer are more charcaters in the pushback buffer after this one.
char ch2 = pushbackChars[idxPushback] ;
idxPushback-- ;
return ch2 ;
}
throw new IllegalStateException() ;
// // Push back buffer empty.
// // Next char is from chars[] which must have yielded a
// // character and idx >= 1 or the stream was zero chars (idx <= 0)
// int nextCurrChar = EOF ;
//
// if ( idx-1 >= 0 )
// // Had been a read.
// nextCurrChar = chars[idx-1] ;
// setCurrChar(nextCurrChar) ;
}
}
/*
* (c) Copyright 2007, 2008, 2009 Hewlett-Packard Development Company, LP
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
© 2015 - 2025 Weber Informatics LLC | Privacy Policy