All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.atlas.io.PeekReader Maven / Gradle / Ivy

There is a newer version: 5.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.atlas.io;

import static org.apache.jena.atlas.io.IO.EOF;
import static org.apache.jena.atlas.io.IO.UNSET;

import java.io.*;

import org.apache.jena.atlas.AtlasException;
import org.apache.jena.atlas.lib.Chars;

/**
 * Parsing-centric reader. This class is not thread safe.
 * @see PeekInputStream
 */

public final class PeekReader extends Reader {
    // Remember to apply fixes to PeekInputStream as well.

    // Buffering is done by a CharStream - does it make difference?
    // Yes. A lot (Java6).

    // Using a Reader here seems to have zero cost or benefit but CharStream
    // allows fast String handling.
    private final CharStream source;

    private static final int PUSHBACK_SIZE = 10;
    static final byte        CHAR0         = (char)0;

    private char[]           pushbackChars;
    // Index into pushbackChars: points to next pushBack.
    // -1 => none.
    private int              idxPushback;

    // Next character to return when reading forwards.
    private int              currChar      = UNSET;
    private long             posn;

    public static final int  INIT_LINE     = 1;
    public static final int  INIT_COL      = 1;

    private long             colNum;
    private long             lineNum;

    // ---- static construction methods.

    public static PeekReader make(Reader r) {
        if ( r instanceof PeekReader pr )
            return pr;
        return make(r, CharStreamBuffered.CB_SIZE);
    }

    public static PeekReader make(Reader r, int bufferSize) {
        // It is worth our own buffering even if a BufferedReader
        // because of the synchronized on one char reads in BufferedReader.
        return new PeekReader(new CharStreamBuffered(r, bufferSize));
    }

    /** Make PeekReader where the input is UTF8 : BOM is removed */
    public static PeekReader makeUTF8(InputStream in) {
        // This is the best route to make a PeekReader because it avoids
        // chances of wrong charset for a Reader say.
        PeekReader pr;
        if ( true ) {
            Reader r = IO.asUTF8(in);
            // This adds reader-level buffering
            pr = make(r);
        } else {
            // This is a bit slower - reason unknown.
            InputStreamBuffered in2 = new InputStreamBuffered(in);
            CharStream r = new InStreamUTF8(in2);
            pr = new PeekReader(r);
        }
        // Skip BOM.
        int ch = pr.peekChar();
        if ( ch == Chars.BOM )
            // Skip BOM
            pr.readChar();
        return pr;
    }

    /** Make PeekReader where the input is ASCII */
    public static PeekReader makeASCII(InputStream in) {
        Reader r = IO.asASCII(in);
        return make(r);
    }

    public static PeekReader make(CharStream r) {
        return new PeekReader(r);
    }

    public static PeekReader readString(String string) {
        return new PeekReader(new CharStreamSequence(string));
    }

    public static PeekReader open(String filename) {
        try {
            InputStream in = new FileInputStream(filename);
            return makeUTF8(in);
        } catch (FileNotFoundException ex) {
            throw new AtlasException("File not found: " + filename);
        }
    }

    private PeekReader(CharStream stream) {
        this.source = stream;
        this.pushbackChars = new char[PUSHBACK_SIZE];
        this.idxPushback = -1;

        this.colNum = INIT_COL;
        this.lineNum = INIT_LINE;
        this.posn = 0;
    }

    public long getLineNum() {
        return lineNum;
    }

    public long getColNum() {
        return colNum;
    }

    public long getPosition() {
        return posn;
    }

    // ---- Do not access currChar except with peekChar/setCurrChar.
    public final int peekChar() {
        if ( idxPushback >= 0 )
            return pushbackChars[idxPushback];

        // If not started ... delayed initialization.
        if ( currChar == UNSET )
            init();
        return currChar;
    }

    // And the correct way to read the currChar is to call peekChar.
    private final void setCurrChar(int ch) {
        currChar = ch;
    }

    public final int readChar() {
        return nextChar();
    }

    /**
     * push back a character : does not alter underlying position, line or
     * column counts
     */
    public final void pushbackChar(int ch) {
        unreadChar(ch);
    }

    // Reader operations
    @Override
    public final void close() throws IOException {
        source.closeStream();
    }

    @Override
    public final int read() throws IOException {
        if ( eof() )
            return EOF;
        int x = readChar();
        return x;
    }

    @Override
    public final int read(char[] cbuf, int off, int len) throws IOException {
        if ( eof() )
            return EOF;
        // Note - we need to preserve line count
        // Single char ops are reasonably efficient.
        for (int i = 0; i < len; i++) {
            int ch = readChar();
            if ( ch == EOF )
                return (i == 0) ? EOF : i;
            cbuf[i + off] = (char)ch;
        }
        return len;
    }

    public final boolean eof() {
        return peekChar() == EOF;
    }

    // ----------------
    // The methods below are the only ones to manipulate the character buffers.
    // Other methods may read the state of variables.

    private final void unreadChar(int ch) {
        // The push back buffer is in the order where [0] is the oldest.
        // Does not alter the line number, column number or position count
        // not does reading a pushback charcater.

        if ( idxPushback >= pushbackChars.length ) {
            // Enlarge pushback buffer.
            char[] pushbackChars2 = new char[pushbackChars.length * 2];
            System.arraycopy(pushbackChars, 0, pushbackChars2, 0, pushbackChars.length);
            pushbackChars = pushbackChars2;
            // throw new JenaException("Pushback buffer overflow");
        }
        if ( ch == EOF || ch == UNSET )
            IO.exception("Illegal character to push back: " + ch);

        idxPushback++;
        pushbackChars[idxPushback] = (char)ch;
    }

    private final void init() {
        advanceAndSet();
        if ( currChar == UNSET )
            setCurrChar(EOF);
    }

    private final void advanceAndSet() {
        int ch = source.advance();
        setCurrChar(ch);
    }

    // Invariants.
    // currChar is either chars[idx-1] or pushbackChars[idxPushback]

    /**
     * Return the next character, moving on one place and resetting the peek
     * character
     */
    private final int nextChar() {
        int ch = peekChar();

        if ( ch == EOF )
            return EOF;

        if ( idxPushback >= 0 ) {
            char ch2 = pushbackChars[idxPushback];
            idxPushback--;
            return ch2;
        }

        posn++;

        if ( ch == '\n' ) {
            lineNum++;
            colNum = INIT_COL;
        } else
            colNum++;

        advanceAndSet();
        return ch;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy