All Downloads are FREE. Search and download functionalities are using the official Maven repository.

htsjdk.tribble.readers.LongLineBufferedReader Maven / Gradle / Ivy

There is a newer version: 4.3.0
Show newest version
/*
 * The MIT License
 *
 * Copyright (c) 2013 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

package htsjdk.tribble.readers;


import java.io.IOException;
import java.io.Reader;

/**
 * A variant of {@link java.io.BufferedReader} with improved performance reading files with long lines.
 * 
 * This class is almost identical to BufferedReader, but it retains a single line buffer for accumulating characters in a line, and allows
 * its size to grow. Conversely, {@link java.io.BufferedReader} assumes each new line will be approximately 80 characters and its
 * performance suffers when that is not the case.
 * 
 * Unlike {@link java.io.BufferedReader}, this class is not thread safe.
 *
 * @author mccowan
 */
public class LongLineBufferedReader extends Reader {

    private Reader in;

    private char cb[];
    private int nChars, nextChar;

    private static final int INVALIDATED = -2;
    private static final int UNMARKED = -1;
    private int markedChar = UNMARKED;
    private int readAheadLimit = 0; /* Valid only when markedChar > 0 */

    /**
     * If the next character is a line feed, skip it
     */
    private boolean skipLF = false;

    /**
     * The skipLF flag when the mark was set
     */
    private boolean markedSkipLF = false;

    private static final int DEFAULT_CHAR_BUFFER_SIZE = 8192;
    private static final int DEFAULT_EXPECTED_LINE_LENGTH = 80;

    private StringBuilder lineBuffer = new StringBuilder(DEFAULT_EXPECTED_LINE_LENGTH);

    /**
     * Creates a buffering character-input stream that uses an input buffer of
     * the specified size.
     *
     * @param in A Reader
     * @param sz Input-buffer size
     * @throws IllegalArgumentException If sz is <= 0
     */
    public LongLineBufferedReader(Reader in, int sz) {
        super(in);
        if (sz <= 0)
            throw new IllegalArgumentException("Buffer size <= 0");
        this.in = in;
        cb = new char[sz];
        nextChar = nChars = 0;
    }

    /**
     * Creates a buffering character-input stream that uses a default-sized
     * input buffer.
     *
     * @param in A Reader
     */
    public LongLineBufferedReader(Reader in) {
        this(in, DEFAULT_CHAR_BUFFER_SIZE);
    }

    /**
     * Checks to make sure that the stream has not been closed
     */
    private void ensureOpen() throws IOException {
        if (in == null)
            throw new IOException("Stream closed");
    }

    /**
     * Fills the input buffer, taking the mark into account if it is valid.
     */
    private void fill() throws IOException {
        int dst;
        if (markedChar <= UNMARKED) {
        /* No mark */
            dst = 0;
        } else {
        /* Marked */
            int delta = nextChar - markedChar;
            if (delta >= readAheadLimit) {
        /* Gone past read-ahead limit: Invalidate mark */
                markedChar = INVALIDATED;
                readAheadLimit = 0;
                dst = 0;
            } else {
                if (readAheadLimit <= cb.length) {
		    /* Shuffle in the current buffer */
                    System.arraycopy(cb, markedChar, cb, 0, delta);
                    markedChar = 0;
                    dst = delta;
                } else {
		    /* Reallocate buffer to accommodate read-ahead limit */
                    char ncb[] = new char[readAheadLimit];
                    System.arraycopy(cb, markedChar, ncb, 0, delta);
                    cb = ncb;
                    markedChar = 0;
                    dst = delta;
                }
                nextChar = nChars = delta;
            }
        }

        int n;
        do {
            n = in.read(cb, dst, cb.length - dst);
        } while (n == 0);
        if (n > 0) {
            nChars = dst + n;
            nextChar = dst;
        }
    }

    /**
     * Reads a single character.
     *
     * @return The character read, as an integer in the range
     *         0 to 65535 (0x00-0xffff), or -1 if the
     *         end of the stream has been reached
     * @throws IOException If an I/O error occurs
     */
    @Override
    public int read() throws IOException {
        synchronized (lock) {
            ensureOpen();
            for (; ; ) {
                if (nextChar >= nChars) {
                    fill();
                    if (nextChar >= nChars)
                        return -1;
                }
                if (skipLF) {
                    skipLF = false;
                    if (cb[nextChar] == '\n') {
                        nextChar++;
                        continue;
                    }
                }
                return cb[nextChar++];
            }
        }
    }

    /**
     * Reads characters into a portion of an array, reading from the underlying
     * stream if necessary.
     */
    private int read1(char[] cbuf, int off, int len) throws IOException {
        if (nextChar >= nChars) {
	    /* If the requested length is at least as large as the buffer, and
	       if there is no mark/reset activity, and if line feeds are not
	       being skipped, do not bother to copy the characters into the
	       local buffer.  In this way buffered streams will cascade
	       harmlessly. */
            if (len >= cb.length && markedChar <= UNMARKED && !skipLF) {
                return in.read(cbuf, off, len);
            }
            fill();
        }
        if (nextChar >= nChars) return -1;
        if (skipLF) {
            skipLF = false;
            if (cb[nextChar] == '\n') {
                nextChar++;
                if (nextChar >= nChars)
                    fill();
                if (nextChar >= nChars)
                    return -1;
            }
        }
        int n = Math.min(len, nChars - nextChar);
        System.arraycopy(cb, nextChar, cbuf, off, n);
        nextChar += n;
        return n;
    }

    /**
     * Reads characters into a portion of an array.
     * 

*

This method implements the general contract of the corresponding * {@link Reader#read(char[], int, int) read} method of the * {@link Reader} class. As an additional convenience, it * attempts to read as many characters as possible by repeatedly invoking * the read method of the underlying stream. This iterated * read continues until one of the following conditions becomes * true:

    *

    *

  • The specified number of characters have been read, *

    *

  • The read method of the underlying stream returns * -1, indicating end-of-file, or *

    *

  • The ready method of the underlying stream * returns false, indicating that further input requests * would block. *

    *

If the first read on the underlying stream returns * -1 to indicate end-of-file then this method returns * -1. Otherwise this method returns the number of characters * actually read. *

*

Subclasses of this class are encouraged, but not required, to * attempt to read as many characters as possible in the same fashion. *

*

Ordinarily this method takes characters from this stream's character * buffer, filling it from the underlying stream as necessary. If, * however, the buffer is empty, the mark is not valid, and the requested * length is at least as large as the buffer, then this method will read * characters directly from the underlying stream into the given array. * Thus redundant BufferedReaderTwos will not copy data * unnecessarily. * * @param cbuf Destination buffer * @param off Offset at which to start storing characters * @param len Maximum number of characters to read * @return The number of characters read, or -1 if the end of the * stream has been reached * @throws IOException If an I/O error occurs */ @Override public int read(char cbuf[], int off, int len) throws IOException { synchronized (lock) { ensureOpen(); if ((off < 0) || (off > cbuf.length) || (len < 0) || ((off + len) > cbuf.length) || ((off + len) < 0)) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; } int n = read1(cbuf, off, len); if (n <= 0) return n; while ((n < len) && in.ready()) { int n1 = read1(cbuf, off + n, len - n); if (n1 <= 0) break; n += n1; } return n; } } /** * Reads a line of text. A line is considered to be terminated by any one * of a line feed ('\n'), a carriage return ('\r'), or a carriage return * followed immediately by a linefeed. * * @param ignoreLF If true, the next '\n' will be skipped * @return A String containing the contents of the line, not including * any line-termination characters, or null if the end of the * stream has been reached * @throws IOException If an I/O error occurs * @see java.io.LineNumberReader#readLine() */ String readLine(boolean ignoreLF) throws IOException { int startChar; lineBuffer.setLength(0); synchronized (lock) { ensureOpen(); boolean omitLF = ignoreLF || skipLF; bufferLoop: for (; ; ) { if (nextChar >= nChars) fill(); if (nextChar >= nChars) { /* EOF */ if (lineBuffer != null && lineBuffer.length() > 0) return lineBuffer.toString(); else return null; } boolean eol = false; char c = 0; int i; /* Skip a leftover '\n', if necessary */ if (omitLF && (cb[nextChar] == '\n')) nextChar++; skipLF = false; omitLF = false; charLoop: for (i = nextChar; i < nChars; i++) { c = cb[i]; if ((c == '\n') || (c == '\r')) { eol = true; break charLoop; } } startChar = nextChar; nextChar = i; if (eol) { String str; lineBuffer.append(cb, startChar, i - startChar); str = lineBuffer.toString(); nextChar++; if (c == '\r') { skipLF = true; } return str; } lineBuffer.append(cb, startChar, i - startChar); } } } /** * Reads a line of text. A line is considered to be terminated by any one * of a line feed ('\n'), a carriage return ('\r'), or a carriage return * followed immediately by a linefeed. * * @return A String containing the contents of the line, not including * any line-termination characters, or null if the end of the * stream has been reached * @throws IOException If an I/O error occurs */ public String readLine() throws IOException { return readLine(false); } /** * Skips characters. * * @param n The number of characters to skip * @return The number of characters actually skipped * @throws IllegalArgumentException If n is negative. * @throws IOException If an I/O error occurs */ @Override public long skip(long n) throws IOException { if (n < 0L) { throw new IllegalArgumentException("skip value is negative"); } synchronized (lock) { ensureOpen(); long r = n; while (r > 0) { if (nextChar >= nChars) fill(); if (nextChar >= nChars) /* EOF */ break; if (skipLF) { skipLF = false; if (cb[nextChar] == '\n') { nextChar++; } } long d = nChars - nextChar; if (r <= d) { nextChar += r; r = 0; break; } else { r -= d; nextChar = nChars; } } return n - r; } } /** * Tells whether this stream is ready to be read. A buffered character * stream is ready if the buffer is not empty, or if the underlying * character stream is ready. * * @throws IOException If an I/O error occurs */ @Override public boolean ready() throws IOException { synchronized (lock) { ensureOpen(); /* * If newline needs to be skipped and the next char to be read * is a newline character, then just skip it right away. */ if (skipLF) { /* Note that in.ready() will return true if and only if the next * read on the stream will not block. */ if (nextChar >= nChars && in.ready()) { fill(); } if (nextChar < nChars) { if (cb[nextChar] == '\n') nextChar++; skipLF = false; } } return (nextChar < nChars) || in.ready(); } } /** * Tells whether this stream supports the mark() operation, which it does. */ @Override public boolean markSupported() { return true; } /** * Marks the present position in the stream. Subsequent calls to reset() * will attempt to reposition the stream to this point. * * @param readAheadLimit Limit on the number of characters that may be * read while still preserving the mark. An attempt * to reset the stream after reading characters * up to this limit or beyond may fail. * A limit value larger than the size of the input * buffer will cause a new buffer to be allocated * whose size is no smaller than limit. * Therefore large values should be used with care. * @throws IllegalArgumentException If readAheadLimit is < 0 * @throws IOException If an I/O error occurs */ @Override public void mark(int readAheadLimit) throws IOException { if (readAheadLimit < 0) { throw new IllegalArgumentException("Read-ahead limit < 0"); } synchronized (lock) { ensureOpen(); this.readAheadLimit = readAheadLimit; markedChar = nextChar; markedSkipLF = skipLF; } } /** * Resets the stream to the most recent mark. * * @throws IOException If the stream has never been marked, * or if the mark has been invalidated */ @Override public void reset() throws IOException { synchronized (lock) { ensureOpen(); if (markedChar < 0) throw new IOException((markedChar == INVALIDATED) ? "Mark invalid" : "Stream not marked"); nextChar = markedChar; skipLF = markedSkipLF; } } @Override public void close() throws IOException { synchronized (lock) { if (in == null) return; in.close(); in = null; cb = null; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy