
org.terrier.utility.LookAheadReader Maven / Gradle / Ivy
The newest version!
/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is LookAheadReader.java.
*
* The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
* Craig Macdonald (original author)
* Vassilis Plachouras
*/
package org.terrier.utility;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.zip.GZIPInputStream;
/**
* Implements a Reader, that encapsulates another stream, but only upto the
* point that a pre-defined end marker in the stream is identified. The Reader
* will then become endOfFile, and refuse to return any more characters from the
* stream. Suppose that we create an instance of a LookAheadReader with the
* end marker END. For the following input:
* a b c d END e f g...
* the LookAheadReader will stop after reading the string END. Note that the
* end marker will be missing from the parent stream.
*
* @author Craig Macdonald, Vassilis Plachouras
* @see org.terrier.utility.LookAheadStream
*/
public class LookAheadReader extends Reader {
/** the parent stream that this object is looking ahead in */
private final Reader ParentStream;
/** the end marker that it is prescanning the stream for */
private final char[] EndMarker;
/** How long is the end marker */
private final int MarkerLen;
/** How many characters are in the read ahead buffer */
private int BufLen = 0;
/** index of the first entry in the buffer */
private int BufIndex = 0;
/** The read ahead buffer */
private final char[] Buffer;
/** have we reached the end of the file */
private boolean EOF = false;
/**
* Creates an instance of a LookAheadReader that will read from the
* given stream until the end marker is found.
* @param parent Reader the stream used for reading the input/
* @param endMarker String the marker which signifies the end of the stream.
*/
//
public LookAheadReader(Reader parent, String endMarker) {
this.ParentStream = parent;
this.EndMarker = endMarker.toCharArray();
MarkerLen = EndMarker.length;
Buffer = new char[endMarker.length()];
BufLen = 0;
}
/**
* Read a character from the parent stream, first checking that
* it doesnt form part of the end marker.
* @return int the code of the read character, or -1 if the end of
* the stream has been reached.
* @throws IOException if there is any error while reading from the stream.
*/
public int read() throws IOException {
if (EOF)
return -1;
if (BufLen > 0) {
BufLen--;
return Buffer[BufIndex++];
}
int c = -1;
boolean keepReading = true;
while (keepReading) {
if ((c = ParentStream.read()) == -1)
{
EOF = true;
return -1;
}
char cc = (char)c;
if (cc == EndMarker[BufLen]) {
Buffer[BufLen++] = cc;
if (BufLen == MarkerLen) {
EOF = true;
return -1;
}
} else {
Buffer[BufLen++] = cc;
BufIndex = 0;
//keepReading = false;
break;
}
}
BufLen--;
return Buffer[BufIndex++];
}
/**
* Read characters into an array. This method will read 100 characters or the array length,
* and until the end of the stream is reached.
* NB: Uses read() internally.
* @param cbuf cbuf - Destination buffer
* @return The number of characters read, or -1 if the end of the stream has been reached.
* @throws IOException If an I/O error occurs
*/
public int read(char[] cbuf) throws IOException {
if (EOF)
return -1;
int ReadSize = 100;
if (ReadSize > cbuf.length)
ReadSize = cbuf.length;
int i=0;
for(;iNB:Implemented in terms of read().
* @param cbuf Destination buffer
* @param offset Offset at which to start storing characters
* @param len Maximum number of characters to read
* @return The number of characters read, or -1 if the end of the stream has been reached
* @throws IOException If an I/O error occurs
*/
public int read(char[] cbuf, int offset, int len) throws IOException {
//System.out.print("offset="+offset+ " len="+len);
if (EOF)
return -1;
int i=0;
for(;i© 2015 - 2025 Weber Informatics LLC | Privacy Policy