All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.utility.LookAheadReader Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever 
 * Webpage: http://terrier.org 
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 * 
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is LookAheadReader.java.
 *
 * The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Craig Macdonald  (original author)
 *   Vassilis Plachouras 
 */
package org.terrier.utility;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.zip.GZIPInputStream;
/**
 * Implements a Reader, that encapsulates another stream, but only upto the
 * point that a pre-defined end marker in the stream is identified. The Reader
 * will then become endOfFile, and refuse to return any more characters from the
 * stream. Suppose that we create an instance of a LookAheadReader with the 
 * end marker END. For the following input:
 * a b c d END e f g...
 * the LookAheadReader will stop after reading the string END. Note that the 
 * end marker will be missing from the parent stream.
 * 
 * @author Craig Macdonald, Vassilis Plachouras
  * @see org.terrier.utility.LookAheadStream
 */
public class LookAheadReader extends Reader {
	/** the parent stream that this object is looking ahead in */
	private final Reader ParentStream;
	/** the end marker that it is prescanning the stream for */
	private final char[] EndMarker;
	/** How long is the end marker */
	private final int MarkerLen;
	/** How many characters are in the read ahead buffer */
	private int BufLen = 0;
	/** index of the first entry in the buffer */
	private int BufIndex = 0;
	/** The read ahead buffer */
	private final char[] Buffer;
	/** have we reached the end of the file */
	private boolean EOF = false;
	/**
	 * Creates an instance of a LookAheadReader that will read from the 
	 * given stream until the end marker is found.
	 * @param parent Reader the stream used for reading the input/
	 * @param endMarker String the marker which signifies the end of the stream. 
	 */
	//
	public LookAheadReader(Reader parent, String endMarker) {
		this.ParentStream = parent;
		this.EndMarker = endMarker.toCharArray();
		MarkerLen = EndMarker.length;
		Buffer = new char[endMarker.length()];
		BufLen = 0;
	}
	/**
	 * Read a character from the parent stream, first checking that 
	 * it doesnt form part of the end marker.
	 * @return int the code of the read character, or -1 if the end of
	 *		 the stream has been reached.
	 * @throws IOException if there is any error while reading from the stream.
	 */
	public int read() throws IOException {
		if (EOF)
			return -1;
		if (BufLen > 0) {
			BufLen--;
			return Buffer[BufIndex++];
		}
		int c = -1;
		boolean keepReading = true;
		while (keepReading) {
			if ((c = ParentStream.read()) == -1)
			{
				EOF = true;
				return -1;
			}
			char cc = (char)c;
			if (cc == EndMarker[BufLen]) {
				Buffer[BufLen++] = cc; 
				if (BufLen == MarkerLen) {
					EOF = true;
					return -1;
				}
			} else {
				Buffer[BufLen++] = cc;
				BufIndex = 0;
				//keepReading = false;
				break;
			}
		}
		BufLen--;
		return Buffer[BufIndex++];
	}
	/** 
	 * Read characters into an array. This method will read 100 characters or the array length, 
	 * and until the end of the stream is reached.
	 * NB: Uses read() internally.
	 * @param cbuf cbuf - Destination buffer
	 * @return The number of characters read, or -1 if the end of the stream has been reached.
	 * @throws IOException If an I/O error occurs
	 */
	public int read(char[] cbuf) throws IOException {
		if (EOF)
			return -1;
		int ReadSize = 100;
		if (ReadSize > cbuf.length)
			ReadSize = cbuf.length;
		int i=0;
		for(;iNB:Implemented in terms of read().
	 * @param cbuf Destination buffer
	 * @param offset Offset at which to start storing characters
	 * @param len Maximum number of characters to read
	 * @return The number of characters read, or -1 if the end of the stream has been reached
	 * @throws IOException If an I/O error occurs
	 */
	public int read(char[] cbuf, int offset, int len) throws IOException {
		//System.out.print("offset="+offset+ " len="+len);
		if (EOF)
			return -1;
		int i=0;
		for(;i© 2015 - 2025 Weber Informatics LLC | Privacy Policy