All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.terrier.utility.LookAheadStreamCaseInsensitive Maven / Gradle / Ivy

The newest version!
/*
 * Terrier - Terabyte Retriever
 * Webpage: http://terrier.org
 * Contact: terrier{a.}dcs.gla.ac.uk
 * University of Glasgow - School of Computing Science
 * http://www.gla.ac.uk/
 *
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is LookAheadStreamCaseInsensitive.java.
 *
 * The Original Code is Copyright (C) 2004-2020 the University of Glasgow.
 * All Rights Reserved.
 *
 * Contributor(s):
 *   Craig Macdonald  (original author)
 *   Vassilis Plachouras 
 */
package org.terrier.utility;
import java.io.InputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
/** Version of LookAheadStream that is case-insensitive. This version assumes that the characters
  * that form the end-of-stream markers are single bytes, from the default character set. Use with care. 
 * @author Craig Macdonald
 * @since 2.1
 */
public class LookAheadStreamCaseInsensitive extends LookAheadStream
{
	/** Create a LookAheadStream that is case insensitive. The default character set
	  * is used to parse the marker into bytes.
	  * @param parent The InputStream to wrap
	  * @param endMarker the marker at which to give EOF
	  */
	public LookAheadStreamCaseInsensitive(InputStream parent, String endMarker) {
		super(parent, endMarker.toUpperCase());
	}
	
	/** Create a LookAheadStream that is case insensitive. The default character set
	  * is used to parse the marker into bytes.
	  * @param parent The InputStream to wrap
	  * @param endMarker the marker at which to give EOF
	  * @param encoding name for encoding
	  */
	public LookAheadStreamCaseInsensitive(InputStream parent, String endMarker, String encoding) throws UnsupportedEncodingException {
		super(parent, endMarker.toUpperCase(), encoding);
	}
	
    /**
     * Read a character from the parent stream, first checking that
     * it doesn't form part of the end marker.
     * @return int the code of the read character, or -1 if the end of
     *       the stream has been reached.
     * @throws IOException if there is any error while reading from the stream.
     */
	@Override
    public int read() throws IOException {
        if (EOF)
            return -1;
        if (BufLen > 0) {
            BufLen--;
            return Buffer[BufIndex++];
        }
        int c = -1;
        boolean keepReading = true;
        while (keepReading) {
            if ((c = ParentStream.read()) == -1)
            {
                EOF = true;
                return -1;
            }
            char cc = Character.toUpperCase((char)c);
            if (((int)cc) == EndMarker[BufLen]) {
                Buffer[BufLen++] = c;
                if (BufLen == MarkerLen) {
                    EOF = true;
                    return -1;
                }
            } else {
                Buffer[BufLen++] = c;
                BufIndex = 0;
                //keepReading = false;
                break;
            }
        }
        BufLen--;
        return Buffer[BufIndex++];
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy