All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlparser.lexer.Source Maven / Gradle / Ivy

// HTMLParser Library $Name: v1_5 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v $
// $Author: derrickoswald $
// $Date: 2005/05/15 11:49:04 $
// $Revision: 1.20 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.lexer;

import org.htmlparser.util.ParserException;

import java.io.IOException;
import java.io.Reader;
import java.io.Serializable;

/**
 * A buffered source of characters.
 * A Source is very similar to a Reader, like:
 * 
 * new InputStreamReader (connection.getInputStream (), charset)
 * 
* It differs from the above, in three ways: *
    *
  • the fetching of bytes may be asynchronous
  • *
  • the character set may be changed, which resets the input stream
  • *
  • characters may be requested more than once, so in general they * will be buffered
  • *
*/ public abstract class Source extends Reader implements Serializable { /** * Return value when the source is exhausted. * Has a value of {@value}. */ public static final int EOF = -1; /** * Get the encoding being used to convert characters. * @return The current encoding. */ public abstract String getEncoding (); /** * Set the encoding to the given character set. * If the current encoding is the same as the requested encoding, * this method is a no-op. Otherwise any subsequent characters read from * this source will have been decoded using the given character set.

* If characters have already been consumed from this source, it is expected * that an exception will be thrown if the characters read so far would * be different if the encoding being set was used from the start. * @param character_set The character set to use to convert characters. * @exception ParserException If a character mismatch occurs between * characters already provided and those that would have been returned * had the new character set been in effect from the beginning. An * exception is also thrown if the character set is not recognized. */ public abstract void setEncoding (String character_set) throws ParserException; // // Reader overrides // /** * Does nothing. * It's supposed to close the source, but use {@link #destroy} instead. * @exception IOException not used * @see #destroy */ public abstract void close () throws IOException; /** * Read a single character. * This method will block until a character is available, * an I/O error occurs, or the source is exhausted. * @return The character read, as an integer in the range 0 to 65535 * (0x00-0xffff), or {@link #EOF} if the source is exhausted. * @exception IOException If an I/O error occurs. */ public abstract int read () throws IOException; /** * Read characters into a portion of an array. This method will block * until some input is available, an I/O error occurs, or the source is * exhausted. * @param cbuf Destination buffer * @param off Offset at which to start storing characters * @param len Maximum number of characters to read * @return The number of characters read, or {@link #EOF} if the source is * exhausted. * @exception IOException If an I/O error occurs. */ public abstract int read (char[] cbuf, int off, int len) throws IOException; /** * Read characters into an array. * This method will block until some input is available, an I/O error occurs, * or the source is exhausted. * @param cbuf Destination buffer. * @return The number of characters read, or {@link #EOF} if the source is * exhausted. * @exception IOException If an I/O error occurs. */ public abstract int read (char[] cbuf) throws IOException; /** * Tell whether this source is ready to be read. * @return true if the next read() is guaranteed not to block * for input, false otherwise. * Note that returning false does not guarantee that the next read will block. * @exception IOException If an I/O error occurs. */ public abstract boolean ready () throws IOException; /** * Reset the source. * Repositions the read point to begin at zero. */ public abstract void reset (); /** * Tell whether this source supports the mark() operation. * @return true if and only if this source supports the mark * operation. */ public abstract boolean markSupported (); /** * Mark the present position. * Subsequent calls to {@link #reset} * will attempt to reposition the source to this point. Not all * sources support the mark() operation. * @param readAheadLimit The minimum number of characters that can be read * before this mark becomes invalid. * @exception IOException If an I/O error occurs. */ public abstract void mark (int readAheadLimit) throws IOException; /** * Skip characters. * This method will block until some characters are available, * an I/O error occurs, or the source is exhausted. * Note: n is treated as an int * @param n The number of characters to skip. * @return The number of characters actually skipped * @exception IOException If an I/O error occurs. */ public abstract long skip (long n) throws IOException; // // Methods not in your Daddy's Reader // /** * Undo the read of a single character. * @exception IOException If the source is closed or no characters have * been read. */ public abstract void unread () throws IOException; /** * Retrieve a character again. * @param offset The offset of the character. * @return The character at offset. * @exception IOException If the source is closed or the offset is beyond * {@link #offset()}. */ public abstract char getCharacter (int offset) throws IOException; /** * Retrieve characters again. * @param array The array of characters. * @param offset The starting position in the array where characters are to be placed. * @param start The starting position, zero based. * @param end The ending position * (exclusive, i.e. the character at the ending position is not included), * zero based. * @exception IOException If the source is closed or the start or end is * beyond {@link #offset()}. */ public abstract void getCharacters (char[] array, int offset, int start, int end) throws IOException; /** * Retrieve a string comprised of characters already read. * @param offset The offset of the first character. * @param length The number of characters to retrieve. * @return A string containing the length characters at offset. * @exception IOException If the source is closed. */ public abstract String getString (int offset, int length) throws IOException; /** * Append characters already read into a StringBuffer. * @param buffer The buffer to append to. * @param offset The offset of the first character. * @param length The number of characters to retrieve. * @exception IOException If the source is closed or the offset or * (offset + length) is beyond {@link #offset()}. */ public abstract void getCharacters (StringBuilder buffer, int offset, int length) throws IOException; /** * Close the source. * Once a source has been closed, further {@link #read() read}, * {@link #ready ready}, {@link #mark mark}, {@link #reset reset}, * {@link #skip skip}, {@link #unread unread}, * {@link #getCharacter getCharacter} or {@link #getString getString} * invocations will throw an IOException. * Closing a previously-closed source, however, has no effect. * @exception IOException If an I/O error occurs. */ public abstract void destroy () throws IOException; /** * Get the position (in characters). * @return The number of characters that have already been read, or * {@link #EOF} if the source is closed. */ public abstract int offset (); /** * Get the number of available characters. * @return The number of characters that can be read without blocking. */ public abstract int available (); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy