org.htmlparser.lexer.Stream Maven / Gradle / Ivy
Show all versions of bboss-htmlparser Show documentation
// HTMLParser Library $Name: v1_5 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v $
// $Author: derrickoswald $
// $Date: 2005/05/15 11:49:04 $
// $Revision: 1.14 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.lexer;
import java.io.IOException;
import java.io.InputStream;
/**
* Provides for asynchronous fetching from a stream.
*
*/
public class Stream extends InputStream implements Runnable
{
/**
* The number of calls to fill.
* Note: to be removed.
*/
public int fills = 0;
/**
* The number of reallocations.
* Note: to be removed.
*/
public int reallocations = 0;
/**
* The number of synchronous (blocking) fills.
* Note: to be removed.
*/
public int synchronous = 0;
/**
* An initial buffer size.
*/
protected static final int BUFFER_SIZE = 4096;
/**
* Return value when no more characters are left.
*/
protected static final int EOF = -1;
/**
* The underlying stream.
*/
protected volatile InputStream mIn;
/**
* The bytes read so far.
*/
public volatile byte[] mBuffer;
/**
* The number of valid bytes in the buffer.
*/
public volatile int mLevel;
/**
* The offset of the next byte returned by read().
*/
protected int mOffset;
/**
* The content length from the HTTP header.
*/
protected int mContentLength;
/**
* The bookmark.
*/
protected int mMark;
/**
* Construct a stream with no assumptions about the number of bytes available.
* @param in The input stream to use.
*/
public Stream (InputStream in)
{
this (in, 0);
}
/**
* Construct a stream to read the given number of bytes.
* @param in The input stream to use.
* @param bytes The maximum number of bytes to read.
* This should be set to the ContentLength from the HTTP header.
* A negative or zero value indicates an unknown number of bytes.
*/
public Stream (InputStream in, int bytes)
{
mIn = in;
mBuffer = null;
mLevel = 0;
mOffset = 0;
mContentLength = bytes < 0 ? 0 : bytes;
mMark = -1;
}
/**
* Fetch more bytes from the underlying stream.
* Has no effect if the underlying stream has been drained.
* @param force If true
, an attempt is made to read from the
* underlying stream, even if bytes are available, If false
,
* a read of the underlying stream will not occur if there are already
* bytes available.
* @return true
if not at the end of the input stream.
* @exception IOException If the underlying stream read() or available() throws one.
*/
protected synchronized boolean fill (boolean force)
throws
IOException
{
int size;
byte[] buffer;
int read;
boolean ret;
ret = false;
if (null != mIn) // mIn goes null when it's been sucked dry
{
if (!force)
{ // check for change of state while waiting on the monitor in a synchronous call
if (0 != available ())
return (true);
synchronous++;
}
// get some buffer space
if (0 == mContentLength)
{ // unknown content length... keep doubling
if (null == mBuffer)
{
mBuffer = new byte[Math.max (BUFFER_SIZE, mIn.available ())];
buffer = mBuffer;
}
else
{
if (mBuffer.length - mLevel < BUFFER_SIZE / 2)
buffer = new byte[Math.max (mBuffer.length * 2, mBuffer.length + mIn.available ())];
else
buffer = mBuffer;
}
size = buffer.length - mLevel;
}
else
{ // known content length... allocate once
size = mContentLength - mLevel;
if (null == mBuffer)
mBuffer = new byte[size];
buffer = mBuffer;
}
// read into the end of the 'new' buffer
read = mIn.read (buffer, mLevel, size);
if (-1 == read)
{
mIn.close ();
mIn = null;
}
else
{
if (mBuffer != buffer)
{ // copy the bytes previously read
System.arraycopy (mBuffer, 0, buffer, 0, mLevel);
mBuffer = buffer;
reallocations++;
}
mLevel += read;
if ((0 != mContentLength) && (mLevel == mContentLength))
{
mIn.close ();
mIn = null;
}
ret = true;
fills++;
}
}
return (ret);
}
//
// Runnable interface
//
/**
* Continually read the underlying stream untill exhausted.
* @see java.lang.Thread#run()
*/
public void run ()
{
boolean filled;
do
{ // keep hammering the socket with no delay, it's metered upstream
try
{
filled = fill (true);
}
catch (IOException ioe)
{
ioe.printStackTrace ();
// exit the thread if there is a problem,
// let the synchronous reader find out about it
filled = false;
}
}
while (filled);
}
//
// InputStream overrides
//
/**
* Reads the next byte of data from the input stream. The value byte is
* returned as an int
in the range 0
to
* 255
. If no byte is available because the end of the stream
* has been reached, the value -1
is returned. This method
* blocks until input data is available, the end of the stream is detected,
* or an exception is thrown.
* @return The next byte of data, or -1
if the end of the
* stream is reached.
* @exception IOException If an I/O error occurs.
*/
public int read () throws IOException
{
int ret;
// The following is unsynchronized code.
// Some would argue that unsynchronized access isn't thread safe
// but I think I can rationalize it in this case...
// The two volatile members are mLevel and mBuffer (besides mIn).
// If (mOffset >= mLevel) turns false after the test, fill is
// superflously called, but it's synchronized and figures it out.
// (mOffset < mLevel) only goes more true by the operation of the
// background thread, it increases the value of mLevel
// and volatile int access is atomic.
// If mBuffer changes by the operation of the background thread,
// the array pointed to can only be bigger than the previous buffer,
// and hence no array bounds exception can be raised.
if (0 == available ())
fill (false);
if (0 != available ())
ret = mBuffer[mOffset++] & 0xff;
else
ret = EOF;
return (ret);
}
/**
* Returns the number of bytes that can be read (or skipped over) from
* this input stream without blocking by the next caller of a method for
* this input stream. The next caller might be the same thread or or
* another thread.
* @return The number of bytes that can be read from this input stream
* without blocking.
* @exception IOException If an I/O error occurs.
*/
public int available () throws IOException
{
return (mLevel - mOffset);
}
/**
* Closes this input stream and releases any system resources associated
* with the stream.
* @exception IOException If an I/O error occurs.
*/
public synchronized void close () throws IOException
{
if (null != mIn)
{
mIn.close ();
mIn = null;
}
mBuffer = null;
mLevel = 0;
mOffset = 0;
mContentLength =0;
mMark = -1;
}
/**
* Repositions this stream to the position at the time the
* mark
method was last called on this input stream.
*
* The general contract of reset
is:
*
*
*
* - If the method
markSupported
returns
* true
, then:
*
* - If the method
mark
has not been called since
* the stream was created, or the number of bytes read from the stream
* since mark
was last called is larger than the argument
* to mark
at that last call, then an
* IOException
might be thrown.
*
* - If such an
IOException
is not thrown, then the
* stream is reset to a state such that all the bytes read since the
* most recent call to mark
(or since the start of the
* file, if mark
has not been called) will be resupplied
* to subsequent callers of the read
method, followed by
* any bytes that otherwise would have been the next input data as of
* the time of the call to reset
.
*
* - If the method
markSupported
returns
* false
, then:
*
* - The call to
reset
may throw an
* IOException
.
*
* - If an
IOException
is not thrown, then the stream
* is reset to a fixed state that depends on the particular type of the
* input stream and how it was created. The bytes that will be supplied
* to subsequent callers of the read
method depend on the
* particular type of the input stream.
*
* @exception IOException Never thrown. Just for subclassers.
* @see java.io.InputStream#mark(int)
* @see java.io.IOException
*
*/
public void reset () throws IOException
{
if (-1 != mMark)
mOffset = mMark;
else
mOffset = 0;
}
/**
* Tests if this input stream supports the mark
and
* reset
methods. Whether or not mark
and
* reset
are supported is an invariant property of a
* particular input stream instance. The markSupported
method
* of InputStream
returns false
.
*
* @return true
.
* @see java.io.InputStream#mark(int)
* @see java.io.InputStream#reset()
*
*/
public boolean markSupported ()
{
return (true);
}
/**
* Marks the current position in this input stream. A subsequent call to
* the reset
method repositions this stream at the last marked
* position so that subsequent reads re-read the same bytes.
*
* The readlimit
arguments tells this input stream to
* allow that many bytes to be read before the mark position gets
* invalidated.
*
*
The general contract of mark
is that, if the method
* markSupported
returns true
, the stream somehow
* remembers all the bytes read after the call to mark
and
* stands ready to supply those same bytes again if and whenever the method
* reset
is called. However, the stream is not required to
* remember any data at all if more than readlimit
bytes are
* read from the stream before reset
is called.
*
* @param readlimit Not used.
* @see java.io.InputStream#reset()
*
*/
public void mark (int readlimit)
{
mMark = mOffset;
}
}