src.java.net.htmlparser.jericho.StreamedText Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.io.*;
import java.nio.*;
/**
* Implements a buffered window into a stream of characters.
*
* Unless the buffer is explicitly {@linkplain #setBuffer(char[]) set}, it expands automatically as further characters are fetched from the stream.
*
* The {@link #setMinRequiredBufferBegin(int)} method can be used to inform the StreamedText
object that characters up to a specified
* position are no longer required, allowing more characters to be fetched without the need to increase the buffer size.
*/
final class StreamedText implements CharSequence {
private final Reader reader;
private char[] buffer;
private boolean expandableBuffer;
private int bufferBegin=0; // the current position of the first byte of the buffer. all text before it has been discarded.
private int readerPos=0; // the next position into which text will be loaded from the reader stream. must be >=bufferBegin and <=bufferBegin+buffer.length, except if one of the "text" argument constructors was used, in which case =Integer.MAX_VALUE.
private int minRequiredBufferBegin=0; // the minimum pos that must be kept in buffer. always >=bufferBegin.
private int end=Integer.MAX_VALUE;
public static int INITIAL_EXPANDABLE_BUFFER_SIZE=8192; // same default as StAX
public StreamedText(final Reader reader, final char[] buffer) {
this.reader=reader;
setBuffer(buffer);
}
public StreamedText(final Reader reader) {
this(reader,null);
}
private StreamedText(final char[] text, final int length) {
reader=null;
buffer=text;
expandableBuffer=false;
end=length;
readerPos=Integer.MAX_VALUE;
}
public StreamedText(final char[] text) {
this(text,text.length);
}
public StreamedText(final CharBuffer text) {
this(text.array(),text.length());
}
public StreamedText(final CharSequence text) {
this(toCharArray(text));
}
public StreamedText setBuffer(char[] buffer) {
if (buffer!=null) {
this.buffer=buffer;
expandableBuffer=false;
} else {
this.buffer=new char[INITIAL_EXPANDABLE_BUFFER_SIZE];
expandableBuffer=true;
}
return this;
}
public boolean hasExpandableBuffer() {
return expandableBuffer;
}
/**
* Returns the character at the specified index.
* @param index the index of the character.
* @return the character at the specified index.
*/
public char charAt(final int pos) {
if (pos>=readerPos) readToPosition(pos);
checkPos(pos);
return buffer[pos-bufferBegin];
}
public void setMinRequiredBufferBegin(final int minRequiredBufferBegin) {
if (minRequiredBufferBegin
* This method returns Integer.MAX_VALUE until an attempt is made to access a position past the end of the stream.
*
* @return the length of the text stream.
*/
public int length() {
if (end==Integer.MAX_VALUE) throw new IllegalStateException("Length of streamed text cannot be determined until end of file has been reached");
return end;
}
public int getEnd() {
return end;
}
private void prepareBufferRange(final int begin, final int end) {
final int lastRequiredPos=end-1;
if (lastRequiredPos>readerPos) readToPosition(lastRequiredPos);
checkPos(begin);
if (end>this.end) throw new IndexOutOfBoundsException();
}
public void writeTo(final Writer writer, final int begin, final int end) throws IOException {
prepareBufferRange(begin,end);
writer.write(buffer,begin-bufferBegin,end-begin);
}
/**
* Returns a new string that is a substring of this text.
*
* The substring begins at the specified begin
position and extends to the character at position end
- 1.
* Thus the length of the substring is end-begin
.
*
* @param begin the begin position, inclusive.
* @param end the end position, exclusive.
* @return a new string that is a substring of this text.
*/
public String substring(final int begin, final int end) {
prepareBufferRange(begin,end);
return new String(buffer,begin-bufferBegin,end-begin);
}
/**
* Returns a new character sequence that is a subsequence of this sequence.
*
* The returned CharSequence
is only guaranteed to be valid as long as no futher operations are performed on this StreamedText
object.
* Any subsequent method call could invalidate the underlying buffer used by the CharSequence
.
*
* @param begin the begin position, inclusive.
* @param end the end position, exclusive.
* @return a new character sequence that is a subsequence of this sequence.
*/
public CharSequence subSequence(final int begin, final int end) {
// This has not been benchmarked. It is possible that returning substring(begin,end) results in faster code even though it requires more memory allocation.
return getCharBuffer(begin,end);
}
public CharBuffer getCharBuffer(final int begin, final int end) {
prepareBufferRange(begin,end);
return CharBuffer.wrap(buffer,begin-bufferBegin,end-begin);
}
public String toString() {
throw new UnsupportedOperationException("Streamed text can not be converted to a string");
}
public String getDebugInfo() {
return "Buffer size: \""+buffer.length+"\", bufferBegin="+bufferBegin+", minRequiredBufferBegin="+minRequiredBufferBegin+", readerPos="+readerPos;
}
public char[] getBuffer() {
return buffer;
}
public int getBufferBegin() {
return bufferBegin;
}
private void checkPos(final int pos) {
// hopefully inlined by the compiler
if (pos=end) throw new IndexOutOfBoundsException();
}
public int getBufferOverflowPosition() {
return minRequiredBufferBegin+buffer.length;
}
private void readToPosition(final int pos) {
try {
if (pos>=bufferBegin+buffer.length) {
if (pos>=minRequiredBufferBegin+buffer.length) {
if (!expandableBuffer) throw new BufferOverflowException(); // unfortunately BufferOverflowException doesn't accept a message argument, otherwise it would include the message "StreamedText buffer too small to keep positions "+minRequiredBufferBegin+" and "+pos+" simultaneously"
expandBuffer(pos-minRequiredBufferBegin+1);
}
discardUsedText();
}
while (readerPos<=pos) {
final int charCount=reader.read(buffer,readerPos-bufferBegin,bufferBegin+buffer.length-readerPos);
if (charCount==-1) {
end=readerPos;
break;
}
readerPos+=charCount;
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
private void expandBuffer(final int minSize) throws IOException {
int newSize=buffer.length*2;
if (newSize