
vuegwt.shaded.net.htmlparser.jericho.StreamedText Maven / Gradle / Ivy
Show all versions of vue-gwt-processors Show documentation
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.4
// Copyright (C) 2004-2013 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// 3. The Apache License version 2.0,
// included in this distribution in the file licence-apache-2.0.html
// or available at http://www.apache.org/licenses/LICENSE-2.0.html
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.io.*;
import java.nio.*;
/**
* Implements a buffered window into a stream of characters.
*
* Unless the buffer is explicitly {@linkplain #setBuffer(char[]) set}, it expands automatically as further characters are fetched from the stream.
*
* The {@link #setMinRequiredBufferBegin(int)} method can be used to inform the StreamedText
object that characters up to a specified
* position are no longer required, allowing more characters to be fetched without the need to increase the buffer size.
*/
final class StreamedText implements CharSequence {
private final Reader reader;
private char[] buffer;
private boolean expandableBuffer;
private int bufferBegin=0; // the current position of the first byte of the buffer. all text before it has been discarded.
private int readerPos=0; // the next position into which text will be loaded from the reader stream. must be >=bufferBegin and <=bufferBegin+buffer.length, except if one of the "text" argument constructors was used, in which case =Integer.MAX_VALUE.
private int minRequiredBufferBegin=0; // the minimum pos that must be kept in buffer. always >=bufferBegin.
private int end=Integer.MAX_VALUE;
private boolean atEndOfStream=false; // This flag is set when charAt(int) reaches the end of stream. We can't just check for charAt returning END_OF_STREAM because some source documents actually contain the U+FFFF character.
public static final char END_OF_STREAM='\uFFFF'; // character returned when end of stream is encountered, but calling code should always check atEndOfStream().
public static int INITIAL_EXPANDABLE_BUFFER_SIZE=8192; // same default as StAX
public StreamedText(final Reader reader, final char[] buffer) {
this.reader=reader;
setBuffer(buffer);
}
public StreamedText(final Reader reader) {
this(reader,null);
}
private StreamedText(final char[] text, final int length) {
reader=null;
buffer=text;
expandableBuffer=false;
end=length;
readerPos=Integer.MAX_VALUE;
}
public StreamedText(final char[] text) {
this(text,text.length);
}
public StreamedText(final CharBuffer text) {
this(text.array(),text.length());
}
public StreamedText(final CharSequence text) {
this(toCharArray(text));
}
public StreamedText setBuffer(char[] buffer) {
if (buffer!=null) {
this.buffer=buffer;
expandableBuffer=false;
} else {
this.buffer=new char[INITIAL_EXPANDABLE_BUFFER_SIZE];
expandableBuffer=true;
}
return this;
}
public boolean hasExpandableBuffer() {
return expandableBuffer;
}
/**
* Returns the character at the specified index.
* @param index the index of the character.
* @return the character at the specified index, or END_OF_STREAM if the end of stream has been reached.
*/
public char charAt(final int pos) {
if (pos>=readerPos) readToPosition(pos);
if (!checkPos(pos)) {
atEndOfStream=true;
return END_OF_STREAM;
}
return buffer[pos-bufferBegin];
}
public final boolean atEndOfStream() {
if (atEndOfStream) {
atEndOfStream=false; // reset it just in case further charAt() calls are made
return true;
}
return false;
}
public void setMinRequiredBufferBegin(final int minRequiredBufferBegin) {
if (minRequiredBufferBegin
* This method returns Integer.MAX_VALUE until an attempt is made to access a position past the end of the stream.
*
* @return the length of the text stream.
*/
public int length() {
if (end==Integer.MAX_VALUE) throw new IllegalStateException("Length of streamed text cannot be determined until end of file has been reached");
return end;
}
public int getEnd() {
return end;
}
private boolean prepareBufferRange(final int begin, final int end) {
final int lastRequiredPos=end-1;
if (lastRequiredPos>readerPos) readToPosition(lastRequiredPos);
return checkPos(begin) && end<=this.end; // false if end of stream reached before end position
}
// not used
public void writeTo(final Writer writer, final int begin, final int end) throws IOException {
if (!prepareBufferRange(begin,end)) throw new IndexOutOfBoundsException();
writer.write(buffer,begin-bufferBegin,end-begin);
}
/**
* Returns a new string that is a substring of this text.
*
* The substring begins at the specified begin
position and extends to the character at position end
- 1.
* Thus the length of the substring is end-begin
.
*
* @param begin the begin position, inclusive.
* @param end the end position, exclusive.
* @return a new string that is a substring of this text.
*/
public String substring(final int begin, final int end) {
final int charCount=prepareBufferRange(begin,end) ? end-begin : this.end-begin;
if (!prepareBufferRange(begin,end)) throw new IndexOutOfBoundsException();
return new String(buffer,begin-bufferBegin,end-begin);
}
/**
* Returns a new character sequence that is a subsequence of this sequence.
*
* The returned CharSequence
is only guaranteed to be valid as long as no futher operations are performed on this StreamedText
object.
* Any subsequent method call could invalidate the underlying buffer used by the CharSequence
.
*
* @param begin the begin position, inclusive.
* @param end the end position, exclusive.
* @return a new character sequence that is a subsequence of this sequence.
*/
public CharSequence subSequence(final int begin, final int end) {
// This has not been benchmarked. It is possible that returning substring(begin,end) results in faster code even though it requires more memory allocation.
return getCharBuffer(begin,end);
}
public CharBuffer getCharBuffer(final int begin, final int end) {
if (!prepareBufferRange(begin,end)) throw new IndexOutOfBoundsException();
return CharBuffer.wrap(buffer,begin-bufferBegin,end-begin);
}
public String toString() {
throw new UnsupportedOperationException("Streamed text can not be converted to a string");
}
public String getDebugInfo() {
return "Buffer size: \""+buffer.length+"\", bufferBegin="+bufferBegin+", minRequiredBufferBegin="+minRequiredBufferBegin+", readerPos="+readerPos;
}
public char[] getBuffer() {
return buffer;
}
public int getBufferBegin() {
return bufferBegin;
}
private boolean checkPos(final int pos) {
// hopefully inlined by the compiler
if (pos=bufferBegin+buffer.length) {
if (pos>=minRequiredBufferBegin+buffer.length) {
if (!expandableBuffer) throw new BufferOverflowException(); // unfortunately BufferOverflowException doesn't accept a message argument, otherwise it would include the message "StreamedText buffer too small to keep positions "+minRequiredBufferBegin+" and "+pos+" simultaneously"
expandBuffer(pos-minRequiredBufferBegin+1);
}
discardUsedText();
if (pos>=end) return; // don't continue on to throw IndexOutOfBoundsException
}
while (readerPos<=pos) {
final int charCount=reader.read(buffer,readerPos-bufferBegin,bufferBegin+buffer.length-readerPos);
if (charCount==-1) {
end=readerPos;
return;
}
readerPos+=charCount;
}
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
private void expandBuffer(final int minSize) throws IOException {
int newSize=buffer.length*2;
if (newSize