javolution.xml.internal.stream.XMLStreamReaderImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of javolution-core-java-msftbx
Show all versions of javolution-core-java-msftbx
Only the Java Core part of Javolution library, with slight modifications for use in MSFTBX.
The newest version!
/*
* Javolution - Java(TM) Solution for Real-Time and Embedded Systems
* Copyright (C) 2012 - Javolution (http://javolution.org/)
* All rights reserved.
*
* Permission to use, copy, modify, and distribute this software is
* freely granted, provided that this notice is preserved.
*/
package javolution.xml.internal.stream;
import javolution.io.UTF8StreamReader;
import javolution.lang.Realtime;
import javolution.text.CharArray;
import javolution.xml.sax.Attributes;
import javolution.xml.stream.*;
import java.io.*;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
/**
* {@link XMLStreamReader} implementation.
*
* This implementation returns all contiguous character data in a single
* chunk (always coalescing). It is non-validating (DTD is returned
* unparsed). Although, users may define custom entities mapping using
* the {@link #setEntities} method (e.g. after parsing/resolving
* external entities).
*/
@Realtime
public final class XMLStreamReaderImpl implements XMLStreamReader {
/**
* Holds the textual representation for events.
*/
static final String[] NAMES_OF_EVENTS = new String[] { "UNDEFINED",
"START_ELEMENT", "END_ELEMENT", "PROCESSING_INSTRUCTIONS",
"CHARACTERS", "COMMENT", "SPACE", "START_DOCUMENT", "END_DOCUMENT",
"ENTITY_REFERENCE", "ATTRIBUTE", "DTD", "CDATA", "NAMESPACE",
"NOTATION_DECLARATION", "ENTITY_DECLARATION" };
/**
* Holds the reader buffer capacity.
*/
static final int READER_BUFFER_CAPACITY = 4096;
/**
* Holds the prolog if any.
*/
CharArray _prolog;
/**
* Holds the current index in the character buffer.
*/
private int _readIndex;
/**
* Number of characters read from reader
*/
private int _readCount;
/**
* Holds the data buffer for CharSequence produced by this parser.
*/
private char[] _data = new char[READER_BUFFER_CAPACITY * 2];
/**
* Holds the current index of the data buffer (_data).
*/
private int _index;
/**
* Holds the current element nesting.
*/
private int _depth;
/**
* Holds qualified name (include prefix).
*/
private CharArray _qName;
/**
* Holds element prefix separator index.
*/
private int _prefixSep;
/**
* Holds attribute qualified name.
*/
private CharArray _attrQName;
/**
* Holds attribute prefix separator index.
*/
private int _attrPrefixSep;
/**
* Holds attribute value.
*/
private CharArray _attrValue;
/**
* Holds current event type
*/
private int _eventType = START_DOCUMENT;
/**
* Indicates if event type is START_TAG, and tag is empty, i.e.
*/
private boolean _isEmpty;
/**
* Indicates if characters are pending for potential coalescing.
*/
boolean _charactersPending = false;
/**
* Holds the start index for the current state within _data array.
*/
private int _start;
/**
* Holds the parser state.
*/
private int _state = STATE_CHARACTERS;
/**
* Holds the current text.
*/
private CharArray _text;
/**
* Holds the reader input source (null
when unused).
*/
private Reader _reader;
/**
* Holds the character buffer used for reading.
*/
private final char[] _readBuffer = new char[READER_BUFFER_CAPACITY];
/** Holds the first 16 bytes of the input stream, used for BOM detection. */
private final byte[] _bomBuffer = new byte[BOM_BUF_LEN];
private int _bomBufferRead = 0;
private BOM _bom = BOM.NONE;
private final static int BOM_BUF_LEN = 16;
/**
* Holds the start offset in the character buffer (due to auto detection
* of encoding).
*/
private int _startOffset; // Byte Order Mark count.
/**
* Holds the location object.
*/
private final LocationImpl _location = new LocationImpl();
/**
* Holds the namespace stack.
*/
private final NamespacesImpl _namespaces = new NamespacesImpl();
/**
* Holds the current attributes.
*/
private final AttributesImpl _attributes = new AttributesImpl(_namespaces);
/**
* Holds working stack (by nesting level).
*/
private CharArray[] _elemStack = new CharArray[16];
/**
* Holds stream encoding if known.
*/
private Charset _encoding;
/**
* Holds the entities.
*/
private final EntitiesImpl _entities = new EntitiesImpl();
/**
* Holds the reader for input streams.
*/
private final UTF8StreamReader _utf8StreamReader = new UTF8StreamReader();
/**
* Holds the factory (if any)
*/
private final XMLInputFactoryImpl _factory;
/**
* Default constructor.
*/
public XMLStreamReaderImpl() {
this(null);
}
/**
* Factory-based constructor.
*/
XMLStreamReaderImpl(XMLInputFactoryImpl factory) {
_factory = factory;
}
private static boolean isUTF8(Object encoding) {
return encoding.equals("utf-8") || encoding.equals("UTF-8")
|| encoding.equals("ASCII") || encoding.equals("utf8")
|| encoding.equals("UTF8");
}
/**
* Sets the input stream source for this XML stream reader
* (encoding retrieved from XML prolog if any). This method
* attempts to detect the encoding automatically.
*
* @param in the input source with unknown encoding.
*/
public void setInput(InputStream in) throws XMLStreamException {
setInput(in, null);
}
/**
* Sets the input stream source and encoding for this XML stream reader.
*
* @param in the input source.
* @param encoding the associated encoding.
*/
public void setInput(InputStream in, String encoding) throws XMLStreamException {
// first try to detect encoding by BOM
detectBom(in);
if (BOM.NONE.equals(_bom)) {
if (encoding == null) { // no encoding provided, and NONE detected
_encoding = StandardCharsets.UTF_8; // assume UTF-8
} else { // try parse provided encoding
try {
_encoding = Charset.forName(encoding);
} catch (Exception e) {
throw new XMLStreamException("Unknown or unsupported encoding provided", e);
}
}
} else { // detected encoding from BOM, use it
_encoding = Charset.forName(_bom.toString());
}
Reader reader;
if (StandardCharsets.UTF_8.equals(_encoding)) { // Use our fast UTF-8 Reader.
reader = _utf8StreamReader.setInput(in);
} else {
reader = new InputStreamReader(in, _encoding);
}
// during BOM detection we've read a few bytes from the input stream
// need to restore
if (_bomBufferRead > 0) {
final ByteArrayInputStream bais = new ByteArrayInputStream(_bomBuffer);
bais.skip(_bom.bytes.length);
final InputStreamReader isr = new InputStreamReader(bais, _encoding);
final char[] bomCharBuf = new char[_bomBuffer.length];
int bomCharsRead = -1;
try {
bomCharsRead = isr.read(bomCharBuf);
} catch (IOException e) {
throw new XMLStreamException(e);
}
if (bomCharsRead > 0) {
_startOffset = bomCharsRead;
_readCount += bomCharsRead;
for (int i = 0; i < bomCharsRead; i++) {
_readBuffer[i] = bomCharBuf[i];
}
}
}
setInput(reader);
}
private void detectBom(InputStream input) throws XMLStreamException {
final BOM[] boms = BOM.values();
Arrays.sort(boms, new Comparator() { // sort by descending bom byte length
@Override
public int compare(BOM o1, BOM o2) {
return Integer.compare(o2.bytes.length, o1.bytes.length);
}
});
try {
_bomBufferRead = input.read(_bomBuffer);
} catch (IOException e) {
throw new XMLStreamException("Error reading the first " + _bomBuffer.length + " bytes from input stream for BOM detection", e);
}
// if we couldn't read enough bytes even for the shortest BOM, there probably isn't any
if (_bomBufferRead < boms[0].bytes.length) {
_bom = BOM.NONE;
return;
}
// try match known BOMs to the read sequence
for (BOM bom : boms) {
if (bom.bytes.length > _bomBufferRead)
continue;
boolean matches = true;
for (int i = 0; i < bom.bytes.length; i++) {
if (_bomBuffer[i] != bom.bytes[i]) {
matches = false;
break;
}
}
if (matches) {
_bom = bom;
break;
}
}
}
/**
* Sets the reader input source for this XML stream reader.
* This method reads the prolog (if any).
*
* @param reader the input source reader.
* @see javolution.io.UTF8StreamReader
* @see javolution.io.UTF8ByteBufferReader
* @see javolution.io.CharSequenceReader
*/
public void setInput(Reader reader) throws XMLStreamException {
if (_reader != null)
throw new IllegalStateException("Reader not closed or reset");
_reader = reader;
try { // Reads prolog (if there)
int readCount = reader.read(_readBuffer, _startOffset,
_readBuffer.length - _startOffset);
_readCount = (readCount >= 0) ? readCount + _startOffset
: _startOffset;
if ((_readCount >= 5) && (_readBuffer[0] == '<')
&& (_readBuffer[1] == '?') && (_readBuffer[2] == 'x')
&& (_readBuffer[3] == 'm') && (_readBuffer[4] == 'l')
&& (_readBuffer[5] == ' ')) { // Prolog detected.
next(); // Processing instruction.
_prolog = this.getPIData();
_index = _prolog.offset() + _prolog.length(); // Keep prolog.
_start = _index; // Default state.
_eventType = START_DOCUMENT; // Resets to START_DOCUMENT.
}
} catch (IOException e) {
throw new XMLStreamException(e);
}
}
/**
* Returns the current depth of the element. Outside the root element,
* the depth is 0. The depth is incremented by 1 when a start tag is
* reached. The depth is decremented AFTER the end tag event was observed.
* [code]
* 0
* 1
* sometext 1
* 2
* 2
* 1
* 0 [/code]
*
* @return the nesting depth.
*/
public int getDepth() {
return _depth;
}
/**
* Returns the qualified name of the current event.
*
* @return the qualified name.
* @throws IllegalStateException if this not a START_ELEMENT or END_ELEMENT.
*/
public CharArray getQName() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw new IllegalStateException(
"Not a start element or an end element");
return _qName;
}
/**
* Returns the qualified name of the element at the specified level.
* This method can be used to retrieve the XPath of the current element.
*
* @return the qualified name of the element at the specified level.
* @throws IllegalArgumentException if depth > getDepth()
*/
public CharArray getQName(int depth) {
if (depth > this.getDepth())
throw new IllegalArgumentException();
return _elemStack[depth];
}
/**
* Returns the current attributes (SAX2-Like).
*
* @return returns the number of attributes.
* @throws IllegalStateException if not a START_ELEMENT.
*/
public Attributes getAttributes() {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw new IllegalStateException("Not a start element");
return _attributes;
}
/**
* Defines a custom entities to replacement text mapping for this reader.
* For example:[code]
* FastMap HTML_ENTITIES = new FastMap();
* HTML_ENTITIES.put("nbsp", " ");
* HTML_ENTITIES.put("copy", "©");
* HTML_ENTITIES.put("eacute", "é");
* ...
* XMLStreamReaderImpl reader = new XMLStreamReaderImpl();
* reader.setEntities(HTML_ENTITIES);
* [/code]
* The entities mapping may be changed dynamically (e.g.
* after reading the DTD and all external entities references are resolved).
*
* @param entities the entities to replacement texts mapping
* (both must be CharSequence
instances).
*/
public void setEntities(Map entities) {
_entities.setEntitiesMapping(entities);
}
/**
* Returns the textual representation of this reader current state.
*
* @return the textual representation of the current state.
*/
public String toString() {
return "XMLStreamReader - State: " + NAMES_OF_EVENTS[_eventType]
+ ", Location: " + _location.toString();
}
// Implements XMLStreamReader Interface.
public int next() throws XMLStreamException {
// Clears previous state.
if (_eventType == START_ELEMENT) {
if (_isEmpty) { // Previous empty tag, generates END_TAG automatically.
_isEmpty = false;
return _eventType = END_ELEMENT;
}
} else if (_eventType == END_ELEMENT) {
// we might be reading from the middle of a document
if (_depth > 0) {
_namespaces.pop();
CharArray startElem = _elemStack[_depth--];
_start = _index = startElem.offset();
while (_seqs[--_seqsIndex] != startElem) { // Recycles CharArray instances.
}
}
}
// Reader loop.
while (true) {
// Main character reading block.
if ((_readIndex >= _readCount) && isEndOfStream())
return _eventType; // END_DOCUMENT or CHARACTERS.
char c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
if (c <= '&')
c = (c == '&') ? replaceEntity()
: (c < ' ') ? handleEndOfLine(c) : c;
_data[_index++] = c;
// Main processing.
//
switch (_state) {
case STATE_CHARACTERS:
while (true) { // Read characters data all at once.
if (c == '<') {
int length = _index - _start - 1;
if (length > 0) {
if (_charactersPending) {
_text.setArray(_data, _text.offset(),
_text.length() + length); // Coalescing.
} else {
_text = newSeq(_start, length);
_charactersPending = true;
}
_start = _index - 1; // Keeps '<' as part of markup.
}
_state = STATE_MARKUP;
break;
}
// Local character reading block.
if ((_readIndex >= _readCount) && isEndOfStream())
return _eventType;
c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
if (c <= '&')
c = (c == '&') ? replaceEntity()
: (c < ' ') ? handleEndOfLine(c) : c;
_data[_index++] = c;
}
break;
case STATE_CDATA:
while (true) { // Reads CDATA all at once.
if ((c == '>') && (_index - _start >= 3)
&& (_data[_index - 2] == ']')
&& (_data[_index - 3] == ']')) {
_index -= 3;
int length = _index - _start;
if (length > 0) { // Not empty.
if (_charactersPending) {
_text.setArray(_data, _text.offset(),
_text.length() + length); // Coalescing.
} else {
_text = newSeq(_start, length);
_charactersPending = true;
}
}
_start = _index;
_state = STATE_CHARACTERS;
break;
}
// Local character reading block.
if (_readIndex >= _readCount)
reloadBuffer();
c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
if (c < ' ')
c = handleEndOfLine(c);
_data[_index++] = c;
}
break;
case STATE_DTD:
if (c == '>') {
_text = newSeq(_start, _index - _start);
_index = _start; // Do not keep DTD.
_state = STATE_CHARACTERS;
return _eventType = DTD;
} else if (c == '[') {
_state = STATE_DTD_INTERNAL;
}
break;
case STATE_DTD_INTERNAL:
if (c == ']') {
_state = STATE_DTD;
}
break;
case STATE_MARKUP: // Starts with '<'
if (_index - _start == 2) {
if (c == '/') {
_start = _index = _index - 2;
_state = STATE_CLOSE_TAGxREAD_ELEM_NAME;
_prefixSep = -1;
if (_charactersPending) { // Flush characters event.
_charactersPending = false;
return _eventType = CHARACTERS;
}
} else if (c == '?') {
_start = _index = _index - 2;
_state = STATE_PI;
if (_charactersPending) { // Flush characters event.
_charactersPending = false;
return _eventType = CHARACTERS;
}
} else if (c != '!') { // Element tag (first letter).
_location._lastStartTagPos = _location._totalCharsRead - 2;
_data[_start] = c;
_index = _start + 1;
_state = STATE_OPEN_TAGxREAD_ELEM_NAME;
_prefixSep = -1;
if (_charactersPending) { // Flush character event.
_charactersPending = false;
return _eventType = CHARACTERS;
}
}
} else if ((_index - _start == 4)
&& (_data[_start + 1] == '!')
&& (_data[_start + 2] == '-')
&& (_data[_start + 3] == '-')) {
_start = _index = _index - 4; // Removes
_text = newSeq(_start, _index - _start);
_state = STATE_CHARACTERS;
_index = _start; // Do not keep comments.
return _eventType = COMMENT;
}
// Local character reading block.
if (_readIndex >= _readCount)
reloadBuffer();
c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
if (c < ' ')
c = handleEndOfLine(c);
_data[_index++] = c;
}
case STATE_PI:
if ((c == '>') && (_index - _start >= 2)
&& (_data[_index - 2] == '?')) {
_index -= 2; // Removes ?>
_text = newSeq(_start, _index - _start);
_state = STATE_CHARACTERS;
_index = _start; // Do not keep processing instructions.
return _eventType = PROCESSING_INSTRUCTION;
}
break;
// OPEN_TAG:
case STATE_OPEN_TAGxREAD_ELEM_NAME:
_attributes.reset();
_namespaces.push();
while (true) { // Read element name all at once.
if (c < '@') { // Else avoid multiple checks.
if (c == '>') {
_qName = newSeq(_start, --_index - _start);
_start = _index;
_state = STATE_CHARACTERS;
processStartTag();
_isEmpty = false;
return _eventType = START_ELEMENT;
} else if (c == '/') {
_qName = newSeq(_start, --_index - _start);
_start = _index;
_state = STATE_OPEN_TAGxEMPTY_TAG;
break;
} else if (c == ':') {
_prefixSep = _index - 1;
} else if (c <= ' ') {
_qName = newSeq(_start, --_index - _start);
_state = STATE_OPEN_TAGxELEM_NAME_READ;
break;
}
}
if (_readIndex >= _readCount)
reloadBuffer();
c = _data[_index++] = _readBuffer[_readIndex++];
_location._totalCharsRead++;
}
break;
case STATE_OPEN_TAGxELEM_NAME_READ:
if (c == '>') {
_start = --_index;
_state = STATE_CHARACTERS;
processStartTag();
_isEmpty = false;
return _eventType = START_ELEMENT;
} else if (c == '/') {
_state = STATE_OPEN_TAGxEMPTY_TAG;
} else if (c > ' ') {
_start = _index - 1; // Includes current character.
_attrPrefixSep = -1;
_state = STATE_OPEN_TAGxREAD_ATTR_NAME;
}
break;
case STATE_OPEN_TAGxREAD_ATTR_NAME:
while (true) { // Read attribute name all at once.
if (c < '@') { // Else avoid multiple checks.
if (c <= ' ') {
_attrQName = newSeq(_start, --_index - _start);
_state = STATE_OPEN_TAGxATTR_NAME_READ;
break;
} else if (c == '=') {
_attrQName = newSeq(_start, --_index - _start);
_state = STATE_OPEN_TAGxEQUAL_READ;
break;
} else if (c == ':') {
_attrPrefixSep = _index - 1;
}
}
if (_readIndex >= _readCount)
reloadBuffer();
_data[_index++] = c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
}
break;
case STATE_OPEN_TAGxATTR_NAME_READ:
if (c == '=') {
--_index;
_state = STATE_OPEN_TAGxEQUAL_READ;
} else if (c > ' ') { throw new XMLStreamException(
"'=' expected", _location); }
break;
case STATE_OPEN_TAGxEQUAL_READ:
if (c == '\'') {
_start = --_index;
_state = STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE;
} else if (c == '\"') {
_start = --_index;
_state = STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE;
} else if (c > ' ') { throw new XMLStreamException(
"Quotes expected", _location); }
break;
case STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE:
while (true) { // Read attribute value all at once.
if (c == '\'') {
_attrValue = newSeq(_start, --_index - _start);
processAttribute();
_state = STATE_OPEN_TAGxELEM_NAME_READ;
break;
}
// Local character reading block.
if (_readIndex >= _readCount)
reloadBuffer();
c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
if (c == '&')
c = replaceEntity();
_data[_index++] = c;
}
break;
case STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE:
while (true) { // Read attribute value all at once.
if (c == '\"') {
_attrValue = newSeq(_start, --_index - _start);
processAttribute();
_state = STATE_OPEN_TAGxELEM_NAME_READ;
break;
}
// Local character reading block.
if (_readIndex >= _readCount)
reloadBuffer();
c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
if (c == '&')
c = replaceEntity();
_data[_index++] = c;
}
break;
case STATE_OPEN_TAGxEMPTY_TAG:
if (c == '>') {
_start = --_index;
_state = STATE_CHARACTERS;
processStartTag();
_isEmpty = true;
return _eventType = START_ELEMENT;
} else {
throw new XMLStreamException("'>' expected", _location);
}
// CLOSE_TAG:
case STATE_CLOSE_TAGxREAD_ELEM_NAME:
while (true) { // Element name can be read all at once.
if (c < '@') { // Else avoid multiple checks.
if (c == '>') {
_qName = newSeq(_start, --_index - _start);
_start = _index;
_state = STATE_CHARACTERS;
_eventType = END_ELEMENT;
processEndTag();
return _eventType;
} else if (c == ':') {
_prefixSep = _index - 1;
} else if (c <= ' ') {
_qName = newSeq(_start, --_index - _start);
_state = STATE_CLOSE_TAGxELEM_NAME_READ;
break;
}
}
if (_readIndex >= _readCount)
reloadBuffer();
c = _data[_index++] = _readBuffer[_readIndex++];
_location._totalCharsRead++;
}
break;
case STATE_CLOSE_TAGxELEM_NAME_READ:
if (c == '>') {
_start = --_index;
_state = STATE_CHARACTERS;
_eventType = END_ELEMENT;
processEndTag();
return _eventType;
} else if (c > ' ') { throw new XMLStreamException(
"'>' expected", _location); }
break;
default:
throw new XMLStreamException("State unknown: " + _state,
_location);
}
}
}
// Defines parsing states (keep values close together to avoid lookup).
private static final int STATE_CHARACTERS = 1;
private static final int STATE_MARKUP = 2;
private static final int STATE_COMMENT = 3;
private static final int STATE_PI = 4;
private static final int STATE_CDATA = 5;
private static final int STATE_OPEN_TAGxREAD_ELEM_NAME = 6;
private static final int STATE_OPEN_TAGxELEM_NAME_READ = 7;
private static final int STATE_OPEN_TAGxREAD_ATTR_NAME = 8;
private static final int STATE_OPEN_TAGxATTR_NAME_READ = 9;
private static final int STATE_OPEN_TAGxEQUAL_READ = 10;
private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE = 11;
private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE = 12;
private static final int STATE_OPEN_TAGxEMPTY_TAG = 13;
private static final int STATE_CLOSE_TAGxREAD_ELEM_NAME = 14;
private static final int STATE_CLOSE_TAGxELEM_NAME_READ = 15;
private static final int STATE_DTD = 16;
private static final int STATE_DTD_INTERNAL = 17;
/**
* Reloads data buffer.
*/
private void reloadBuffer() throws XMLStreamException {
if (_reader == null)
throw new XMLStreamException("Input not specified");
_location._column += _readIndex;
_location._charactersRead += _readIndex;
_readIndex = 0;
try {
_readCount = _reader.read(_readBuffer, 0, _readBuffer.length);
if (_readCount <= 0) { // end of stream
if ((_depth != 0) || (_state != STATE_CHARACTERS)) {
throw new XMLUnexpectedEndOfDocumentException("Unexpected end of document", _location);
}
}
} catch (IOException e) {
throw new XMLStreamException(e);
}
while ((_index + _readCount) >= _data.length) { // Potential overflow.
increaseDataBuffer();
}
}
/**
* Detects end of stream.
*
* @return true
if end of stream has being reached
* and the event type (CHARACTERS or END_DOCUMENT) has been set;
* false
otherwise.
*/
private boolean isEndOfStream() throws XMLStreamException {
if (_readIndex >= _readCount)
reloadBuffer();
if (_readCount <= 0) {
// _state == STATE_CHARACTERS (otherwise reloadBuffer() exception)
if (_eventType == END_DOCUMENT)
throw new XMLStreamException(
"End document has already been reached");
int length = _index - _start;
if (length > 0) { // Flushes trailing characters.
if (_charactersPending) {
_text.setArray(_data, _text.offset(), _text.length()
+ length); // Coalescing.
} else {
_text = newSeq(_start, length);
}
_start = _index;
_eventType = CHARACTERS;
} else {
_eventType = END_DOCUMENT;
}
return true;
}
return false;
}
/**
* Handles end of line as per XML Spec. 2.11
*
* @param c the potential end of line character.
* @return the replacement character for end of line.
*/
private char handleEndOfLine(char c) throws XMLStreamException {
if (c == 0xD) { // Replaces #xD with #xA
// Unless next char is #xA, then skip,
// #xD#xA will be replaced by #xA
if (_readIndex >= _readCount)
reloadBuffer();
if ((_readIndex < _readCount) && (_readBuffer[_readIndex] == 0xA)) {
_readIndex++; // Skips 0xD
_location._totalCharsRead++;
}
c = (char) 0xA;
}
if (c == 0xA) {
_location._line++;
_location._column = -_readIndex; // column = 0
} else if (c == 0x0) { throw new XMLStreamException(
"Illegal XML character U+0000", _location); }
return c;
}
/**
* Replaces an entity if the current state allows it.
*
* @return the next character after the text replacement or '&' if no
* replacement took place.
*/
private char replaceEntity() throws XMLStreamException {
if ((_state == STATE_COMMENT) || (_state == STATE_PI)
|| (_state == STATE_CDATA))
return '&'; // (&2.4)
int start = _index; // Index of first replacement character.
_data[_index++] = '&';
while (true) {
if (_readIndex >= _readCount)
reloadBuffer();
char c = _data[_index++] = _readBuffer[_readIndex++];
_location._totalCharsRead++;
if (c == ';')
break;
if (c <= ' ')
throw new XMLStreamException("';' expected", _location);
}
// Ensures that the replacement string holds in the data buffer.
while (start + _entities.getMaxLength() >= _data.length) {
increaseDataBuffer();
}
// Replaces the entity.
int length = _entities.replaceEntity(_data, start, _index - start);
// Returns the next character after entity unless ampersand.
_index = start + length;
// Local character reading block.
if (_readIndex >= _readCount)
reloadBuffer();
char c = _readBuffer[_readIndex++];
_location._totalCharsRead++;
return c == '&' ? replaceEntity() : c;
}
/**
* Processes the attribute just read.
*/
private void processAttribute() throws XMLStreamException {
if (_attrPrefixSep < 0) { // No prefix.
if (isXMLNS(_attrQName)) { // Sets default namespace.
_namespaces.setPrefix(_namespaces._defaultNsPrefix, _attrValue);
} else {
_attributes.addAttribute(_attrQName, null, _attrQName,
_attrValue);
}
} else { // Prefix.
final int offset = _attrQName.offset();
final int length = _attrQName.length();
CharArray prefix = newSeq(offset, _attrPrefixSep - offset);
CharArray localName = newSeq(_attrPrefixSep + 1, offset + length
- _attrPrefixSep - 1);
if (isXMLNS(prefix)) { // Namespace association.
_namespaces.setPrefix(localName, _attrValue);
} else {
_attributes.addAttribute(localName, prefix, _attrQName,
_attrValue);
}
}
}
private static boolean isXMLNS(CharArray chars) {
return (chars.length() == 5) && (chars.charAt(0) == 'x')
&& (chars.charAt(1) == 'm') && (chars.charAt(2) == 'l')
&& (chars.charAt(3) == 'n') && (chars.charAt(4) == 's');
}
private void processEndTag() throws XMLStreamException {
if (!_qName.equals(_elemStack[_depth]))
throw new XMLUnexpectedEndTagException("Unexpected end tag for " + _qName,
_location);
}
private void processStartTag() throws XMLStreamException {
if (++_depth >= _elemStack.length) {
increaseStack();
}
_elemStack[_depth] = _qName;
}
// Implements Reusable.
public void reset() {
// Resets all members (alphabetically ordered).
_attributes.reset();
_attrPrefixSep = 0;
_attrQName = null;
_attrValue = null;
_attrQName = null;
_charactersPending = false;
_encoding = null;
_entities.reset();
_eventType = START_DOCUMENT;
_index = 0;
_isEmpty = false;
_location.reset();
_namespaces.reset();
_prolog = null;
_readCount = 0;
_bomBufferRead = 0;
_bom = BOM.NONE;
_reader = null;
_depth = 0;
_readIndex = 0;
_seqsIndex = 0;
_start = 0;
_startOffset = 0;
_state = STATE_CHARACTERS;
_utf8StreamReader.reset();
// Recycles if factory produced.
if (_factory != null)
_factory.recycle(this);
}
// Returns a new character sequence from the pool.
private CharArray newSeq(int offset, int length) {
CharArray seq = (_seqsIndex < _seqsCapacity) ? _seqs[_seqsIndex++]
: newSeq2();
return seq.setArray(_data, offset, length);
}
private CharArray newSeq2() {
_createSeqLogic.run();
return _seqs[_seqsIndex++];
}
private final Runnable _createSeqLogic = new Runnable() {
public void run() {
if (_seqsCapacity >= _seqs.length) { // Resizes.
CharArray[] tmp = new CharArray[_seqs.length * 2];
System.arraycopy(_seqs, 0, tmp, 0, _seqs.length);
_seqs = tmp;
}
CharArray seq = new CharArray();
_seqs[_seqsCapacity++] = seq;
}
};
private CharArray[] _seqs = new CharArray[256];
private int _seqsIndex;
private int _seqsCapacity;
// Increases internal data buffer capacity.
private void increaseDataBuffer() {
// Note: The character data at any nesting level is discarded
// only when moving to outer nesting level (due to coalescing).
// This accumulation may cause resize of the data buffer if
// numerous elements at the same nesting level are separated by
// spaces or indentation.
char[] tmp = new char[_data.length * 2];
javolution.context.LogContext.info(new CharArray(
"XMLStreamReaderImpl: Data buffer increased to " + tmp.length));
System.arraycopy(_data, 0, tmp, 0, _data.length);
_data = tmp;
}
// Increases statck.
private void increaseStack() {
CharArray[] tmp = new CharArray[_elemStack.length * 2];
javolution.context.LogContext.info(new CharArray(
"XMLStreamReaderImpl: CharArray stack increased to "
+ tmp.length));
System.arraycopy(_elemStack, 0, tmp, 0, _elemStack.length);
_elemStack = tmp;
}
/**
* This inner class represents the parser location.
*/
public final class LocationImpl implements Location {
int _column;
int _line;
int _charactersRead;
long _lastStartTagPos;
long _totalCharsRead;
public int getLineNumber() {
return _line + 1;
}
public int getColumnNumber() {
return _column + _readIndex;
}
public int getCharacterOffset() {
return _charactersRead + _readIndex;
}
public long getLastStartTagPos() {
return _lastStartTagPos;
}
public long getTotalCharsRead() {
return _totalCharsRead;
}
public int getBomLength() {
return _bom.bytes.length;
}
public String getPublicId() {
return null; // Not available.
}
public String getSystemId() {
return null; // Not available.
}
public String toString() {
return "Line " + getLineNumber() + ", Column " + getColumnNumber();
}
public void reset() {
_line = 0;
_column = 0;
_charactersRead = 0;
_lastStartTagPos = -1;
_totalCharsRead = 0;
}
}
//////////////////////////////////////////
// Implements XMLStreamReader Interface //
//////////////////////////////////////////
// Implements XMLStreamReader Interface.
public void require(int type, CharSequence namespaceURI,
CharSequence localName) throws XMLStreamException {
if (_eventType != type)
throw new XMLStreamException("Expected event: "
+ NAMES_OF_EVENTS[type] + ", found event: "
+ NAMES_OF_EVENTS[_eventType]);
if ((namespaceURI != null) && !getNamespaceURI().equals(namespaceURI))
throw new XMLStreamException("Expected namespace URI: "
+ namespaceURI + ", found: " + getNamespaceURI());
if ((localName != null) && !getLocalName().equals(localName))
throw new XMLStreamException("Expected local name: " + localName
+ ", found: " + getLocalName());
}
// Implements XMLStreamReader Interface.
public CharArray getElementText() throws XMLStreamException {
// Derived from interface specification code.
if (getEventType() != XMLStreamConstants.START_ELEMENT) { throw new XMLStreamException(
"Parser must be on START_ELEMENT to read next text",
getLocation()); }
CharArray text = null;
int eventType = next();
while (eventType != XMLStreamConstants.END_ELEMENT) {
if (eventType == XMLStreamConstants.CHARACTERS) {
if (text == null) {
text = getText();
} else { // Merge (adjacent text, comments and PI are not kept).
text.setArray(_data, text.offset(), text.length()
+ getText().length());
}
} else if (eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
|| eventType == XMLStreamConstants.COMMENT) {
// Skips (not kept).
} else if (eventType == XMLStreamConstants.END_DOCUMENT) {
throw new XMLUnexpectedEndOfDocumentException(
"Unexpected end of document when reading element text content",
getLocation());
} else if (eventType == XMLStreamConstants.START_ELEMENT) {
throw new XMLStreamException(
"Element text content may not contain START_ELEMENT",
getLocation());
} else {
throw new XMLStreamException("Unexpected event type "
+ NAMES_OF_EVENTS[eventType], getLocation());
}
eventType = next();
}
return (text != null) ? text : newSeq(0, 0);
}
// Implements XMLStreamReader Interface.
public Object getProperty(String name) throws IllegalArgumentException {
if (name.equals(XMLInputFactory.IS_COALESCING)) {
return Boolean.TRUE;
} else if (name.equals(XMLInputFactory.ENTITIES)) {
return _entities.getEntitiesMapping();
} else {
throw new IllegalArgumentException("Property: " + name
+ " not supported");
}
}
// Implements XMLStreamReader Interface.
public void close() throws XMLStreamException {
reset();
}
public int getAttributeCount() {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getLength();
}
public CharArray getAttributeLocalName(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getLocalName(index);
}
public CharArray getAttributeNamespace(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
CharArray prefix = _attributes.getPrefix(index);
return _namespaces.getNamespaceURINullAllowed(prefix);
}
public CharArray getAttributePrefix(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getPrefix(index);
}
public CharArray getAttributeType(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getType(index);
}
public CharArray getAttributeValue(CharSequence uri, CharSequence localName) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return (uri == null) ? _attributes.getValue(localName) : _attributes
.getValue(uri, localName);
}
public CharArray getAttributeValue(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw illegalState("Not a start element");
return _attributes.getValue(index);
}
public CharArray getCharacterEncodingScheme() {
return readPrologAttribute(ENCODING);
}
private static final CharArray ENCODING = new CharArray("encoding");
public String getEncoding() {
return _encoding.name();
}
public int getEventType() {
return _eventType;
}
public CharArray getLocalName() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
if (_prefixSep < 0)
return _qName;
CharArray localName = newSeq(_prefixSep + 1,
_qName.offset() + _qName.length() - _prefixSep - 1);
return localName;
}
public LocationImpl getLocation() {
return _location;
}
public int getNamespaceCount() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces._namespacesCount[_depth];
}
public CharArray getNamespacePrefix(int index) {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces._prefixes[index];
}
public CharArray getNamespaceURI(CharSequence prefix) {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces.getNamespaceURI(prefix);
}
public CharArray getNamespaceURI(int index) {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
return _namespaces._namespaces[index];
}
public NamespaceContext getNamespaceContext() {
return _namespaces;
}
public CharArray getNamespaceURI() {
return _namespaces.getNamespaceURINullAllowed(getPrefix());
}
public CharArray getPrefix() {
if ((_eventType != XMLStreamConstants.START_ELEMENT)
&& (_eventType != XMLStreamConstants.END_ELEMENT))
throw illegalState("Not a start or end element");
if (_prefixSep < 0)
return null;
int offset = _qName.offset();
CharArray prefix = newSeq(offset, _prefixSep - offset);
return prefix;
}
public CharArray getPIData() {
if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
throw illegalState("Not a processing instruction");
int offset = _text.indexOf(' ') + _text.offset() + 1;
CharArray piData = newSeq(offset, _text.length() - offset);
return piData;
}
public CharArray getPITarget() {
if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
throw illegalState("Not a processing instruction");
CharArray piTarget = newSeq(_text.offset(),
_text.indexOf(' ') + _text.offset());
return piTarget;
}
public CharArray getText() {
if ((_eventType != XMLStreamConstants.CHARACTERS)
&& (_eventType != XMLStreamConstants.COMMENT)
&& (_eventType != XMLStreamConstants.DTD))
throw illegalState("Not a text event");
return _text;
}
public char[] getTextCharacters() {
return getText().array();
}
public int getTextCharacters(int sourceStart, char[] target,
int targetStart, int length) throws XMLStreamException {
CharArray text = getText();
int copyLength = Math.min(length, text.length());
System.arraycopy(text.array(), sourceStart + text.offset(), target,
targetStart, copyLength);
return copyLength;
}
public int getTextLength() {
return getText().length();
}
public int getTextStart() {
return getText().offset();
}
public CharArray getVersion() {
return readPrologAttribute(VERSION);
}
private static final CharArray VERSION = new CharArray("version");
public boolean isStandalone() {
CharArray standalone = readPrologAttribute(STANDALONE);
return (standalone != null) ? standalone.equals("no") : true;
}
public boolean standaloneSet() {
return readPrologAttribute(STANDALONE) != null;
}
private static final CharArray STANDALONE = new CharArray("standalone");
public boolean hasName() {
return (_eventType == XMLStreamConstants.START_ELEMENT)
|| (_eventType == XMLStreamConstants.END_ELEMENT);
}
public boolean hasNext() throws XMLStreamException {
return _eventType != XMLStreamConstants.END_DOCUMENT;
}
public boolean hasText() {
return ((_eventType == XMLStreamConstants.CHARACTERS)
|| (_eventType == XMLStreamConstants.COMMENT) || (_eventType == XMLStreamConstants.DTD))
&& (_text.length() > 0);
}
public boolean isAttributeSpecified(int index) {
if (_eventType != XMLStreamConstants.START_ELEMENT)
throw new IllegalStateException("Not a start element");
return _attributes.getValue(index) != null;
}
public boolean isCharacters() {
return _eventType == XMLStreamConstants.CHARACTERS;
}
public boolean isEndElement() {
return _eventType == XMLStreamConstants.END_ELEMENT;
}
public boolean isStartElement() {
return _eventType == XMLStreamConstants.START_ELEMENT;
}
public boolean isWhiteSpace() {
if (isCharacters()) {
char[] chars = _text.array();
for (int i = _text.offset(), end = _text.offset() + _text.length(); i < end;) {
if (!isWhiteSpace(chars[i++]))
return false;
}
return true;
}
return false;
}
// Whitespaces according to XML 1.1 Specification.
private static boolean isWhiteSpace(char c) {
return (c == 0x20) || (c == 0x9) || (c == 0xD) || (c == 0xA);
}
public int nextTag() throws XMLStreamException {
int eventType = next();
while (eventType == XMLStreamConstants.COMMENT
|| eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
|| eventType == XMLStreamConstants.DTD
|| (eventType == XMLStreamConstants.CHARACTERS && isWhiteSpace())) {
eventType = next();
}
if (eventType != XMLStreamConstants.START_ELEMENT
&& eventType != XMLStreamConstants.END_ELEMENT)
throw new XMLStreamException("Tag expected (but found "
+ NAMES_OF_EVENTS[_eventType] + ")");
return eventType;
}
private IllegalStateException illegalState(String msg) {
return new IllegalStateException(msg + " ("
+ NAMES_OF_EVENTS[_eventType] + ")");
}
private final CharArray readPrologAttribute(CharSequence name) {
if (_prolog == null)
return null;
final int READ_EQUAL = 0;
final int READ_QUOTE = 1;
final int VALUE_SIMPLE_QUOTE = 2;
final int VALUE_DOUBLE_QUOTE = 3;
int i = _prolog.indexOf(name);
if (i >= 0) {
i += _prolog.offset();
int maxIndex = _prolog.offset() + _prolog.length();
i += name.length();
int state = READ_EQUAL;
int valueOffset = 0;
while (i < maxIndex) {
char c = _prolog.array()[i++];
switch (state) {
case READ_EQUAL:
if (c == '=') {
state = READ_QUOTE;
}
break;
case READ_QUOTE:
if (c == '"') {
state = VALUE_DOUBLE_QUOTE;
valueOffset = i;
} else if (c == '\'') {
state = VALUE_SIMPLE_QUOTE;
valueOffset = i;
}
break;
case VALUE_SIMPLE_QUOTE:
if (c == '\'')
return newSeq(valueOffset, i - valueOffset - 1);
break;
case VALUE_DOUBLE_QUOTE:
if (c == '"')
return newSeq(valueOffset, i - valueOffset - 1);
break;
}
}
}
return null;
}
}