All Downloads are FREE. Search and download functionalities are using the official Maven repository.

javolution.xml.internal.stream.XMLStreamReaderImpl Maven / Gradle / Ivy

The newest version!
/*
 * Javolution - Java(TM) Solution for Real-Time and Embedded Systems
 * Copyright (C) 2012 - Javolution (http://javolution.org/)
 * All rights reserved.
 * 
 * Permission to use, copy, modify, and distribute this software is
 * freely granted, provided that this notice is preserved.
 */
package javolution.xml.internal.stream;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.util.Map;
import javolution.io.UTF8StreamReader;
import javolution.lang.Realtime;
import javolution.text.CharArray;
import javolution.xml.sax.Attributes;
import javolution.xml.stream.Location;
import javolution.xml.stream.NamespaceContext;
import javolution.xml.stream.XMLInputFactory;
import javolution.xml.stream.XMLStreamConstants;
import javolution.xml.stream.XMLStreamException;
import javolution.xml.stream.XMLStreamReader;

/**
 * {@link XMLStreamReader} implementation.
 *     
 * This implementation returns all contiguous character data in a single
 * chunk (always coalescing). It is non-validating (DTD is returned 
 * unparsed). Although, users may define custom entities mapping using 
 * the {@link #setEntities} method (e.g. after parsing/resolving 
 * external entities).
 */
@Realtime
public final class XMLStreamReaderImpl implements XMLStreamReader {

    /**
     * Holds the textual representation for events.
     */
    static final String[] NAMES_OF_EVENTS = new String[] { "UNDEFINED",
            "START_ELEMENT", "END_ELEMENT", "PROCESSING_INSTRUCTIONS",
            "CHARACTERS", "COMMENT", "SPACE", "START_DOCUMENT", "END_DOCUMENT",
            "ENTITY_REFERENCE", "ATTRIBUTE", "DTD", "CDATA", "NAMESPACE",
            "NOTATION_DECLARATION", "ENTITY_DECLARATION" };

    /**
     * Holds the reader buffer capacity.
     */
    static final int READER_BUFFER_CAPACITY = 4096;

    /**
     * Holds the prolog if any.
     */
    CharArray _prolog;

    /**
     * Holds the current index in the character buffer.
     */
    private int _readIndex;

    /**
     * Number of characters read from reader
     */
    private int _readCount;

    /**
     * Holds the data buffer for CharSequence produced by this parser.
     */
    private char[] _data = new char[READER_BUFFER_CAPACITY * 2];

    /**
     * Holds the current index of the data buffer (_data).
     */
    private int _index;

    /**
     * Holds the current element nesting.
     */
    private int _depth;

    /**
     * Holds qualified name (include prefix).
     */
    private CharArray _qName;

    /**
     * Holds element prefix separator index.
     */
    private int _prefixSep;

    /**
     * Holds attribute qualified name.
     */
    private CharArray _attrQName;

    /**
     * Holds attribute prefix separator index.
     */
    private int _attrPrefixSep;

    /**
     * Holds attribute value.
     */
    private CharArray _attrValue;

    /**
     * Holds current event type
     */
    private int _eventType = START_DOCUMENT;

    /**
     * Indicates if event type is START_TAG, and tag is empty, i.e. 
     */
    private boolean _isEmpty;

    /**
     * Indicates if characters are pending for potential coalescing.
     */
    boolean _charactersPending = false;

    /**
     * Holds the start index for the current state within _data array.
     */
    private int _start;

    /**
     * Holds the parser state.
     */
    private int _state = STATE_CHARACTERS;

    /**
     * Holds the current text.
     */
    private CharArray _text;

    /** 
     * Holds the reader input source (null when unused).
     */
    private Reader _reader;

    /**
     * Holds the character buffer used for reading.
     */
    private final char[] _readBuffer = new char[READER_BUFFER_CAPACITY];

    /**
     * Holds the start offset in the character buffer (due to auto detection
     * of encoding).
     */
    private int _startOffset; // Byte Order Mark count.

    /**
     * Holds the location object.
     */
    private final LocationImpl _location = new LocationImpl();

    /**
     * Holds the namespace  stack.
     */
    private final NamespacesImpl _namespaces = new NamespacesImpl();

    /**
     * Holds the current attributes.
     */
    private final AttributesImpl _attributes = new AttributesImpl(_namespaces);

    /**
     * Holds working stack (by nesting level).
     */
    private CharArray[] _elemStack = new CharArray[16];

    /**
     * Holds stream encoding if known.
     */
    private String _encoding;

    /**
     * Holds the entities.
     */
    private final EntitiesImpl _entities = new EntitiesImpl();

    /**
     * Holds the reader for input streams.
     */
    private final UTF8StreamReader _utf8StreamReader = new UTF8StreamReader();

    /**
     * Holds the factory (if any) 
     */
    private final XMLInputFactoryImpl _factory;

    /** 
     * Default constructor.
     */
    public XMLStreamReaderImpl() {
        this(null);
    }

    /** 
     * Factory-based constructor.
     */
    XMLStreamReaderImpl(XMLInputFactoryImpl factory) {
        _factory = factory;
    }

    /**
     * Sets the input stream source for this XML stream reader 
     * (encoding retrieved from XML prolog if any). This method
     * attempts to detect the encoding automatically.
     *
     * @param  in the input source with unknown encoding.
     */
    public void setInput(InputStream in) throws XMLStreamException {
        setInput(in, detectEncoding(in));
        CharArray prologEncoding = getCharacterEncodingScheme();

        // Checks if necessary to change the reader.
        if ((prologEncoding != null) && !prologEncoding.equals(_encoding)
                && !(isUTF8(prologEncoding) && isUTF8(_encoding))) {
            // Changes reader (keep characters already read).
            int startOffset = _readCount;
            reset();
            _startOffset = startOffset;
            setInput(in, prologEncoding.toString());
        }
    }

    private static boolean isUTF8(Object encoding) {
        return encoding.equals("utf-8") || encoding.equals("UTF-8")
                || encoding.equals("ASCII") || encoding.equals("utf8")
                || encoding.equals("UTF8");
    }

    /**
     * Sets the input stream source and encoding for this XML stream reader.
     *
     * @param in the input source.
     * @param encoding the associated encoding.
     */
    public void setInput(InputStream in, String encoding)
            throws XMLStreamException {
        _encoding = encoding;
        if (isUTF8(encoding)) { // Use our fast UTF-8 Reader.
            setInput(_utf8StreamReader.setInput(in));
        } else {
            try {
                setInput(new InputStreamReader(in, encoding));
            } catch (UnsupportedEncodingException e) {
                throw new XMLStreamException(e);
            }
        }
    }

    /**
     * Sets the reader input source for this XML stream reader. 
     * This method reads the prolog (if any).
     *
     * @param  reader the input source reader.
     * @see    javolution.io.UTF8StreamReader
     * @see    javolution.io.UTF8ByteBufferReader
     * @see    javolution.io.CharSequenceReader
     */
    public void setInput(Reader reader) throws XMLStreamException {
        if (_reader != null)
            throw new IllegalStateException("Reader not closed or reset");
        _reader = reader;
        try { // Reads prolog (if there)
            int readCount = reader.read(_readBuffer, _startOffset,
                    _readBuffer.length - _startOffset);
            _readCount = (readCount >= 0) ? readCount + _startOffset
                    : _startOffset;
            if ((_readCount >= 5) && (_readBuffer[0] == '<')
                    && (_readBuffer[1] == '?') && (_readBuffer[2] == 'x')
                    && (_readBuffer[3] == 'm') && (_readBuffer[4] == 'l')
                    && (_readBuffer[5] == ' ')) { // Prolog detected.
                next(); // Processing instruction.
                _prolog = this.getPIData();
                _index = _prolog.offset() + _prolog.length(); // Keep prolog.
                _start = _index; // Default state.
                _eventType = START_DOCUMENT; // Resets to START_DOCUMENT.
            }
        } catch (IOException e) {
            throw new XMLStreamException(e);
        }
    }

    /** 
     * Returns the current depth of the element. Outside the root element, 
     * the depth is 0. The depth is incremented by 1 when a start tag is
     * reached. The depth is decremented AFTER the end tag event was observed.
     * [code]
     *      0
     *                1
     *    sometext          1
     *              2
     *             2
     *               1
     *      0 [/code]
     * 
     * @return the nesting depth.
     */
    public int getDepth() {
        return _depth;
    }

    /**
     * Returns the qualified name of the current event.
     * 
     * @return the qualified name.
     * @throws IllegalStateException if this not a START_ELEMENT or END_ELEMENT.
     */
    public CharArray getQName() {
        if ((_eventType != XMLStreamConstants.START_ELEMENT)
                && (_eventType != XMLStreamConstants.END_ELEMENT))
            throw new IllegalStateException(
                    "Not a start element or an end element");
        return _qName;
    }

    /**
     * Returns the qualified name of the element at the specified level.
     * This method can be used to retrieve the XPath of the current element.
     *
     * @return the qualified name of the element at the specified level.
     * @throws IllegalArgumentException if depth > getDepth()
     */
    public CharArray getQName(int depth) {
        if (depth > this.getDepth())
            throw new IllegalArgumentException();
        return _elemStack[depth];
    }

    /**
     * Returns the current attributes (SAX2-Like).
     *
     * @return returns the number of attributes.
     * @throws IllegalStateException if not a START_ELEMENT.
     */
    public Attributes getAttributes() {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw new IllegalStateException("Not a start element");
        return _attributes;
    }

    /**
     * Defines a custom entities to replacement text mapping for this reader.
     * For example:[code]
     *     FastMap HTML_ENTITIES = new FastMap();
     *     HTML_ENTITIES.put("nbsp", " ");
     *     HTML_ENTITIES.put("copy", "©");
     *     HTML_ENTITIES.put("eacute", "é");
     *     ...
     *     XMLStreamReaderImpl reader = new XMLStreamReaderImpl();
     *     reader.setEntities(HTML_ENTITIES);
     * [/code]
     * The entities mapping may be changed dynamically (e.g. 
     * after reading the DTD and all external entities references are resolved).
     * 
     * @param entities the entities to replacement texts mapping 
     *        (both must be CharSequence instances).
     */
    public void setEntities(Map entities) {
        _entities.setEntitiesMapping(entities);
    }

    /**
     * Returns the textual representation of this reader current state.
     * 
     * @return the textual representation of the current state.
     */
    public String toString() {
        return "XMLStreamReader - State: " + NAMES_OF_EVENTS[_eventType]
                + ", Location: " + _location.toString();
    }

    // Implements XMLStreamReader Interface.
    public int next() throws XMLStreamException {

        // Clears previous state.
        if (_eventType == START_ELEMENT) {
            if (_isEmpty) { // Previous empty tag, generates END_TAG automatically.
                _isEmpty = false;
                return _eventType = END_ELEMENT;
            }
        } else if (_eventType == END_ELEMENT) {
            _namespaces.pop();
            CharArray startElem = _elemStack[_depth--];
            _start = _index = startElem.offset();
            while (_seqs[--_seqsIndex] != startElem) { // Recycles CharArray instances.
            }
        }
        // Reader loop.
        while (true) {

            // Main character reading block.
            if ((_readIndex >= _readCount) && isEndOfStream())
                return _eventType; // END_DOCUMENT or CHARACTERS.
            char c = _readBuffer[_readIndex++];
            if (c <= '&')
                c = (c == '&') ? replaceEntity()
                        : (c < ' ') ? handleEndOfLine(c) : c;
            _data[_index++] = c;

            // Main processing.
            //
            switch (_state) {

                case STATE_CHARACTERS:
                    while (true) { // Read characters data all at once.

                        if (c == '<') {
                            int length = _index - _start - 1;
                            if (length > 0) {
                                if (_charactersPending) {
                                    _text.setArray(_data, _text.offset(),
                                            _text.length() + length); // Coalescing.
                                } else {
                                    _text = newSeq(_start, length);
                                    _charactersPending = true;
                                }
                                _start = _index - 1; // Keeps '<' as part of markup.
                            }
                            _state = STATE_MARKUP;
                            break;
                        }

                        // Local character reading block.
                        if ((_readIndex >= _readCount) && isEndOfStream())
                            return _eventType;
                        c = _readBuffer[_readIndex++];
                        if (c <= '&')
                            c = (c == '&') ? replaceEntity()
                                    : (c < ' ') ? handleEndOfLine(c) : c;
                        _data[_index++] = c;
                    }
                    break;

                case STATE_CDATA:
                    while (true) { // Reads CDATA all at once.

                        if ((c == '>') && (_index - _start >= 3)
                                && (_data[_index - 2] == ']')
                                && (_data[_index - 3] == ']')) {
                            _index -= 3;
                            int length = _index - _start;
                            if (length > 0) { // Not empty.
                                if (_charactersPending) {
                                    _text.setArray(_data, _text.offset(),
                                            _text.length() + length); // Coalescing.
                                } else {
                                    _text = newSeq(_start, length);
                                    _charactersPending = true;
                                }
                            }
                            _start = _index;
                            _state = STATE_CHARACTERS;
                            break;
                        }

                        // Local character reading block.
                        if (_readIndex >= _readCount)
                            reloadBuffer();
                        c = _readBuffer[_readIndex++];
                        if (c < ' ')
                            c = handleEndOfLine(c);
                        _data[_index++] = c;
                    }
                    break;

                case STATE_DTD:
                    if (c == '>') {
                        _text = newSeq(_start, _index - _start);
                        _index = _start; // Do not keep DTD.
                        _state = STATE_CHARACTERS;
                        return _eventType = DTD;
                    } else if (c == '[') {
                        _state = STATE_DTD_INTERNAL;
                    }
                    break;

                case STATE_DTD_INTERNAL:
                    if (c == ']') {
                        _state = STATE_DTD;
                    }
                    break;

                case STATE_MARKUP: // Starts with '<'
                    if (_index - _start == 2) {
                        if (c == '/') {
                            _start = _index = _index - 2;
                            _state = STATE_CLOSE_TAGxREAD_ELEM_NAME;
                            _prefixSep = -1;
                            if (_charactersPending) { // Flush characters event.
                                _charactersPending = false;
                                return _eventType = CHARACTERS;
                            }
                        } else if (c == '?') {
                            _start = _index = _index - 2;
                            _state = STATE_PI;
                            if (_charactersPending) { // Flush characters event.
                                _charactersPending = false;
                                return _eventType = CHARACTERS;
                            }
                        } else if (c != '!') { // Element tag (first letter).
                            _data[_start] = c;
                            _index = _start + 1;
                            _state = STATE_OPEN_TAGxREAD_ELEM_NAME;
                            _prefixSep = -1;
                            if (_charactersPending) { // Flush character event.
                                _charactersPending = false;
                                return _eventType = CHARACTERS;
                            }
                        }
                    } else if ((_index - _start == 4)
                            && (_data[_start + 1] == '!')
                            && (_data[_start + 2] == '-')
                            && (_data[_start + 3] == '-')) {
                        _start = _index = _index - 4; // Removes 
                            _text = newSeq(_start, _index - _start);
                            _state = STATE_CHARACTERS;
                            _index = _start; // Do not keep comments.
                            return _eventType = COMMENT;
                        }

                        // Local character reading block.
                        if (_readIndex >= _readCount)
                            reloadBuffer();
                        c = _readBuffer[_readIndex++];
                        if (c < ' ')
                            c = handleEndOfLine(c);
                        _data[_index++] = c;
                    }

                case STATE_PI:
                    if ((c == '>') && (_index - _start >= 2)
                            && (_data[_index - 2] == '?')) {
                        _index -= 2; // Removes ?>
                        _text = newSeq(_start, _index - _start);
                        _state = STATE_CHARACTERS;
                        _index = _start; // Do not keep processing instructions.
                        return _eventType = PROCESSING_INSTRUCTION;
                    }
                    break;

                // OPEN_TAG:
                case STATE_OPEN_TAGxREAD_ELEM_NAME:
                    _attributes.reset();
                    _namespaces.push();
                    while (true) { // Read element name all at once.

                        if (c < '@') { // Else avoid multiple checks.
                            if (c == '>') {
                                _qName = newSeq(_start, --_index - _start);
                                _start = _index;
                                _state = STATE_CHARACTERS;
                                processStartTag();
                                _isEmpty = false;
                                return _eventType = START_ELEMENT;
                            } else if (c == '/') {
                                _qName = newSeq(_start, --_index - _start);
                                _start = _index;
                                _state = STATE_OPEN_TAGxEMPTY_TAG;
                                break;
                            } else if (c == ':') {
                                _prefixSep = _index - 1;
                            } else if (c <= ' ') {
                                _qName = newSeq(_start, --_index - _start);
                                _state = STATE_OPEN_TAGxELEM_NAME_READ;
                                break;
                            }
                        }

                        if (_readIndex >= _readCount)
                            reloadBuffer();
                        c = _data[_index++] = _readBuffer[_readIndex++];
                    }
                    break;

                case STATE_OPEN_TAGxELEM_NAME_READ:
                    if (c == '>') {
                        _start = --_index;
                        _state = STATE_CHARACTERS;
                        processStartTag();
                        _isEmpty = false;
                        return _eventType = START_ELEMENT;
                    } else if (c == '/') {
                        _state = STATE_OPEN_TAGxEMPTY_TAG;
                    } else if (c > ' ') {
                        _start = _index - 1; // Includes current character.
                        _attrPrefixSep = -1;
                        _state = STATE_OPEN_TAGxREAD_ATTR_NAME;
                    }
                    break;

                case STATE_OPEN_TAGxREAD_ATTR_NAME:
                    while (true) { // Read attribute name all at once.

                        if (c < '@') { // Else avoid multiple checks.
                            if (c <= ' ') {
                                _attrQName = newSeq(_start, --_index - _start);
                                _state = STATE_OPEN_TAGxATTR_NAME_READ;
                                break;
                            } else if (c == '=') {
                                _attrQName = newSeq(_start, --_index - _start);
                                _state = STATE_OPEN_TAGxEQUAL_READ;
                                break;
                            } else if (c == ':') {
                                _attrPrefixSep = _index - 1;
                            }
                        }

                        if (_readIndex >= _readCount)
                            reloadBuffer();
                        _data[_index++] = c = _readBuffer[_readIndex++];
                    }
                    break;

                case STATE_OPEN_TAGxATTR_NAME_READ:
                    if (c == '=') {
                        --_index;
                        _state = STATE_OPEN_TAGxEQUAL_READ;
                    } else if (c > ' ') { throw new XMLStreamException(
                            "'=' expected", _location); }
                    break;

                case STATE_OPEN_TAGxEQUAL_READ:
                    if (c == '\'') {
                        _start = --_index;
                        _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE;
                    } else if (c == '\"') {
                        _start = --_index;
                        _state = STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE;
                    } else if (c > ' ') { throw new XMLStreamException(
                            "Quotes expected", _location); }
                    break;

                case STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE:
                    while (true) { // Read attribute value all at once.

                        if (c == '\'') {
                            _attrValue = newSeq(_start, --_index - _start);
                            processAttribute();
                            _state = STATE_OPEN_TAGxELEM_NAME_READ;
                            break;
                        }

                        // Local character reading block.
                        if (_readIndex >= _readCount)
                            reloadBuffer();
                        c = _readBuffer[_readIndex++];
                        if (c == '&')
                            c = replaceEntity();
                        _data[_index++] = c;
                    }
                    break;

                case STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE:
                    while (true) { // Read attribute value all at once.

                        if (c == '\"') {
                            _attrValue = newSeq(_start, --_index - _start);
                            processAttribute();
                            _state = STATE_OPEN_TAGxELEM_NAME_READ;
                            break;
                        }

                        // Local character reading block.
                        if (_readIndex >= _readCount)
                            reloadBuffer();
                        c = _readBuffer[_readIndex++];
                        if (c == '&')
                            c = replaceEntity();
                        _data[_index++] = c;
                    }
                    break;

                case STATE_OPEN_TAGxEMPTY_TAG:
                    if (c == '>') {
                        _start = --_index;
                        _state = STATE_CHARACTERS;
                        processStartTag();
                        _isEmpty = true;
                        return _eventType = START_ELEMENT;
                    } else {
                        throw new XMLStreamException("'>' expected", _location);
                    }

                    // CLOSE_TAG:
                case STATE_CLOSE_TAGxREAD_ELEM_NAME:
                    while (true) { // Element name can be read all at once.

                        if (c < '@') { // Else avoid multiple checks.
                            if (c == '>') {
                                _qName = newSeq(_start, --_index - _start);
                                _start = _index;
                                _state = STATE_CHARACTERS;
                                processEndTag();
                                return _eventType = END_ELEMENT;
                            } else if (c == ':') {
                                _prefixSep = _index - 1;
                            } else if (c <= ' ') {
                                _qName = newSeq(_start, --_index - _start);
                                _state = STATE_CLOSE_TAGxELEM_NAME_READ;
                                break;
                            }
                        }

                        if (_readIndex >= _readCount)
                            reloadBuffer();
                        c = _data[_index++] = _readBuffer[_readIndex++];
                    }
                    break;

                case STATE_CLOSE_TAGxELEM_NAME_READ:
                    if (c == '>') {
                        _start = --_index;
                        _state = STATE_CHARACTERS;
                        processEndTag();
                        return _eventType = END_ELEMENT;
                    } else if (c > ' ') { throw new XMLStreamException(
                            "'>' expected", _location); }
                    break;

                default:
                    throw new XMLStreamException("State unknown: " + _state,
                            _location);
            }
        }
    }

    // Defines parsing states (keep values close together to avoid lookup).
    private static final int STATE_CHARACTERS = 1;

    private static final int STATE_MARKUP = 2;

    private static final int STATE_COMMENT = 3;

    private static final int STATE_PI = 4;

    private static final int STATE_CDATA = 5;

    private static final int STATE_OPEN_TAGxREAD_ELEM_NAME = 6;

    private static final int STATE_OPEN_TAGxELEM_NAME_READ = 7;

    private static final int STATE_OPEN_TAGxREAD_ATTR_NAME = 8;

    private static final int STATE_OPEN_TAGxATTR_NAME_READ = 9;

    private static final int STATE_OPEN_TAGxEQUAL_READ = 10;

    private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_SIMPLE_QUOTE = 11;

    private static final int STATE_OPEN_TAGxREAD_ATTR_VALUE_DOUBLE_QUOTE = 12;

    private static final int STATE_OPEN_TAGxEMPTY_TAG = 13;

    private static final int STATE_CLOSE_TAGxREAD_ELEM_NAME = 14;

    private static final int STATE_CLOSE_TAGxELEM_NAME_READ = 15;

    private static final int STATE_DTD = 16;

    private static final int STATE_DTD_INTERNAL = 17;

    /**
     * Reloads data buffer.
     * 
     * @param detectEndOfStream indicates 
     * @return true if the buffer has been reloaded;
     *         false if the end of stream has being reached
     *         and the event type (CHARACTERS or END_DOCUMENT) has been set.
     */
    private void reloadBuffer() throws XMLStreamException {
        if (_reader == null)
            throw new XMLStreamException("Input not specified");
        _location._column += _readIndex;
        _location._charactersRead += _readIndex;
        _readIndex = 0;
        try {
            _readCount = _reader.read(_readBuffer, 0, _readBuffer.length);
            if ((_readCount <= 0)
                    && ((_depth != 0) || (_state != STATE_CHARACTERS)))
                throw new XMLStreamException("Unexpected end of document",
                        _location);
        } catch (IOException e) {
            throw new XMLStreamException(e);
        }
        while ((_index + _readCount) >= _data.length) { // Potential overflow.
            increaseDataBuffer();
        }
    }

    /**
     * Detects end of stream.
     * 
     * @return true if end of stream has being reached
     *         and the event type (CHARACTERS or END_DOCUMENT) has been set;
     *         false otherwise.
     */
    private boolean isEndOfStream() throws XMLStreamException {
        if (_readIndex >= _readCount)
            reloadBuffer();
        if (_readCount <= 0) {
            // _state == STATE_CHARACTERS (otherwise reloadBuffer() exception)
            if (_eventType == END_DOCUMENT)
                throw new XMLStreamException(
                        "End document has already been reached");
            int length = _index - _start;
            if (length > 0) { // Flushes trailing characters.
                if (_charactersPending) {
                    _text.setArray(_data, _text.offset(), _text.length()
                            + length); // Coalescing.
                } else {
                    _text = newSeq(_start, length);
                }
                _start = _index;
                _eventType = CHARACTERS;
            } else {
                _eventType = END_DOCUMENT;
            }
            return true;
        }
        return false;
    }

    /**
     * Handles end of line as per XML Spec. 2.11
     * 
     * @param c the potential end of line character.
     * @return the replacement character for end of line.
     */
    private char handleEndOfLine(char c) throws XMLStreamException {
        if (c == 0xD) { // Replaces #xD with #xA
            // Unless next char is #xA, then skip,
            // #xD#xA will be replaced by #xA
            if (_readIndex >= _readCount)
                reloadBuffer();
            if ((_readIndex < _readCount) && (_readBuffer[_readIndex] == 0xA))
                _readIndex++; // Skips 0xD
            c = (char) 0xA;
        }
        if (c == 0xA) {
            _location._line++;
            _location._column = -_readIndex; // column = 0
        } else if (c == 0x0) { throw new XMLStreamException(
                "Illegal XML character U+0000", _location); }
        return c;
    }

    /**
     * Replaces an entity if the current state allows it.
     * 
     * @return the next character after the text replacement or '&' if no
     *         replacement took place.
     */
    private char replaceEntity() throws XMLStreamException {
        if ((_state == STATE_COMMENT) || (_state == STATE_PI)
                || (_state == STATE_CDATA))
            return '&'; // (&2.4)

        int start = _index; // Index of first replacement character.
        _data[_index++] = '&';
        while (true) {
            if (_readIndex >= _readCount)
                reloadBuffer();
            char c = _data[_index++] = _readBuffer[_readIndex++];
            if (c == ';')
                break;
            if (c <= ' ')
                throw new XMLStreamException("';' expected", _location);
        }
        // Ensures that the replacement string holds in the data buffer.
        while (start + _entities.getMaxLength() >= _data.length) {
            increaseDataBuffer();
        }

        // Replaces the entity.
        int length = _entities.replaceEntity(_data, start, _index - start);

        // Returns the next character after entity unless ampersand.
        _index = start + length;

        // Local character reading block.
        if (_readIndex >= _readCount)
            reloadBuffer();
        char c = _readBuffer[_readIndex++];
        return (c == '&') ? (c = replaceEntity()) : c;
    }

    /**
     * Processes the attribute just read.
     */
    private void processAttribute() throws XMLStreamException {
        if (_attrPrefixSep < 0) { // No prefix.
            if (isXMLNS(_attrQName)) { // Sets default namespace.
                _namespaces.setPrefix(_namespaces._defaultNsPrefix, _attrValue);
            } else {
                _attributes.addAttribute(_attrQName, null, _attrQName,
                        _attrValue);
            }
        } else { // Prefix.
            final int offset = _attrQName.offset();
            final int length = _attrQName.length();

            CharArray prefix = newSeq(offset, _attrPrefixSep - offset);

            CharArray localName = newSeq(_attrPrefixSep + 1, offset + length
                    - _attrPrefixSep - 1);

            if (isXMLNS(prefix)) { // Namespace association.
                _namespaces.setPrefix(localName, _attrValue);
            } else {
                _attributes.addAttribute(localName, prefix, _attrQName,
                        _attrValue);
            }
        }
    }

    private static boolean isXMLNS(CharArray chars) {
        return (chars.length() == 5) && (chars.charAt(0) == 'x')
                && (chars.charAt(1) == 'm') && (chars.charAt(2) == 'l')
                && (chars.charAt(3) == 'n') && (chars.charAt(4) == 's');
    }

    private void processEndTag() throws XMLStreamException {
        if (!_qName.equals(_elemStack[_depth]))
            throw new XMLStreamException("Unexpected end tag for " + _qName,
                    _location);
    }

    private void processStartTag() throws XMLStreamException {
        if (++_depth >= _elemStack.length) {
            increaseStack();
        }
        _elemStack[_depth] = _qName;
    }

    // Implements Reusable.
    public void reset() {
        // Resets all members (alphabetically ordered).
        _attributes.reset();
        _attrPrefixSep = 0;
        _attrQName = null;
        _attrValue = null;
        _attrQName = null;
        _charactersPending = false;
        _encoding = null;
        _entities.reset();
        _eventType = START_DOCUMENT;
        _index = 0;
        _isEmpty = false;
        _location.reset();
        _namespaces.reset();
        _prolog = null;
        _readCount = 0;
        _reader = null;
        _depth = 0;
        _readIndex = 0;
        _seqsIndex = 0;
        _start = 0;
        _startOffset = 0;
        _state = STATE_CHARACTERS;
        _utf8StreamReader.reset();

        // Recycles if factory produced.
        if (_factory != null)
            _factory.recycle(this);
    }

    // Returns a new character sequence from the pool.
    private CharArray newSeq(int offset, int length) {
        CharArray seq = (_seqsIndex < _seqsCapacity) ? _seqs[_seqsIndex++]
                : newSeq2();
        return seq.setArray(_data, offset, length);
    }

    private CharArray newSeq2() {
        _createSeqLogic.run();
        return _seqs[_seqsIndex++];
    }

    private final Runnable _createSeqLogic = new Runnable() {

        public void run() {
            if (_seqsCapacity >= _seqs.length) { // Resizes.
                CharArray[] tmp = new CharArray[_seqs.length * 2];
                System.arraycopy(_seqs, 0, tmp, 0, _seqs.length);
                _seqs = tmp;
            }
            CharArray seq = new CharArray();
            _seqs[_seqsCapacity++] = seq;
        }

    };

    private CharArray[] _seqs = new CharArray[256];

    private int _seqsIndex;

    private int _seqsCapacity;

    // Increases internal data buffer capacity.
    private void increaseDataBuffer() {
        // Note: The character data at any nesting level is discarded 
        //       only when moving to outer nesting level (due to coalescing).
        //       This accumulation may cause resize of the data buffer if
        //       numerous elements at the same nesting level are separated by 
        //       spaces or indentation.
        char[] tmp = new char[_data.length * 2];
        javolution.context.LogContext.info(new CharArray(
                "XMLStreamReaderImpl: Data buffer increased to " + tmp.length));
        System.arraycopy(_data, 0, tmp, 0, _data.length);
        _data = tmp;
    }

    // Increases statck.
    private void increaseStack() {
        CharArray[] tmp = new CharArray[_elemStack.length * 2];
        javolution.context.LogContext.info(new CharArray(
                "XMLStreamReaderImpl: CharArray stack increased to "
                        + tmp.length));
        System.arraycopy(_elemStack, 0, tmp, 0, _elemStack.length);
        _elemStack = tmp;
    }

    /**
     * This inner class represents the parser location.
     */
    private final class LocationImpl implements Location {

        int _column;

        int _line;

        int _charactersRead;

        public int getLineNumber() {
            return _line + 1;
        }

        public int getColumnNumber() {
            return _column + _readIndex;
        }

        public int getCharacterOffset() {
            return _charactersRead + _readIndex;
        }

        public String getPublicId() {
            return null; // Not available.
        }

        public String getSystemId() {
            return null; // Not available.
        }

        public String toString() {
            return "Line " + getLineNumber() + ", Column " + getColumnNumber();
        }

        public void reset() {
            _line = 0;
            _column = 0;
            _charactersRead = 0;
        }

    }

    //////////////////////////////////////////
    // Implements XMLStreamReader Interface //
    //////////////////////////////////////////
    // Implements XMLStreamReader Interface.
    public void require(int type, CharSequence namespaceURI,
            CharSequence localName) throws XMLStreamException {
        if (_eventType != type)
            throw new XMLStreamException("Expected event: "
                    + NAMES_OF_EVENTS[type] + ", found event: "
                    + NAMES_OF_EVENTS[_eventType]);
        if ((namespaceURI != null) && !getNamespaceURI().equals(namespaceURI))
            throw new XMLStreamException("Expected namespace URI: "
                    + namespaceURI + ", found: " + getNamespaceURI());
        if ((localName != null) && !getLocalName().equals(localName))
            throw new XMLStreamException("Expected local name: " + localName
                    + ", found: " + getLocalName());
    }

    // Implements XMLStreamReader Interface.
    public CharArray getElementText() throws XMLStreamException {
        // Derived from interface specification code.
        if (getEventType() != XMLStreamConstants.START_ELEMENT) { throw new XMLStreamException(
                "Parser must be on START_ELEMENT to read next text",
                getLocation()); }
        CharArray text = null;
        int eventType = next();
        while (eventType != XMLStreamConstants.END_ELEMENT) {
            if (eventType == XMLStreamConstants.CHARACTERS) {
                if (text == null) {
                    text = getText();
                } else { // Merge (adjacent text, comments and PI are not kept).
                    text.setArray(_data, text.offset(), text.length()
                            + getText().length());
                }
            } else if (eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
                    || eventType == XMLStreamConstants.COMMENT) {
                // Skips (not kept).
            } else if (eventType == XMLStreamConstants.END_DOCUMENT) {
                throw new XMLStreamException(
                        "Unexpected end of document when reading element text content",
                        getLocation());
            } else if (eventType == XMLStreamConstants.START_ELEMENT) {
                throw new XMLStreamException(
                        "Element text content may not contain START_ELEMENT",
                        getLocation());
            } else {
                throw new XMLStreamException("Unexpected event type "
                        + NAMES_OF_EVENTS[eventType], getLocation());
            }
            eventType = next();
        }
        return (text != null) ? text : newSeq(0, 0);
    }

    // Implements XMLStreamReader Interface.
    public Object getProperty(String name) throws IllegalArgumentException {
        if (name.equals(XMLInputFactory.IS_COALESCING)) {
            return Boolean.TRUE;
        } else if (name.equals(XMLInputFactory.ENTITIES)) {
            return _entities.getEntitiesMapping();
        } else {
            throw new IllegalArgumentException("Property: " + name
                    + " not supported");
        }
    }

    // Implements XMLStreamReader Interface.
    public void close() throws XMLStreamException {
        reset();
    }

    public int getAttributeCount() {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw illegalState("Not a start element");
        return _attributes.getLength();
    }

    public CharArray getAttributeLocalName(int index) {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw illegalState("Not a start element");
        return _attributes.getLocalName(index);
    }

    public CharArray getAttributeNamespace(int index) {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw illegalState("Not a start element");
        CharArray prefix = _attributes.getPrefix(index);
        return _namespaces.getNamespaceURINullAllowed(prefix);
    }

    public CharArray getAttributePrefix(int index) {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw illegalState("Not a start element");
        return _attributes.getPrefix(index);
    }

    public CharArray getAttributeType(int index) {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw illegalState("Not a start element");
        return _attributes.getType(index);
    }

    public CharArray getAttributeValue(CharSequence uri, CharSequence localName) {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw illegalState("Not a start element");
        return (uri == null) ? _attributes.getValue(localName) : _attributes
                .getValue(uri, localName);
    }

    public CharArray getAttributeValue(int index) {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw illegalState("Not a start element");
        return _attributes.getValue(index);
    }

    public CharArray getCharacterEncodingScheme() {
        return readPrologAttribute(ENCODING);
    }

    private static final CharArray ENCODING = new CharArray("encoding");

    public String getEncoding() {
        return _encoding;
    }

    public int getEventType() {
        return _eventType;
    }

    public CharArray getLocalName() {
        if ((_eventType != XMLStreamConstants.START_ELEMENT)
                && (_eventType != XMLStreamConstants.END_ELEMENT))
            throw illegalState("Not a start or end element");
        if (_prefixSep < 0)
            return _qName;
        CharArray localName = newSeq(_prefixSep + 1,
                _qName.offset() + _qName.length() - _prefixSep - 1);
        return localName;
    }

    public Location getLocation() {
        return _location;
    }

    public int getNamespaceCount() {
        if ((_eventType != XMLStreamConstants.START_ELEMENT)
                && (_eventType != XMLStreamConstants.END_ELEMENT))
            throw illegalState("Not a start or end element");
        return _namespaces._namespacesCount[_depth];
    }

    public CharArray getNamespacePrefix(int index) {
        if ((_eventType != XMLStreamConstants.START_ELEMENT)
                && (_eventType != XMLStreamConstants.END_ELEMENT))
            throw illegalState("Not a start or end element");
        return _namespaces._prefixes[index];
    }

    public CharArray getNamespaceURI(CharSequence prefix) {
        if ((_eventType != XMLStreamConstants.START_ELEMENT)
                && (_eventType != XMLStreamConstants.END_ELEMENT))
            throw illegalState("Not a start or end element");
        return _namespaces.getNamespaceURI(prefix);
    }

    public CharArray getNamespaceURI(int index) {
        if ((_eventType != XMLStreamConstants.START_ELEMENT)
                && (_eventType != XMLStreamConstants.END_ELEMENT))
            throw illegalState("Not a start or end element");
        return _namespaces._namespaces[index];
    }

    public NamespaceContext getNamespaceContext() {
        return _namespaces;
    }

    public CharArray getNamespaceURI() {
        return _namespaces.getNamespaceURINullAllowed(getPrefix());
    }

    public CharArray getPrefix() {
        if ((_eventType != XMLStreamConstants.START_ELEMENT)
                && (_eventType != XMLStreamConstants.END_ELEMENT))
            throw illegalState("Not a start or end element");
        if (_prefixSep < 0)
            return null;
        int offset = _qName.offset();
        CharArray prefix = newSeq(offset, _prefixSep - offset);
        return prefix;
    }

    public CharArray getPIData() {
        if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
            throw illegalState("Not a processing instruction");
        int offset = _text.indexOf(' ') + _text.offset() + 1;
        CharArray piData = newSeq(offset, _text.length() - offset);
        return piData;
    }

    public CharArray getPITarget() {
        if (_eventType != XMLStreamConstants.PROCESSING_INSTRUCTION)
            throw illegalState("Not a processing instruction");
        CharArray piTarget = newSeq(_text.offset(),
                _text.indexOf(' ') + _text.offset());
        return piTarget;
    }

    public CharArray getText() {
        if ((_eventType != XMLStreamConstants.CHARACTERS)
                && (_eventType != XMLStreamConstants.COMMENT)
                && (_eventType != XMLStreamConstants.DTD))
            throw illegalState("Not a text event");
        return _text;
    }

    public char[] getTextCharacters() {
        return getText().array();
    }

    public int getTextCharacters(int sourceStart, char[] target,
            int targetStart, int length) throws XMLStreamException {
        CharArray text = getText();
        int copyLength = Math.min(length, text.length());
        System.arraycopy(text.array(), sourceStart + text.offset(), target,
                targetStart, copyLength);
        return copyLength;
    }

    public int getTextLength() {
        return getText().length();
    }

    public int getTextStart() {
        return getText().offset();
    }

    public CharArray getVersion() {
        return readPrologAttribute(VERSION);
    }

    private static final CharArray VERSION = new CharArray("version");

    public boolean isStandalone() {
        CharArray standalone = readPrologAttribute(STANDALONE);
        return (standalone != null) ? standalone.equals("no") : true;
    }

    public boolean standaloneSet() {
        return readPrologAttribute(STANDALONE) != null;
    }

    private static final CharArray STANDALONE = new CharArray("standalone");

    public boolean hasName() {
        return (_eventType == XMLStreamConstants.START_ELEMENT)
                || (_eventType == XMLStreamConstants.END_ELEMENT);
    }

    public boolean hasNext() throws XMLStreamException {
        return _eventType != XMLStreamConstants.END_DOCUMENT;
    }

    public boolean hasText() {
        return ((_eventType == XMLStreamConstants.CHARACTERS)
                || (_eventType == XMLStreamConstants.COMMENT) || (_eventType == XMLStreamConstants.DTD))
                && (_text.length() > 0);
    }

    public boolean isAttributeSpecified(int index) {
        if (_eventType != XMLStreamConstants.START_ELEMENT)
            throw new IllegalStateException("Not a start element");
        return _attributes.getValue(index) != null;
    }

    public boolean isCharacters() {
        return _eventType == XMLStreamConstants.CHARACTERS;
    }

    public boolean isEndElement() {
        return _eventType == XMLStreamConstants.END_ELEMENT;
    }

    public boolean isStartElement() {
        return _eventType == XMLStreamConstants.START_ELEMENT;
    }

    public boolean isWhiteSpace() {
        if (isCharacters()) {
            char[] chars = _text.array();
            for (int i = _text.offset(), end = _text.offset() + _text.length(); i < end;) {
                if (!isWhiteSpace(chars[i++]))
                    return false;
            }
            return true;
        }
        return false;
    }

    // Whitespaces according to XML 1.1 Specification.
    private static boolean isWhiteSpace(char c) {
        return (c == 0x20) || (c == 0x9) || (c == 0xD) || (c == 0xA);
    }

    public int nextTag() throws XMLStreamException {
        int eventType = next();
        while (eventType == XMLStreamConstants.COMMENT
                || eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
                || eventType == XMLStreamConstants.DTD
                || (eventType == XMLStreamConstants.CHARACTERS && isWhiteSpace())) {
            eventType = next();
        }
        if (eventType != XMLStreamConstants.START_ELEMENT
                && eventType != XMLStreamConstants.END_ELEMENT)
            throw new XMLStreamException("Tag expected (but found "
                    + NAMES_OF_EVENTS[_eventType] + ")");
        return eventType;
    }

    private IllegalStateException illegalState(String msg) {
        return new IllegalStateException(msg + " ("
                + NAMES_OF_EVENTS[_eventType] + ")");
    }

    private String detectEncoding(InputStream input) throws XMLStreamException {
        // Autodetect encoding (see http://en.wikipedia.org/wiki/UTF-16)
        int byte0;
        try {
            byte0 = input.read();
        } catch (IOException e) {
            throw new XMLStreamException(e);
        }
        if (byte0 == -1)
            throw new XMLStreamException("Premature End-Of-File");
        if (byte0 == '<') { // UTF-8 or compatible encoding.
            _readBuffer[_startOffset++] = '<';
            return "UTF-8";
        } else {
            int byte1;
            try {
                byte1 = input.read();
            } catch (IOException e) {
                throw new XMLStreamException(e);
            }
            if (byte1 == -1)
                throw new XMLStreamException("Premature End-Of-File");
            if ((byte0 == 0) && (byte1 == '<')) { // UTF-16 BIG ENDIAN
                _readBuffer[_startOffset++] = '<';
                return "UTF-16BE";
            } else if ((byte0 == '<') && (byte1 == 0)) { // UTF-16 LITTLE ENDIAN
                _readBuffer[_startOffset++] = '<';
                return "UTF-16LE";
            } else if ((byte0 == 0xFF) && (byte1 == 0xFE)) { // BOM for UTF-16 LITTLE ENDIAN
                return "UTF-16";
            } else if ((byte0 == 0xFE) && (byte1 == 0xFF)) { // BOM for UTF-16 BIG ENDIAN
                return "UTF-16";
            } else { // Encoding unknown (or no prolog) assumes UTF-8
                _readBuffer[_startOffset++] = (char) byte0;
                _readBuffer[_startOffset++] = (char) byte1;
                return "UTF-8";
            }
        }
    }

    private final CharArray readPrologAttribute(CharSequence name) {
        if (_prolog == null)
            return null;
        final int READ_EQUAL = 0;
        final int READ_QUOTE = 1;
        final int VALUE_SIMPLE_QUOTE = 2;
        final int VALUE_DOUBLE_QUOTE = 3;

        int i = _prolog.indexOf(name);
        if (i >= 0) {
            i += _prolog.offset();
            int maxIndex = _prolog.offset() + _prolog.length();
            i += name.length();
            int state = READ_EQUAL;
            int valueOffset = 0;
            while (i < maxIndex) {
                char c = _prolog.array()[i++];
                switch (state) {
                    case READ_EQUAL:
                        if (c == '=') {
                            state = READ_QUOTE;
                        }
                        break;
                    case READ_QUOTE:
                        if (c == '"') {
                            state = VALUE_DOUBLE_QUOTE;
                            valueOffset = i;
                        } else if (c == '\'') {
                            state = VALUE_SIMPLE_QUOTE;
                            valueOffset = i;
                        }
                        break;
                    case VALUE_SIMPLE_QUOTE:
                        if (c == '\'')
                            return newSeq(valueOffset, i - valueOffset - 1);
                        break;
                    case VALUE_DOUBLE_QUOTE:
                        if (c == '"')
                            return newSeq(valueOffset, i - valueOffset - 1);
                        break;
                }
            }
        }
        return null;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy