com.fasterxml.aalto.async.AsyncByteScanner Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aalto-xml Show documentation
Ultra-high performance non-blocking XML processor (Stax/Stax2, SAX/SAX2)
There is a newer version: 1.3.3
package com.fasterxml.aalto.async;

import java.io.IOException;

import javax.xml.stream.XMLStreamException;

import com.fasterxml.aalto.AsyncInputFeeder;
import com.fasterxml.aalto.AsyncXMLStreamReader;
import com.fasterxml.aalto.impl.ErrorConsts;
import com.fasterxml.aalto.in.ByteBasedPNameTable;
import com.fasterxml.aalto.in.ByteBasedScanner;
import com.fasterxml.aalto.in.PName;
import com.fasterxml.aalto.in.ReaderConfig;
import com.fasterxml.aalto.util.CharsetNames;
import com.fasterxml.aalto.util.DataUtil;
import com.fasterxml.aalto.util.XmlCharTypes;

public abstract class AsyncByteScanner
    extends ByteBasedScanner
    implements AsyncInputFeeder
{
    protected final static int EVENT_INCOMPLETE = AsyncXMLStreamReader.EVENT_INCOMPLETE;

    /*
    /**********************************************************************
    /* State consts
    /**********************************************************************
     */

    /**
     * Default starting state for many events/contexts -- nothing has been
     * seen so far, no  event incomplete. Not used for all event types.
     */
    protected final static int STATE_DEFAULT = 0;

    // // // States for prolog/epilog major state:

    /**
     * State in which a less-than sign has been seen
     */
    protected final static int STATE_PROLOG_INITIAL = 1; // State before document when we may get xml declaration
    protected final static int STATE_PROLOG_SEEN_LT = 2; // "<" seen after xml declaration
    protected final static int STATE_PROLOG_DECL = 3; // "'
    protected final static int STATE_DTD_BEFORE_IDS = 6; // before "PUBLIC" or "SYSTEM" token
    protected final static int STATE_DTD_PUBLIC_OR_SYSTEM = 7; // parsing "PUBLIC" or "SYSTEM"
    protected final static int STATE_DTD_AFTER_PUBLIC = 8; // "PUBLIC" found, need space
    protected final static int STATE_DTD_AFTER_SYSTEM = 9; // "SYSTEM" found, need space
    protected final static int STATE_DTD_BEFORE_PUBLIC_ID = 10; // after "PUBLIC", space, need quoted public id
    protected final static int STATE_DTD_PUBLIC_ID = 11; // parsing public ID
    protected final static int STATE_DTD_AFTER_PUBLIC_ID = 12; // public ID parsed, need space
    protected final static int STATE_DTD_BEFORE_SYSTEM_ID = 13; // about to parse quoted system id
    protected final static int STATE_DTD_SYSTEM_ID = 14; // parsing system ID
    protected final static int STATE_DTD_AFTER_SYSTEM_ID = 15; // after system ID, optional space, '>' or int subset
    protected final static int STATE_DTD_INT_SUBSET = 16; // parsing internal subset

    protected final static int STATE_DTD_EXPECT_CLOSING_GT = 50; // ']' gotten that should be followed by '>'

    // For CHARACTERS, default is the basic (and only)

    // just seen "&"
    protected final static int STATE_TEXT_AMP = 4;
    // just seen "&#"
//    protected final static int STATE_TEXT_AMP_AND_HASH = 5;
    // seen '&' and partial name:
    protected final static int STATE_TEXT_AMP_NAME = 6;

    // For comments, STATE_DEFAULT means "
    protected final static int STATE_PI_AFTER_TARGET_WS = 2; // "
     * NOTE: non-final due to xml declaration handling occurring later.
     */
    protected XmlCharTypes _charTypes;
    
    /**
     * For now, symbol table contains prefixed names. In future it is
     * possible that they may be split into prefixes and local names?
     *
     * NOTE: non-final for async scanners
     */
    protected ByteBasedPNameTable _symbols;

    /**
     * This buffer is used for name parsing. Will be expanded if/as
     * needed; 32 ints can hold names 128 ascii chars long.
     */
    protected int[] _quadBuffer = new int[32];

    /*
    /**********************************************************************
    /* General state tracking
    /**********************************************************************
     */

    /**
     * Due to asynchronous nature of parsing, we may know what
     * event we are trying to parse, even if it's not yet
     * complete. Type of that event is stored here.
     */
    protected int _nextEvent = EVENT_INCOMPLETE;

    /**
     * In addition to the event type, there is need for additional
     * state information
     */
    protected int _state;
    
    /**
     * For token/state combinations that are 'shared' between
     * events (or embedded in them), this is where the surrounding
     * event state is retained.
     */
    protected int _surroundingEvent = EVENT_INCOMPLETE;

    /**
     * There are some multi-byte combinations that must be handled
     * as a unit: CR+LF linefeeds, multi-byte UTF-8 characters, and
     * multi-character end markers for comments and PIs.
     * Since they can be split across input buffer
     * boundaries, first byte(s) may need to be temporarily stored.
     *

     * If so, this int will store byte(s), in little-endian format
     * (that is, first pending byte is at 0x000000FF, second [if any]
     * at 0x0000FF00, and third at 0x00FF0000). This can be
     * (and is) used to figure out actual number of bytes pending,
     * for multi-byte (UTF-8) character decoding.
     *

     * Note: it is assumed that if value is 0, there is no data.
     * Thus, if 0 needed to be added pending, it has to be masked.
     */
    protected int _pendingInput = 0;

    /**
     * Flag that is sent when calling application indicates that there will
     * be no more input to parse.
     */
    protected boolean _endOfInput = false;

    /*
    /**********************************************************************
    /* Name/entity parsing state
    /**********************************************************************
     */

    /**
     * Number of complete quads parsed for current name (quads
     * themselves are stored in {@link #_quadBuffer}).
     */
    protected int _quadCount;

    /**
     * Bytes parsed for the current, incomplete, quad
     */
    protected int _currQuad;

    /**
     * Number of bytes pending/buffered, stored in {@link #_currQuad}
     */
    protected int _currQuadBytes = 0;

    /**
     * Entity value accumulated so far
     */
    protected int _entityValue = 0;
    
    /*
    /**********************************************************************
    /* (Start) element parsing state
    /**********************************************************************
     */

    protected boolean _elemAllNsBound;

    protected boolean _elemAttrCount;

    protected byte _elemAttrQuote;

    protected PName _elemAttrName;

    /**
     * Pointer for the next character of currently being parsed value
     * within attribute value buffer
     */
    protected int _elemAttrPtr;

    /**
     * Pointer for the next character of currently being parsed namespace
     * URI for the current namespace declaration
     */
    protected int _elemNsPtr;

    /*
    /**********************************************************************
    /* Other state
    /**********************************************************************
     */

    /**
     * Flag that indicates whether we are inside a declaration during parsing
     * of internal DTD subset.
     */
    protected boolean _inDtdDeclaration;

    /*
    /**********************************************************************
    /* Life-cycle
    /**********************************************************************
     */
    
    protected AsyncByteScanner(ReaderConfig cfg) {
        super(cfg);
        // 03-Apr-2018, tatu: Can not yet fetch `_charTypes` or `_symbols` since we
        //   do not necessarily know actual encoding from XML declaration
//        _charTypes = cfg.getCharTypes();
//        _symbols = cfg.getBBSymbols();
    }

    /**
     * Initialization method to call when encoding has been definitely figured out,
     * from XML declarations, or, from lack of one (using defaults).
     *
     * @since 1.1.1
     */
    protected void _activateEncoding() {
        // 04-Apr-2018, tatu: Not sure if we should try to enforce; gets tricky so for now
        //    simply make first call stick
        if (_symbols == null) {
            _charTypes = _config.getCharTypes();
            _symbols = _config.getBBSymbols();
        }
    }

    @Override
    public void endOfInput() {
        _endOfInput = true;
    }

    @Override
    protected void _releaseBuffers()
    {
        super._releaseBuffers();
        if (_symbols.maybeDirty()) {
            _config.updateBBSymbols(_symbols);
        }
    }

    /**
     * Since the async scanner has no access to whatever passes content,
     * there is no input source in same sense as with blocking scanner;
     * and there is nothing to close. But we can at least mark input
     * as having ended.
     */
    @Override
    protected void _closeSource() throws IOException
    {
        // nothing to do, we are done.
        _endOfInput = true;
    }

    /*
    /**********************************************************************
    /* Shared helper methods
    /**********************************************************************
     */

    protected void verifyAndSetXmlVersion() throws XMLStreamException
    {
        if (_textBuilder.equalsString("1.0")) {
            _config.setXmlVersion("1.0");
        } else if (_textBuilder.equalsString("1.1")) {
            _config.setXmlVersion("1.1");
        } else {
            reportInputProblem("Unrecognized XML version '"+_textBuilder.contentsAsString()+"' (expected '1.0' or '1.1')");
        }
    }

    protected void verifyAndSetXmlEncoding() throws XMLStreamException
    {
        String enc = CharsetNames.normalize(_textBuilder.contentsAsString());
        if ((CharsetNames.CS_UTF8 != enc) && (CharsetNames.CS_US_ASCII != enc)
                && (CharsetNames.CS_ISO_LATIN1 != enc)) {
            reportInputProblem("Unsupported encoding '"+enc+"': only UTF-8 and US-ASCII support by async parser");
        }
        // 03-Apr-2018, tatu: Need to overwrite default (UTF-8) if declared otherwise.
        //    And besides changing configs need to force use of new symbol tables, too...
        _config.setXmlEncoding(enc);
        if (enc != null) {
            _config.setActualEncoding(enc);
        }
        _charTypes = _config.getCharTypes();
    }

    protected void verifyAndSetXmlStandalone() throws XMLStreamException
    {
        if (_textBuilder.equalsString("yes")) {
            _config.setXmlStandalone(Boolean.TRUE);
        } else if (_textBuilder.equalsString("no")) {
            _config.setXmlStandalone(Boolean.FALSE);
        } else {
            reportInputProblem("Invalid standalone value '"+_textBuilder.contentsAsString()+"': can only use 'yes' and 'no'");
        }
    }

    protected void verifyAndSetPublicId() throws XMLStreamException {
        _publicId = _textBuilder.contentsAsString();
    }

    protected void verifyAndSetSystemId() throws XMLStreamException {
        _systemId = _textBuilder.contentsAsString();
    }

    /*
    /**********************************************************************
    /* Content accessors for less performance-critical sections
    /**********************************************************************
     */
    
    protected abstract byte _currentByte() throws XMLStreamException;
    protected abstract byte _nextByte() throws XMLStreamException;
    protected abstract byte _prevByte() throws XMLStreamException;

    /*
    /**********************************************************************
    /* Abstract methods for subclasses to implement wrt prolog/epilog
    /**********************************************************************
     */

    protected abstract int handlePI() throws XMLStreamException;
    protected abstract boolean handleDTDInternalSubset(boolean init) throws XMLStreamException;
    protected abstract int handleComment() throws XMLStreamException;
    protected abstract int handleStartElementStart(byte b) throws XMLStreamException;
    protected abstract int handleStartElement() throws XMLStreamException;

    protected abstract PName parsePName() throws XMLStreamException;
    protected abstract PName parseNewName(byte b) throws XMLStreamException;

    protected abstract boolean asyncSkipSpace() throws XMLStreamException;
    protected abstract boolean handlePartialCR() throws XMLStreamException;

    /*
    /**********************************************************************
    /* Second-level parsing; character content (in tree)
    /**********************************************************************
     */

    @Override
    protected final void finishToken() throws XMLStreamException
    {
        _tokenIncomplete = false;
        switch (_currToken) {
        case PROCESSING_INSTRUCTION:
            finishPI();
            break;
        case CHARACTERS:
            finishCharacters();
            break;
        case COMMENT:
            finishComment();
            break;
        case SPACE:
            finishSpace();
            break;
        case DTD:
            finishDTD(true); // true -> get text
            break;
        case CDATA:
            finishCData();
            break;
        default:
            ErrorConsts.throwInternalError();
        }
    }

    /**
     * Method called to initialize state for CHARACTERS event, after
     * just a single byte has been seen. What needs to be done next
     * depends on whether coalescing mode is set or not: if it is not
     * set, just a single character needs to be decoded, after which
     * current event will be incomplete, but defined as CHARACTERS.
     * In coalescing mode, the whole content must be read before
     * current event can be defined. The reason for difference is
     * that when XMLStreamReader.next() returns, no
     * blocking can occur when calling other methods.
     *
     * @return Event type detected; either CHARACTERS, if at least
     *   one full character was decoded (and can be returned),
     *   EVENT_INCOMPLETE if not (part of a multi-byte character
     *   split across input buffer boundary)
     */
    protected abstract int startCharacters(byte b) throws XMLStreamException;

    protected abstract boolean handleAttrValue() throws XMLStreamException;

    protected abstract boolean handleNsDecl() throws XMLStreamException;

    /*
    /**********************************************************************
    /* Abstract methods from base class, parsing
    /**********************************************************************
     */


    @Override
    protected void finishCData() throws XMLStreamException
    {
        // N/A
        throwInternal();
    }

    @Override
    protected void finishComment() throws XMLStreamException
    {
        // N/A
        throwInternal();
    }

    @Override
    protected void finishDTD(boolean copyContents) throws XMLStreamException
    {
        // N/A
        throwInternal();
    }

    @Override
    protected void finishPI() throws XMLStreamException
    {
        // N/A
        throwInternal();
    }

    @Override
    protected void finishSpace() throws XMLStreamException
    {
        // N/A
        throwInternal();
    }

    // // token-skip methods

    /**
     * @return True if the whole characters segment was succesfully
     *   skipped; false if not
     */
    @Override
    protected abstract boolean skipCharacters()
        throws XMLStreamException;

    @Override
    protected void skipCData() throws XMLStreamException
    {
        // should never be called
        throwInternal();
    }

    @Override
    protected void skipComment() throws XMLStreamException
    {
        // should never be called
        throwInternal();
    }

    @Override
    protected void skipPI() throws XMLStreamException
    {
        // should never be called
        throwInternal();
    }

    @Override
    protected void skipSpace() throws XMLStreamException
    {
        // should never be called
        throwInternal();
    }

    @Override
    protected boolean loadMore() throws XMLStreamException
    {
        // should never get called
        throwInternal();
        return false; // never gets here
    }
    
    @Override
    protected abstract void finishCharacters() throws XMLStreamException;

    /*
    /**********************************************************************
    /* Internal methods, name decoding
    /**********************************************************************
     */

    /**
     * Method called to process a sequence of bytes that is likely to
     * be a PName. At this point we encountered an end marker, and
     * may either hit a formerly seen well-formed PName; an as-of-yet
     * unseen well-formed PName; or a non-well-formed sequence (containing
     * one or more non-name chars without any valid end markers).
     *
     * @param lastQuad Word with last 0 to 3 bytes of the PName; not included
     *   in the quad array
     * @param lastByteCount Number of bytes contained in lastQuad; 0 to 3.
     */
    protected final PName findPName(int lastQuad, int lastByteCount) throws XMLStreamException
    {
        // First, need to push back the byte read but not used:
        --_inputPtr;
        int qlen = _quadCount;
        // Also: if last quad is empty, will need take last from qbuf.
        if (lastByteCount == 0) {
            lastQuad = _quadBuffer[--qlen];
            lastByteCount = 4;
        }
        // Separate handling for short names:
        if (qlen <= 1) { // short name?
            if (qlen == 0) { // 4-bytes or less; only has 'lastQuad' defined
                int hash = ByteBasedPNameTable.calcHash(lastQuad);
                PName name = _symbols.findSymbol(hash, lastQuad, 0);
                if (name == null) {
                    // Let's simplify things a bit, and just use array based one then:
                    _quadBuffer[0] = lastQuad;
                    name = addPName(_symbols, hash, _quadBuffer, 1, lastByteCount);
                }
                return name;
            }
            int firstQuad = _quadBuffer[0];
            int hash = ByteBasedPNameTable.calcHash(firstQuad, lastQuad);
            PName name = _symbols.findSymbol(hash, firstQuad, lastQuad);
            if (name == null) {
                // As above, let's just use array, then
                _quadBuffer[1] = lastQuad;
                name = addPName(_symbols, hash, _quadBuffer, 2, lastByteCount);
            }
            return name;
        }
        // Nope, long (3 quads or more). At this point, the last quad is
        // not yet in the array, let's add:
        if (qlen >= _quadBuffer.length) { // let's just double?
            _quadBuffer = DataUtil.growArrayBy(_quadBuffer, _quadBuffer.length);
        }
        _quadBuffer[qlen++] = lastQuad;
        int hash = ByteBasedPNameTable.calcHash(_quadBuffer, qlen);
        PName name = _symbols.findSymbol(hash, _quadBuffer, qlen);
        if (name == null) {
            name = addPName(_symbols, hash, _quadBuffer, qlen, lastByteCount);
        }
        return name;
    }

    protected final PName addPName(ByteBasedPNameTable symbols,
            int hash, int[] quads, int qlen, int lastQuadBytes)
        throws XMLStreamException
    {
        return addUTFPName(symbols, _charTypes, hash, quads, qlen, lastQuadBytes);
    }

    /*
    /**********************************************************************
    /* Internal methods, input validation
    /**********************************************************************
     */

    /**
     * Method called to verify validity of given character (from entity) and
     * append it to the text buffer
     */
    protected void verifyAndAppendEntityCharacter(int charFromEntity) throws XMLStreamException
    {
        verifyXmlChar(charFromEntity);
        // Ok; does it need a surrogate though? (over 16 bits)
        if ((charFromEntity >> 16) != 0) {
            charFromEntity -= 0x10000;
            _textBuilder.append((char) (0xD800 | (charFromEntity >> 10)));
            charFromEntity = 0xDC00 | (charFromEntity & 0x3FF);
        }
        _textBuilder.append((char) charFromEntity);
    }

    /**
     * Checks that a character for a PublicId
     *
     * @param c A character
     * @return true if the character is valid for use in the Public ID
     * of an XML doctype declaration
     *
     * @see "http://www.w3.org/TR/xml/#NT-PubidLiteral"
     */
    protected boolean validPublicIdChar(int c) {
        return
            c == 0xA ||                     //
            c == 0xD ||                     //
            c == 0x20 ||                    //
            (c >= '0' && c <= '9') ||       //[0-9]
            (c >= '@' && c <= 'Z') ||       //@[A-Z]
            (c >= 'a' && c <= 'z') ||
            c == '!' ||
            (c >= 0x23 && c <= 0x25) ||     //#$%
            (c >= 0x27 && c <= 0x2F) ||     //'()*+,-./
            (c >= ':' && c <= ';') ||
            c == '=' ||
            c == '?' ||
            c == '_';
    }

    /*
    /**********************************************************************
    /* Internal methods, error handling
    /**********************************************************************
     */

    @Override
    protected int decodeCharForError(byte b) throws XMLStreamException {
        // !!! TBI
        return (int) b;
    }

    protected void checkPITargetName(PName targetName) throws XMLStreamException
    {
        String ln = targetName.getLocalName();
        if (ln.length() == 3 && ln.equalsIgnoreCase("xml") &&
            !targetName.hasPrefix()) {
            reportInputProblem(ErrorConsts.ERR_WF_PI_XML_TARGET);
        }
    }

    protected int throwInternal() {
        throw new IllegalStateException("Internal error: should never execute this code path");
    }

    protected void reportInvalidOther(int mask, int ptr) throws XMLStreamException
    {
        _inputPtr = ptr;
        reportInvalidOther(mask);
    }

    /*
    /**********************************************************************
    /* Shared implementation for handling XML prolog; less performance
    /* sensitive so need not inline access
    /**********************************************************************
     */

    @Override
    public final int nextFromProlog(boolean isProlog) throws XMLStreamException
    {
        // Had fully complete event? Need to reset state etc:
        if (_currToken != EVENT_INCOMPLETE) {
            // First: keep track of where event started
            setStartLocation();

            // yet one more special case: after START_DOCUMENT need to check things...
            if (_currToken == START_DOCUMENT) {
                _currToken = EVENT_INCOMPLETE;
                if (_tokenName != null) {
                    _nextEvent = PROCESSING_INSTRUCTION;
                    _state = STATE_PI_AFTER_TARGET;
                    checkPITargetName(_tokenName);
                    return handlePI();
                }
            } else {
                _currToken = _nextEvent = EVENT_INCOMPLETE;
                _state = STATE_DEFAULT;
            }
        }

        // Ok, do we know which event it will be?
        if (_nextEvent == EVENT_INCOMPLETE) { // nope
            // The very first thing: XML declaration handling
            if (_state == STATE_PROLOG_INITIAL) {
                if (_inputPtr >= _inputEnd) {
                    return _currToken;
                }
                // Ok: see if we have what looks like XML declaration; process:
                if (_pendingInput != 0) { // already parsing (potential) XML declaration
                    Boolean b = startXmlDeclaration(); // is or may be XML declaration, so:
                    if (b == null) { // not yet known; bail out
                        return EVENT_INCOMPLETE;
                    }
                    if (b == Boolean.FALSE) { // no real XML declaration; synthesize one
                        return _startDocumentNoXmlDecl();
                    }
                    return handleXmlDeclaration();
                }
                if (_currentByte() == BYTE_LT) { // first byte, see if it could be XML declaration
                    ++_inputPtr;
                    _pendingInput = PENDING_STATE_XMLDECL_LT;
                    Boolean b = startXmlDeclaration(); // is or may be XML declaration, so:
                    if (b == null) {
                        return EVENT_INCOMPLETE;
                    }
                    if (b == Boolean.FALSE) { // no real XML declaration; synthesize one
                        return _startDocumentNoXmlDecl();
                    }
                    return handleXmlDeclaration();
                }
                // can't be XML declaration
                _state = STATE_DEFAULT;
                return _startDocumentNoXmlDecl();
            }

            // First: did we have a lone CR at the end of the buffer?
            if (_pendingInput != 0) { // yup
                if (!handlePartialCR()) {
                    return _currToken;
                }
            }
            while (_state == STATE_DEFAULT) {
                if (_inputPtr >= _inputEnd) { // no more input available
                    if (_endOfInput) { // for good? That may be fine
                        setStartLocation();
                        return TOKEN_EOI;
                    }
                    return _currToken;
                }
                byte b = _nextByte();

                // Really should get white space or '<'... anything else is
                // pretty much an error.
                if (b == BYTE_LT) { // root element, comment, proc instr?
                    _state = STATE_PROLOG_SEEN_LT;
                    break;
                }
                if (b == BYTE_SPACE || b == BYTE_CR
                    || b == BYTE_LF || b == BYTE_TAB) {
                    // Prolog/epilog ws is to be skipped, not part of Infoset
                    if (!asyncSkipSpace()) { // ran out of input?
                        if (_endOfInput) { // for good? That may be fine
                            setStartLocation();
                            return TOKEN_EOI;
                        }
                        return _currToken;
                    }
                } else {
                    reportPrologUnexpChar(isProlog, decodeCharForError(b), null);
                }
            }
            if (_state == STATE_PROLOG_SEEN_LT) {
                if (_inputPtr >= _inputEnd) {
                    return _currToken;
                }
                byte b = _nextByte();
                if (b == BYTE_EXCL) { // comment or DOCTYPE declaration?
                    _state = STATE_PROLOG_DECL;
                    return handlePrologDeclStart(isProlog);
                }
                if (b == BYTE_QMARK) { // PI
                    _nextEvent = PROCESSING_INSTRUCTION;
                    _state = STATE_DEFAULT;
                    return handlePI();
                }
                if (b == BYTE_SLASH || !isProlog) {
                    reportPrologUnexpElement(isProlog, b);
                }
                return handleStartElementStart(b);
            }
            if (_state == STATE_PROLOG_DECL) {
                return handlePrologDeclStart(isProlog);
            }
            // should never have anything else...
            return throwInternal();
        }

        // At this point, we do know the event type
        switch (_nextEvent) {
        case START_ELEMENT:
            return handleStartElement();
        case START_DOCUMENT:
            return handleXmlDeclaration();
        case PROCESSING_INSTRUCTION:
            return handlePI();
        case COMMENT:
            return handleComment();
        case DTD:
            return handleDTD();
        }
        return throwInternal(); // should never get here
    }

    /**
     * Helper method called when it is determined that the document does NOT start with
     * an xml declaration. Needs to return START_DOCUMENT, and initialize other state
     * appropriately.
     */
    protected int _startDocumentNoXmlDecl() throws XMLStreamException
    {
        // 03-Apr-2018, tatu: We can finalize encoding at this point
        _activateEncoding();
        _currToken = START_DOCUMENT;
        return START_DOCUMENT;
    }

    private final int handlePrologDeclStart(boolean isProlog) throws XMLStreamException
    {
        if (_inputPtr >= _inputEnd) { // nothing we can do?
            return EVENT_INCOMPLETE;
        }
        byte b = _nextByte();
        // So far, we have seen "= _inputEnd) {
           return null;
       }
       if (_pendingInput == PENDING_STATE_XMLDECL_LT) { // "<" at start of doc
            if (_currentByte() != BYTE_QMARK) { // some other 
                _pendingInput = 0;
                _state = STATE_PROLOG_SEEN_LT;
                return Boolean.FALSE;
            }
            ++_inputPtr;
            _pendingInput = PENDING_STATE_XMLDECL_LTQ;
            if (_inputPtr >= _inputEnd) {
                return null;
            }
       }
       if (_pendingInput == PENDING_STATE_XMLDECL_LTQ) { // "= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_BEFORE_VERSION:
                if (!asyncSkipSpace()) { // not enough input
                    break;
                }
                if ((_tokenName = _parseNewXmlDeclName(_nextByte())) == null) { // incomplete
                    _state = STATE_XMLDECL_VERSION;
                    break;
                }
                if (!_tokenName.hasPrefixedName("version")) {
                    reportInputProblem("Unexpected keyword '"+_tokenName.getPrefixedName()+"' in XML declaration: expected 'version'");
                }
                _state = STATE_XMLDECL_AFTER_VERSION;
                continue main_loop;
            case STATE_XMLDECL_VERSION: // "= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_AFTER_VERSION: // "= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_VERSION_EQ: // "= _inputEnd || !parseXmlDeclAttr(buf, 0)) {
                        _state = STATE_XMLDECL_VERSION_VALUE;
                        break;
                    }
                }
                verifyAndSetXmlVersion();
                _state = STATE_XMLDECL_AFTER_VERSION_VALUE;
                continue main_loop;
    
            case STATE_XMLDECL_VERSION_VALUE: // parsing version value
                if (!parseXmlDeclAttr(_textBuilder.getBufferWithoutReset(), _textBuilder.getCurrentLength())) {
                    _state = STATE_XMLDECL_VERSION_VALUE;
                    break;
                }
                verifyAndSetXmlVersion();
                _state = STATE_XMLDECL_AFTER_VERSION_VALUE;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
                
            case STATE_XMLDECL_AFTER_VERSION_VALUE: // version got; need space or '?'
                {
                    byte b = _nextByte();
                    if (b == BYTE_QMARK) {
                        _state = STATE_XMLDECL_ENDQ;
                        continue main_loop;
                    }
                    if (b == BYTE_SPACE || b == BYTE_CR || b == BYTE_LF || b == BYTE_TAB) {
                        _state = STATE_XMLDECL_BEFORE_ENCODING;
                    } else {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected space after version value in xml declaration)");
                    }
                }
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
                
            case STATE_XMLDECL_BEFORE_ENCODING: // version, value, space got, need '?' or 'e'
                if (!asyncSkipSpace()) { // not enough input
                    break;
                }
                {
                    byte b = _nextByte();
                    if (b == BYTE_QMARK) {
                        _state = STATE_XMLDECL_ENDQ;
                        continue main_loop;
                    }
                    if ((_tokenName = _parseNewXmlDeclName(b)) == null) { // incomplete
                        _state = STATE_XMLDECL_ENCODING;
                        break;
                    }
                    // Can actually also get "standalone" instead...
                    if (_tokenName.hasPrefixedName("encoding")) {
                        _state = STATE_XMLDECL_AFTER_ENCODING;
                    } else if (_tokenName.hasPrefixedName("standalone")) {
                        _state = STATE_XMLDECL_AFTER_STANDALONE;
                        continue main_loop;
                    } else {
                        reportInputProblem("Unexpected keyword '"+_tokenName.getPrefixedName()+"' in XML declaration: expected 'encoding'");
                    }
                }
                continue main_loop;
    
            case STATE_XMLDECL_ENCODING: // parsing "encoding"
                if ((_tokenName = _parseXmlDeclName()) == null) { // incomplete
                    break;
                }
                // Can actually also get "standalone" instead...
                if (_tokenName.hasPrefixedName("encoding")) {
                    _state = STATE_XMLDECL_AFTER_ENCODING;
                } else if (_tokenName.hasPrefixedName("standalone")) {
                    _state = STATE_XMLDECL_AFTER_STANDALONE;
                    continue main_loop;
                } else {
                    reportInputProblem("Unexpected keyword '"+_tokenName.getPrefixedName()+"' in XML declaration: expected 'encoding'");
                }
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_AFTER_ENCODING: // got "encoding"; must get ' ' or '='
                if (!asyncSkipSpace()) { // not enough input
                    break;
                }
                {
                    byte b = _nextByte();
                    if (b != BYTE_EQ) {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected '=' after 'encoding' in xml declaration)");
                    }
                }
                _state = STATE_XMLDECL_ENCODING_EQ;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_ENCODING_EQ: // "encoding="
                if (!asyncSkipSpace()) { // skip space, if any
                    break;
                }
                _elemAttrQuote = _nextByte();
                if (_elemAttrQuote != BYTE_QUOT && _elemAttrQuote != BYTE_APOS) {
                    reportPrologUnexpChar(true, decodeCharForError(_elemAttrQuote), " (expected '\"' or ''' in xml declaration for encoding value)");
                }
                _state = STATE_XMLDECL_ENCODING_VALUE;
                {
                    char[] buf = _textBuilder.resetWithEmpty();
                    if (_inputPtr >= _inputEnd || !parseXmlDeclAttr(buf, 0)) {
                        _state = STATE_XMLDECL_ENCODING_VALUE;
                        break;
                    }
                }
                verifyAndSetXmlEncoding();
                _state = STATE_XMLDECL_AFTER_ENCODING_VALUE;
                break;
    
            case STATE_XMLDECL_ENCODING_VALUE: // parsing encoding value
                if (!parseXmlDeclAttr(_textBuilder.getBufferWithoutReset(), _textBuilder.getCurrentLength())) {
                    _state = STATE_XMLDECL_ENCODING_VALUE;
                    break;
                }
                verifyAndSetXmlEncoding();
                _state = STATE_XMLDECL_AFTER_ENCODING_VALUE;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
                
            case STATE_XMLDECL_AFTER_ENCODING_VALUE: // encoding+value gotten; need space or '?'
                {
                    byte b = _nextByte();
                    if (b == BYTE_QMARK) {
                        _state = STATE_XMLDECL_ENDQ;
                        continue main_loop;
                    }
                    if (b == BYTE_SPACE || b == BYTE_CR || b == BYTE_LF || b == BYTE_TAB) {
                        _state = STATE_XMLDECL_BEFORE_STANDALONE;
                    } else {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected space after encoding value in xml declaration)");
                    }
                }
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            
            case STATE_XMLDECL_BEFORE_STANDALONE: // after encoding+value+space; get '?' or 's'
                if (!asyncSkipSpace()) { // not enough input
                    break;
                }
                {
                    byte b = _nextByte();
                    if (b == BYTE_QMARK) {
                        _state = STATE_XMLDECL_ENDQ;
                        continue main_loop;
                    }
                    if ((_tokenName = _parseNewXmlDeclName(b)) == null) { // incomplete
                        _state = STATE_XMLDECL_STANDALONE;
                        break;
                    }
                    if (!_tokenName.hasPrefixedName("standalone")) {
                        reportInputProblem("Unexpected keyword '"+_tokenName.getPrefixedName()+"' in XML declaration: expected 'standalone'");
                    }
                }
                _state = STATE_XMLDECL_AFTER_STANDALONE;
                continue main_loop;
    
            case STATE_XMLDECL_STANDALONE: // parsing "standalone"
                if ((_tokenName = _parseXmlDeclName()) == null) { // incomplete
                    break;
                }
                if (!_tokenName.hasPrefixedName("standalone")) {
                    reportInputProblem("Unexpected keyword 'encoding' in XML declaration: expected 'standalone'");
                }
                _state = STATE_XMLDECL_AFTER_STANDALONE;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_AFTER_STANDALONE: // got "standalone"; must get ' ' or '='
                if (!asyncSkipSpace()) { // not enough input
                    break;
                }
                {
                    byte b = _nextByte();
                    if (b != BYTE_EQ) {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected '=' after 'standalone' in xml declaration)");
                    }
                }
                _state = STATE_XMLDECL_STANDALONE_EQ;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_STANDALONE_EQ: // "standalone="
                if (!asyncSkipSpace()) { // skip space, if any
                    break;
                }
                _elemAttrQuote = _nextByte();
                if (_elemAttrQuote != BYTE_QUOT && _elemAttrQuote != BYTE_APOS) {
                    reportPrologUnexpChar(true, decodeCharForError(_elemAttrQuote), " (expected '\"' or ''' in xml declaration for standalone value)");
                }
                {
                    char[] buf = _textBuilder.resetWithEmpty();
                    if (_inputPtr >= _inputEnd || !parseXmlDeclAttr(buf, 0)) {
                        _state = STATE_XMLDECL_STANDALONE_VALUE;
                        break;
                    }
                }
                verifyAndSetXmlStandalone();
                _state = STATE_XMLDECL_AFTER_STANDALONE_VALUE;
                continue main_loop;
    
            case STATE_XMLDECL_STANDALONE_VALUE: // encoding+value gotten; need space or '?'
    
                if (!parseXmlDeclAttr(_textBuilder.getBufferWithoutReset(), _textBuilder.getCurrentLength())) {
                    _state = STATE_XMLDECL_STANDALONE_VALUE;
                    break;
                }
                verifyAndSetXmlStandalone();
                _state = STATE_XMLDECL_AFTER_STANDALONE_VALUE;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_XMLDECL_AFTER_STANDALONE_VALUE: // encoding+value gotten; need space or '?'
                if (!asyncSkipSpace()) { // skip space, if any
                    break;
                }
                if (_nextByte() != BYTE_QMARK) {
                    reportPrologUnexpChar(true, decodeCharForError(_prevByte()), " (expected '?>' to end xml declaration)");
                }
                _state = STATE_XMLDECL_ENDQ;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
    
            case STATE_XMLDECL_ENDQ:
                // Better clear up decoded name, to avoid later problems (would be taken as PI)
                _tokenName = null;
                _state = STATE_DEFAULT;
                _nextEvent = EVENT_INCOMPLETE;
                if (_nextByte() != BYTE_GT) {
                    reportPrologUnexpChar(true, decodeCharForError(_prevByte()), " (expected '>' to end xml declaration)");
                }
                // 03-Apr-2018, tatu: Finally! Done with XML declaration, we know the encoding for sure.
                _activateEncoding();
                return START_DOCUMENT;
    
            default:
                throwInternal();
            }
        }

        return EVENT_INCOMPLETE;
    }
    
    private int handleDTD() throws XMLStreamException
    {
        // First: left-over CRs?
        if (_pendingInput == PENDING_STATE_CR) {
            if (!handlePartialCR()) {
                return EVENT_INCOMPLETE;
            }
        }
        if (_state == STATE_DTD_INT_SUBSET) {
            if (handleDTDInternalSubset(false)) { // got it!
                _state = STATE_DTD_EXPECT_CLOSING_GT;
            } else {
                return EVENT_INCOMPLETE;
            }
        }
        
        main_loop:
        while (_inputPtr < _inputEnd) {
            switch (_state) {
            case STATE_DEFAULT: // seen 'D'
                _tokenName = parseNewName(BYTE_D);
                if (_tokenName == null) {
                    _state = STATE_DTD_DOCTYPE;
                    return EVENT_INCOMPLETE;
                }
                if (!"DOCTYPE".equals(_tokenName.getPrefixedName())) {
                    reportPrologProblem(true, "expected 'DOCTYPE'");
                }
                _state = STATE_DTD_AFTER_DOCTYPE;
                continue main_loop;
            case STATE_DTD_DOCTYPE:
                _tokenName = parsePName();
                if (_tokenName == null) {
                    _state = STATE_DTD_DOCTYPE;
                    return EVENT_INCOMPLETE;
                }
                if (!"DOCTYPE".equals(_tokenName.getPrefixedName())) {
                    reportPrologProblem(true, "expected 'DOCTYPE'");
                }
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_DTD_AFTER_DOCTYPE:
                {
                    byte b = _nextByte();
                    if (b == BYTE_SPACE || b == BYTE_CR || b == BYTE_LF || b == BYTE_TAB) {
                        _state = STATE_DTD_BEFORE_ROOT_NAME;
                    } else {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected space after 'DOCTYPE')");
                    }
                }
                // fall through (ok to skip bounds checks, async-skip does it)
            case STATE_DTD_BEFORE_ROOT_NAME:
                if (!asyncSkipSpace()) { // not enough input
                    break;
                }
                if ((_tokenName = parseNewName(_nextByte())) == null) { // incomplete
                    _state = STATE_DTD_ROOT_NAME;
                    break;
                }
                _state = STATE_DTD_ROOT_NAME;
                continue main_loop;
            case STATE_DTD_ROOT_NAME:
                if ((_tokenName = parsePName()) == null) { // incomplete
                    break;
                }
                _state = STATE_DTD_AFTER_ROOT_NAME;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_DTD_AFTER_ROOT_NAME:
                {
                    byte b = _nextByte();
                    if (b == BYTE_GT) {
                        _state = STATE_DEFAULT;
                        _nextEvent = EVENT_INCOMPLETE;
                        return DTD;
                    }
                    if (b == BYTE_SPACE || b == BYTE_CR || b == BYTE_LF || b == BYTE_TAB) {
                        _state = STATE_DTD_BEFORE_IDS;
                    } else {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected space after root name in DOCTYPE declaration)");
                    }
                }
                // fall through (ok to skip bounds checks, async-skip does it)
            case STATE_DTD_BEFORE_IDS:
                if (!asyncSkipSpace()) { // not enough input
                    break;
                }
                {
                    byte b = _nextByte();
                    if (b == BYTE_GT) {
                        _state = STATE_DEFAULT;
                        _nextEvent = EVENT_INCOMPLETE;
                        return DTD;
                    }
                    PName name;
                    if ((name = parseNewName(b)) == null) {
                        _state = STATE_DTD_PUBLIC_OR_SYSTEM;
                        break;
                    }
                    String str = name.getPrefixedName();
                    if ("PUBLIC".equals(str)) {
                        _state = STATE_DTD_AFTER_PUBLIC;
                    } else if ("SYSTEM".equals(str)) {
                        _state = STATE_DTD_AFTER_SYSTEM;
                    } else {
                        reportPrologProblem(true, "unexpected token '"+str+"': expected either PUBLIC or SYSTEM");
                    }
                }
                continue main_loop;
    
            case STATE_DTD_PUBLIC_OR_SYSTEM: 
                {
                    PName name;
                    if ((name = parsePName()) == null) {
                        _state = STATE_DTD_PUBLIC_OR_SYSTEM;
                        break;
                    }
                    String str = name.getPrefixedName();
                    if ("PUBLIC".equals(str)) {
                        _state = STATE_DTD_AFTER_PUBLIC;
                    } else if ("SYSTEM".equals(str)) {
                        _state = STATE_DTD_AFTER_SYSTEM;
                    } else {
                        reportPrologProblem(true, "unexpected token '"+str+"': expected either PUBLIC or SYSTEM");
                    }
                }
                continue main_loop;
                    
            case STATE_DTD_AFTER_PUBLIC: 
                {
                    byte b = _nextByte();
                    if (b == BYTE_SPACE || b == BYTE_CR || b == BYTE_LF || b == BYTE_TAB) {
                        _state = STATE_DTD_BEFORE_PUBLIC_ID;
                    } else {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected space after PUBLIC keyword)");
                    }
                }
                continue main_loop;
    
            case STATE_DTD_AFTER_SYSTEM: 
                {
                    byte b = _nextByte();
                    if (b == BYTE_SPACE || b == BYTE_CR || b == BYTE_LF || b == BYTE_TAB) {
                        _state = STATE_DTD_BEFORE_SYSTEM_ID;
                    } else {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected space after SYSTEM keyword)");
                    }
                }
                continue main_loop;
    
            case STATE_DTD_BEFORE_PUBLIC_ID: 
                if (!asyncSkipSpace()) {
                    break;
                }
                _elemAttrQuote = _nextByte();
                if (_elemAttrQuote != BYTE_QUOT && _elemAttrQuote != BYTE_APOS) {
                    reportPrologUnexpChar(true, decodeCharForError(_elemAttrQuote), " (expected '\"' or ''' for PUBLIC ID)");
                }
                {
                    char[] buf = _textBuilder.resetWithEmpty();
                    if (_inputPtr >= _inputEnd || !parseDtdId(buf, 0, false)) {
                        _state = STATE_DTD_PUBLIC_ID;
                        break;
                    }
                }
                verifyAndSetPublicId();
                _state = STATE_DTD_AFTER_PUBLIC_ID;
                continue main_loop;
    
            case STATE_DTD_PUBLIC_ID: 
                if (!parseDtdId(_textBuilder.getBufferWithoutReset(), _textBuilder.getCurrentLength(), false)) {
                    break;
                }
                verifyAndSetPublicId();
                _state = STATE_DTD_AFTER_PUBLIC_ID;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
            case STATE_DTD_AFTER_PUBLIC_ID: 
                {
                    byte b = _nextByte();
                    if (b == BYTE_SPACE || b == BYTE_CR || b == BYTE_LF || b == BYTE_TAB) {
                        _state = STATE_DTD_BEFORE_SYSTEM_ID;
                    } else {
                        reportPrologUnexpChar(true, decodeCharForError(b), " (expected space after PUBLIC ID)");
                    }
                }
                // fall through (ok to skip bounds checks, async-skip does it)
    
            case STATE_DTD_BEFORE_SYSTEM_ID: 
                if (!asyncSkipSpace()) {
                    break;
                }
                _elemAttrQuote = _nextByte();
                if (_elemAttrQuote != BYTE_QUOT && _elemAttrQuote != BYTE_APOS) {
                    reportPrologUnexpChar(true, decodeCharForError(_elemAttrQuote), " (expected '\"' or ''' for SYSTEM ID)");
                }
                {
                    char[] buf = _textBuilder.resetWithEmpty();
                    if (_inputPtr >= _inputEnd || !parseDtdId(buf, 0, true)) {
                        _state = STATE_DTD_SYSTEM_ID;
                        break;
                    }
                }
                verifyAndSetSystemId();
                _state = STATE_DTD_AFTER_SYSTEM_ID;
                continue main_loop;

            case STATE_DTD_SYSTEM_ID: 
                if (!parseDtdId(_textBuilder.getBufferWithoutReset(), _textBuilder.getCurrentLength(), true)) {
                    break;
                }
                verifyAndSetSystemId();
                _state = STATE_DTD_AFTER_SYSTEM_ID;
                if (_inputPtr >= _inputEnd) {
                    break;
                }
                // fall through
    
            case STATE_DTD_AFTER_SYSTEM_ID:
                if (!asyncSkipSpace()) {
                    break;
                }
                {
                    byte b = _nextByte();
                    if (b == BYTE_GT) {
                        _state = STATE_DEFAULT;
                        _nextEvent = EVENT_INCOMPLETE;
                        return DTD;
                    }
                    if (b != BYTE_LBRACKET) {
                        reportPrologUnexpChar(true, decodeCharForError(_elemAttrQuote), " (expected either '[' for internal subset, or '>' to end DOCTYPE)");
                    }
                }
                _state = STATE_DTD_INT_SUBSET;
                if (handleDTDInternalSubset(true)) {
                    _state = STATE_DTD_EXPECT_CLOSING_GT;
                } else {
                    return EVENT_INCOMPLETE;
                }
                // fall through
                
            case STATE_DTD_EXPECT_CLOSING_GT:
                if (!asyncSkipSpace()) {
                    break;
                }
                {
                    byte b = _nextByte();
                    if (b != BYTE_GT) {
                        reportPrologUnexpChar(true, b, "expected '>' to end DTD");
                    }
                }
                _state = STATE_DEFAULT;
                _nextEvent = EVENT_INCOMPLETE;
                return DTD;
            default:
                throwInternal();
            }
        }
        return _currToken;
    }

    private final boolean parseDtdId(char[] outputBuffer, int outputPtr, boolean system) throws XMLStreamException
    {
        final int quote = (int) _elemAttrQuote;
        while (_inputPtr < _inputEnd) {
            int ch = _nextByte() & 0xFF;
            if (ch == quote) {
                _textBuilder.setCurrentLength(outputPtr);
                return true;
            }
            if (!system && !validPublicIdChar(ch)) {
                reportPrologUnexpChar(true, decodeCharForError((byte) ch), " (not valid in " + (system ? "SYSTEM" : "PUBLIC") + " ID)");
            }
            if (outputPtr >= outputBuffer.length) {
                outputBuffer = _textBuilder.finishCurrentSegment();
                outputPtr = 0;
            }
            outputBuffer[outputPtr++] = (char) ch;
        }
        _textBuilder.setCurrentLength(outputPtr);
        return false;
    }

    // // // NOTE: specialized versions of `parsePName`, `parseNewName`, to be
    // // //  used in decoding `xml` and pseudo-attributes of XML declaration
    // // //  Tricky part here is that it predates possible encoding declaration
    // // //  so it is essentially part of bootstrapping
    
    private final PName _parseNewXmlDeclName(byte b) throws XMLStreamException
    {
        int q = b & 0xFF;
        if (q < INT_A) { // lowest acceptable start char, except for ':' that would be allowed in non-ns mode
            throwUnexpectedChar(q, "; expected a name start character");
        }
        _quadCount = 0;
        _currQuad = q;
        _currQuadBytes = 1;
        return _parseXmlDeclName();
    }

    private final PName _parseXmlDeclName() throws XMLStreamException
    {
        int q = _currQuad;

        while (true) {
            int i;

            switch (_currQuadBytes) {
            case 0:
                if (_inputPtr >= _inputEnd) {
                    return null; // all pointers have been set
                }
                q = _nextByte() & 0xFF;
                // Since name char validity is checked later on, only do quickie lookup
                if (q < 65) { // 'A'
                    if (q < 45 || q > 58 || q == 47) {
                        return _findXmlDeclName(q, 0);
                    }
                }
                // fall through
            case 1:
                if (_inputPtr >= _inputEnd) { // need to store pointers
                    _currQuad = q;
                    _currQuadBytes = 1;
                    return null;
                }
                i = _nextByte() & 0xFF;
                if (i < 65) { // 'A'
                    if (i < 45 || i > 58 || i == 47) {
                        return _findXmlDeclName(q, 1);
                    }
                }
                q = (q << 8) | i;
                // fall through
            case 2:
                if (_inputPtr >= _inputEnd) { // need to store pointers
                    _currQuad = q;
                    _currQuadBytes = 2;
                    return null;
                }
                i = _nextByte() & 0xFF;
                if (i < 65) { // 'A'
                    if (i < 45 || i > 58 || i == 47) {
                        return _findXmlDeclName(q, 2);
                    }
                }
                q = (q << 8) | i;
                // fall through
            case 3:
                if (_inputPtr >= _inputEnd) { // need to store pointers
                    _currQuad = q;
                    _currQuadBytes = 3;
                    return null;
                }
                i = _nextByte() & 0xFF;
                if (i < 65) { // 'A'
                    if (i < 45 || i > 58 || i == 47) {
                        return _findXmlDeclName(q, 3);
                    }
                }
                q = (q << 8) | i;
            }

            // If we get this far, need to add full quad into result array and update state
            if (_quadCount == 0) { // first quad
                _quadBuffer[0] = q;
                _quadCount = 1;
            } else {
                if (_quadCount >= _quadBuffer.length) { // let's just double?
                    _quadBuffer = DataUtil.growArrayBy(_quadBuffer, _quadBuffer.length);
                }
                _quadBuffer[_quadCount++] = q;
            }
            _currQuadBytes = 0;
        }
    }

    protected final PName _findXmlDeclName(int lastQuad, int lastByteCount) throws XMLStreamException
    {
        int qlen = _quadCount;
        // Also: if last quad is empty, will need take last from qbuf.
        if (lastByteCount == 0) {
            lastQuad = _quadBuffer[--qlen];
            // NOTE: do not change since we may need to delegate with original value,
            // and byte count not checked here
//            lastByteCount = 4;
        }

        // First things first: we are very likely to find one of short pseudo-attributes, so:
        PName pname;

        switch (qlen) {
        case 0: // 4-bytes or less; only has 'lastQuad' defined
            pname = AsyncXmlDeclHelper.find(lastQuad);
            break;
        case 1:
            pname = AsyncXmlDeclHelper.find(_quadBuffer[0], lastQuad);
            break;
        case 2:
            pname = AsyncXmlDeclHelper.find(_quadBuffer[0], _quadBuffer[1], lastQuad);
            break;
        default:
            pname = null;
        }
        if (pname != null) {
            // Need to push back the byte read but not used:
            --_inputPtr;
            return pname;
        }

        // Otherwise most likely a processing instruction instead of XML declaration. A few
        // ways we could deal with it, but for now let's finalize symbol table etc, delegate
        _activateEncoding();
        return findPName(lastQuad, lastByteCount);
    }

    /**
     * Method called to try to parse an XML pseudo-attribute value. This is relatively
     * simple, since we can't have linefeeds or entities; and although there are exact
     * rules for what is allowed, we can do coarse parsing and only later on verify
     * validity (for encoding could do stricter parsing in future?)
     *
     * NOTE: pseudo-attribute values required to be 7-bit ASCII so can do crude cast.
     * 
     * @return True if we managed to parse the whole pseudo-attribute
     */
    protected boolean parseXmlDeclAttr(char[] outputBuffer, int outputPtr) throws XMLStreamException
    {
        final int quote = (int) _elemAttrQuote;
        while (_inputPtr < _inputEnd) {
            int ch = _nextByte() & 0xFF;
            if (ch == quote) {
                _textBuilder.setCurrentLength(outputPtr);
                return true;
            }
            // this is not exact check; but does work for all legal (valid) characters:
            if (ch <= INT_SPACE || ch > INT_z) {
                reportPrologUnexpChar(true, decodeCharForError((byte) ch), " (not valid in XML pseudo-attribute values)");
            }
            if (outputPtr >= outputBuffer.length) {
                outputBuffer = _textBuilder.finishCurrentSegment();
                outputPtr = 0;
            }
            outputBuffer[outputPtr++] = (char) ch;
        }
        _textBuilder.setCurrentLength(outputPtr);
        return false;
    }
}