src.java.com.ctc.wstx.sr.StreamScanner Maven / Gradle / Ivy

Go to download
/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in file LICENSE, included with
 * the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.sr;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URL;
import java.text.MessageFormat;

import javax.xml.stream.Location;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLResolver;
import javax.xml.stream.XMLStreamException;

import org.codehaus.stax2.XMLReporter2;
import org.codehaus.stax2.XMLStreamLocation2;
import org.codehaus.stax2.validation.XMLValidationProblem;

import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.InputConfigFlags;
import com.ctc.wstx.cfg.ParsingErrorMsgs;
import com.ctc.wstx.cfg.XmlConsts;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.exc.*;
import com.ctc.wstx.io.*;
import com.ctc.wstx.util.ExceptionUtil;
import com.ctc.wstx.util.SymbolTable;
import com.ctc.wstx.util.TextBuffer;

/**
 * Abstract base class that defines some basic functionality that all
 * Woodstox reader classes (main XML reader, DTD reader) extend from.
 */

public abstract class StreamScanner
    extends WstxInputData
    implements InputProblemReporter,
        InputConfigFlags, ParsingErrorMsgs
{

    // // // Some well-known chars:

    /**
     * Last (highest) char code of the three, LF, CR and NULL
     */
    public final static char CHAR_CR_LF_OR_NULL = (char) 13;

    public final static int INT_CR_LF_OR_NULL = 13;

    /**
     * Character that allows quick check of whether a char can potentially
     * be some kind of markup, WRT input stream processing;
     * has to contain linefeeds, &, < and > (">" only matters when
     * quoting text, as part of "]]>")
     */
    protected final static char CHAR_FIRST_PURE_TEXT = (char) ('>' + 1);


    /**
     * First character in Unicode (ie one with lowest id) that is legal
     * as part of a local name (all valid name chars minus ':'). Used
     * for doing quick check for local name end; usually name ends in
     * a whitespace or equals sign.
     */
    protected final static char CHAR_LOWEST_LEGAL_LOCALNAME_CHAR = '-';

    /*
    ////////////////////////////////////////////////////
    // Character validity constants, structs
    ////////////////////////////////////////////////////
     */

    /**
     * We will only use validity array for first 256 characters, mostly
     * because after those characters it's easier to do fairly simple
     * block checks.
     */
    private final static int VALID_CHAR_COUNT = 0x100;

    private final static byte NAME_CHAR_INVALID_B = (byte) 0;
    private final static byte NAME_CHAR_ALL_VALID_B = (byte) 1;
    private final static byte NAME_CHAR_VALID_NONFIRST_B = (byte) -1;

    private final static byte[] sCharValidity = new byte[VALID_CHAR_COUNT];

    static {
        /* First, since all valid-as-first chars are also valid-as-other chars,
         * we'll initialize common chars:
         */
        sCharValidity['_'] = NAME_CHAR_ALL_VALID_B;
        for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {
            sCharValidity['A' + i] = NAME_CHAR_ALL_VALID_B;
            sCharValidity['a' + i] = NAME_CHAR_ALL_VALID_B;
        }
        for (int i = 0xC0; i < 0xF6; ++i) { // not all are fully valid, but
            sCharValidity[i] = NAME_CHAR_ALL_VALID_B;
        }
        // ... now we can 'revert' ones not fully valid:
        sCharValidity[0xD7] = NAME_CHAR_INVALID_B;
        sCharValidity[0xF7] = NAME_CHAR_INVALID_B;

        /* And then we can proceed with ones only valid-as-other.
         */
        sCharValidity['-'] = NAME_CHAR_VALID_NONFIRST_B;
        sCharValidity['.'] = NAME_CHAR_VALID_NONFIRST_B;
        sCharValidity[0xB7] = NAME_CHAR_VALID_NONFIRST_B;
        for (int i = '0'; i <= '9'; ++i) {
            sCharValidity[i] = NAME_CHAR_VALID_NONFIRST_B;
        }
    }

    /**
     * Public identifiers only use 7-bit ascii range.
     */
    private final static int VALID_PUBID_CHAR_COUNT = 0x80;
    private final static byte[] sPubidValidity = new byte[VALID_PUBID_CHAR_COUNT];
//    private final static byte PUBID_CHAR_INVALID_B = (byte) 0;
    private final static byte PUBID_CHAR_VALID_B = (byte) 1;
    static {
        for (int i = 0, last = ('z' - 'a'); i <= last; ++i) {
            sPubidValidity['A' + i] = PUBID_CHAR_VALID_B;
            sPubidValidity['a' + i] = PUBID_CHAR_VALID_B;
        }
        for (int i = '0'; i <= '9'; ++i) {
            sPubidValidity[i] = PUBID_CHAR_VALID_B;
        }

        // 3 main white space types are valid
        sPubidValidity[0x0A] = PUBID_CHAR_VALID_B;
        sPubidValidity[0x0D] = PUBID_CHAR_VALID_B;
        sPubidValidity[0x20] = PUBID_CHAR_VALID_B;

        // And many of punctuation/separator ascii chars too:
        sPubidValidity['-'] = PUBID_CHAR_VALID_B;
        sPubidValidity['\''] = PUBID_CHAR_VALID_B;
        sPubidValidity['('] = PUBID_CHAR_VALID_B;
        sPubidValidity[')'] = PUBID_CHAR_VALID_B;
        sPubidValidity['+'] = PUBID_CHAR_VALID_B;
        sPubidValidity[','] = PUBID_CHAR_VALID_B;
        sPubidValidity['.'] = PUBID_CHAR_VALID_B;
        sPubidValidity['/'] = PUBID_CHAR_VALID_B;
        sPubidValidity[':'] = PUBID_CHAR_VALID_B;
        sPubidValidity['='] = PUBID_CHAR_VALID_B;
        sPubidValidity['?'] = PUBID_CHAR_VALID_B;
        sPubidValidity[';'] = PUBID_CHAR_VALID_B;
        sPubidValidity['!'] = PUBID_CHAR_VALID_B;
        sPubidValidity['*'] = PUBID_CHAR_VALID_B;
        sPubidValidity['#'] = PUBID_CHAR_VALID_B;
        sPubidValidity['@'] = PUBID_CHAR_VALID_B;
        sPubidValidity['$'] = PUBID_CHAR_VALID_B;
        sPubidValidity['_'] = PUBID_CHAR_VALID_B;
        sPubidValidity['%'] = PUBID_CHAR_VALID_B;
    }

    /*
    ////////////////////////////////////////////////////
    // Basic configuration
    ////////////////////////////////////////////////////
     */

    /**
     * Copy of the configuration object passed by the factory.
     * Contains immutable settings for this reader (or in case
     * of DTD parsers, reader that uses it)
     */
    protected final ReaderConfig mConfig;

    // // // Various extracted settings:

    /**
     * If true, Reader is namespace aware, and should do basic checks
     * (usually enforcing limitations on having colons in names)
     */
    protected final boolean mCfgNsEnabled;

    // Extracted standard on/off settings:

    /**
     * note: left non-final on purpose: sub-class may need to modify
     * the default value after construction.
     */
    protected boolean mCfgReplaceEntities;

    /*
    ////////////////////////////////////////////////////
    // Symbol handling, if applicable
    ////////////////////////////////////////////////////
     */

    final SymbolTable mSymbols;

    /**
     * Local full name for the event, if it has one (note: element events
     * do NOT use this variable; those names are stored in element stack):
     * target for processing instructions.
     *
     * Currently used for proc. instr. target, and entity name (at least
     * when current entity reference is null).
     *

     * Note: this variable is generally not cleared, since it comes from
     * a symbol table, ie. this won't be the only reference.
     */
    protected String mCurrName;

    /*
    ////////////////////////////////////////////////////
    // Input handling
    ////////////////////////////////////////////////////
     */

    /**
     * Currently active input source; contains link to parent (nesting) input
     * sources, if any.
     */
    protected WstxInputSource mInput;

    /**
     * Top-most input source this reader can use; due to input source
     * chaining, this is not necessarily the root of all input; for example,
     * external DTD subset reader's root input still has original document
     * input as its parent.
     */
    protected final WstxInputSource mRootInput;

    /**
     * Custom resolver used to handle external entities that are to be expanded
     * by this reader (external param/general entity expander)
     */
    XMLResolver mEntityResolver = null;

    /**
     * This is the current depth of the input stack (same as what input
     * element stack would return as its depth).
     * It is used to enforce input scope constraints for nesting of
     * elements (for xml reader) and dtd declaration (for dtd reader)
     * with regards to input block (entity expansion) boundaries.
     *

     * Basically this value is compared to {@link #mInputTopDepth}, which
     * indicates what was the depth at the point where the currently active
     * input scope/block was started.
     */
    protected int mCurrDepth = 0;

    protected int mInputTopDepth = 0;

    /**
     * Flag that indicates whether linefeeds in the input data are to
     * be normalized or not.
     * Xml specs mandate that the line feeds are only normalized
     * when they are from the external entities (main doc, external
     * general/parsed entities), so normalization has to be
     * suppressed when expanding internal general/parsed entities.
     */
    protected boolean mNormalizeLFs;

    /*
    ////////////////////////////////////////////////////
    // Buffer(s) for local name(s) and text content
    ////////////////////////////////////////////////////
     */

    /**
     * Temporary buffer used if local name can not be just directly
     * constructed from input buffer (name is on a boundary or such).
     */
    protected char[] mNameBuffer = null;

    /*
    ////////////////////////////////////////////////////
    // Information about starting location of event
    // Reader is pointing to; updated on-demand
    ////////////////////////////////////////////////////
     */

    // // // Location info at point when current token was started

    /**
     * Total number of characters read before start of current token.
     * For big (gigabyte-sized) sizes are possible, needs to be long,
     * unlike pointers and sizes related to in-memory buffers.
     */
    protected long mTokenInputTotal = 0; 

    /**
     * Input row on which current token starts, 1-based
     */
    protected int mTokenInputRow = 1;

    /**
     * Column on input row that current token starts; 0-based (although
     * in the end it'll be converted to 1-based)
     */
    protected int mTokenInputCol = 0;

    /*
    ////////////////////////////////////////////////////
    // XML document information (from doc decl if one
    // was found) common to all entities (main xml
    // document, external DTD subset)
    ////////////////////////////////////////////////////
     */

    /**
     * Input stream encoding, if known (passed in, or determined by
     * auto-detection); null if not.
     */
    String mDocInputEncoding = null;

    /**
     * Character encoding from xml declaration, if any; null if no
     * declaration, or it didn't specify encoding.
     */
    String mDocXmlEncoding = null;

    /**
     * XML version as declared by the document; one of constants
     * from {@link XmlConsts} (like {@link XmlConsts#XML_V_10}).
     */
    protected int mDocXmlVersion = XmlConsts.XML_V_UNKNOWN;

    /*
    ////////////////////////////////////////////////////
    // Life-cycle
    ////////////////////////////////////////////////////
     */

    /**
     * Constructor used when creating a complete new (main-level) reader that
     * does not share its input buffers or state with another reader.
     */
    protected StreamScanner(WstxInputSource input, ReaderConfig cfg,
                            XMLResolver res)
    {
        super();
        mInput = input;
        // 17-Jun-2004, TSa: Need to know root-level input source
        mRootInput = input;

        mConfig = cfg;
        mSymbols = cfg.getSymbols();
        int cf = cfg.getConfigFlags();
        mCfgNsEnabled = (cf & CFG_NAMESPACE_AWARE) != 0;
        mCfgReplaceEntities = (cf & CFG_REPLACE_ENTITY_REFS) != 0;

        mNormalizeLFs = true;
        mInputBuffer = null;
        mInputPtr = mInputEnd = 0;
        mEntityResolver = res;
    }

    /*
    ////////////////////////////////////////////////////
    // Package API
    ////////////////////////////////////////////////////
     */

    /**
     * Method that returns location of the last character returned by this
     * reader; that is, location "one less" than the currently pointed to
     * location.
     */
    protected WstxInputLocation getLastCharLocation()
    {
        return mInput.getLocation(mCurrInputProcessed + mInputPtr - 1,
                                  mCurrInputRow,
                                  mInputPtr - mCurrInputRowStart);
    }

    protected URL getSource() {
        return mInput.getSource();
    }

    protected String getSystemId() {
        return mInput.getSystemId();
    }

    /*
    ///////////////////////////////////////////////////////
    // Partial LocationInfo implementation (not implemented
    // by this base class, but is by some sub-classes)
    ///////////////////////////////////////////////////////
     */

    /**
     * Returns location of last properly parsed token; as per StAX specs,
     * apparently needs to be the end of current event, which is the same
     * as the start of the following event (or EOF if that's next).
     */
    public abstract Location getLocation();

    public XMLStreamLocation2 getStartLocation()
    {
        // note: +1 is used as columns are 1-based...
        return mInput.getLocation(mTokenInputTotal, mTokenInputRow,
                                  mTokenInputCol + 1);
    }

    public XMLStreamLocation2 getCurrentLocation()
    {
        return mInput.getLocation(mCurrInputProcessed + mInputPtr,
                                  mCurrInputRow,
                                  mInputPtr - mCurrInputRowStart + 1);
    }

    /*
    ////////////////////////////////////////////////////
    // InputProblemReporter implementation
    ////////////////////////////////////////////////////
     */

    public WstxException throwWfcException(String msg, boolean deferErrors)
        throws WstxException
    {
        WstxException ex = constructWfcException(msg);
        if (!deferErrors) {
            throw ex;
        }
        return ex;
    }

    public void throwParseError(String msg) throws XMLStreamException
    {
        throwParseError(msg, null, null);
    }

    /**
     * Throws generic parse error with specified message and current parsing
     * location.
     *

     * Note: public access only because core code in other packages needs
     * to access it.
     */
    public void throwParseError(String format, Object arg, Object arg2)
        throws XMLStreamException
    {
        String msg = (arg != null || arg2 != null) ?
            MessageFormat.format(format, new Object[] { arg, arg2 }) : format;
        throw constructWfcException(msg);
    }

    public void reportProblem(String probType, String format, Object arg, Object arg2)
        throws XMLStreamException
    {
        XMLReporter rep = mConfig.getXMLReporter();
        if (rep != null) {
            _reportProblem(rep, probType,
                            MessageFormat.format(format, new Object[] { arg, arg2 }), null);
        }
    }

    public void reportProblem(Location loc, String probType,
                              String format, Object arg, Object arg2)
        throws XMLStreamException
    {
        XMLReporter rep = mConfig.getXMLReporter();
        if (rep != null) {
            String msg = (arg != null || arg2 != null) ?
                MessageFormat.format(format, new Object[] { arg, arg2 }) : format;
            _reportProblem(rep, probType, msg, loc);
        }
    }

    protected void _reportProblem(XMLReporter rep, String probType, String msg, Location loc)
        throws XMLStreamException
    {
        if (loc == null) {
            loc = getLastCharLocation();
        }
        _reportProblem(rep, new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_ERROR, probType));
    }

    protected void _reportProblem(XMLReporter rep, XMLValidationProblem prob)
        throws XMLStreamException
    {
        if (rep != null) {
            Location loc = prob.getLocation();
            if (loc == null) {
                loc = getLastCharLocation();
                prob.setLocation(loc);
            }
            // Backwards-compatibility fix: add non-null type, if missing:
            if (prob.getType() == null) {
                prob.setType(ErrorConsts.WT_VALIDATION);
            }
            // [WSTX-154]: was catching and dropping thrown exception: shouldn't.
            // [WTSX-157]: need to support XMLReporter2
            if (rep instanceof XMLReporter2) {
                ((XMLReporter2) rep).report(prob);
            } else {
                rep.report(prob.getMessage(), prob.getType(), prob, loc);
            }
        }
    }

    /**
     *

     * Note: this is the base implementation used for implementing
     * ValidationContext
     */
    public void reportValidationProblem(XMLValidationProblem prob)
        throws XMLStreamException
    {
        // !!! TBI: Fail-fast vs. deferred modes?
        /* For now let's implement basic functionality: warnings get
         * reported via XMLReporter, errors and fatal errors result in
         * immediate exceptions.
         */
        /* 27-May-2008, TSa: [WSTX-153] Above is incorrect: as per Stax
         *   javadocs for XMLReporter, both warnings and non-fatal errors
         *   (which includes all validation errors) should be reported via
         *   XMLReporter interface, and only fatals should cause an
         *   immediate stream exception (by-passing reporter)
         */
        if (prob.getSeverity() > XMLValidationProblem.SEVERITY_ERROR) {
            throw WstxValidationException.create(prob);
        }
        XMLReporter rep = mConfig.getXMLReporter();
        if (rep != null) {
            _reportProblem(rep, prob);
        } else {
            /* If no reporter, regular non-fatal errors are to be reported
             * as exceptions as well, for backwards compatibility
             */
            if (prob.getSeverity() >= XMLValidationProblem.SEVERITY_ERROR) {
                throw WstxValidationException.create(prob);
            }
        }
    }

    public void reportValidationProblem(String msg, int severity)
        throws XMLStreamException
    {
        reportValidationProblem(new XMLValidationProblem(getLastCharLocation(),
                                                         msg, severity));
    }

    public void reportValidationProblem(String msg)
        throws XMLStreamException
    {
        reportValidationProblem(new XMLValidationProblem(getLastCharLocation(),
                                                         msg,
                                                         XMLValidationProblem.SEVERITY_ERROR));
    }

    public void reportValidationProblem(Location loc, String msg)
        throws XMLStreamException
    {
        reportValidationProblem(new XMLValidationProblem(loc, msg));
    }

    public void reportValidationProblem(String format, Object arg, Object arg2)
        throws XMLStreamException
    {
        reportValidationProblem(MessageFormat.format(format, new Object[] { arg, arg2 }));
    }

    /*
    ////////////////////////////////////////////////////
    // Other error reporting methods
    ////////////////////////////////////////////////////
     */

    protected WstxException constructWfcException(String msg)
    {
        return new WstxParsingException(msg, getLastCharLocation());
    }

    /**
     * Construct and return a {@link XMLStreamException} to throw
     * as a result of a failed Typed Access operation (but one not
     * caused by a Well-Formedness Constraint or Validation Constraint
     * problem)
     */
    /*
    protected WstxException _constructTypeException(String msg)
    {
        // Hmmh. Should there be a distinct sub-type?
        return new WstxParsingException(msg, getLastCharLocation());
    }
    */

    protected WstxException constructFromIOE(IOException ioe)
    {
        return new WstxIOException(ioe);
    }

    protected WstxException constructNullCharException()
    {
        return new WstxUnexpectedCharException("Illegal character (NULL, unicode 0) encountered: not valid in any content", getLastCharLocation(), CHAR_NULL);
    }

    protected void throwUnexpectedChar(int i, String msg)
        throws WstxException
    {
        char c = (char) i;
        String excMsg = "Unexpected character "+getCharDesc(c)+msg;
        throw new WstxUnexpectedCharException(excMsg, getLastCharLocation(), c);
    }

    protected void throwNullChar()
        throws WstxException
    {
        throw constructNullCharException();
    }

    protected void throwInvalidSpace(int i)
        throws WstxException
    {
        throwInvalidSpace(i, false);
    }

    protected WstxException throwInvalidSpace(int i, boolean deferErrors)
        throws WstxException
    {
        char c = (char) i;
        WstxException ex;
        if (c == CHAR_NULL) {
            ex = constructNullCharException();
        } else {
            String msg = "Illegal character ("+getCharDesc(c)+")";
            if (mXml11) {
                msg += " [note: in XML 1.1, it could be included via entity expansion]";
            }
            ex = new WstxUnexpectedCharException(msg, getLastCharLocation(), c);
        }
        if (!deferErrors) {
            throw ex;
        }
        return ex;
    }

    protected void throwUnexpectedEOF(String msg)
        throws WstxException
    {
        throw new WstxEOFException("Unexpected EOF"
                                   +(msg == null ? "" : msg),
                                   getLastCharLocation());
    }

    /**
     * Similar to {@link #throwUnexpectedEOF}, but only indicates ending
     * of an input block. Used when reading a token that can not span
     * input block boundaries (ie. can not continue past end of an
     * entity expansion).
     */
    protected void throwUnexpectedEOB(String msg)
        throws WstxException
    {
        throw new WstxEOFException("Unexpected end of input block"
                                   +(msg == null ? "" : msg),
                                   getLastCharLocation());
    }

    protected void throwFromIOE(IOException ioe)
        throws WstxException
    {
        throw new WstxIOException(ioe);
    }

    protected void throwFromStrE(XMLStreamException strex)
        throws WstxException
    {
        if (strex instanceof WstxException) {
            throw (WstxException) strex;
        }
        WstxException newEx = new WstxException(strex);
        ExceptionUtil.setInitCause(newEx, strex);
        throw newEx;
    }

    /**
     * Method called to report an error, when caller's signature only
     * allows runtime exceptions to be thrown.
     */
    protected void throwLazyError(Exception e)
    {
        if (e instanceof XMLStreamException) {
            WstxLazyException.throwLazily((XMLStreamException) e);
        }
        ExceptionUtil.throwRuntimeException(e);
    }

    protected String tokenTypeDesc(int type)
    {
        return ErrorConsts.tokenTypeDesc(type);
    }

    /*
    ////////////////////////////////////////////////////
    // Input buffer handling
    ////////////////////////////////////////////////////
     */

    /**
     * Returns current input source this source uses.
     *

     * Note: public only because some implementations are on different
     * package.
     */
    public final WstxInputSource getCurrentInput() {
        return mInput;
    }

    protected final int inputInBuffer() {
        return mInputEnd - mInputPtr;
    }

    protected final int getNext()
        throws XMLStreamException
    {
        if (mInputPtr >= mInputEnd) {
            if (!loadMore()) {
                return -1;
            }
        }
        return (int) mInputBuffer[mInputPtr++];
    }

    /**
     * Similar to {@link #getNext}, but does not advance pointer
     * in input buffer.
     *

     * Note: this method only peeks within current input source;
     * it does not close it and check nested input source (if any).
     * This is necessary when checking keywords, since they can never
     * cross input block boundary.
     */
    protected final int peekNext()
        throws XMLStreamException
    {
        if (mInputPtr >= mInputEnd) {
            if (!loadMoreFromCurrent()) {
                return -1;
            }
        }
        return (int) mInputBuffer[mInputPtr];
    }

    protected final char getNextChar(String errorMsg)
        throws XMLStreamException
    {
        if (mInputPtr >= mInputEnd) {
            loadMore(errorMsg);
        }
        return mInputBuffer[mInputPtr++];
    }

    /**
     * Similar to {@link #getNextChar}, but will not read more characters
     * from parent input source(s) if the current input source doesn't
     * have more content. This is often needed to prevent "runaway" content,
     * such as comments that start in an entity but do not have matching
     * close marker inside entity; XML specification specifically states
     * such markup is not legal.
     */
    protected final char getNextCharFromCurrent(String errorMsg)
        throws XMLStreamException
    {
        if (mInputPtr >= mInputEnd) {
            loadMoreFromCurrent(errorMsg);
        }
        return mInputBuffer[mInputPtr++];
    }

    /**
     * Method that will skip through zero or more white space characters,
     * and return either the character following white space, or -1 to
     * indicate EOF (end of the outermost input source)/
     */
    protected final int getNextAfterWS()
        throws XMLStreamException
    {
        if (mInputPtr >= mInputEnd) {
            if (!loadMore()) {
                return -1;
            }
        }
        char c = mInputBuffer[mInputPtr++];
        while (c <= CHAR_SPACE) {
            // Linefeed?
            if (c == '\n' || c == '\r') {
                skipCRLF(c);
            } else if (c != CHAR_SPACE && c != '\t') {
                throwInvalidSpace(c);
            }
            // Still a white space?
            if (mInputPtr >= mInputEnd) {
                if (!loadMore()) {
                    return -1;
                }
            }
            c = mInputBuffer[mInputPtr++];
        }
        return (int) c;
    }

    protected final char getNextCharAfterWS(String errorMsg)
        throws XMLStreamException
    {
        if (mInputPtr >= mInputEnd) {
            loadMore(errorMsg);
        }

        char c = mInputBuffer[mInputPtr++];
        while (c <= CHAR_SPACE) {
            // Linefeed?
            if (c == '\n' || c == '\r') {
                skipCRLF(c);
            } else if (c != CHAR_SPACE && c != '\t') {
                throwInvalidSpace(c);
            }

            // Still a white space?
            if (mInputPtr >= mInputEnd) {
                loadMore(errorMsg);
            }
            c = mInputBuffer[mInputPtr++];
        }
        return c;
    }

    protected final char getNextInCurrAfterWS(String errorMsg)
        throws XMLStreamException
    {
        return getNextInCurrAfterWS(errorMsg, getNextCharFromCurrent(errorMsg));
    }

    protected final char getNextInCurrAfterWS(String errorMsg, char c)
        throws XMLStreamException
    {
        while (c <= CHAR_SPACE) {
            // Linefeed?
            if (c == '\n' || c == '\r') {
                skipCRLF(c);
            } else if (c != CHAR_SPACE && c != '\t') {
                throwInvalidSpace(c);
            }

            // Still a white space?
            if (mInputPtr >= mInputEnd) {
                loadMoreFromCurrent(errorMsg);
            }
            c = mInputBuffer[mInputPtr++];
        }
        return c;
    }

    /**
     * Method called when a CR has been spotted in input; checks if next
     * char is LF, and if so, skips it. Note that next character has to
     * come from the current input source, to qualify; it can never come
     * from another (nested) input source.
     *
     * @return True, if passed in char is '\r' and next one is '\n'.
     */
    protected final boolean skipCRLF(char c) 
        throws XMLStreamException
    {
        boolean result;

        if (c == '\r' && peekNext() == '\n') {
            ++mInputPtr;
            result = true;
        } else {
            result = false;
        }
        ++mCurrInputRow;
        mCurrInputRowStart = mInputPtr;
        return result;
    }

    protected final void markLF() {
        ++mCurrInputRow;
        mCurrInputRowStart = mInputPtr;
    }

    protected final void markLF(int inputPtr) {
        ++mCurrInputRow;
        mCurrInputRowStart = inputPtr;
    }

    /**
     * Method to push back last character read; can only be called once,
     * that is, no more than one char can be guaranteed to be succesfully
     * returned.
     */
    protected final void pushback() { --mInputPtr; }

    /*
    ////////////////////////////////////////////////////
    // Sub-class overridable input handling methods
    ////////////////////////////////////////////////////
     */

    /**
     * Method called when an entity has been expanded (new input source
     * has been created). Needs to initialize location information and change
     * active input source.
     *
     * @param entityId Name of the entity being expanded
     */
    protected void initInputSource(WstxInputSource newInput, boolean isExt,
                                   String entityId)
        throws XMLStreamException
    {
        mInput = newInput;
        // Let's make sure new input will be read next time input is needed:
        mInputPtr = 0;
        mInputEnd = 0;
        /* Plus, reset the input location so that'll be accurate for
         * error reporting etc.
         */
        mInputTopDepth = mCurrDepth;
        mInput.initInputLocation(this, mCurrDepth);

        /* 21-Feb-2006, TSa: Linefeeds are NOT normalized when expanding
         *   internal entities (XML, 2.11)
         */
        if (isExt) {
            mNormalizeLFs = true;
        } else {
            mNormalizeLFs = false;
        }
    }

    /**
     * Method that will try to read one or more characters from currently
     * open input sources; closing input sources if necessary.
     *
     * @return true if reading succeeded (or may succeed), false if
     *   we reached EOF.
     */
    protected boolean loadMore()
        throws XMLStreamException
    {
        WstxInputSource input = mInput;
        do {
            /* Need to make sure offsets are properly updated for error
             * reporting purposes, and do this now while previous amounts
             * are still known.
             */
            mCurrInputProcessed += mInputEnd;
            mCurrInputRowStart -= mInputEnd;
            int count;
            try {
                count = input.readInto(this);
                if (count > 0) {
                    return true;
                }
                input.close();
            } catch (IOException ioe) {
                throw constructFromIOE(ioe);
            }
            if (input == mRootInput) {
                /* Note: no need to check entity/input nesting in this
                 * particular case, since it will be handled by higher level
                 * parsing code (results in an unexpected EOF)
                 */
                return false;
            }
            WstxInputSource parent = input.getParent();
            if (parent == null) { // sanity check!
                throwNullParent(input);
            }
            /* 13-Feb-2006, TSa: Ok, do we violate a proper nesting constraints
             *   with this input block closure?
             */
            if (mCurrDepth != input.getScopeId()) {
                handleIncompleteEntityProblem(input);
            }

            mInput = input = parent;
            input.restoreContext(this);
            mInputTopDepth = input.getScopeId();
            /* 21-Feb-2006, TSa: Since linefeed normalization needs to be
             *   suppressed for internal entity expansion, we may need to
             *   change the state...
             */
            if (!mNormalizeLFs) {
                mNormalizeLFs = !input.fromInternalEntity();
            }
            // Maybe there are leftovers from that input in buffer now?
        } while (mInputPtr >= mInputEnd);

        return true;
    }

    protected final boolean loadMore(String errorMsg)
        throws XMLStreamException
    {
        if (!loadMore()) {
            throwUnexpectedEOF(errorMsg);
        }
        return true;
    }

    protected boolean loadMoreFromCurrent()
        throws XMLStreamException
    {
        // Need to update offsets properly
        mCurrInputProcessed += mInputEnd;
        mCurrInputRowStart -= mInputEnd;
        try {
            int count = mInput.readInto(this);
            return (count > 0);
        } catch (IOException ie) {
            throw constructFromIOE(ie);
        }
    }

    protected final boolean loadMoreFromCurrent(String errorMsg)
        throws XMLStreamException
    {
        if (!loadMoreFromCurrent()) {
            throwUnexpectedEOB(errorMsg);
        }
        return true;
    }

    /**
     * Method called to make sure current main-level input buffer has at
     * least specified number of characters available consequtively,
     * without having to call {@link #loadMore}. It can only be called
     * when input comes from main-level buffer; further, call can shift
     * content in input buffer, so caller has to flush any data still
     * pending. In short, caller has to know exactly what it's doing. :-)
     *

     * Note: method does not check for any other input sources than the
     * current one -- if current source can not fulfill the request, a
     * failure is indicated.
     *
     * @return true if there's now enough data; false if not (EOF)
     */
    protected boolean ensureInput(int minAmount)
        throws XMLStreamException
    {
        int currAmount = mInputEnd - mInputPtr;
        if (currAmount >= minAmount) {
            return true;
        }
        try {
            return mInput.readMore(this, minAmount);
        } catch (IOException ie) {
            throw constructFromIOE(ie);
        }
    }

    protected void closeAllInput(boolean force)
        throws XMLStreamException
    {
        WstxInputSource input = mInput;
        while (true) {
            try {
                if (force) {
                    input.closeCompletely();
                } else {
                    input.close();
                }
            } catch (IOException ie) {
                throw constructFromIOE(ie);
            }
            if (input == mRootInput) {
                break;
            }
            WstxInputSource parent = input.getParent();
            if (parent == null) { // sanity check!
                throwNullParent(input);
            }
            mInput = input = parent;
        }
    }

    protected void throwNullParent(WstxInputSource curr)
    {
        throw new IllegalStateException(ErrorConsts.ERR_INTERNAL);
        //throw new IllegalStateException("Internal error: null parent for input source '"+curr+"'; should never occur (should have stopped at root input '"+mRootInput+"').");
    }

    /*
    ////////////////////////////////////////////////////
    // Entity resolution
    ////////////////////////////////////////////////////
     */

    /**
     * Method that tries to resolve a character entity, or (if caller so
     * specifies), a pre-defined internal entity (lt, gt, amp, apos, quot).
     * It will succeed iff:
     * 

     *  Entity in question is a simple character entity (either one of
     *    5 pre-defined ones, or using decimal/hex notation), AND
     *   

     *  
Entity fits completely inside current input buffer.
     *   

     * 
     * If so, character value of entity is returned. Character 0 is returned
     * otherwise; if so, caller needs to do full resolution.
     *
     * Note: On entry we are guaranteed there are at least 3 more characters
     * in this buffer; otherwise we shouldn't be called.
     *
     * @param checkStd If true, will check pre-defined internal entities
     *   (gt, lt, amp, apos, quot); if false, will only check actual
     *   character entities.
     *
     * @return (Valid) character value, if entity is a character reference,
     *   and could be resolved from current input buffer (does not span
     *   buffer boundary); null char (code 0) if not (either non-char
     *   entity, or spans input buffer boundary).
     */
    protected char resolveSimpleEntity(boolean checkStd)
        throws XMLStreamException
    {
        char[] buf = mInputBuffer;
        int ptr = mInputPtr;
        char c = buf[ptr++];

        // Numeric reference?
        if (c == '#') {
            c = buf[ptr++];
            int value = 0;
            int inputLen = mInputEnd;
            if (c == 'x') { // hex
                while (ptr < inputLen) {
                    c = buf[ptr++];
                    if (c == ';') {
                        break;
                    }
                    value = value << 4;
                    if (c <= '9' && c >= '0') {
                        value += (c - '0');
                    } else if (c >= 'a' && c <= 'f') {
                        value += (10 + (c - 'a'));
                    } else if (c >= 'A' && c <= 'F') {
                        value += (10 + (c - 'A'));
                    } else {
                        mInputPtr = ptr; // so error points to correct char
                        throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F).");
                    }
                    /* Need to check for overflow; easiest to do right as
                     * it happens...
                     */
                    if (value > MAX_UNICODE_CHAR) {
                        reportUnicodeOverflow();
                    }
                }
            } else { // numeric (decimal)
                while (c != ';') {
                    if (c <= '9' && c >= '0') {
                        value = (value * 10) + (c - '0');
                        // Overflow?
                        if (value > MAX_UNICODE_CHAR) {
                            reportUnicodeOverflow();
                        }
                    } else {
                        mInputPtr = ptr; // so error points to correct char
                        throwUnexpectedChar(c, "; expected a decimal number.");
                    }
                    if (ptr >= inputLen) {
                        break;
                    }
                    c = buf[ptr++];
                }
            }
            /* We get here either if we got it all, OR if we ran out of
             * input in current buffer.
             */
            if (c == ';') { // got the full thing
                mInputPtr = ptr;
                return checkAndExpandChar(value);
            }

            /* If we ran out of input, need to just fall back, gets
             * resolved via 'full' resolution mechanism.
             */
        } else if (checkStd) {
            /* Caller may not want to resolve these quite yet...
             * (when it wants separate events for non-char entities)
             */
            if (c == 'a') { // amp or apos?
                c = buf[ptr++];
                
                if (c == 'm') { // amp?
                    if (buf[ptr++] == 'p') {
                        if (ptr < mInputEnd && buf[ptr++] == ';') {
                            mInputPtr = ptr;
                            return '&';
                        }
                    }
                } else if (c == 'p') { // apos?
                    if (buf[ptr++] == 'o') {
                        int len = mInputEnd;
                        if (ptr < len && buf[ptr++] == 's') {
                            if (ptr < len && buf[ptr++] == ';') {
                                mInputPtr = ptr;
                                return '\'';
                            }
                        }
                    }
                }
            } else if (c == 'g') { // gt?
                if (buf[ptr++] == 't' && buf[ptr++] == ';') {
                    mInputPtr = ptr;
                    return '>';
                }
            } else if (c == 'l') { // lt?
                if (buf[ptr++] == 't' && buf[ptr++] == ';') {
                    mInputPtr = ptr;
                    return '<';
                }
            } else if (c == 'q') { // quot?
                if (buf[ptr++] == 'u' && buf[ptr++] == 'o') {
                    int len = mInputEnd;
                    if (ptr < len && buf[ptr++] == 't') {
                        if (ptr < len && buf[ptr++] == ';') {
                            mInputPtr = ptr;
                            return '"';
                        }
                    }
                }
            }
        }
        return CHAR_NULL;
    }

    /**
     * Method called to resolve character entities, and only character
     * entities (except that pre-defined char entities -- amp, apos, lt,
     * gt, quote -- MAY be "char entities" in this sense, depending on
     * arguments).
     * Otherwise it is to return the null char; if so,
     * the input pointer will point to the same point as when method
     * entered (char after ampersand), plus the ampersand itself is
     * guaranteed to be in the input buffer (so caller can just push it
     * back if necessary).
     *

     * Most often this method is called when reader is not to expand
     * non-char entities automatically, but to return them as separate
     * events.
     *

     * Main complication here is that we need to do 5-char lookahead. This
     * is problematic if chars are on input buffer boundary. This is ok
     * for the root level input buffer, but not for some nested buffers.
     * However, according to XML specs, such split entities are actually
     * illegal... so we can throw an exception in those cases.
     *
     * @param checkStd If true, will check pre-defined internal entities
     *   (gt, lt, amp, apos, quot) as character entities; if false, will only
     *   check actual 'real' character entities.
     *
     * @return (Valid) character value, if entity is a character reference,
     *   and could be resolved from current input buffer (does not span
     *   buffer boundary); null char (code 0) if not (either non-char
     *   entity, or spans input buffer boundary).
     */
    protected char resolveCharOnlyEntity(boolean checkStd)
        throws XMLStreamException
    {
        //int avail = inputInBuffer();
        int avail = mInputEnd - mInputPtr;
        if (avail < 6) {
            // split entity, or buffer boundary
            /* Don't want to lose leading '&' (in case we can not expand
             * the entity), so let's push it back first
             */
            --mInputPtr;
            /* Shortest valid reference would be 3 chars ('&a;'); which
             * would only be legal from an expanded entity...
             */
            if (!ensureInput(6)) {
                avail = inputInBuffer();
                if (avail < 3) {
                    throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF);
                }
            } else {
                avail = 6;
            }
            // ... and now we can move pointer back as well:
            ++mInputPtr;
        }

        /* Ok, now we have one more character to check, and that's enough
         * to determine type decisively.
         */
        char c = mInputBuffer[mInputPtr];

        // A char reference?
        if (c == '#') { // yup
            ++mInputPtr;
            return resolveCharEnt();
        }

        // nope... except may be a pre-def?
        if (checkStd) {
            if (c == 'a') {
                char d = mInputBuffer[mInputPtr+1];
                if (d == 'm') {
                    if (avail >= 4
                        && mInputBuffer[mInputPtr+2] == 'p'
                        && mInputBuffer[mInputPtr+3] == ';') {
                        mInputPtr += 4;
                        return '&';
                    }
                } else if (d == 'p') {
                    if (avail >= 5
                        && mInputBuffer[mInputPtr+2] == 'o'
                        && mInputBuffer[mInputPtr+3] == 's'
                        && mInputBuffer[mInputPtr+4] == ';') {
                        mInputPtr += 5;
                        return '\'';
                    }
                }
            } else if (c == 'l') {
                if (avail >= 3
                    && mInputBuffer[mInputPtr+1] == 't'
                    && mInputBuffer[mInputPtr+2] == ';') {
                    mInputPtr += 3;
                    return '<';
                }
            } else if (c == 'g') {
                if (avail >= 3
                    && mInputBuffer[mInputPtr+1] == 't'
                    && mInputBuffer[mInputPtr+2] == ';') {
                    mInputPtr += 3;
                    return '>';
                }
            } else if (c == 'q') {
                if (avail >= 5
                    && mInputBuffer[mInputPtr+1] == 'u'
                    && mInputBuffer[mInputPtr+2] == 'o'
                    && mInputBuffer[mInputPtr+3] == 't'
                    && mInputBuffer[mInputPtr+4] == ';') {
                    mInputPtr += 5;
                    return '"';
                }
            }
        }
        return CHAR_NULL;
    }

    /**
     * Reverse of {@link #resolveCharOnlyEntity}; will only resolve entity
     * if it is NOT a character entity (or pre-defined 'generic' entity;
     * amp, apos, lt, gt or quot). Only used in cases where entities
     * are to be separately returned unexpanded (in non-entity-replacing
     * mode); which means it's never called from dtd handler.
     */
    protected EntityDecl resolveNonCharEntity()
        throws XMLStreamException
    {
        //int avail = inputInBuffer();
        int avail = mInputEnd - mInputPtr;
        if (avail < 6) {
            // split entity, or buffer boundary
            /* Don't want to lose leading '&' (in case we can not expand
             * the entity), so let's push it back first
             */
            --mInputPtr;

            /* Shortest valid reference would be 3 chars ('&a;'); which
             * would only be legal from an expanded entity...
             */
            if (!ensureInput(6)) {
                avail = inputInBuffer();
                if (avail < 3) {
                    throwUnexpectedEOF(SUFFIX_IN_ENTITY_REF);
                }
            } else {
                avail = 6;
            }
            // ... and now we can move pointer back as well:
            ++mInputPtr;
        }

        // We don't care about char entities:
        char c = mInputBuffer[mInputPtr];
        if (c == '#') {
            return null;
        }

        /* 19-Aug-2004, TSa: Need special handling for pre-defined
         *   entities; they are not counted as 'real' general parsed
         *   entities, but more as character entities...
         */

        // have chars at least up to mInputPtr+4 by now
        if (c == 'a') {
            char d = mInputBuffer[mInputPtr+1];
            if (d == 'm') {
                if (avail >= 4
                    && mInputBuffer[mInputPtr+2] == 'p'
                    && mInputBuffer[mInputPtr+3] == ';') {
                    // If not automatically expanding:
                    //return sEntityAmp;
                    // mInputPtr += 4;
                    return null;
                }
            } else if (d == 'p') {
                if (avail >= 5
                    && mInputBuffer[mInputPtr+2] == 'o'
                    && mInputBuffer[mInputPtr+3] == 's'
                    && mInputBuffer[mInputPtr+4] == ';') {
                    return null;
                }
            }
        } else if (c == 'l') {
            if (avail >= 3
                && mInputBuffer[mInputPtr+1] == 't'
                && mInputBuffer[mInputPtr+2] == ';') {
                return null;
            }
        } else if (c == 'g') {
            if (avail >= 3
                && mInputBuffer[mInputPtr+1] == 't'
                && mInputBuffer[mInputPtr+2] == ';') {
                return null;
            }
        } else if (c == 'q') {
            if (avail >= 5
                && mInputBuffer[mInputPtr+1] == 'u'
                && mInputBuffer[mInputPtr+2] == 'o'
                && mInputBuffer[mInputPtr+3] == 't'
                && mInputBuffer[mInputPtr+4] == ';') {
                return null;
            }
        }

        // Otherwise, let's just parse in generic way:
        ++mInputPtr; // since we already read the first letter
        String id = parseEntityName(c);
        mCurrName = id;

        return findEntity(id, null);
    }

    /**
     * Method that does full resolution of an entity reference, be it
     * character entity, internal entity or external entity, including
     * updating of input buffers, and depending on whether result is
     * a character entity (or one of 5 pre-defined entities), returns
     * char in question, or null character (code 0) to indicate it had
     * to change input source.
     *
     * @param allowExt If true, is allowed to expand external entities
     *   (expanding text); if false, is not (expanding attribute value).
     *
     * @return Either single-character replacement (which is NOT to be
     *    reparsed), or null char (0) to indicate expansion is done via
     *    input source.
     */
    protected char fullyResolveEntity(boolean allowExt)
        throws XMLStreamException
    {
        char c = getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);

        // Do we have a (numeric) character entity reference?
        if (c == '#') { // numeric
            return resolveCharEnt();
        }

        String id = parseEntityName(c);
            
        // Perhaps we have a pre-defined char reference?
        c = id.charAt(0);
        /* 16-May-2004, TSa: Should custom entities (or ones defined in
         *   int/ext subset) override pre-defined settings for these?
         */
        if (c == 'a') { // amp or apos?
            if (id.equals("amp")) {
                return '&';
            }
            if (id.equals("apos")) {
                return '\'';
            }
        } else if (c == 'g') { // gt?
            if (id.length() == 2 && id.charAt(1) == 't') {
                return '>';
            }
        } else if (c == 'l') { // lt?
            if (id.length() == 2 && id.charAt(1) == 't') {
                return '<';
            }
        } else if (c == 'q') { // quot?
            if (id.equals("quot")) {
                return '"';
            }
        }
        expandEntity(id, allowExt, null);
        return CHAR_NULL;
    }

    /**
     * Helper method that will try to expand a parsed entity (parameter or
     * generic entity).
     *

     * note: called by sub-classes (dtd parser), needs to be protected.
     *
     * @param id Name of the entity being expanded 
     * @param allowExt Whether external entities can be expanded or not; if
     *   not, and the entity to expand would be external one, an exception
     *   will be thrown
     */
    protected EntityDecl expandEntity(String id, boolean allowExt,
                                      Object extraArg)
        throws XMLStreamException
    {
        mCurrName = id;

        EntityDecl ed = findEntity(id, extraArg);

        if (ed == null) {
            /* 30-Sep-2005, TSa: As per [WSTX-5], let's only throw exception
             *   if we have to resolve it (otherwise it's just best-effort, 
             *   and null is ok)
             */
            /* 02-Oct-2005, TSa: Plus, [WSTX-4] adds "undeclared entity
             *    resolver"
             */
            if (mCfgReplaceEntities) {
                expandUnresolvedEntity(id);
            }
            return null;
        }
        expandEntity(ed, allowExt);
        return ed;
    }

    /**
     *
     *

     * note: defined as private for documentation, ie. it's just called
     * from within this class (not sub-classes), from one specific method
     * (see above)
     *
     * @param ed Entity to be expanded
     * @param allowExt Whether external entities are allowed or not.
     */
    private void expandEntity(EntityDecl ed, boolean allowExt)
        throws XMLStreamException
    {
        String id = ed.getName();

        /* Very first thing; we can immediately check if expanding
         * this entity would result in infinite recursion:
         */
        if (mInput.isOrIsExpandedFrom(id)) {
            throwRecursionError(id);
        }

        /* Should not refer unparsed entities from attribute values
         * or text content (except via notation mechanism, but that's
         * not parsed here)
         */
        if (!ed.isParsed()) {
            throwParseError("Illegal reference to unparsed external entity \"{0}\"", id, null);
        }

        // 28-Jun-2004, TSa: Do we support external entity expansion?
        boolean isExt = ed.isExternal();
        if (isExt) {
            if (!allowExt) { // never ok in attribute value...
                throwParseError("Encountered a reference to external parsed entity \"{0}\" when expanding attribute value: not legal as per XML 1.0/1.1 #3.1", id, null);
            }
            if (!mConfig.willSupportExternalEntities()) {
                throwParseError("Encountered a reference to external entity \"{0}\", but stream reader has feature \"{1}\" disabled",
                                id, XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);
            }
        }

        // First, let's give current context chance to save its stuff
        WstxInputSource oldInput = mInput;
        oldInput.saveContext(this);
        WstxInputSource newInput = null;
        try {
            newInput = ed.expand(oldInput, mEntityResolver, mConfig, mDocXmlVersion);
        } catch (FileNotFoundException fex) {
            /* Let's catch and rethrow this just so we get more meaningful
             * description (with input source position etc)
             */
            throwParseError("(was {0}) {1}", fex.getClass().getName(), fex.getMessage());
        } catch (IOException ioe) {
            throw constructFromIOE(ioe);
        }
        /* And then we'll need to make sure new input comes from the new
         * input source
         */
        initInputSource(newInput, isExt, id);
    }

    /**
     *

     * note: only called from the local expandEntity() method
     */
    private void expandUnresolvedEntity(String id)
        throws XMLStreamException
    {
        XMLResolver resolver = mConfig.getUndeclaredEntityResolver();
        if (resolver != null) {
            /* Ok, we can check for recursion here; but let's only do that
             * if there is any chance that it might get resolved by
             * the special resolver (it must have been resolved this way
             * earlier, too...)
             */
            if (mInput.isOrIsExpandedFrom(id)) {
                throwRecursionError(id);
            }

            WstxInputSource oldInput = mInput;
            oldInput.saveContext(this);
            // null, null -> no public or system ids
            int xmlVersion = mDocXmlVersion;
            // 05-Feb-2006, TSa: If xmlVersion not explicitly known, defaults to 1.0
            if (xmlVersion == XmlConsts.XML_V_UNKNOWN) {
                xmlVersion = XmlConsts.XML_V_10;
            }
            WstxInputSource newInput;
            try {
                newInput = DefaultInputResolver.resolveEntityUsing
                    (oldInput, id, null, null, resolver, mConfig, xmlVersion);
            } catch (IOException ioe) {
                throw constructFromIOE(ioe);
            }
            if (newInput != null) {
                // true -> is external
                initInputSource(newInput, true, id);
                return;
            }
        }
        handleUndeclaredEntity(id);
    }

    /*
    ////////////////////////////////////////////////////
    // Abstract methods for sub-classes to implement
    ////////////////////////////////////////////////////
     */

    /**
     * Abstract method for sub-classes to implement, for finding
     * a declared general or parsed entity.
     *
     * @param id Identifier of the entity to find
     * @param arg Optional argument passed from caller; needed by DTD
     *    reader.
     */
    protected abstract EntityDecl findEntity(String id, Object arg)
        throws XMLStreamException;

    /**
     * This method gets called if a declaration for an entity was not
     * found in entity expanding mode (enabled by default for xml reader,
     * always enabled for dtd reader).
     */
    protected abstract void handleUndeclaredEntity(String id)
        throws XMLStreamException;

    protected abstract void handleIncompleteEntityProblem(WstxInputSource closing)
        throws XMLStreamException;

    /**
     * Method called when a character entity needs to expand to a pair
     * of 16-bit surrogate characters. The exact mechanism to relay
     * this information back is different for some use cases (specifically,
     * when dealing with internal entity expansion).
     */
    protected abstract char handleExpandedSurrogate(char first, char second);

    /*
    ////////////////////////////////////////////////////
    // Basic tokenization
    ////////////////////////////////////////////////////
     */

    /**
     * Method that will parse name token (roughly equivalent to XML specs;
     * although bit lenier for more efficient handling); either uri prefix,
     * or local name.
     *

     * Much of complexity in this method has to do with the intention to 
     * try to avoid any character copies. In this optimal case algorithm
     * would be fairly simple. However, this only works if all data is
     * already in input buffer... if not, copy has to be made halfway
     * through parsing, and that complicates things.
     *

     * One thing to note is that String returned has been canonicalized
     * and (if necessary) added to symbol table. It can thus be compared
     * against other such (usually id) Strings, with simple equality operator.
     *
     * @param c First character of the name; not yet checked for validity
     *
     * @return Canonicalized name String (which may have length 0, if
     *    EOF or non-name-start char encountered)
     */
    protected String parseLocalName(char c)
        throws XMLStreamException
    {
        /* Has to start with letter, or '_' (etc); we won't allow ':' as that
         * is taken as namespace separator; no use trying to optimize
         * heavily as it's 98% likely it is a valid char...
         */
        if (!isNameStartChar(c)) {
            if (c == ':') {
                throwUnexpectedChar(c, " (missing namespace prefix?)");
            }
            throwUnexpectedChar(c, " (expected a name start character)");
        }

        int ptr = mInputPtr;
        int hash = (int) c;
        final int inputLen = mInputEnd;
        int startPtr = ptr-1; // already read previous char
        final char[] inputBuf = mInputBuffer;

        /* After which there may be zero or more name chars
         * we have to consider
         */
        while (true) {
            if (ptr >= inputLen) {
                /* Ok, identifier may continue past buffer end, need
                 * to continue with part 2 (separate method, as this is
                 * not as common as having it all in buffer)
                 */
                mInputPtr = ptr;
                return parseLocalName2(startPtr, hash);
            }
            // Ok, we have the char... is it a name char?
            c = inputBuf[ptr];
            if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
                break;
            }
            if (!isNameChar(c)) {
                break;
            }
            hash = (hash * 31) + (int) c;
            ++ptr;
        }
        mInputPtr = ptr;
        return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash);
    }

    /**
     * Second part of name token parsing; called when name can continue
     * past input buffer end (so only part was read before calling this
     * method to read the rest).
     *

     * Note that this isn't heavily optimized, on assumption it's not
     * called very often.
     */
    protected String parseLocalName2(int start, int hash)
        throws XMLStreamException
    {
        int ptr = mInputEnd - start;
        // Let's assume fairly short names
        char[] outBuf = getNameBuffer(ptr+8);

        if (ptr > 0) {
            System.arraycopy(mInputBuffer, start, outBuf, 0, ptr);
        }

        int outLen = outBuf.length;
        while (true) {
            // note: names can not cross input block (entity) boundaries...
            if (mInputPtr >= mInputEnd) {
                if (!loadMoreFromCurrent()) {
                    break;
                }
            }
            char c = mInputBuffer[mInputPtr];
            if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
                break;
            }
            if (!isNameChar(c)) {
                break;
            }
            ++mInputPtr;
            if (ptr >= outLen) {
                mNameBuffer = outBuf = expandBy50Pct(outBuf);
                outLen = outBuf.length;
            }
            outBuf[ptr++] = c;
            hash = (hash * 31) + (int) c;
        }
        // Still need to canonicalize the name:
        return mSymbols.findSymbol(outBuf, 0, ptr, hash);
    }

    /**
     * Method that will parse 'full' name token; what full means depends on
     * whether reader is namespace aware or not. If it is, full name means
     * local name with no namespace prefix (PI target, entity/notation name);
     * if not, name can contain arbitrary number of colons. Note that
     * element and attribute names are NOT parsed here, so actual namespace
     * prefix separation can be handled properly there.
     *

     * Similar to {@link #parseLocalName}, much of complexity stems from
     * trying to avoid copying name characters from input buffer.
     *

     * Note that returned String will be canonicalized, similar to
     * {@link #parseLocalName}, but without separating prefix/local name.
      *
     * @return Canonicalized name String (which may have length 0, if
     *    EOF or non-name-start char encountered)
     */
    protected String parseFullName()
        throws XMLStreamException
    {
        if (mInputPtr >= mInputEnd) {
            loadMoreFromCurrent();
        }
        return parseFullName(mInputBuffer[mInputPtr++]);
    }

    protected String parseFullName(char c)
        throws XMLStreamException
    {
        // First char has special handling:
        if (!isNameStartChar(c)) {
            if (c == ':') { // no name.... generally an error:
                if (mCfgNsEnabled) {
                    throwNsColonException(parseFNameForError());
                }
                // Ok, that's fine actually
            } else {
                if (c <= CHAR_SPACE) {
                    throwUnexpectedChar(c, " (missing name?)");
                }
                throwUnexpectedChar(c, " (expected a name start character)");
            }
        }

        int ptr = mInputPtr;
        int hash = (int) c;
        int inputLen = mInputEnd;
        int startPtr = ptr-1; // to account for the first char

        /* After which there may be zero or more name chars
         * we have to consider
         */
        while (true) {
            if (ptr >= inputLen) {
                /* Ok, identifier may continue past buffer end, need
                 * to continue with part 2 (separate method, as this is
                 * not as common as having it all in buffer)
                 */
                mInputPtr = ptr;
                return parseFullName2(startPtr, hash);
            }
            c = mInputBuffer[ptr];
            if (c == ':') { // colon only allowed in non-NS mode
                if (mCfgNsEnabled) {
                    mInputPtr = ptr;
                    throwNsColonException(new String(mInputBuffer, startPtr, ptr - startPtr) + parseFNameForError());
                }
            } else {
                if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
                    break;
                }
                if (!isNameChar(c)) {
                    break;
                }
            }
            hash = (hash * 31) + (int) c;
            ++ptr;
        }
        mInputPtr = ptr;
        return mSymbols.findSymbol(mInputBuffer, startPtr, ptr - startPtr, hash);
    }

    protected String parseFullName2(int start, int hash)
        throws XMLStreamException
    {
        int ptr = mInputEnd - start;
        // Let's assume fairly short names
        char[] outBuf = getNameBuffer(ptr+8);

        if (ptr > 0) {
            System.arraycopy(mInputBuffer, start, outBuf, 0, ptr);
        }

        int outLen = outBuf.length;
        while (true) {
            /* 06-Sep-2004, TSa: Name tokens are not allowed to continue
             *   past entity expansion ranges... that is, all characters
             *   have to come from the same input source. Thus, let's only
             *   load things from same input level
             */
            if (mInputPtr >= mInputEnd) {
                if (!loadMoreFromCurrent()) {
                    break;
                }
            }
            char c = mInputBuffer[mInputPtr];
            if (c == ':') { // colon only allowed in non-NS mode
                if (mCfgNsEnabled) {
                    throwNsColonException(new String(outBuf, 0, ptr) + c + parseFNameForError());
                }
            } else if (c < CHAR_LOWEST_LEGAL_LOCALNAME_CHAR) {
                break;
            } else if (!isNameChar(c)) {
                break;
            }
            ++mInputPtr;

            if (ptr >= outLen) {
                mNameBuffer = outBuf = expandBy50Pct(outBuf);
                outLen = outBuf.length;
            }
            outBuf[ptr++] = c;
            hash = (hash * 31) + (int) c;
        }

        // Still need to canonicalize the name:
        return mSymbols.findSymbol(outBuf, 0, ptr, hash);
    }

    /**
     * Method called to read in full name, including unlimited number of
     * namespace separators (':'), for the purpose of displaying name in
     * an error message. Won't do any further validations, and parsing
     * is not optimized: main need is just to get more meaningful error
     * messages.
     */
    protected String parseFNameForError()
        throws XMLStreamException
    {
        StringBuffer sb = new StringBuffer(100);
        while (true) {
            char c;

            if (mInputPtr < mInputEnd) {
                c = mInputBuffer[mInputPtr++];
            } else { // can't error here, so let's accept EOF for now:
                int i = getNext();
                if (i < 0) {
                    break;
                }
                c = (char) i;
            }
            if (c != ':' && !isNameChar(c)) {
                --mInputPtr;
                break;
            }
            sb.append(c);
        }
        return sb.toString();
    }

    protected final String parseEntityName(char c)
        throws XMLStreamException
    {
        String id = parseFullName(c);
        // Needs to be followed by a semi-colon, too.. from same input source:
        if (mInputPtr >= mInputEnd) {
            if (!loadMoreFromCurrent()) {
                throwParseError("Missing semicolon after reference for entity \"{0}\"", id, null);
            }
        }
        c = mInputBuffer[mInputPtr++];
        if (c != ';') {
            throwUnexpectedChar(c, "; expected a semi-colon after the reference for entity '"+id+"'");
        }
        return id;
    }
    
    /**
     * Note: does not check for number of colons, amongst other things.
     * Main idea is to skip through what superficially seems like a valid
     * id, nothing more. This is only done when really skipping through
     * something we do not care about at all: not even whether names/ids
     * would be valid (for example, when ignoring internal DTD subset).
     *
     * @return Length of skipped name.
     */
    protected int skipFullName(char c)
        throws XMLStreamException
    {
        if (!isNameStartChar(c)) {
            --mInputPtr;
            return 0;
        }

        /* After which there may be zero or more name chars
         * we have to consider
         */
        int count = 1;
        while (true) {
            c = (mInputPtr < mInputEnd) ?
                mInputBuffer[mInputPtr++] : getNextChar(SUFFIX_EOF_EXP_NAME);
            if (c != ':' && !isNameChar(c)) {
                break;
            }
            ++count;
        }
        return count;
    }

    /**
     * Simple parsing method that parses system ids, which are generally
     * used in entities (from DOCTYPE declaration to internal/external
     * subsets).
     *

     * NOTE: returned String is not canonicalized, on assumption that
     * external ids may be longish, and are not shared all that often, as
     * they are generally just used for resolving paths, if anything.
     *

     * Also note that this method is not heavily optimized, as it's not
     * likely to be a bottleneck for parsing.
     */
    protected final String parseSystemId(char quoteChar, boolean convertLFs,
                                         String errorMsg)
        throws XMLStreamException
    {
        char[] buf = getNameBuffer(-1);
        int ptr = 0;

        while (true) {
            char c = (mInputPtr < mInputEnd) ?
                mInputBuffer[mInputPtr++] : getNextChar(errorMsg);
            if (c == quoteChar) {
                break;
            }
            /* ??? 14-Jun-2004, TSa: Should we normalize linefeeds or not?
             *   It seems like we should, for all input... so that's the way it
             *   works.
             */
            if (c == '\n') {
                markLF();
            } else if (c == '\r') {
                if (peekNext() == '\n') {
                    ++mInputPtr;
                    if (!convertLFs) {
                        /* The only tricky thing; need to preserve 2-char LF; need to
                         * output one char from here, then can fall back to default:
                         */
                        if (ptr >= buf.length) {
                            buf = expandBy50Pct(buf);
                        }
                        buf[ptr++] = '\r';
                    }
                    c = '\n';
                } else if (convertLFs) {
                    c = '\n';
                }
            }

            // Other than that, let's just append it:
            if (ptr >= buf.length) {
                buf = expandBy50Pct(buf);
            }
            buf[ptr++] = c;
        }

        return (ptr == 0) ? "" : new String(buf, 0, ptr);
    }

    /**
     * Simple parsing method that parses system ids, which are generally
     * used in entities (from DOCTYPE declaration to internal/external
     * subsets).
     *

     * As per xml specs, the contents are actually normalized.
     *
     * NOTE: returned String is not canonicalized, on assumption that
     * external ids may be longish, and are not shared all that often, as
     * they are generally just used for resolving paths, if anything.
     *

     * Also note that this method is not heavily optimized, as it's not
     * likely to be a bottleneck for parsing.
     */
    protected final String parsePublicId(char quoteChar, String errorMsg)
        throws XMLStreamException
    {
        char[] buf = getNameBuffer(-1);
        int ptr = 0;
        boolean spaceToAdd = false;

        while (true) {
            char c = (mInputPtr < mInputEnd) ?
                mInputBuffer[mInputPtr++] : getNextChar(errorMsg);
            if (c == quoteChar) {
                break;
            }
            if (c == '\n') {
                markLF();
                spaceToAdd = true;
                continue;
            } else if (c == '\r') {
                if (peekNext() == '\n') {
                    ++mInputPtr;
                }
                spaceToAdd = true;
                continue;
            } else if (c == CHAR_SPACE) {
                spaceToAdd = true;
                continue;
            } else {
                // Verify it's a legal pubid char (see XML spec, #13, from 2.3)
                if ((c >= VALID_PUBID_CHAR_COUNT)
                    || sPubidValidity[c] != PUBID_CHAR_VALID_B) {
                    throwUnexpectedChar(c, " in public identifier");
                }
            }
        
            // Other than that, let's just append it:
            if (ptr >= buf.length) {
                buf = expandBy50Pct(buf);
            }
            /* Space-normalization means scrapping leading and trailing
             * white space, and coalescing remaining ws into single spaces.
             */
            if (spaceToAdd) { // pending white space to add?
                if (c == CHAR_SPACE) { // still a space; let's skip
                    continue;
                }
                /* ok: if we have non-space, we'll either forget about
                 * space(s) (if nothing has been output, ie. leading space),
                 * or output a single space (in-between non-white space)
                 */
                spaceToAdd = false;
                if (ptr > 0) {
                    buf[ptr++] = CHAR_SPACE;
                    if (ptr >= buf.length) {
                        buf = expandBy50Pct(buf);
                    }
                }
            }
            buf[ptr++] = c;
        }
      
        return (ptr == 0) ? "" : new String(buf, 0, ptr);
    }

    protected final void parseUntil(TextBuffer tb, char endChar, boolean convertLFs,
                                    String errorMsg)
        throws XMLStreamException
    {
        // Let's first ensure we have some data in there...
        if (mInputPtr >= mInputEnd) {
            loadMore(errorMsg);
        }
        while (true) {
            // Let's loop consequtive 'easy' spans:
            char[] inputBuf = mInputBuffer;
            int inputLen = mInputEnd;
            int ptr = mInputPtr;
            int startPtr = ptr;
            while (ptr < inputLen) {
                char c = inputBuf[ptr++];
                if (c == endChar) {
                    int thisLen = ptr - startPtr - 1;
                    if (thisLen > 0) {
                        tb.append(inputBuf, startPtr, thisLen);
                    }
                    mInputPtr = ptr;
                    return;
                }
                if (c == '\n') {
                    mInputPtr = ptr; // markLF() requires this
                    markLF();
                } else if (c == '\r') {
                    if (!convertLFs && ptr < inputLen) {
                        if (inputBuf[ptr] == '\n') {
                            ++ptr;
                        }
                        mInputPtr = ptr;
                        markLF();
                    } else {
                        int thisLen = ptr - startPtr - 1;
                        if (thisLen > 0) {
                            tb.append(inputBuf, startPtr, thisLen);
                        }
                        mInputPtr = ptr;
                        c = getNextChar(errorMsg);
                        if (c != '\n') {
                            --mInputPtr; // pusback
                            tb.append(convertLFs ? '\n' : '\r');
                        } else {
                            if (convertLFs) {
                                tb.append('\n');
                            } else {
                                tb.append('\r');
                                tb.append('\n');
                            }
                        }
                        startPtr = ptr = mInputPtr;
                        markLF();
                    }
                }
            }
            int thisLen = ptr - startPtr;
            if (thisLen > 0) {
                tb.append(inputBuf, startPtr, thisLen);
            }
            loadMore(errorMsg);
            startPtr = ptr = mInputPtr;
            inputBuf = mInputBuffer;
            inputLen = mInputEnd;
        }
    }

    /*
    //////////////////////////////////////////
    // Internal methods
    //////////////////////////////////////////
     */

    private char resolveCharEnt()
        throws XMLStreamException
    {
        int value = 0;
        char c = getNextChar(SUFFIX_IN_ENTITY_REF);
        if (c == 'x') { // hex
            while (true) {
                c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
                    : getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);
                if (c == ';') {
                    break;
                }
                value = value << 4;
                if (c <= '9' && c >= '0') {
                    value += (c - '0');
                } else if (c >= 'a' && c <= 'f') {
                    value += 10 + (c - 'a');
                } else if (c >= 'A' && c <= 'F') {
                    value += 10 + (c - 'A');
                } else {
                    throwUnexpectedChar(c, "; expected a hex digit (0-9a-fA-F).");
                }
                // Overflow?
                if (value > MAX_UNICODE_CHAR) {
                    reportUnicodeOverflow();
                }
            }
        } else { // numeric (decimal)
            while (c != ';') {
                if (c <= '9' && c >= '0') {
                    value = (value * 10) + (c - '0');
                    // Overflow?
                    if (value > MAX_UNICODE_CHAR) {
                        reportUnicodeOverflow();
                    }
                } else {
                    throwUnexpectedChar(c, "; expected a decimal number.");
                }
                c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++]
                    : getNextCharFromCurrent(SUFFIX_IN_ENTITY_REF);
            }
        }
        return checkAndExpandChar(value);
    }

    /**
     * Method that will handle expansion of a single character entity.
     * It will first check the general validity of the character (checking
     * characters that are never valid in any context); and if that succeeds,
     * will deal with splitting high-order characters (> 0xFFFF) into
     * surrogate pair as necessary.
     */
    private final char checkAndExpandChar(int value)
        throws XMLStreamException
    {
        /* 24-Jan-2006, TSa: Ok, "high" Unicode chars are problematic,
         *   need to be reported by a surrogate pair..
         */
        if (value >= 0xD800) {
            if (value < 0xE000) { // no surrogates via entity expansion
                reportIllegalChar(value);
            }
            if (value > 0xFFFF) {
                // Within valid range at all?
                if (value > MAX_UNICODE_CHAR) {
                    reportUnicodeOverflow();
                }
                /* Ok, have overwrite one char with second surrogate,
                 * push back input pointer, and return the first surrogate
                 */
                value -= 0x10000;
                char first = (char) ((value >> 10)  + 0xD800);
                char second = (char) ((value & 0x3FF)  + 0xDC00);

                return handleExpandedSurrogate(first, second);
            } else if (value >= 0xFFFE) { // 0xFFFE and 0xFFFF are illegal too
                reportIllegalChar(value);
            }
            // Ok, fine as is
        } else if (value < 32) {
            if (value == 0) {
                throwParseError("Invalid character reference: null character not allowed in XML content.");
            }
            // XML 1.1 allows most other chars; 1.0 does not:
            if (!mXml11 &&
                (value != 0x9 && value != 0xA && value != 0xD)) {
                reportIllegalChar(value);
            }
        }
        return (char) value;
    }

    protected final char[] getNameBuffer(int minSize)
    {
        char[] buf = mNameBuffer;
        
        if (buf == null) {
            mNameBuffer = buf = new char[(minSize > 48) ? (minSize+16) : 64];
        } else if (minSize >= buf.length) { // let's allow one char extra...
            int len = buf.length;
            len += (len >> 1); // grow by 50%
            mNameBuffer = buf = new char[(minSize >= len) ? (minSize+16) : len];
        }
        return buf;
    }
    
    protected final char[] expandBy50Pct(char[] buf)
    {
        int len = buf.length;
        char[] newBuf = new char[len + (len >> 1)];
        System.arraycopy(buf, 0, newBuf, 0, len);
        return newBuf;
    }

    /**
     * Method called to throw an exception indicating that a name that
     * should not be namespace-qualified (PI target, entity/notation name)
     * is one, and reader is namespace aware.
     */
    private void throwNsColonException(String name)
        throws XMLStreamException
    {
        throwParseError("Illegal name \"{0}\" (PI target, entity/notation name): can not contain a colon (XML Namespaces 1.0#6)", name, null);
    }

    private void throwRecursionError(String entityName)
        throws XMLStreamException
    {
        throwParseError("Illegal entity expansion: entity \"{0}\" expands itself recursively.", entityName, null);
    }

    private void reportUnicodeOverflow()
        throws XMLStreamException
    {
        throwParseError("Illegal character entity: value higher than max allowed (0x{0})", Integer.toHexString(MAX_UNICODE_CHAR), null);
    }

    private void reportIllegalChar(int value)
        throws XMLStreamException
    {
        throwParseError("Illegal character entity: expansion character (code 0x{0}", Integer.toHexString(value), null);
    }
}