All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.aalto.in.XmlScanner Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
package com.fasterxml.aalto.in;

import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;

import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;

import org.codehaus.stax2.XMLStreamLocation2;
import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.TypedArrayDecoder;
import org.codehaus.stax2.typed.TypedValueDecoder;
import org.codehaus.stax2.typed.TypedXMLStreamException;
import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder;

import com.fasterxml.aalto.WFCException;
import com.fasterxml.aalto.impl.*;
import com.fasterxml.aalto.util.*;

/**
 * This is the abstract base class for all scanner implementations,
 * defining operations the actual parser requires from the low-level
 * scanners.
 * Scanners are encoding and input type (byte, char / stream, block)
 * specific, so there are many implementations.
 */
public abstract class XmlScanner
    implements XmlConsts, XMLStreamConstants, NamespaceContext
{

    // // // Constants:

    /**
     * String that identifies CDATA section (after "<![" prefix)
     */
    final protected String CDATA_STR = "CDATA[";

    /**
     * This token type signifies end-of-input, in cases where it can be
     * returned. In other cases, an exception may be thrown.
     */
    public final static int TOKEN_EOI = -1;

    /**
     * This constant defines the highest Unicode character allowed
     * in XML content.
     */
    protected final static int MAX_UNICODE_CHAR = 0x10FFFF;

    protected final static int INT_NULL = 0;
    protected final static int INT_CR = (int) '\r';
    protected final static int INT_LF = (int) '\n';
    protected final static int INT_TAB = (int) '\t';
    protected final static int INT_SPACE = 0x0020;

    protected final static int INT_HYPHEN = (int) '-';
    protected final static int INT_QMARK = (int) '?';
    protected final static int INT_AMP = (int) '&';
    protected final static int INT_LT = (int) '<';
    protected final static int INT_GT = (int) '>';
    protected final static int INT_QUOTE = (int) '"';
    protected final static int INT_APOS = (int) '\'';
    protected final static int INT_EXCL = (int) '!';
    protected final static int INT_COLON = (int) ':';
    protected final static int INT_LBRACKET = (int) '[';
    protected final static int INT_RBRACKET = (int) ']';
    protected final static int INT_SLASH = (int) '/';
    protected final static int INT_EQ = (int) '=';

    protected final static int INT_A = (int) 'A';
    protected final static int INT_F = (int) 'F';

    protected final static int INT_a = (int) 'a';
    protected final static int INT_f = (int) 'f';
    protected final static int INT_z = (int) 'z';

    protected final static int INT_0 = (int) '0';
    protected final static int INT_9 = (int) '9';
    
    // // // Config for bound PName cache:

    /**
     * Let's activate cache quite soon, no need to wait for hundreds
     * of misses; just try to avoid cache construction if all we get
     * is soap envelope element or such.
     */
    private final static int BIND_MISSES_TO_ACTIVATE_CACHE = 10;

    /**
     * Size of the bind cache can be reasonably small, and should
     * still get high enough hit rate
     */
    private final static int BIND_CACHE_SIZE = 0x40;

    private final static int BIND_CACHE_MASK = 0x3F;

    /*
    /**********************************************************************
    /* Configuration
    /**********************************************************************
     */

    protected final ReaderConfig _config;

    /**
     * Whether validity checks (wrt. name and text characters)
     * and normalization (linefeeds) is to be
     * done using xml 1.1 rules, or basic xml 1.0 rules. Default
     * is 1.0.
     */
    protected final boolean _xml11;

    protected final boolean _cfgCoalescing;

    /* Note: non-final since it may need to be disabled after
     * construction.
     */
    protected boolean _cfgLazyParsing;

    /*
    /**********************************************************************
    /* Tokenization state
    /**********************************************************************
     */

    protected int _currToken = START_DOCUMENT;

    protected boolean _tokenIncomplete = false;

    /**
     * Number of START_ELEMENT events returned for which no END_ELEMENT
     * has been returned; including current event.
     */
    protected int _depth = 0;

    /**
     * Textual content of the current event
     */
    protected final TextBuilder _textBuilder;

    /**
     * Flag set to indicate that an entity is pending
     */
    protected boolean _entityPending = false;

    /*
    /**********************************************************************
    /* Name/String handling
    /**********************************************************************
     */

    /**
     * Similarly, need a char buffer for actual String construction
     * (in future, could perhaps use StringBuilder?). It is used
     * for holding things like names (element, attribute), and
     * attribute values.
     */
    protected char[] _nameBuffer = null;

    /**
     * Current name associated with the token, if any. Name of the
     * current element, target of processing instruction, or name
     * of an unexpanded entity.
     */
    protected PName _tokenName = null;

    /*
    /**********************************************************************
    /* Element information
    /**********************************************************************
     */

    /**
     * Flag that is used if the current state is START_ELEMENT
     * or END_ELEMENT, to indicate if the underlying physical
     * tag is a so-called empty tag (one ending with "/>")
     */
    protected boolean _isEmptyTag = false;

    /**
     * Information about the current element on the stack
     */
    protected ElementScope _currElem;

    /**
     * Public id of the current event (DTD), if any.
     */
    protected String _publicId;

    /**
     * System id of the current event (DTD), if any.
     */
    protected String _systemId;

    /*
    /**********************************************************************
    /* Namespace binding
    /**********************************************************************
     */

    /**
     * Pointer to the last namespace declaration encountered. Because of backwards
     * linking, it also serves as the head of the linked list of all active
     * namespace declarations starting from the most recent one.
     */
    protected NsDeclaration _lastNsDecl = null;

    /**
     * This is a temporary state variable, valid during START_ELEMENT
     * event. For those events, contains number of namespace declarations
     * available. For END_ELEMENT, this count is computed on the fly.
     */
    protected int _currNsCount = 0;

    /**
     * Default namespace binding is a per-document singleton, like
     * explicit bindings, and used for elements (never for attributes).
     */
    protected NsBinding _defaultNs = NsBinding.createDefaultNs();

    /**
     * Array containing all prefix bindings needed within the current
     * document, so far (if any). These bindings are not in a particular
     * order, and they specifically do NOT represent actual namespace
     * declarations parsed from xml content.
     */
    protected NsBinding[] _nsBindings;

    protected int _nsBindingCount = 0;

    /**
     * Although unbound pname instances can be easily and safely reused,
     * bound ones are per-document. However, it makes sense to try to
     * reuse them too; at least using a minimal static cache, activate
     * only after certain number of cache misses (to avoid overhead for
     * tiny documents, or documents with few or no namespace prefixes).
     */
    protected PName[] _nsBindingCache = null;

    protected int _nsBindMisses = 0;

    /*
    /**********************************************************************
    /* Support for non-transient NamespaceContext
    /**********************************************************************
     */

    /**
     * Last returned {@link NamespaceContext}, created for a call
     * to {@link #getNonTransientNamespaceContext}, iff this would
     * still be a valid context.
     */
    protected FixedNsContext _lastNsContext = FixedNsContext.EMPTY_CONTEXT;

    /*
    /**********************************************************************
    /* Attribute info
    /**********************************************************************
     */

    protected final AttributeCollector _attrCollector;

    protected int _attrCount = 0;

    /*
    /**********************************************************************
    /* Minimal location info for all impls
    /**********************************************************************
     */

    /**
     * Number of bytes that were read and processed before the contents
     * of the current buffer; used for calculating absolute offsets.
     */
    protected long _pastBytesOrChars;

    /**
     * The row on which the character to read next is on. Note that
     * it is 0-based, so API will generally add one to it before
     * returning the value
     */
    protected int _currRow;

    /**
     * Offset used to calculate the column value given current input
     * buffer pointer. May be negative, if the first character of the
     * row was contained within an earlier buffer.
     */
    protected int _rowStartOffset;

    /**
     * Offset (in chars or bytes) at start of current token
     */
    protected long _startRawOffset;

    /**
     * Current row at start of current (last returned) token
     */
    protected long _startRow = -1L;

    /**
     * Current column at start of current (last returned) token
     */
    protected long _startColumn = -1L;
    
    /*
    /**********************************************************************
    /* Life-cycle
    /**********************************************************************
     */

    protected XmlScanner(ReaderConfig cfg)
    {
        _config = cfg;

        _cfgCoalescing = cfg.willCoalesceText();
        _cfgLazyParsing = cfg.willParseLazily();
        _xml11 = cfg.isXml11();
        _textBuilder = TextBuilder.createRecyclableBuffer(_config);
        _attrCollector = new AttributeCollector(cfg);
        _nameBuffer = cfg.allocSmallCBuffer(ReaderConfig.DEFAULT_SMALL_BUFFER_LEN);
        _currRow = 0;
    }

    /**
     * Method called at point when the parsing process has ended (either
     * by encountering end of the input, or via explicit close), and
     * buffers can and should be released.
     *
     * @param forceCloseSource True if the underlying input source is
     *   to be closed, independent of whether auto-close has been set
     *   to true via configuration (or if the scanner manages the input
     *   source)
     */
    public final void close(boolean forceCloseSource)
        throws XMLStreamException
    {
        _releaseBuffers();
        if (forceCloseSource || _config.willAutoCloseInput()) {
            try {
                _closeSource();
            } catch (IOException ioe) {
                throw new IoStreamException(ioe);
            }
        }
    }

    protected void _releaseBuffers()
    {
        _textBuilder.recycle(true);
        if (_nameBuffer != null) {
            char[] buf = _nameBuffer;
            _nameBuffer = null;
            _config.freeSmallCBuffer(buf);
        }
    }

    protected abstract void _closeSource() throws IOException;

    /*
    /**********************************************************************
    /* Package access methods, needed by SAX impl
    /**********************************************************************
     */

    public ReaderConfig getConfig() { return _config; }

    public AttributeCollector getAttrCollector() { return _attrCollector; }

    /*
    /**********************************************************************
    /* Public scanner interface, iterating
    /**********************************************************************
     */

    // // // First, main iteration methods

    public abstract int nextFromProlog(boolean isProlog) throws XMLStreamException;

    public abstract int nextFromTree() throws XMLStreamException;

    /**
     * This method is called to ensure that the current token/event has been
     * completely parsed, such that we have all the data needed to return
     * it (textual content, PI data, comment text etc)
     */
    protected abstract void finishToken() throws XMLStreamException;

    /**
     * This method is called to essentially skip remaining of the
     * current token (data of PI etc)
     *
     * @return True If by skipping we also figured out following event
     *   type (and assigned its type to _currToken); false if that remains
     *   to be done
     */
    protected final boolean skipToken()
        throws XMLStreamException
    {
        _tokenIncomplete = false;
        switch (_currToken) {
        case PROCESSING_INSTRUCTION:
            skipPI();
            break;
        case CHARACTERS:
            if (skipCharacters()) { // encountered an entity
                // _tokenName already set, just need to set curr token
                _currToken = ENTITY_REFERENCE;
                return true;
            } 
            if (_cfgCoalescing) {
                if (skipCoalescedText()) { // encountered an entity
                    _currToken = ENTITY_REFERENCE;
                    return true;
                }
            }
            break;
        case COMMENT:
            skipComment();
            break;
        case SPACE:
            skipSpace();
            break;
        case CDATA:
            skipCData();
            if (_cfgCoalescing) {
                skipCoalescedText();
                if (_entityPending) { // encountered an entity
                    _currToken = ENTITY_REFERENCE;
                    return true;
                }
            }
            break;
        case DTD:
            finishDTD(false); // false -> skip subset text
            break;
        default:
            throw new Error("Internal error, unexpected incomplete token type "+ErrorConsts.tokenTypeDesc(_currToken));
        }
        return false;
    }

    /*
    /**********************************************************************
    /* Public scanner interface, location access
    /**********************************************************************
     */
    
    /**
     * @return Current input location
     */
    public abstract XMLStreamLocation2 getCurrentLocation();

    public final XMLStreamLocation2 getStartLocation()
    {
        // !!! TODO: deal with impedance wrt int/long (flaw in Stax API)
        int row = (int) _startRow;
        int col = (int) _startColumn;
        return LocationImpl.fromZeroBased(_config.getPublicId(), _config.getSystemId(),
                _startRawOffset, row, col);
    }

    public abstract long getStartingByteOffset();
    public abstract long getStartingCharOffset();
    public abstract long getEndingByteOffset() throws XMLStreamException;
    public abstract long getEndingCharOffset() throws XMLStreamException;
    
    public XMLStreamLocation2 getEndLocation() throws XMLStreamException
    {
        // Have to complete the token to know the ending location...
        if (_tokenIncomplete) {
            finishToken();
        }
        return getCurrentLocation();
    }

    public final int getCurrentLineNr() {
        return _currRow+1;
    }

    public abstract int getCurrentColumnNr();

    public final String getInputSystemId() {
        return _config.getSystemId();
    }

    public final String getInputPublicId() {
        return _config.getPublicId();
    }

    /*
    /**********************************************************************
    /* Public scanner interface, other methods
    /**********************************************************************
     */

    public final boolean hasEmptyStack() {
        return (_depth == 0);
    }

    public final int getDepth() { return _depth; }

    public final boolean isEmptyTag() { return _isEmptyTag; }

    /*
    /**********************************************************************
    /* Data accessors, names:
    /**********************************************************************
     */

    public final PName getName() {
        return _tokenName;
    }

    public final QName getQName() {
        return _tokenName.constructQName(_defaultNs);
    }

    public final String getDTDPublicId() {
        return _publicId;
    }

    public final String getDTDSystemId() {
        return _systemId;
    }

    /*
    /**********************************************************************
    /* Data accessors, (element) text:
    /**********************************************************************
     */

    public final String getText() throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }
        return _textBuilder.contentsAsString();
    }

    public final int getTextLength()
        throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }
        return _textBuilder.size();
    }

    public final char[] getTextCharacters()
        throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }
        return _textBuilder.getTextBuffer();
    }

    public final int getTextCharacters(int srcStart, char[] target, int targetStart, int len)
        throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }
        return _textBuilder.contentsToArray(srcStart, target, targetStart, len);
    }

    public final int getText(Writer w, boolean preserveContents)
        throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }
        /* !!! Preserve or not, we'll hold the contents in memory.
         *   Could be improved if necessary.
         */
        try {
            return _textBuilder.rawContentsTo(w);
        } catch (IOException ioe) {
            throw new IoStreamException(ioe);
        }
    }

    public final boolean isTextWhitespace()
        throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }
        return _textBuilder.isAllWhitespace();
    }

    /**
     * Method called by the stream reader to decode space-separated tokens
     * that are part of the current text event, using given decoder.
     *
     * @param reset If true, need to tell text buffer to reset its decoding
     *   state; if false, shouldn't
     */
    public final int decodeElements(TypedArrayDecoder tad, boolean reset)
        throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }

        try {
            return _textBuilder.decodeElements(tad, reset);
        } catch (TypedXMLStreamException tex) {
            // Need to add location?
            Location loc = getCurrentLocation();
            String lexical = tex.getLexical();
            IllegalArgumentException iae = (IllegalArgumentException)tex.getCause();
            throw new TypedXMLStreamException(lexical, tex.getMessage(), loc, iae);
        }
    }

    /**
     * Method called by the stream reader to reset given base64 decoder
     * with data from the current text event.
     */
    public final void resetForDecoding(Base64Variant v, CharArrayBase64Decoder dec, boolean firstChunk)
        throws XMLStreamException
    {
        if (_tokenIncomplete) {
            finishToken();
        }
        _textBuilder.resetForBinaryDecode(v, dec, firstChunk);
    }

    /*
    /**********************************************************************
    /* Data accessors, firing SAX events
    /**********************************************************************
     */

    public void fireSaxStartElement(ContentHandler h, Attributes attrs)
        throws SAXException
    {
        if (h != null) {
            // First; any ns declarations?
            NsDeclaration nsDecl = _lastNsDecl;
            /* 17-Sep-2006, tatus: There is disparity between START/END_ELEMENT;
             *   with START_ELEMENT, _depth is one higher than that of ns
             *   declarations; with END_ELEMENT, the same
             */
            int level = _depth-1;
            while (nsDecl != null && nsDecl.getLevel() == level) {
                String prefix = nsDecl.getPrefix();
                String uri = nsDecl.getCurrNsURI();
                h.startPrefixMapping((prefix == null) ? "" : prefix, uri);
                nsDecl = nsDecl.getPrev();
            }

            // Then start-elem event itself:
            PName n = getName();
            String uri = n.getNsUri();
            // Sax requires "" (not null) for ns uris...
            h.startElement((uri == null) ? "" : uri,
                           n.getLocalName(), n.getPrefixedName(),
                           attrs);
        }
    }

    public void fireSaxEndElement(ContentHandler h)
        throws SAXException
    {
        if (h != null) {
            /* Order of events is reversed (wrt. start-element): first
             * the end tag event, then unbound prefixes
             */
            // End element:
            PName n = getName();
            String uri = n.getNsUri();
            // Sax requires "" (not null) for ns uris...
            h.endElement((uri == null) ? "" : uri, n.getLocalName(), n.getPrefixedName());
            // Then, any expiring ns declarations?
            NsDeclaration nsDecl = _lastNsDecl;
            /* 17-Sep-2006, tatus: There is disparity between START/END_ELEMENT;
             *   with START_ELEMENT, _depth is one higher than that of ns
             *   declarations; with END_ELEMENT, the same
             */
            int level = _depth;
            while (nsDecl != null && nsDecl.getLevel() == level) {
                String prefix = nsDecl.getPrefix();
                h.endPrefixMapping((prefix == null) ? "" : prefix);
                nsDecl = nsDecl.getPrev();
            }
        }
    }

    public void fireSaxCharacterEvents(ContentHandler h)
        throws XMLStreamException, SAXException
    {
        if (h != null) {
            if (_tokenIncomplete) {
                finishToken();
            }
            _textBuilder.fireSaxCharacterEvents(h);
        }
    }

    public void fireSaxSpaceEvents(ContentHandler h)
        throws XMLStreamException, SAXException
    {
        if (h != null) {
            if (_tokenIncomplete) {
                finishToken();
            }
            _textBuilder.fireSaxSpaceEvents(h);
        }
    }

    public void fireSaxCommentEvent(LexicalHandler h)
        throws XMLStreamException, SAXException
    {
        if (h != null) {
            if (_tokenIncomplete) {
                finishToken();
            }
            _textBuilder.fireSaxCommentEvent(h);
        }
    }

    public void fireSaxPIEvent(ContentHandler h)
        throws XMLStreamException, SAXException
    {
        if (h != null) {
            if (_tokenIncomplete) {
                finishToken();
            }
            h.processingInstruction(_tokenName.getLocalName(), getText());
        }
    }

    /*
    /**********************************************************************
    /* Data accessors, attributes:
    /**********************************************************************
     */

    public final int getAttrCount() {
        return _attrCount;
    }

	public final String getAttrLocalName(int index)
    {
        // Note: caller checks indices:
        return _attrCollector.getName(index).getLocalName();
    }

    public final QName getAttrQName(int index)
    {
        // Note: caller checks indices:
        return _attrCollector.getQName(index);
    }

    public final String getAttrPrefixedName(int index)
    {
        // Note: caller checks indices:
        return _attrCollector.getName(index).getPrefixedName();
    }

    public final String getAttrNsURI(int index)
    {
        // Note: caller checks indices:
        return _attrCollector.getName(index).getNsUri();
    }

    public final String getAttrPrefix(int index)
    {
        // Note: caller checks indices:
        return _attrCollector.getName(index).getPrefix();
    }

    public final String getAttrValue(int index)
    {
        // Note: caller checks indices
        return _attrCollector.getValue(index);
    }

    public final String getAttrValue(String nsURI, String localName)
    {
        /* Collector may not be reset if there are no attributes,
         * need to check if any could be found first:
         */
        if (_attrCount < 1) {
            return null;
        }
        return _attrCollector.getValue(nsURI, localName);
    }

    public final void decodeAttrValue(int index, TypedValueDecoder tvd)
        throws XMLStreamException
    {
        _attrCollector.decodeValue(index, tvd);
    }

    /**
     * Method called to decode the attribute value that consists of
     * zero or more space-separated tokens.
     * Decoding is done using the decoder provided.
     * @return Number of tokens decoded
     */
    public final int decodeAttrValues(int index, TypedArrayDecoder tad)
        throws XMLStreamException
    {
        return _attrCollector.decodeValues(index, tad, this);
    }

    public final byte[] decodeAttrBinaryValue(int index, Base64Variant v, CharArrayBase64Decoder dec)
        throws XMLStreamException
    {
        return _attrCollector.decodeBinaryValue(index, v, dec, this);
    }

    public final int findAttrIndex(String nsURI, String localName)
    {
        /* Collector may not be reset if there are no attributes,
         * need to check if any could be found first:
         */
        if (_attrCount < 1) {
            return -1;
        }
        return _attrCollector.findIndex(nsURI, localName);
    }

    public final String getAttrType(int index)
    {
        // Note: caller checks indices:
        // !!! TBI
        return "CDATA";
    }

    public final boolean isAttrSpecified(int index)
    {
        // !!! TBI
        // (for now works ok as we don't handle DTD info, no attr value defaults)
        return true;
    }

    /*
    /**********************************************************************
    /* Data accessors, namespace declarations:
    /**********************************************************************
     */

    public final int getNsCount()
    {
        if (_currToken == START_ELEMENT) {
            return _currNsCount;
        }
        return (_lastNsDecl == null) ? 0 : _lastNsDecl.countDeclsOnLevel(_depth);
    }

    public final String getNamespacePrefix(int index)
    {
        return findCurrNsDecl(index).getBinding().mPrefix;
    }

    public final String getNamespaceURI(int index)
    {
        return findCurrNsDecl(index).getBinding().mURI;
    }

    private NsDeclaration findCurrNsDecl(int index)
    {
        NsDeclaration nsDecl = _lastNsDecl;
        /* 17-Sep-2006, tatu: There is disparity between START/END_ELEMENT;
         *   with START_ELEMENT, _depth is one higher than that of ns
         *   declarations; with END_ELEMENT, the same
         */
        int level = _depth;
        int count;
        // 20-Jan-2011, tatu: Hmmh... since declarations are in reverse order should we reorder?
        if (_currToken == START_ELEMENT) {
            count = _currNsCount - 1 - index;
            --level;
        } else {
            count = index;
        }

        while (nsDecl != null && nsDecl.getLevel() == level) {
            if (count == 0) {
                return nsDecl;
            }
            --count;
            nsDecl = nsDecl.getPrev();
        }
        reportInvalidNsIndex(index);
        return null; // never gets here
    }

    // Part of NamespaceContext impl below
    //public final String getNsUri(String prefix)

    public final String getNamespaceURI()
    {
        String uri = _tokenName.getNsUri();
        // Null means it uses the default ns:
        return (uri == null) ? _defaultNs.mURI : uri;
    }

    public final NamespaceContext getNonTransientNamespaceContext()
    {
        _lastNsContext = _lastNsContext.reuseOrCreate(_lastNsDecl);
        return _lastNsContext;
    }

    /*
    /**********************************************************************
    /* NamespaceContext implementation
    /**********************************************************************
     */

    @Override
    public String getNamespaceURI(String prefix)
    {
        if (prefix == null) {
            throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG);
        }
        if (prefix.length() == 0) { // default namespace?
            // Need to check if it's null, too, to convert
            String uri = _defaultNs.mURI;
            return (uri == null) ? "" : uri;
        }
        // xml, xmlns?
        if (prefix.equals(XMLConstants.XML_NS_PREFIX)) {
            return XMLConstants.XML_NS_URI;
        }
        if (prefix.equals(XMLConstants.XMLNS_ATTRIBUTE)) {
            return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
        }
        // Nope, a specific other prefix
        NsDeclaration nsDecl = _lastNsDecl;
        while (nsDecl != null) {
            if (nsDecl.hasPrefix(prefix)) {
                return nsDecl.getCurrNsURI();
            }
            nsDecl = nsDecl.getPrev();
        }
        return null;
    }

    @Override
    public String getPrefix(String nsURI)
    {
        /* As per JDK 1.5 JavaDocs, null is illegal; but no mention
         * about empty String (""). But that should 
         */
        if (nsURI == null) {
            throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG);
        }
        if (nsURI.equals(XMLConstants.XML_NS_URI)) {
            return XMLConstants.XML_NS_PREFIX;
        }
        if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
            return XMLConstants.XMLNS_ATTRIBUTE;
        }
        // First: does the default namespace bind to the URI?
        if (nsURI.equals(_defaultNs.mURI)) {
            return "";
        }
        /* Need to loop twice; first find a prefix, then ensure it's
         * not masked by a later declaration
         */
        main_loop:
        for (NsDeclaration nsDecl = _lastNsDecl; nsDecl != null;
             nsDecl = nsDecl.getPrev()) {
            if (nsDecl.hasNsURI(nsURI)) {
                // Ok: but is prefix masked?
                String prefix = nsDecl.getPrefix();
                // Plus, default ns wouldn't do (since current one was already checked)
                if (prefix != null) {
                    for (NsDeclaration decl2 = _lastNsDecl; decl2 != nsDecl;
                         decl2 = decl2.getPrev()) {
                        if (decl2.hasPrefix(prefix)) {
                            continue main_loop;
                        }
                    }
                    return prefix;
                }
            }
        }
        return null;
    }

    @Override
    public Iterator getPrefixes(String nsURI)
    {
        if (nsURI == null) {
            throw new IllegalArgumentException(ErrorConsts.ERR_NULL_ARG);
        }
        if (nsURI.equals(XMLConstants.XML_NS_URI)) {
            return new SingletonIterator(XMLConstants.XML_NS_PREFIX);
        }
        if (nsURI.equals(XMLConstants.XMLNS_ATTRIBUTE_NS_URI)) {
            return new SingletonIterator(XMLConstants.XMLNS_ATTRIBUTE);
        }
        ArrayList l = null;

        // First, the default ns?
        if (nsURI.equals(_defaultNs.mURI)) {
            l = new ArrayList();
            l.add("");
        }

        main_loop:
        for (NsDeclaration nsDecl = _lastNsDecl; nsDecl != null;
             nsDecl = nsDecl.getPrev()) {
            if (nsDecl.hasNsURI(nsURI)) {
                // Ok: but is prefix masked?
                String prefix = nsDecl.getPrefix();
                // Plus, default ns wouldn't do (since current one was already checked)
                if (prefix != null) {
                    for (NsDeclaration decl2 = _lastNsDecl; decl2 != nsDecl;
                         decl2 = decl2.getPrev()) {
                        if (decl2.hasPrefix(prefix)) {
                            continue main_loop;
                        }
                    }
                    if (l == null) {
                        l = new ArrayList();
                    }
                    l.add(prefix);
                }
            }
        }

        if (l == null) {
            return EmptyIterator.getInstance();
        }
        if (l.size() == 1) {
            return new SingletonIterator(l.get(0));
        }
        return l.iterator();
    }

    /*
    /**********************************************************************
    /* Abstract methods for sub-classes to implement
    /**********************************************************************
     */

    // // token-finish methods

    protected abstract void finishCharacters()
        throws XMLStreamException;

    protected abstract void finishCData()
        throws XMLStreamException;

    protected abstract void finishComment()
        throws XMLStreamException;

    protected abstract void finishDTD(boolean copyContents)
        throws XMLStreamException;

    protected abstract void finishPI()
        throws XMLStreamException;

    protected abstract void finishSpace()
        throws XMLStreamException;

    // // token-skip methods

    /**
     * @return True, if an unexpanded entity was encountered (and
     *   is now pending)
     */
    protected abstract boolean skipCharacters()
        throws XMLStreamException;

    protected abstract void skipCData()
        throws XMLStreamException;

    protected abstract void skipComment()
        throws XMLStreamException;

    protected abstract void skipPI()
        throws XMLStreamException;

    protected abstract void skipSpace()
        throws XMLStreamException;

    /**
     * Secondary skip method called after primary text segment
     * has been skipped, and we are in coalescing mode.
     *
     * @return True, if an unexpanded entity was encountered (and
     *   is now pending)
     */
    protected abstract boolean skipCoalescedText()
        throws XMLStreamException;

    // // Raw input access:

    protected abstract boolean loadMore()
        throws XMLStreamException;

    /*
    /**********************************************************************
    /* Basic namespace binding methods
    /**********************************************************************
     */

    /**
     * This method is called to find/create a fully qualified (bound)
     * name (element / attribute), for a name with prefix. For non-prefixed
     * names this method will not get called
     */
    protected final PName bindName(PName name, String prefix)
    {
        // First, do we have a cache, to perhaps find bound name from?
        if (_nsBindingCache != null) {
            PName cn = _nsBindingCache[name.unboundHashCode() & BIND_CACHE_MASK];
            if (cn != null && cn.unboundEquals(name)) {
                return cn;
            }
        }

        // If no cache, or not found there, need to first find binding
        for (int i = 0, len = _nsBindingCount; i < len; ++i) {
            NsBinding b = _nsBindings[i];
            if (b.mPrefix != prefix) { // prefixes are canonicalized
                continue;
            }
            // Ok, match!
            // Can we bubble prefix closer to the head?
            if (i > 0) {
                _nsBindings[i] = _nsBindings[i-1];
                _nsBindings[i-1] = b;
            }
            // Plus, should we cache it?
            PName bn = name.createBoundName(b);
            if (_nsBindingCache == null) {
                if (++_nsBindMisses < BIND_MISSES_TO_ACTIVATE_CACHE) {
                    return bn;
                }
                _nsBindingCache = new PName[BIND_CACHE_SIZE];
            }
            _nsBindingCache[bn.unboundHashCode() & BIND_CACHE_MASK] = bn;
            return bn;
        }

        // If not even binding, need to create that first

        // No match; perhaps "xml"? But is "xmlns" legal to use too?
        if (prefix == "xml") {
            return name.createBoundName(NsBinding.XML_BINDING);
        }
        /* Nope. Need to create a new binding. For such entries, let's
         * not try caching, yet, but let's note it as a miss
         */
        ++_nsBindMisses;
        NsBinding b = new NsBinding(prefix);
        if (_nsBindingCount == 0) {
            _nsBindings = new NsBinding[16];
        } else if (_nsBindingCount >= _nsBindings.length) {
            _nsBindings = (NsBinding[]) DataUtil.growAnyArrayBy(_nsBindings, _nsBindings.length);
        }
        _nsBindings[_nsBindingCount] = b;
        ++_nsBindingCount;
        return name.createBoundName(b);
    }

    /**
     * Method called when a namespace declaration needs to find the
     * binding object (essentially a per-prefix-per-document canonical
     * container object)
     */
    protected final NsBinding findOrCreateBinding(String prefix)
        throws XMLStreamException
    {
        // !!! TODO: switch to hash at size N?

        // TEST only (for ns-soap.xml):
        //int MAX = (_nsBindingCount > 8) ? 8 : _nsBindingCount;
        //for (int i = 0; i < MAX; ++i) {

        for (int i = 0, len = _nsBindingCount; i < len; ++i) {
            NsBinding b = _nsBindings[i];
            if (b.mPrefix == prefix) { // prefixes are interned
                if (i > 0) { // let's do bubble it up a notch... can speed things up
                    _nsBindings[i] = _nsBindings[i-1];
                    _nsBindings[i-1] = b;
                }
                return b;
            }
        }

        if (prefix == "xml") {
            return NsBinding.XML_BINDING;
        }
        if (prefix == "xmlns") {
            return NsBinding.XMLNS_BINDING;
        }
        // Nope. Need to create a new binding
        NsBinding b = new NsBinding(prefix);
        if (_nsBindingCount == 0) {
            _nsBindings = new NsBinding[16];
        } else if (_nsBindingCount >= _nsBindings.length) {
            _nsBindings = (NsBinding[]) DataUtil.growAnyArrayBy(_nsBindings, _nsBindings.length);
        }
        _nsBindings[_nsBindingCount] = b;
        ++_nsBindingCount;
        return b;
    }

    /**
     * Method called when we are ready to bind a declared namespace.
     */
    protected final void bindNs(PName name, String uri)
        throws XMLStreamException
    {
        NsBinding ns;
        String prefix = name.getPrefix();

        if (prefix == null) { // default ns
            ns = _defaultNs;
        } else {
            prefix = name.getLocalName();
            ns = findOrCreateBinding(prefix);
            if (ns.isImmutable()) { // xml, xmlns
                checkImmutableBinding(prefix, uri);
            }
        }

        /* 28-Oct-2006, tatus: Also need to ensure that neither
         *   xml nor xmlns-bound namespaces are bound to any
         *   other prefixes. Since we know that URIs are intern()ed,
         *   can just do identity comparison
         */
        if (!ns.isImmutable()) {
            if (uri == XMLConstants.XML_NS_URI) {
                reportIllegalNsDecl("xml", XMLConstants.XML_NS_URI);
            } else if (uri == XMLConstants.XMLNS_ATTRIBUTE_NS_URI) {
                reportIllegalNsDecl("xmlns", XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
            }
        }
        // Already declared in current scope?
        if (_lastNsDecl != null && _lastNsDecl.alreadyDeclared(prefix, _depth)) {
            reportDuplicateNsDecl(prefix);
        }
        _lastNsDecl = new NsDeclaration(ns, uri, _lastNsDecl, _depth);
    }

    /**
     * Method called when an immutable ns prefix (xml, xmlns) is 
     * encountered.
     */
    protected final void checkImmutableBinding(String prefix, String uri)
        throws XMLStreamException
    {
        if (prefix != "xml" || !uri.equals(XMLConstants.XML_NS_URI)) {
            reportIllegalNsDecl(prefix);
        }
    }

    /*
    /**********************************************************************
    /* Helper methods for sub-classes, input data
    /**********************************************************************
     */

    /**
     * Method that tries to load at least one more byte into buffer;
     * and if that fails, throws an appropriate EOI exception.
     */
    protected final void loadMoreGuaranteed()
        throws XMLStreamException
    {
        if (!loadMore()) {
            reportInputProblem("Unexpected end-of-input when trying to parse "+ErrorConsts.tokenTypeDesc(_currToken));
        }
    }

    protected final void loadMoreGuaranteed(int tt) throws XMLStreamException
    {
        if (!loadMore()) {
            reportInputProblem("Unexpected end-of-input when trying to parse "+ErrorConsts.tokenTypeDesc(tt));
        }
    }

    /*
    /**********************************************************************
    /* Helper methods for sub-classes, character validity checks
    /**********************************************************************
     */

    protected final void verifyXmlChar(int value) throws XMLStreamException
    {
        // Ok, and then need to check result is a valid XML content char:
        if (value >= 0xD800) { // note: checked for overflow earlier
            if (value < 0xE000) { // no surrogates via entity expansion
                reportInvalidXmlChar(value);
            }
            if (value == 0xFFFE || value == 0xFFFF) {
                reportInvalidXmlChar(value);
            }
        } else if (value < 32) {
            // XML 1.1 allows most other chars; 1.0 does not:
            if (value != INT_LF && value != INT_CR && value != INT_TAB) {
                if (!_xml11 || value == 0) {
                    reportInvalidXmlChar(value);
                }
            }
        }
    }
    
    /*
    /**********************************************************************
    /* Helper methods for sub-classes, error reporting
    /**********************************************************************
     */

    protected void reportInputProblem(String msg)
        throws XMLStreamException
    {
        /* 29-Mar-2008, tatus: Not sure if these are all Well-Formedness
         *   Constraint (WFC) violations? They should be... ?
         */
        throw new WFCException(msg, getCurrentLocation());
    }

    /**
     * Method called when a call to expand an entity within attribute
     * value fails to expand it.
     */
    protected void reportUnexpandedEntityInAttr(PName name, boolean isNsDecl)
        throws XMLStreamException
    {
        reportInputProblem("Unexpanded ENTITY_REFERENCE ("+_tokenName+") in "
                           +(isNsDecl ? "namespace declaration" : "attribute value"));
    }

    protected void reportPrologUnexpElement(boolean isProlog, int ch)
        throws XMLStreamException
    {
        if (ch < 0) { // just to be safe, in case caller passed signed byte
            ch &= 0x7FFFF;
        }
        if (ch == '/') { // end element
            if (isProlog) {
                reportInputProblem("Unexpected end element in prolog: malformed XML document, expected root element");
            }
            reportInputProblem("Unexpected end element in epilog: malformed XML document (unbalanced start/end tags?)");
        }

        // Otherwise, likely start element. But check for invalid white space for funsies
        if (ch < 32) {
            String type = isProlog ? ErrorConsts.SUFFIX_IN_PROLOG : ErrorConsts.SUFFIX_IN_EPILOG;
            throwUnexpectedChar(ch, "Unrecognized directive "+type);
        }
        reportInputProblem("Second root element in content: malformed XML document, only one allowed");
    }
    
    protected void reportPrologUnexpChar(boolean isProlog, int ch, String msg)
        throws XMLStreamException
    {
        String fullMsg = isProlog ? ErrorConsts.SUFFIX_IN_PROLOG : ErrorConsts.SUFFIX_IN_EPILOG;
        if (msg == null) {
            if (ch == '&') {
                throwUnexpectedChar(ch, fullMsg+"; no entities allowed");
            }
        } else {
            fullMsg += msg;
        }
        throwUnexpectedChar(ch, fullMsg);
    }

    protected void reportPrologProblem(boolean isProlog, String msg)
        throws XMLStreamException
    {
        String prefix = isProlog ? ErrorConsts.SUFFIX_IN_PROLOG : ErrorConsts.SUFFIX_IN_EPILOG;
        reportInputProblem(prefix+": "+msg);
    }
    
    protected void reportTreeUnexpChar(int ch, String msg)
        throws XMLStreamException
    {
        String fullMsg = ErrorConsts.SUFFIX_IN_TREE;
        if (msg != null) {
            fullMsg += msg;
        }
        throwUnexpectedChar(ch, fullMsg);
    }

    protected void reportInvalidNameChar(int ch, int index)
        throws XMLStreamException
    { 
        if (ch == INT_COLON) {
            reportInputProblem("Invalid colon in name: at most one colon allowed in element/attribute names, and none in PI target or entity names");
        }
        if (index == 0) {
            reportInputProblem("Invalid name start character (0x"
                           +Integer.toHexString(ch)+")");
        }
        reportInputProblem("Invalid name character (0x"
                           +Integer.toHexString(ch)+")");
    }

    protected void reportInvalidXmlChar(int ch)
        throws XMLStreamException
    {
        if (ch == 0) {
            reportInputProblem("Invalid null character");
        }
        if (ch < 32) {
            reportInputProblem("Invalid white space character (0x"
                               +Integer.toHexString(ch)+")");
        }
        reportInputProblem("Invalid xml content character (0x"
                           +Integer.toHexString(ch)+")");
    }

    protected void reportEofInName(char[] cbuf, int clen)
        throws XMLStreamException
    {
        reportInputProblem("Unexpected end-of-input in name (parsing "+ErrorConsts.tokenTypeDesc(_currToken)+")");
    }

    /**
     * Called when there's an unexpected char after PI target (non-ws,
     * not part of {@code '?>'} end marker
     */
    protected void reportMissingPISpace(int ch)
        throws XMLStreamException
    {
        throwUnexpectedChar(ch, ": expected either white space, or closing '?>'");
    }

    protected void reportDoubleHyphenInComments()
        throws XMLStreamException
    {
        reportInputProblem("String '--' not allowed in comment (missing '>'?)");
    }

    protected void reportMultipleColonsInName()
        throws XMLStreamException
    {
        reportInputProblem("Multiple colons not allowed in names");
    }

    protected void reportEntityOverflow()
        throws XMLStreamException
    {
        reportInputProblem("Illegal character entity: value higher than max allowed (0x"+Integer.toHexString(MAX_UNICODE_CHAR)+")");
    }

    protected void reportInvalidNsIndex(int index)
    {
        /* 24-Jun-2006, tatus: Stax API doesn't specify what (if anything)
         *   should be thrown. Ref. Impl. throws IndexOutOfBounds, which
         *   makes sense; could also throw IllegalArgumentException.
         */
        throw new IndexOutOfBoundsException("Illegal namespace declaration index, "+index+", current START_ELEMENT/END_ELEMENT has "+getNsCount()+" declarations");
    }

    protected void reportUnboundPrefix(PName name, boolean isAttr)
        throws XMLStreamException
    {
        reportInputProblem("Unbound namespace prefix '"+name.getPrefix()+"' (for "+(isAttr ? "attribute" : "element")+" name '"+name.getPrefixedName()+"')");
    }


    protected void reportDuplicateNsDecl(String prefix)
        throws XMLStreamException
    {
        if (prefix == null) {
            reportInputProblem("Duplicate namespace declaration for the default namespace");
        } else {
            reportInputProblem("Duplicate namespace declaration for prefix '"+prefix+"'");
        }
    }

    protected void reportIllegalNsDecl(String prefix)
        throws XMLStreamException
    {
        reportInputProblem("Illegal namespace declaration: can not re-bind prefix '"+prefix+"'");
    }

    protected void reportIllegalNsDecl(String prefix, String uri)
        throws XMLStreamException
    {
        reportInputProblem("Illegal namespace declaration: can not bind URI '"+uri+"' to prefix other than '"+prefix+"'");
    }

    protected void reportUnexpectedEndTag(String expName)
        throws XMLStreamException
    {
        reportInputProblem("Unexpected end tag: expected ");
    }

    // Thrown when ']]>' found in text content
    protected void reportIllegalCDataEnd() throws XMLStreamException
    {
        reportInputProblem("String ']]>' not allowed in textual content, except as the end marker of CDATA section");
    }

    protected void throwUnexpectedChar(int i, String msg) throws XMLStreamException
    {
        // But first, let's check illegals
        if (i < 32 && i != '\r' && i != '\n' && i != '\t') {
            throwInvalidSpace(i);
        }
        char c = (char) i;
        String excMsg = "Unexpected character "+XmlChars.getCharDesc(c)+msg;
        reportInputProblem(excMsg);
    }

    protected void throwNullChar() throws XMLStreamException
    {
        reportInputProblem("Illegal character (NULL, unicode 0) encountered: not valid in any content");
    }

    protected char handleInvalidXmlChar(int i) throws XMLStreamException
    {
        final IllegalCharHandler iHandler = _config.getIllegalCharHandler();
    	
        if (iHandler != null) {
    		    return iHandler.convertIllegalChar(i);
        }
    	
        char c = (char) i;
        if (c == CHAR_NULL) {
            throwNullChar();
        }

        String msg = "Illegal XML character ("+XmlChars.getCharDesc(c)+")";
        if (_xml11) {
            if (i < INT_SPACE) {
                msg += " [note: in XML 1.1, it could be included via entity expansion]";
            }
        }
        reportInputProblem(msg);
        
        //will not reach this block
        return (char) i;
    }

    protected void throwInvalidSpace(int i)
        throws XMLStreamException
    {
        char c = (char) i;
        if (c == CHAR_NULL) {
            throwNullChar();
        }
        String msg = "Illegal character ("+XmlChars.getCharDesc(c)+")";
        if (_xml11) {
            if (i < INT_SPACE) {
                msg += " [note: in XML 1.1, it could be included via entity expansion]";
            }
        }
        reportInputProblem(msg);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy