src.java.com.ctc.wstx.sr.AttributeCollector Maven / Gradle / Ivy

Go to download
/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in file LICENSE, included with
 * the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.sr;

import java.io.IOException;

import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import javax.xml.namespace.QName;

import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder;
import org.codehaus.stax2.ri.typed.ValueDecoderFactory;
import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.TypedArrayDecoder;
import org.codehaus.stax2.typed.TypedValueDecoder;
import org.codehaus.stax2.typed.TypedXMLStreamException;

import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.sw.XmlWriter;
import com.ctc.wstx.util.StringUtil;
import com.ctc.wstx.util.StringVector;
import com.ctc.wstx.util.TextBuilder;

/**
 * Shared base class that defines API stream reader uses to communicate
 * with the attribute collector implementation, independent of whether it's
 * operating in namespace-aware or non-namespace modes.
 * Collector class is used to build up attribute lists; for the most part
 * will just hold references to few specialized {@link TextBuilder}s that
 * are used to create efficient semi-shared value Strings.
 */
public abstract class AttributeCollector
{
    final static int INT_SPACE = 0x0020;

    /**
     * Threshold value that indicates minimum length for lists instances
     * that need a Map structure, for fast attribute access by fully-qualified
     * name.
     */
    protected final static int LONG_ATTR_LIST_LEN = 4;

    /**
     * Expected typical maximum number of attributes for any element;
     * chosen to minimize need to resize, while trying not to waste space.
     * Dynamically grown; better not to set too high to avoid excessive
     * overhead for small attribute-less documents.
     */
    protected final static int EXP_ATTR_COUNT = 12;

    /**
     * This value is used to indicate that we shouldn't keep track
     * of index of xml:id attribute -- generally done when Xml:id
     * support is disabled
     */
    protected final static int XMLID_IX_DISABLED = -2;

    protected final static int XMLID_IX_NONE = -1;

    /*
    //////////////////////////////////////////
    // Collected attribute information:
    //////////////////////////////////////////
     */

    /**
     * Actual number of attributes collected, including attributes
     * added via default values.
     */
    protected int mAttrCount;

    /**
     * Number of attribute values actually parsed, not including
     * ones created via default value expansion. Equal to or less than
     * {@link #mAttrCount}.
     */
    protected int mNonDefCount;

    /**
     * TextBuilder into which values of all attributes are appended
     * to, including default valued ones (defaults are added after
     * explicit ones).
     * Constructed lazily, if and when needed (not needed
     * for short attribute-less docs)
     */
    protected TextBuilder mValueBuffer = null;

    /**
     * Vector in which attribute names are added; exact number of elements
     * per attribute depends on whether namespace support is enabled or
     * not (non-namespace mode only needs one entry; namespace mode two,
     * one for prefix, one for local name).
     * Constructed lazily, if and when needed (not needed
     * for short attribute-less docs)
     */
    protected StringVector mAttrNames = null;

    /**
     * Index of "xml:id" attribute, if one exists for the current
     * element; {@link #XMLID_IX_NONE} if none.
     */
    protected int mXmlIdAttrIndex;

    /*
    //////////////////////////////////////////
    // Resolved (derived) attribute information:
    //////////////////////////////////////////
     */

    /**
     * Array in which attribute value Strings are added, first time they
     * are requested. Values are first added to mValueBuffer,
     * from which a String is created, and finally substring created as
     * needed and added to this array.
     */
    protected String[] mAttrValues = null;

    /*
    //////////////////////////////////////////////////////////////
    // Information that defines "Map-like" data structure used for
    // quick access to attribute values by fully-qualified name
    //////////////////////////////////////////////////////////////
     */

    /**
     * Encoding of a data structure that contains mapping from
     * attribute names to attribute index in main attribute name arrays.
     *
     * Data structure contains two separate areas; main hash area (with
     * size mAttrHashSize), and remaining spillover area
     * that follows hash area up until (but not including)
     * mAttrSpillEnd index.
     * Main hash area only contains indexes (index+1; 0 signifying empty slot)
     * to actual attributes; spillover area has both hash and index for
     * any spilled entry. Spilled entries are simply stored in order
     * added, and need to be searched using linear search. In case of both
     * primary hash hits and spills, eventual comparison with the local
     * name needs to be done with actual name array.
     */
    protected int[] mAttrMap = null;

    /**
     * Size of hash area in mAttrMap; generally at least 20%
     * more than number of attributes (mAttrCount).
     */
    protected int mAttrHashSize;

    /**
     * Pointer to int slot right after last spill entr, in
     * mAttrMap array.
     */
    protected int mAttrSpillEnd;

    /*
    ///////////////////////////////////////////////
    // Life-cycle:
    ///////////////////////////////////////////////
     */

    protected AttributeCollector(ReaderConfig cfg)
    {
        mXmlIdAttrIndex = cfg.willDoXmlIdTyping() ? XMLID_IX_NONE : XMLID_IX_DISABLED;
    }

    /**
     * Method called to allow reusing of collector, usually right before
     * starting collecting attributes for a new start tag.
     */
    protected abstract void reset();

    /*
    ///////////////////////////////////////////////
    // Public accesors (for stream reader)
    ///////////////////////////////////////////////
     */

    /**
     * @return Number of namespace declarations collected, including
     *   possible default namespace declaration
     */
    protected abstract int getNsCount();

    public abstract String getNsPrefix(int index);

    public abstract String getNsURI(int index);

    // // // Direct access to attribute/NS prefixes/localnames/URI

    public final int getCount() {
        return mAttrCount;
    }

    /**
     * @return Number of attributes that were explicitly specified; may
     *  be less than the total count due to attributes created using
     *  attribute default values
     */
    public int getSpecifiedCount() {
        return mNonDefCount;
    }

    public abstract String getPrefix(int index);

    public abstract String getLocalName(int index);

    public abstract String getURI(int index);

    public abstract QName getQName(int index);

    /**
     *
     * Note: the main reason this method is defined at this level, and
     * made final, is performance. JIT may be able to fully inline this
     * method, even when reference is via this base class. This is important
     * since this is likely to be the most often called method of the
     * collector instances.
     */
    public final String getValue(int index)
    {
        if (index < 0 || index >= mAttrCount) {
            throwIndex(index);
        }
        /* Note: array has been properly (re)sized by sub-classes
         * resolveXxx() method, so it's either null or properly sized
         * by now
         */
        if (mAttrValues == null) {
            mAttrValues = new String[mAttrCount];
        }
        String str = mAttrValues[index];
        if (str == null) {
            str = mValueBuffer.getEntry(index);
            mAttrValues[index] = str;
        }
        return str;
    }

    public abstract String getValue(String nsURI, String localName);

    public final boolean isSpecified(int index) {
        return (index < mNonDefCount);
    }

    public final int getXmlIdAttrIndex() {
        return mXmlIdAttrIndex;
    }

    /*
    //////////////////////////////////////////////////////
    // Type-safe accessors to support TypedXMLStreamReader
    //////////////////////////////////////////////////////
     */

    /**
     * Method called to decode the whole attribute value as a single
     * typed value.
     * Decoding is done using the decoder provided.
     */
    public final void decodeValue(int index, TypedValueDecoder tvd)
        throws IllegalArgumentException
    {
        if (index < 0 || index >= mAttrCount) {
            throwIndex(index);
        }
        /* Should be faster to pass the char array even if we might
         * have a String
         */
        // Either way, need to trim before passing:
        char[] buf = mValueBuffer.getCharBuffer();
        int start = mValueBuffer.getOffset(index);
        int end = mValueBuffer.getOffset(index+1);

        while (true) {
            if (start >= end) {
                tvd.handleEmptyValue();
                return;
            }
            if (!StringUtil.isSpace(buf[start])) {
                break;
            }
            ++start;
        }
        // Trailing space?
        while (--end > start && StringUtil.isSpace(buf[end])) { }
        tvd.decode(buf, start, end+1);
    }

    /**
     * Method called to decode the attribute value that consists of
     * zero or more space-separated tokens.
     * Decoding is done using the decoder provided.
     * @return Number of tokens decoded
     */
    public final int decodeValues(int index, TypedArrayDecoder tad,
                                   InputProblemReporter rep)
        throws XMLStreamException
    {
        if (index < 0 || index >= mAttrCount) {
            throwIndex(index);
        }
        // Char[] faster than String... and no need to trim here:
        return decodeValues(tad, rep,
                            mValueBuffer.getCharBuffer(),
                            mValueBuffer.getOffset(index),
                            mValueBuffer.getOffset(index+1));
    }

    public final byte[] decodeBinary(Base64Variant v, int index, CharArrayBase64Decoder dec,
                                     InputProblemReporter rep)
        throws XMLStreamException
    {
        if (index < 0 || index >= mAttrCount) {
            throwIndex(index);
        }
        /* No point in trying to use String representation, even if one
         * available, faster to process from char[]
         */
        char[] cbuf = mValueBuffer.getCharBuffer();
        int offset = mValueBuffer.getOffset(index);
        int len = mValueBuffer.getOffset(index+1) - offset;
        dec.init(v, true, cbuf, offset, len, null);
        try {
            return dec.decodeCompletely();
        } catch (IllegalArgumentException iae) {
            // Need to convert to a checked stream exception
            String lexical = new String(cbuf, offset, len);
            throw new TypedXMLStreamException(lexical, iae.getMessage(), rep.getLocation(), iae);
        }
    }

    private final int decodeValues(TypedArrayDecoder tad,
                                   InputProblemReporter rep,
                                   final char[] buf, int ptr, final int end)
        throws XMLStreamException
    {
        int start = ptr;
        int count = 0;

        try {
            decode_loop:
            while (ptr < end) {
                // First, any space to skip?
                while (buf[ptr] <= INT_SPACE) {
                    if (++ptr >= end) {
                        break decode_loop;
                    }
                }
                // Then let's figure out non-space char (token)
                start = ptr;
                ++ptr;
                while (ptr < end && buf[ptr] > INT_SPACE) {
                    ++ptr;
                }
                int tokenEnd = ptr;
                ++ptr; // to skip trailing space (or, beyond end)
                // Ok, decode... any more room?
                ++count;
                if (tad.decodeValue(buf, start, tokenEnd)) {
                    if (!checkExpand(tad)) {
                        break;
                    }
                }
            }
        } catch (IllegalArgumentException iae) {
            // Need to convert to a checked stream exception
            Location loc = rep.getLocation();
            String lexical = new String(buf, start, (ptr-start));
            throw new TypedXMLStreamException(lexical, iae.getMessage(), loc, iae);
        }
        return count;
    }

    private final int decodeValues(TypedArrayDecoder tad,
                                   InputProblemReporter rep,
                                   String attrValue)
        throws XMLStreamException
    {
        int ptr = 0;
        int start = 0;
        final int end = attrValue.length();
        String lexical = null;
        int count = 0;

        try {
            decode_loop:
            while (ptr < end) {
                // First, any space to skip?
                while (attrValue.charAt(ptr) <= INT_SPACE) {
                    if (++ptr >= end) {
                        break decode_loop;
                    }
                }
                // Then let's figure out non-space char (token)
                start = ptr;
                ++ptr;
                while (ptr < end && attrValue.charAt(ptr) > INT_SPACE) {
                    ++ptr;
                }
                int tokenEnd = ptr;
                ++ptr; // to skip trailing space (or, beyond end)
                // And there we have it
                lexical = attrValue.substring(start, tokenEnd);
                ++count;
                if (tad.decodeValue(lexical)) {
                    if (!checkExpand(tad)) {
                        break;
                    }
                }
            }
        } catch (IllegalArgumentException iae) {
            // Need to convert to a checked stream exception
            Location loc = rep.getLocation();
            throw new TypedXMLStreamException(lexical, iae.getMessage(), loc, iae);
        }
        return count;
    }

    /**
     * Internal method used to see if we can expand the buffer that
     * the array decoder has. Bit messy, but simpler than having
     * separately typed instances; and called rarely so that performance
     * downside of instanceof is irrelevant.
     */
    private final boolean checkExpand(TypedArrayDecoder tad)
    {
        if (tad instanceof ValueDecoderFactory.BaseArrayDecoder) {
            ((ValueDecoderFactory.BaseArrayDecoder) tad).expand();
            return true;
        }
        return false;
    }

    /*
    ///////////////////////////////////////////////
    // Accessors for accessing helper objects
    ///////////////////////////////////////////////
     */

    public abstract TextBuilder getDefaultNsBuilder();

    public abstract TextBuilder getNsBuilder(String localName);

    public abstract TextBuilder getAttrBuilder(String attrPrefix, String attrLocalName);

    /**
     * Method needed by event builder code; called to build a non-transient
     * attribute container to use by a start element event.
     */
    public abstract ElemAttrs buildAttrOb();

    /*
    ///////////////////////////////////////////////
    // Validation methods:
    ///////////////////////////////////////////////
     */

    /**
     * Low-level accessor method that attribute validation code may call
     * for certain types of attributes; generally only for id and idref/idrefs
     * attributes. It returns the underlying 'raw' attribute value buffer
     * for direct access.
     */
    public final TextBuilder getAttrBuilder()
    {
        return mValueBuffer;
    }

    /**
     * Low-level mutator method that attribute validation code may call
     * for certain types of attributes, when it wants to handle the whole
     * validation and normalization process by itself. It is generally
     * only called for id and idref/idrefs attributes, as those values
     * are usually normalized.
     */
    public final void setNormalizedValue(int index, String value) {
        if (mAttrValues == null) {
            mAttrValues = new String[mAttrCount];
        }
        mAttrValues[index] = value;
    }

    /*
    ///////////////////////////////////////////////
    // Package/core methods:
    ///////////////////////////////////////////////
     */

    protected void throwIndex(int index) {
        throw new IllegalArgumentException("Invalid index "+index+"; current element has only "+getCount()+" attributes");
    }

    /**
     * Method called by {@link InputElementStack} instance that "owns" this
     * attribute collector; 
     */
    public final StringVector getNameList() {
        return mAttrNames;
    }

    /**
     * Method that basically serializes the specified (read-in) attribute
     * using Writers provided. Serialization is done by
     * writing out (fully-qualified) name
     * of the attribute, followed by the equals sign and quoted value.
     */
    public abstract void writeAttribute(int index, XmlWriter xw)
        throws IOException, XMLStreamException;

    /**
     * Method called to initialize buffers that need not be immediately
     * initialized
     */
    protected final void allocBuffers()
    {
        if (mValueBuffer == null) {
            mValueBuffer = new TextBuilder(EXP_ATTR_COUNT);
        }
        if (mAttrNames == null) {
            mAttrNames = new StringVector(EXP_ATTR_COUNT);
        }
    }

    /*
    ///////////////////////////////////////////////
    // Internal methods:
    ///////////////////////////////////////////////
     */


    /**
     * Method that can be used to get the specified attribute value,
     * by getting it written using Writer passed in. Can potentially
     * save one String allocation, since no (temporary) Strings need
     * to be created.
     */
    /*
    protected final void writeValue(int index, Writer w)
        throws IOException
    {
        mValueBuffer.getEntry(index, w);
    }
    */

    protected static String[] resize(String[] old) {
        int len = old.length;
        String[] result = new String[len];
        System.arraycopy(old, 0, result, 0, len);
        return result;
    }

    protected void throwDupAttr(InputProblemReporter rep, int index)
        throws XMLStreamException
    {
        rep.throwParseError("Duplicate attribute '"+getQName(index)+"'.");
    }
}