All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.java.com.ctc.wstx.sr.AttributeCollector Maven / Gradle / Ivy

/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in file LICENSE, included with
 * the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.sr;

import java.io.IOException;

import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;
import javax.xml.namespace.QName;

import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder;
import org.codehaus.stax2.ri.typed.ValueDecoderFactory;
import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.TypedArrayDecoder;
import org.codehaus.stax2.typed.TypedValueDecoder;
import org.codehaus.stax2.typed.TypedXMLStreamException;

import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.sw.XmlWriter;
import com.ctc.wstx.util.StringUtil;
import com.ctc.wstx.util.StringVector;
import com.ctc.wstx.util.TextBuilder;

/**
 * Shared base class that defines API stream reader uses to communicate
 * with the attribute collector implementation, independent of whether it's
 * operating in namespace-aware or non-namespace modes.
 * Collector class is used to build up attribute lists; for the most part
 * will just hold references to few specialized {@link TextBuilder}s that
 * are used to create efficient semi-shared value Strings.
 */
public abstract class AttributeCollector
{
    final static int INT_SPACE = 0x0020;

    /**
     * Threshold value that indicates minimum length for lists instances
     * that need a Map structure, for fast attribute access by fully-qualified
     * name.
     */
    protected final static int LONG_ATTR_LIST_LEN = 4;

    /**
     * Expected typical maximum number of attributes for any element;
     * chosen to minimize need to resize, while trying not to waste space.
     * Dynamically grown; better not to set too high to avoid excessive
     * overhead for small attribute-less documents.
     */
    protected final static int EXP_ATTR_COUNT = 12;

    /**
     * This value is used to indicate that we shouldn't keep track
     * of index of xml:id attribute -- generally done when Xml:id
     * support is disabled
     */
    protected final static int XMLID_IX_DISABLED = -2;

    protected final static int XMLID_IX_NONE = -1;

    /*
    //////////////////////////////////////////
    // Collected attribute information:
    //////////////////////////////////////////
     */

    /**
     * Actual number of attributes collected, including attributes
     * added via default values.
     */
    protected int mAttrCount;

    /**
     * Number of attribute values actually parsed, not including
     * ones created via default value expansion. Equal to or less than
     * {@link #mAttrCount}.
     */
    protected int mNonDefCount;

    /**
     * TextBuilder into which values of all attributes are appended
     * to, including default valued ones (defaults are added after
     * explicit ones).
     * Constructed lazily, if and when needed (not needed
     * for short attribute-less docs)
     */
    protected TextBuilder mValueBuffer = null;

    /**
     * Vector in which attribute names are added; exact number of elements
     * per attribute depends on whether namespace support is enabled or
     * not (non-namespace mode only needs one entry; namespace mode two,
     * one for prefix, one for local name).
     * Constructed lazily, if and when needed (not needed
     * for short attribute-less docs)
     */
    protected StringVector mAttrNames = null;

    /**
     * Index of "xml:id" attribute, if one exists for the current
     * element; {@link #XMLID_IX_NONE} if none.
     */
    protected int mXmlIdAttrIndex;

    /*
    //////////////////////////////////////////
    // Resolved (derived) attribute information:
    //////////////////////////////////////////
     */

    /**
     * Array in which attribute value Strings are added, first time they
     * are requested. Values are first added to mValueBuffer,
     * from which a String is created, and finally substring created as
     * needed and added to this array.
     */
    protected String[] mAttrValues = null;

    /*
    //////////////////////////////////////////////////////////////
    // Information that defines "Map-like" data structure used for
    // quick access to attribute values by fully-qualified name
    //////////////////////////////////////////////////////////////
     */

    /**
     * Encoding of a data structure that contains mapping from
     * attribute names to attribute index in main attribute name arrays.
     *

* Data structure contains two separate areas; main hash area (with * size mAttrHashSize), and remaining spillover area * that follows hash area up until (but not including) * mAttrSpillEnd index. * Main hash area only contains indexes (index+1; 0 signifying empty slot) * to actual attributes; spillover area has both hash and index for * any spilled entry. Spilled entries are simply stored in order * added, and need to be searched using linear search. In case of both * primary hash hits and spills, eventual comparison with the local * name needs to be done with actual name array. */ protected int[] mAttrMap = null; /** * Size of hash area in mAttrMap; generally at least 20% * more than number of attributes (mAttrCount). */ protected int mAttrHashSize; /** * Pointer to int slot right after last spill entr, in * mAttrMap array. */ protected int mAttrSpillEnd; /* /////////////////////////////////////////////// // Life-cycle: /////////////////////////////////////////////// */ protected AttributeCollector(ReaderConfig cfg) { mXmlIdAttrIndex = cfg.willDoXmlIdTyping() ? XMLID_IX_NONE : XMLID_IX_DISABLED; } /** * Method called to allow reusing of collector, usually right before * starting collecting attributes for a new start tag. */ protected abstract void reset(); /* /////////////////////////////////////////////// // Public accesors (for stream reader) /////////////////////////////////////////////// */ /** * @return Number of namespace declarations collected, including * possible default namespace declaration */ protected abstract int getNsCount(); public abstract String getNsPrefix(int index); public abstract String getNsURI(int index); // // // Direct access to attribute/NS prefixes/localnames/URI public final int getCount() { return mAttrCount; } /** * @return Number of attributes that were explicitly specified; may * be less than the total count due to attributes created using * attribute default values */ public int getSpecifiedCount() { return mNonDefCount; } public abstract String getPrefix(int index); public abstract String getLocalName(int index); public abstract String getURI(int index); public abstract QName getQName(int index); /** *

* Note: the main reason this method is defined at this level, and * made final, is performance. JIT may be able to fully inline this * method, even when reference is via this base class. This is important * since this is likely to be the most often called method of the * collector instances. */ public final String getValue(int index) { if (index < 0 || index >= mAttrCount) { throwIndex(index); } /* Note: array has been properly (re)sized by sub-classes * resolveXxx() method, so it's either null or properly sized * by now */ if (mAttrValues == null) { mAttrValues = new String[mAttrCount]; } String str = mAttrValues[index]; if (str == null) { str = mValueBuffer.getEntry(index); mAttrValues[index] = str; } return str; } public abstract String getValue(String nsURI, String localName); public final boolean isSpecified(int index) { return (index < mNonDefCount); } public final int getXmlIdAttrIndex() { return mXmlIdAttrIndex; } /* ////////////////////////////////////////////////////// // Type-safe accessors to support TypedXMLStreamReader ////////////////////////////////////////////////////// */ /** * Method called to decode the whole attribute value as a single * typed value. * Decoding is done using the decoder provided. */ public final void decodeValue(int index, TypedValueDecoder tvd) throws IllegalArgumentException { if (index < 0 || index >= mAttrCount) { throwIndex(index); } /* Should be faster to pass the char array even if we might * have a String */ // Either way, need to trim before passing: char[] buf = mValueBuffer.getCharBuffer(); int start = mValueBuffer.getOffset(index); int end = mValueBuffer.getOffset(index+1); while (true) { if (start >= end) { tvd.handleEmptyValue(); return; } if (!StringUtil.isSpace(buf[start])) { break; } ++start; } // Trailing space? while (--end > start && StringUtil.isSpace(buf[end])) { } tvd.decode(buf, start, end+1); } /** * Method called to decode the attribute value that consists of * zero or more space-separated tokens. * Decoding is done using the decoder provided. * @return Number of tokens decoded */ public final int decodeValues(int index, TypedArrayDecoder tad, InputProblemReporter rep) throws XMLStreamException { if (index < 0 || index >= mAttrCount) { throwIndex(index); } // Char[] faster than String... and no need to trim here: return decodeValues(tad, rep, mValueBuffer.getCharBuffer(), mValueBuffer.getOffset(index), mValueBuffer.getOffset(index+1)); } public final byte[] decodeBinary(Base64Variant v, int index, CharArrayBase64Decoder dec, InputProblemReporter rep) throws XMLStreamException { if (index < 0 || index >= mAttrCount) { throwIndex(index); } /* No point in trying to use String representation, even if one * available, faster to process from char[] */ char[] cbuf = mValueBuffer.getCharBuffer(); int offset = mValueBuffer.getOffset(index); int len = mValueBuffer.getOffset(index+1) - offset; dec.init(v, true, cbuf, offset, len, null); try { return dec.decodeCompletely(); } catch (IllegalArgumentException iae) { // Need to convert to a checked stream exception String lexical = new String(cbuf, offset, len); throw new TypedXMLStreamException(lexical, iae.getMessage(), rep.getLocation(), iae); } } private final int decodeValues(TypedArrayDecoder tad, InputProblemReporter rep, final char[] buf, int ptr, final int end) throws XMLStreamException { int start = ptr; int count = 0; try { decode_loop: while (ptr < end) { // First, any space to skip? while (buf[ptr] <= INT_SPACE) { if (++ptr >= end) { break decode_loop; } } // Then let's figure out non-space char (token) start = ptr; ++ptr; while (ptr < end && buf[ptr] > INT_SPACE) { ++ptr; } int tokenEnd = ptr; ++ptr; // to skip trailing space (or, beyond end) // Ok, decode... any more room? ++count; if (tad.decodeValue(buf, start, tokenEnd)) { if (!checkExpand(tad)) { break; } } } } catch (IllegalArgumentException iae) { // Need to convert to a checked stream exception Location loc = rep.getLocation(); String lexical = new String(buf, start, (ptr-start)); throw new TypedXMLStreamException(lexical, iae.getMessage(), loc, iae); } return count; } private final int decodeValues(TypedArrayDecoder tad, InputProblemReporter rep, String attrValue) throws XMLStreamException { int ptr = 0; int start = 0; final int end = attrValue.length(); String lexical = null; int count = 0; try { decode_loop: while (ptr < end) { // First, any space to skip? while (attrValue.charAt(ptr) <= INT_SPACE) { if (++ptr >= end) { break decode_loop; } } // Then let's figure out non-space char (token) start = ptr; ++ptr; while (ptr < end && attrValue.charAt(ptr) > INT_SPACE) { ++ptr; } int tokenEnd = ptr; ++ptr; // to skip trailing space (or, beyond end) // And there we have it lexical = attrValue.substring(start, tokenEnd); ++count; if (tad.decodeValue(lexical)) { if (!checkExpand(tad)) { break; } } } } catch (IllegalArgumentException iae) { // Need to convert to a checked stream exception Location loc = rep.getLocation(); throw new TypedXMLStreamException(lexical, iae.getMessage(), loc, iae); } return count; } /** * Internal method used to see if we can expand the buffer that * the array decoder has. Bit messy, but simpler than having * separately typed instances; and called rarely so that performance * downside of instanceof is irrelevant. */ private final boolean checkExpand(TypedArrayDecoder tad) { if (tad instanceof ValueDecoderFactory.BaseArrayDecoder) { ((ValueDecoderFactory.BaseArrayDecoder) tad).expand(); return true; } return false; } /* /////////////////////////////////////////////// // Accessors for accessing helper objects /////////////////////////////////////////////// */ public abstract TextBuilder getDefaultNsBuilder(); public abstract TextBuilder getNsBuilder(String localName); public abstract TextBuilder getAttrBuilder(String attrPrefix, String attrLocalName); /** * Method needed by event builder code; called to build a non-transient * attribute container to use by a start element event. */ public abstract ElemAttrs buildAttrOb(); /* /////////////////////////////////////////////// // Validation methods: /////////////////////////////////////////////// */ /** * Low-level accessor method that attribute validation code may call * for certain types of attributes; generally only for id and idref/idrefs * attributes. It returns the underlying 'raw' attribute value buffer * for direct access. */ public final TextBuilder getAttrBuilder() { return mValueBuffer; } /** * Low-level mutator method that attribute validation code may call * for certain types of attributes, when it wants to handle the whole * validation and normalization process by itself. It is generally * only called for id and idref/idrefs attributes, as those values * are usually normalized. */ public final void setNormalizedValue(int index, String value) { if (mAttrValues == null) { mAttrValues = new String[mAttrCount]; } mAttrValues[index] = value; } /* /////////////////////////////////////////////// // Package/core methods: /////////////////////////////////////////////// */ protected void throwIndex(int index) { throw new IllegalArgumentException("Invalid index "+index+"; current element has only "+getCount()+" attributes"); } /** * Method called by {@link InputElementStack} instance that "owns" this * attribute collector; */ public final StringVector getNameList() { return mAttrNames; } /** * Method that basically serializes the specified (read-in) attribute * using Writers provided. Serialization is done by * writing out (fully-qualified) name * of the attribute, followed by the equals sign and quoted value. */ public abstract void writeAttribute(int index, XmlWriter xw) throws IOException, XMLStreamException; /** * Method called to initialize buffers that need not be immediately * initialized */ protected final void allocBuffers() { if (mValueBuffer == null) { mValueBuffer = new TextBuilder(EXP_ATTR_COUNT); } if (mAttrNames == null) { mAttrNames = new StringVector(EXP_ATTR_COUNT); } } /* /////////////////////////////////////////////// // Internal methods: /////////////////////////////////////////////// */ /** * Method that can be used to get the specified attribute value, * by getting it written using Writer passed in. Can potentially * save one String allocation, since no (temporary) Strings need * to be created. */ /* protected final void writeValue(int index, Writer w) throws IOException { mValueBuffer.getEntry(index, w); } */ protected static String[] resize(String[] old) { int len = old.length; String[] result = new String[len]; System.arraycopy(old, 0, result, 0, len); return result; } protected void throwDupAttr(InputProblemReporter rep, int index) throws XMLStreamException { rep.throwParseError("Duplicate attribute '"+getQName(index)+"'."); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy