All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.aalto.in.ReaderConfig Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
package com.fasterxml.aalto.in;

import java.lang.ref.SoftReference;
import java.util.*;

import javax.xml.stream.*;

import org.codehaus.stax2.XMLInputFactory2;

import com.fasterxml.aalto.impl.CommonConfig;
import com.fasterxml.aalto.util.*;

/**
 * This is the shared configuration object passed by the factory to reader,
 * and by reader to whoever needs it (scanners at least).
 */
public final class ReaderConfig
    extends CommonConfig
{
    public final static int DEFAULT_SMALL_BUFFER_LEN = 60;

    public final static int DEFAULT_CHAR_BUFFER_LEN = 4000;

    public final static int STANDALONE_UNKNOWN = 0;
    public final static int STANDALONE_YES = 1;
    public final static int STANDALONE_NO = 2;

    // Standard Stax flags:
    final static int F_NS_AWARE = 0x0001;
    final static int F_COALESCING = 0x0002;
    final static int F_DTD_AWARE = 0x0004;
    final static int F_DTD_VALIDATING = 0x0008;
    final static int F_EXPAND_ENTITIES = 0x0010;

    // Standard Stax2 flags:
    final static int F_LAZY_PARSING = 0x0100;
    final static int F_INTERN_NAMES = 0x0200;
    final static int F_INTERN_NS_URIS = 0x0400;
    final static int F_REPORT_CDATA = 0x0800;
    final static int F_PRESERVE_LOCATION = 0x1000;
    final static int F_AUTO_CLOSE_INPUT = 0x2000;

    // Custom flags:

    /**
     * These are the default settigs for XMLInputFactory.
     */
    final static int DEFAULT_FLAGS =
        F_NS_AWARE
        | F_DTD_AWARE
        | F_EXPAND_ENTITIES
        | F_LAZY_PARSING
        // by default we do intern names, ns uris...
        | F_INTERN_NAMES
        | F_INTERN_NS_URIS
        // and will report CDATA as such (and not as CHARACTERS)
        | F_REPORT_CDATA
        | F_PRESERVE_LOCATION
        ;

    private final static HashMap sProperties;
    static {
        sProperties = new HashMap();
        /* 28-Oct-2006, tatus: Let's recognize it, but not allow to be
         *   disabled. Can/needs to be changed if we'll support it.
         */
        sProperties.put(XMLInputFactory.IS_NAMESPACE_AWARE,
                      Boolean.TRUE);
        sProperties.put(XMLInputFactory.IS_VALIDATING, 
                      //Boolean.FALSE);
                      new Integer(F_DTD_VALIDATING));
        sProperties.put(XMLInputFactory.IS_COALESCING, Integer.valueOf(F_COALESCING));
        sProperties.put(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Integer.valueOf(F_EXPAND_ENTITIES));
        sProperties.put(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
        sProperties.put(XMLInputFactory.SUPPORT_DTD, Integer.valueOf(F_DTD_AWARE));
        sProperties.put(XMLInputFactory.REPORTER, null);
        sProperties.put(XMLInputFactory.RESOLVER, null);
        sProperties.put(XMLInputFactory.ALLOCATOR, null);

        // // // Stax2:
        sProperties.put(XMLInputFactory2.P_LAZY_PARSING, Integer.valueOf(F_LAZY_PARSING));
        sProperties.put(XMLInputFactory2.P_INTERN_NAMES, Integer.valueOf(F_INTERN_NAMES));
        sProperties.put(XMLInputFactory2.P_INTERN_NS_URIS, Integer.valueOf(F_INTERN_NS_URIS));
        sProperties.put(XMLInputFactory2.P_AUTO_CLOSE_INPUT, Integer.valueOf(F_AUTO_CLOSE_INPUT));
        sProperties.put(XMLInputFactory2.P_PRESERVE_LOCATION, Integer.valueOf(F_PRESERVE_LOCATION));

        // (ones with fixed defaults)

        /* Should we ever support this property? For now, we really shouldn't
         * report white space in prolog/epilog, as it's not really part
         * of document content.
         */
        sProperties.put(XMLInputFactory2.P_REPORT_PROLOG_WHITESPACE, Boolean.FALSE);
        sProperties.put(XMLInputFactory2.P_REPORT_CDATA, Integer.valueOf(F_REPORT_CDATA));

        sProperties.put(XMLInputFactory2.P_PRESERVE_LOCATION, Boolean.TRUE);

        // !!! Not really implemented, but let's recognize it
        sProperties.put(XMLInputFactory2.P_DTD_OVERRIDE, null);

        // Custom ones?
    }

    /**
     * A single encoding context instance is shared between all ReaderConfig
     * instances created for readers by an input factory. It is used
     * for sharing symbol tables.
     */
    private final EncodingContext mEncCtxt;

    /**
     * For efficient access by qualified name, as well as uniqueness
     * checks, namespace URIs need to be canonicalized.
     */
    private final UriCanonicalizer mCanonicalizer;

    private final String mPublicId;
    private final String mSystemId;

    /**
     * Encoding passed in as external information, possibly from source
     * from which xml content was gained from (for example, as an HTTP
     * header, or file metadata).
     */
    private final String mExtEncoding;

    /**
     * Name of the actual encoding that input was found to be in (if any
     * -- can't be determined if a Reader was passed in).
     */
    private String mActualEncoding = null;

    private String mXmlDeclVersion = null;
    private String mXmlDeclEncoding = null;
    private int mXmlDeclStandalone = STANDALONE_UNKNOWN;

    private XMLReporter mReporter;
    private XMLResolver mResolver;

    private IllegalCharHandler illegalCharHandler;
    /*
    /**********************************************************************
    /* Buffer recycling:
    /**********************************************************************
     */

    /**
     * This ThreadLocal contains a {@link SoftRerefence}
     * to a {@link BufferRecycler} used to provide a low-cost
     * buffer recycling between Reader instances.
     */
    final static ThreadLocal> _recyclerRef = new ThreadLocal>();

    /**
     * This is the actually container of the recyclable buffers. It
     * is obtained via ThreadLocal/SoftReference combination, if one
     * exists, when Config instance is created. If one does not
     * exist, it will created first time a buffer is returned.
     */
    protected BufferRecycler _currRecycler = null;

    /*
    /**********************************************************************
    /* Life-cycle
    /**********************************************************************
     */

    private ReaderConfig(String publicId, String systemId, String extEnc,
            EncodingContext encCtxt, int flags, int flagMods,
            XMLReporter rep, XMLResolver res,
            UriCanonicalizer canonicalizer)
    {
        super(flags, flagMods);
        mPublicId = publicId;
        mSystemId = systemId;
        mExtEncoding = extEnc;

        /* Ok, let's then see if we can find a buffer recycler. Since they
         * are lazily constructed, and since GC may just flush them out
         * on its whims, it's possible we might not find one. That's ok;
         * we can reconstruct one if and when we are to return one or more
         * buffers.
         */
        SoftReference ref = _recyclerRef.get();
        if (ref != null) {
            _currRecycler = ref.get();
        }
        mEncCtxt = encCtxt;
        _flags = flags;
        _flagMods = flagMods;
        mReporter = rep;
        mResolver = res;
        mCanonicalizer = canonicalizer;
    }

    public ReaderConfig()
    {
        this(null, null, null, new EncodingContext(), DEFAULT_FLAGS, 0,
             null, null,
             new UriCanonicalizer());
    }

    public void setActualEncoding(String actualEnc)
    {
        mActualEncoding = actualEnc;
    }

    public void setXmlDeclInfo(int version, String xmlDeclEnc, String standalone)
    {
        if (version == XmlConsts.XML_V_10) {
            mXmlDeclVersion = XmlConsts.XML_V_10_STR;
        } else if (version == XmlConsts.XML_V_11) {
            mXmlDeclVersion = XmlConsts.XML_V_11_STR;
        } else {
            mXmlDeclVersion = null;
        }
        mXmlDeclEncoding = xmlDeclEnc;
        if (standalone == XmlConsts.XML_SA_YES) {
            mXmlDeclStandalone = STANDALONE_YES;
        } else if (standalone == XmlConsts.XML_SA_NO) {
            mXmlDeclStandalone = STANDALONE_NO;
        } else {
            mXmlDeclStandalone = STANDALONE_UNKNOWN;
        }
    }

    public final void setXmlVersion(String version) {
        mXmlDeclVersion = version;
    }

    public final void setXmlEncoding(String enc) {
        mXmlDeclEncoding = enc;
    }

    public final void setXmlStandalone(Boolean b) {
        if (b == null) {
            mXmlDeclStandalone = STANDALONE_UNKNOWN;
        } else {
            mXmlDeclStandalone = b.booleanValue() ? STANDALONE_YES : STANDALONE_NO;
        }
    }
    
    /*
    /**********************************************************************
    /* Additional configuration setters
    /**********************************************************************
     */

    // // Stax:

    public void setXMLReporter(XMLReporter r) {
        mReporter = r;
    }

    public void setXMLResolver(XMLResolver r) {
        mResolver = r;
    }

    // // Stax2:

    public void doCoalesceText(boolean state) {
        setFlag(F_COALESCING, state);
    }

    // // Stax1.0
    
    public void doAutoCloseInput(boolean state) {
        setFlag(F_AUTO_CLOSE_INPUT, state);
    }

    public void doPreserveLocation(boolean state) {
        setFlag(F_PRESERVE_LOCATION, state);
    }

    public void doParseLazily(boolean state) {
        setFlag(F_LAZY_PARSING, state);
    }

    public void doReportCData(boolean state) {
        setFlag(F_REPORT_CDATA, state);
    }

    /*
    /**********************************************************************
    /* Common accessors from CommonConfig
    /**********************************************************************
     */

    public ReaderConfig createNonShared(String publicId, String systemId, String extEnc)
    {
        return new ReaderConfig(publicId, systemId, extEnc, mEncCtxt,
                _flags, _flagMods,
                mReporter, mResolver, mCanonicalizer);
    }

    @Override
    public String getExternalEncoding() { return mExtEncoding; }
    @Override
    public String getActualEncoding() { return mActualEncoding; }

    @Override
    public boolean isXml11() {
        return false;
    }

    /*
    /**********************************************************************
    /* Implementation of abstract methods
    /**********************************************************************
     */

    protected int findPropertyId(String propName)
    {
        Integer I = (Integer) sProperties.get(propName);
        return (I == null) ? -1 : I.intValue();
    }

    /*
    /**********************************************************************
    /* Standard accessors, configurable properties
    /**********************************************************************
     */

    @Override
    public final Object getProperty(String name, boolean isMandatory)
    {
        Object ob = sProperties.get(name);
        if (ob == null) {
            // Might still have it though
            if (sProperties.containsKey(name)) {
                return null;
            }
            return super.getProperty(name, isMandatory);
        }
        if (ob instanceof Boolean) {
            return ((Boolean) ob).booleanValue();
        }
        if (!(ob instanceof Integer)) {
            throw new RuntimeException("Internal error: unrecognized property value type: "+ob.getClass().getName());
        }
        int f = ((Integer) ob).intValue();
        return hasFlag(f);
    }

    @Override
    public boolean setProperty(String name, Object value)
    {
        Object ob = sProperties.get(name);
        if (ob == null) {
            // Might still have it though
            if (sProperties.containsKey(name)) {
                return false;
            }
            return super.setProperty(name, value);
        }
        if (ob instanceof Boolean) { // immutable
            return false;
        }
        if (!(ob instanceof Integer)) {
            throw new RuntimeException("Internal error");
        }
        int f = ((Integer) ob).intValue();
        boolean state = ((Boolean) value).booleanValue();
        setFlag(f, state);
        return true;
    }

    @Override
    public boolean isPropertySupported(String propName)
    {
        return sProperties.containsKey(propName)
            || super.isPropertySupported(propName);
    }

    public XMLReporter getXMLReporter() { return mReporter; }
    public XMLResolver getXMLResolver() { return mResolver; }


    // // // Stax standard properties

    public boolean willExpandEntities() {
        return hasFlag(F_EXPAND_ENTITIES);
    }

    public boolean willCoalesceText() {
        return hasFlag(F_COALESCING);
    }

    public boolean willSupportNamespaces() {
        return true;
    }

    // // // Stax2 standard properties

    public boolean willParseLazily() {
        return hasFlag(F_LAZY_PARSING);
    }

    public boolean willInternNames() { return hasFlag(F_INTERN_NAMES); }

    public boolean willInternNsURIs() { return hasFlag(F_INTERN_NS_URIS); }

    public boolean willReportCData() { return hasFlag(F_REPORT_CDATA); }

    public boolean willPreserveLocation() { return hasFlag(F_PRESERVE_LOCATION); }

    public boolean willAutoCloseInput() { return hasFlag(F_AUTO_CLOSE_INPUT); }

    // // // Support for things that must be explicitly enabled

    public boolean hasInternNamesBeenEnabled() { return hasExplicitFlag(F_INTERN_NAMES); }

    public boolean hasInternNsURIsBeenEnabled() { return hasExplicitFlag(F_INTERN_NS_URIS); }


    /*
    /**********************************************************************
    /* Accessors, detected properties
    /**********************************************************************
     */

    // // // Input source information

    public String getPublicId() { return mPublicId; }
    public String getSystemId() { return mSystemId; }

    // // // XML declaration info

    public String getXmlDeclVersion() { return mXmlDeclVersion; }
    public String getXmlDeclEncoding() { return mXmlDeclEncoding; }
    public int getXmlDeclStandalone() { return mXmlDeclStandalone; }

    /*
    /**********************************************************************
    /* Stax2 additions
    /**********************************************************************
     */

    // // // Profile mutators:

    /**
     * Method to call to make Reader created conform as closely to XML
     * standard as possible, doing all checks and transformations mandated
     * (linefeed conversions, attr value normalizations).
     * See {@link XMLInputFactory2#configureForXmlConformance} for
     * required settings for standard StAX/StAX2 properties.
     *

* Notes: Does NOT change 'performance' settings (buffer sizes, * DTD caching, coalescing, interning, accurate location info). */ public void configureForXmlConformance() { // // StAX 1.0 settings //doSupportNamespaces(true); //doSupportDTDs(true); //doSupportExternalEntities(true); //doReplaceEntityRefs(true); // // Stax2 additional settings } /** * Method to call to make Reader created be as "convenient" to use * as possible; ie try to avoid having to deal with some of things * like segmented text chunks. This may incur some slight performance * penalties, but should not affect XML conformance. * See {@link XMLInputFactory2#configureForConvenience} for * required settings for standard StAX/StAX2 properties. */ public void configureForConvenience() { // StAX (1.0) settings: doCoalesceText(true); //doReplaceEntityRefs(true); // StAX2: //doReportCData(false); //doReportPrologWhitespace(false); /* Also, knowing exact locations is nice esp. for error * reporting purposes */ doPreserveLocation(true); } /** * Method to call to make the Reader created be as fast as possible reading * documents, especially for long-running processes where caching is * likely to help. *

* See {@link XMLInputFactory2#configureForSpeed} for * required settings for standard StAX/StAX2 properties. */ public void configureForSpeed() { // StAX (1.0): doCoalesceText(false); // StAX2: doPreserveLocation(false); //doReportPrologWhitespace(false); //doInternNames(true); // this is a NOP //doInternNsURIs(true); } /** * Method to call to minimize the memory usage of the stream/event reader; * both regarding Objects created, and the temporary memory usage during * parsing. * This generally incurs some performance penalties, due to using * smaller input buffers. *

* See {@link XMLInputFactory2#configureForLowMemUsage} for * required settings for standard StAX/StAX2 properties. */ public void configureForLowMemUsage() { // StAX (1.0) doCoalesceText(false); // StAX2: doPreserveLocation(false); // can reduce temporary mem usage } /** * Method to call to make Reader try to preserve as much of input * formatting as possible, so that round-tripping would be as lossless * as possible. *

* See {@link XMLInputFactory2#configureForLowMemUsage} for * required settings for standard StAX/StAX2 properties. */ public void configureForRoundTripping() { // StAX (1.0) doCoalesceText(false); //doReplaceEntityRefs(false); // StAX2: //doReportCData(true); //doReportPrologWhitespace(true); } /* /********************************************************************** /* Canonicalization support /********************************************************************** */ public String canonicalizeURI(char[] buf, int uriLen) { return mCanonicalizer.canonicalizeURI(buf, uriLen); } /* /********************************************************************** /* Buffer recycling: /********************************************************************** */ public char[] allocSmallCBuffer(int minSize) { //System.err.println("DEBUG: cfg, allocCSmall: "+mCurrRecycler); if (_currRecycler != null) { char[] result = _currRecycler.getSmallCBuffer(minSize); if (result != null) { return result; } } // Nope; no recycler, or it has no suitable buffers, let's create: return new char[minSize]; } public void freeSmallCBuffer(char[] buffer) { //System.err.println("DEBUG: cfg, freeCSmall: "+buffer); // Need to create (and assign) the buffer? if (_currRecycler == null) { _currRecycler = createRecycler(); } _currRecycler.returnSmallCBuffer(buffer); } public char[] allocMediumCBuffer(int minSize) { //System.err.println("DEBUG: cfg, allocCMed: "+mCurrRecycler); if (_currRecycler != null) { char[] result = _currRecycler.getMediumCBuffer(minSize); if (result != null) { return result; } } return new char[minSize]; } public void freeMediumCBuffer(char[] buffer) { //System.err.println("DEBUG: cfg, freeCMed: "+buffer); if (_currRecycler == null) { _currRecycler = createRecycler(); } _currRecycler.returnMediumCBuffer(buffer); } public char[] allocFullCBuffer(int minSize) { //System.err.println("DEBUG: cfg, allocCFull: "+mCurrRecycler); if (_currRecycler != null) { char[] result = _currRecycler.getFullCBuffer(minSize); if (result != null) { return result; } } return new char[minSize]; } public void freeFullCBuffer(char[] buffer) { //System.err.println("DEBUG: cfg, freeCFull: "+buffer); // Need to create (and assign) the buffer? if (_currRecycler == null) { _currRecycler = createRecycler(); } _currRecycler.returnFullCBuffer(buffer); } public byte[] allocFullBBuffer(int minSize) { //System.err.println("DEBUG: cfg, allocBFull: "+mCurrRecycler); if (_currRecycler != null) { byte[] result = _currRecycler.getFullBBuffer(minSize); if (result != null) { return result; } } return new byte[minSize]; } public void freeFullBBuffer(byte[] buffer) { //System.err.println("DEBUG: cfg, freeBFull: "+buffer); // Need to create (and assign) the buffer? if (_currRecycler == null) { _currRecycler = createRecycler(); } _currRecycler.returnFullBBuffer(buffer); } //static int Counter = 0; private BufferRecycler createRecycler() { BufferRecycler recycler = new BufferRecycler(); // No way to reuse/reset SoftReference, have to create new always: //System.err.println("DEBUG: RefCount: "+(++Counter)); _recyclerRef.set(new SoftReference(recycler)); return recycler; } /* /********************************************************************** /* Symbol table reusing, character types /********************************************************************** */ public ByteBasedPNameTable getBBSymbols() { if (mActualEncoding == CharsetNames.CS_UTF8) { return mEncCtxt.getUtf8Symbols(); } if (mActualEncoding == CharsetNames.CS_ISO_LATIN1) { return mEncCtxt.getLatin1Symbols(); } if (mActualEncoding == CharsetNames.CS_US_ASCII) { return mEncCtxt.getAsciiSymbols(); } throw new Error("Internal error, unknown encoding '"+mActualEncoding+"'"); } public CharBasedPNameTable getCBSymbols() { return mEncCtxt.getSymbols(); } public void updateBBSymbols(ByteBasedPNameTable sym) { if (mActualEncoding == CharsetNames.CS_UTF8) { mEncCtxt.updateUtf8Symbols(sym); } else if (mActualEncoding == CharsetNames.CS_ISO_LATIN1) { mEncCtxt.updateLatin1Symbols(sym); } else if (mActualEncoding == CharsetNames.CS_US_ASCII) { mEncCtxt.updateAsciiSymbols(sym); } else { throw new Error("Internal error, unknown encoding '"+mActualEncoding+"'"); } } public void updateCBSymbols(CharBasedPNameTable sym) { mEncCtxt.updateSymbols(sym); } public XmlCharTypes getCharTypes() { if (mActualEncoding == CharsetNames.CS_UTF8) { return InputCharTypes.getUtf8CharTypes(); } if (mActualEncoding == CharsetNames.CS_ISO_LATIN1) { return InputCharTypes.getLatin1CharTypes(); } if (mActualEncoding == CharsetNames.CS_US_ASCII) { return InputCharTypes.getAsciiCharTypes(); } throw new Error("Internal error, unknown encoding '"+mActualEncoding+"'"); } /* /********************************************************************** /* Helper classes /********************************************************************** */ /** * This is a simple container class that is used to encapsulate * per-factory encoding-dependant information like symbol tables. */ final static class EncodingContext { ByteBasedPNameTable mUtf8Table; ByteBasedPNameTable mLatin1Table; ByteBasedPNameTable mAsciiTable; /** * If there is no encoding to worry about, we only need a single * symbol table. */ CharBasedPNameTable mGeneralTable; EncodingContext() { } public synchronized ByteBasedPNameTable getUtf8Symbols() { if (mUtf8Table == null) { mUtf8Table = new ByteBasedPNameTable(64); } return new ByteBasedPNameTable(mUtf8Table); } public synchronized void updateUtf8Symbols(ByteBasedPNameTable sym) { mUtf8Table.mergeFromChild(sym); } public synchronized ByteBasedPNameTable getLatin1Symbols() { if (mLatin1Table == null) { mLatin1Table = new ByteBasedPNameTable(64); } return new ByteBasedPNameTable(mLatin1Table); } public synchronized void updateLatin1Symbols(ByteBasedPNameTable sym) { mLatin1Table.mergeFromChild(sym); } public synchronized ByteBasedPNameTable getAsciiSymbols() { if (mAsciiTable == null) { mAsciiTable = new ByteBasedPNameTable(64); } return new ByteBasedPNameTable(mAsciiTable); } public synchronized void updateAsciiSymbols(ByteBasedPNameTable sym) { mAsciiTable.mergeFromChild(sym); } public synchronized CharBasedPNameTable getSymbols() { if (mGeneralTable == null) { mGeneralTable = new CharBasedPNameTable(64); } return new CharBasedPNameTable(mGeneralTable); } public synchronized void updateSymbols(CharBasedPNameTable sym) { mGeneralTable.mergeFromChild(sym); } } public void setIllegalCharHandler(IllegalCharHandler illegalCharHandler) { this.illegalCharHandler = illegalCharHandler; } public IllegalCharHandler getIllegalCharHandler() { return this.illegalCharHandler; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy