com.ctc.wstx.api.ReaderConfig Maven / Gradle / Ivy
package com.ctc.wstx.api;
import java.lang.ref.SoftReference;
import java.net.URL;
import java.util.*;
import javax.xml.stream.*;
import org.codehaus.stax2.XMLInputFactory2; // for property consts
import org.codehaus.stax2.XMLStreamProperties; // for property consts
import org.codehaus.stax2.validation.DTDValidationSchema;
import com.ctc.wstx.api.WstxInputProperties;
import com.ctc.wstx.cfg.InputConfigFlags;
import com.ctc.wstx.dtd.DTDEventListener;
import com.ctc.wstx.ent.IntEntity;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.io.BufferRecycler;
import com.ctc.wstx.util.ArgUtil;
import com.ctc.wstx.util.DataUtil;
import com.ctc.wstx.util.SymbolTable;
/**
* Simple configuration container class; passed by reader factory to reader
* instance created.
*
* In addition to its main task as a configuration container, this class
* also acts as a wrapper around simple buffer recycling functionality.
* The reason is that while conceptually this is a separate concern,
* there are enough commonalities with the life-cycle of this object to
* make this a very convenience place to add that functionality...
* (that is: conceptually this is not right, but from pragmatic viewpoint
* it just makes sense)
*/
public final class ReaderConfig
extends CommonConfig
implements InputConfigFlags
{
/*
////////////////////////////////////////////////
// Constants for reader properties:
////////////////////////////////////////////////
*/
// // First, standard StAX properties:
// Simple flags:
final static int PROP_COALESCE_TEXT = 1;
final static int PROP_NAMESPACE_AWARE = 2;
final static int PROP_REPLACE_ENTITY_REFS = 3;
final static int PROP_SUPPORT_EXTERNAL_ENTITIES = 4;
final static int PROP_VALIDATE_AGAINST_DTD = 5;
final static int PROP_SUPPORT_DTD = 6;
// Object type properties
public final static int PROP_EVENT_ALLOCATOR = 7;
final static int PROP_WARNING_REPORTER = 8;
final static int PROP_XML_RESOLVER = 9;
// // Then StAX2 standard properties:
// Simple flags:
final static int PROP_INTERN_NS_URIS = 20;
final static int PROP_INTERN_NAMES = 21;
final static int PROP_REPORT_CDATA = 22;
final static int PROP_REPORT_PROLOG_WS = 23;
final static int PROP_PRESERVE_LOCATION = 24;
final static int PROP_AUTO_CLOSE_INPUT = 25;
// Enum / Object type properties:
final static int PROP_SUPPORT_XMLID = 26; // shared with WriterConfig
final static int PROP_DTD_OVERRIDE = 27;
// // // Constants for additional Wstx properties:
// Simple flags:
// Note: these were included pre-4.0, are deprecated in 4.0
//final static int PROP_NORMALIZE_LFS = 40;
//final static int PROP_NORMALIZE_ATTR_VALUES = 41;
final static int PROP_CACHE_DTDS = 42;
final static int PROP_CACHE_DTDS_BY_PUBLIC_ID = 43;
final static int PROP_LAZY_PARSING = 44;
final static int PROP_SUPPORT_DTDPP = 45;
// Object type properties:
final static int PROP_INPUT_BUFFER_LENGTH = 50;
//final static int PROP_TEXT_BUFFER_LENGTH = 51;
final static int PROP_MIN_TEXT_SEGMENT = 52;
final static int PROP_CUSTOM_INTERNAL_ENTITIES = 53;
final static int PROP_DTD_RESOLVER = 54;
final static int PROP_ENTITY_RESOLVER = 55;
final static int PROP_UNDECLARED_ENTITY_RESOLVER = 56;
final static int PROP_BASE_URL = 57;
final static int PROP_INPUT_PARSING_MODE = 58;
/*
////////////////////////////////////////////////
// Limits for numeric properties
////////////////////////////////////////////////
*/
/**
* Need to set a minimum size, since there are some limitations to
* smallest consequtive block that can be used.
*/
final static int MIN_INPUT_BUFFER_LENGTH = 8; // 16 bytes
/**
* Let's allow caching of just a dozen DTDs... shouldn't really
* matter, how many DTDs does one really use?
*/
final static int DTD_CACHE_SIZE_J2SE = 12;
final static int DTD_CACHE_SIZE_J2ME = 5;
/*
////////////////////////////////////////////////
// Default values for custom properties:
////////////////////////////////////////////////
*/
/**
* By default, let's require minimum of 64 chars to be delivered
* as shortest partial (piece of) text (CDATA, text) segment;
* same for both J2ME subset and full readers. Prevents tiniest
* runts from getting passed
*/
final static int DEFAULT_SHORTEST_TEXT_SEGMENT = 64;
/**
* Default config flags are converted from individual settings,
* to conform to StAX 1.0 specifications.
*/
final static int DEFAULT_FLAGS_FULL =
0
// First, default settings StAX specs dictate:
| CFG_NAMESPACE_AWARE
// Coalescing to be disabled
//| CFG_COALESCE_TEXT
| CFG_REPLACE_ENTITY_REFS
| CFG_SUPPORT_EXTERNAL_ENTITIES
| CFG_SUPPORT_DTD
// and then custom setting defaults:
// and namespace URI interning
| CFG_INTERN_NAMES
| CFG_INTERN_NS_URIS
// we will also accurately report CDATA, by default
| CFG_REPORT_CDATA
/* 20-Jan-2006, TSa: As per discussions on stax-builders list
* (and input from xml experts), 4.0 will revert to "do not
* report SPACE events outside root element by default"
* settings. Conceptually this is what xml specification
* implies should be done: there is no content outside of
* the element tree, including any ignorable content, just
* processing instructions and comments.
*/
//| CFG_REPORT_PROLOG_WS
/* but enable DTD caching (if they are handled):
* (... maybe J2ME subset shouldn't do it?)
*/
| CFG_CACHE_DTDS
/* 29-Mar-2006, TSa: But note, no caching by public-id, due
* to problems with cases where public-id/system-id were
* inconsistently used, leading to problems.
*/
/* by default, let's also allow lazy parsing, since it tends
* to improve performance
*/
| CFG_LAZY_PARSING
/* and also make Event objects preserve location info...
* can be turned off for maximum performance
*/
| CFG_PRESERVE_LOCATION
// As per Stax 1.0 specs, we can not enable this by default:
//| CFG_AUTO_CLOSE_INPUT);
/* Also, let's enable dtd++ support (shouldn't hurt with non-dtd++
* dtds)
*/
| CFG_SUPPORT_DTDPP
/* Regarding Xml:id, let's enabled typing by default, but not
* uniqueness validity checks: latter will be taken care of
* by DTD validation if enabled, otherwise needs to be explicitly
* enabled
*/
| CFG_XMLID_TYPING
// | CFG_XMLID_UNIQ_CHECKS
;
/**
* For now defaults for J2ME flags can be identical to 'full' set;
* differences are in buffer sizes.
*/
final static int DEFAULT_FLAGS_J2ME = DEFAULT_FLAGS_FULL;
// // //
/**
* Map to use for converting from String property ids to ints
* described above; useful to allow use of switch later on.
*/
final static HashMap sProperties = new HashMap(64); // we have about 40 entries
static {
// Standard ones; support for features
sProperties.put(XMLInputFactory.IS_COALESCING,
DataUtil.Integer(PROP_COALESCE_TEXT));
sProperties.put(XMLInputFactory.IS_NAMESPACE_AWARE,
DataUtil.Integer(PROP_NAMESPACE_AWARE));
sProperties.put(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES,
DataUtil.Integer(PROP_REPLACE_ENTITY_REFS));
sProperties.put(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES,
DataUtil.Integer(PROP_SUPPORT_EXTERNAL_ENTITIES));
sProperties.put(XMLInputFactory.IS_VALIDATING,
DataUtil.Integer(PROP_VALIDATE_AGAINST_DTD));
sProperties.put(XMLInputFactory.SUPPORT_DTD,
DataUtil.Integer(PROP_SUPPORT_DTD));
// Standard ones; pluggable components
sProperties.put(XMLInputFactory.ALLOCATOR,
DataUtil.Integer(PROP_EVENT_ALLOCATOR));
sProperties.put(XMLInputFactory.REPORTER,
DataUtil.Integer(PROP_WARNING_REPORTER));
sProperties.put(XMLInputFactory.RESOLVER,
DataUtil.Integer(PROP_XML_RESOLVER));
// StAX2-introduced flags:
sProperties.put(XMLInputFactory2.P_INTERN_NAMES,
DataUtil.Integer(PROP_INTERN_NAMES));
sProperties.put(XMLInputFactory2.P_INTERN_NS_URIS,
DataUtil.Integer(PROP_INTERN_NS_URIS));
sProperties.put(XMLInputFactory2.P_REPORT_CDATA,
DataUtil.Integer(PROP_REPORT_CDATA));
sProperties.put(XMLInputFactory2.P_REPORT_PROLOG_WHITESPACE,
DataUtil.Integer(PROP_REPORT_PROLOG_WS));
sProperties.put(XMLInputFactory2.P_PRESERVE_LOCATION,
DataUtil.Integer(PROP_PRESERVE_LOCATION));
sProperties.put(XMLInputFactory2.P_AUTO_CLOSE_INPUT,
DataUtil.Integer(PROP_AUTO_CLOSE_INPUT));
sProperties.put(XMLInputFactory2.XSP_SUPPORT_XMLID,
DataUtil.Integer(PROP_SUPPORT_XMLID));
sProperties.put(XMLInputFactory2.P_DTD_OVERRIDE,
DataUtil.Integer(PROP_DTD_OVERRIDE));
// Non-standard ones, flags:
sProperties.put(WstxInputProperties.P_CACHE_DTDS,
DataUtil.Integer(PROP_CACHE_DTDS));
sProperties.put(WstxInputProperties.P_CACHE_DTDS_BY_PUBLIC_ID,
DataUtil.Integer(PROP_CACHE_DTDS_BY_PUBLIC_ID));
sProperties.put(XMLInputFactory2.P_LAZY_PARSING,
DataUtil.Integer(PROP_LAZY_PARSING));
sProperties.put(WstxInputProperties.P_SUPPORT_DTDPP,
DataUtil.Integer(PROP_SUPPORT_DTDPP));
// Non-standard ones, non-flags:
sProperties.put(WstxInputProperties.P_INPUT_BUFFER_LENGTH,
DataUtil.Integer(PROP_INPUT_BUFFER_LENGTH));
sProperties.put(WstxInputProperties.P_MIN_TEXT_SEGMENT,
DataUtil.Integer(PROP_MIN_TEXT_SEGMENT));
sProperties.put(WstxInputProperties.P_CUSTOM_INTERNAL_ENTITIES,
DataUtil.Integer(PROP_CUSTOM_INTERNAL_ENTITIES));
sProperties.put(WstxInputProperties.P_DTD_RESOLVER,
DataUtil.Integer(PROP_DTD_RESOLVER));
sProperties.put(WstxInputProperties.P_ENTITY_RESOLVER,
DataUtil.Integer(PROP_ENTITY_RESOLVER));
sProperties.put(WstxInputProperties.P_UNDECLARED_ENTITY_RESOLVER,
DataUtil.Integer(PROP_UNDECLARED_ENTITY_RESOLVER));
sProperties.put(WstxInputProperties.P_BASE_URL,
DataUtil.Integer(PROP_BASE_URL));
sProperties.put(WstxInputProperties.P_INPUT_PARSING_MODE,
DataUtil.Integer(PROP_INPUT_PARSING_MODE));
}
/*
//////////////////////////////////////////////////////////
// Current config state:
//////////////////////////////////////////////////////////
*/
final boolean mIsJ2MESubset;
final SymbolTable mSymbols;
/**
* Bitset that contains state of on/off properties; initialized
* to defaults, but can be set/cleared.
*/
int mConfigFlags;
/**
* Bitset that indicates explicit changes to {@link #mConfigFlags}
* through calls; empty bit means that the corresponding property
* has its default value, set bit that an explicit call has been
* made.
*/
int mConfigFlagMods;
/**
* 13-Nov-2008, tatus: Need to be able to keep track of whether
* name-interning has been explicitly enabled/disable or not
* (not if it's whatever defaults we have)
*/
final static int PROP_INTERN_NAMES_EXPLICIT = 26;
final static int PROP_INTERN_NS_URIS_EXPLICIT = 27;
int mInputBufferLen;
int mMinTextSegmentLen;
/**
* Base URL to use as the resolution context for relative entity
* references
*/
URL mBaseURL = null;
/**
* Parsing mode can be changed from the default xml compliant
* behavior to one of alternate modes (fragment processing,
* multiple document processing).
*/
WstxInputProperties.ParsingMode mParsingMode =
WstxInputProperties.PARSING_MODE_DOCUMENT;
/**
* This boolean flag is set if the input document requires
* xml 1.1 (or above) compliant processing: default is xml 1.0
* compliant. Note that unlike most other properties, this
* does not come from configuration settings, but from processed
* document itself.
*/
boolean mXml11 = false;
/*
//////////////////////////////////////////////////////////
// Common configuration objects
//////////////////////////////////////////////////////////
*/
XMLReporter mReporter;
XMLResolver mDtdResolver = null;
XMLResolver mEntityResolver = null;
/*
//////////////////////////////////////////////////////////
// More special(ized) configuration objects
//////////////////////////////////////////////////////////
*/
//Map mCustomEntities;
//XMLResolver mUndeclaredEntityResolver;
//DTDEventListener mDTDEventListener;
Object[] mSpecialProperties = null;
private final static int SPEC_PROC_COUNT = 4;
private final static int SP_IX_CUSTOM_ENTITIES = 0;
private final static int SP_IX_UNDECL_ENT_RESOLVER = 1;
private final static int SP_IX_DTD_EVENT_LISTENER = 2;
private final static int SP_IX_DTD_OVERRIDE = 3;
/*
//////////////////////////////////////////////////////////
// Buffer recycling:
//////////////////////////////////////////////////////////
*/
/**
* This ThreadLocal
contains a {@link SoftRerefence}
* to a {@link BufferRecycler} used to provide a low-cost
* buffer recycling between Reader instances.
*/
final static ThreadLocal mRecyclerRef = new ThreadLocal();
/**
* This is the actually container of the recyclable buffers. It
* is obtained via ThreadLocal/SoftReference combination, if one
* exists, when Config instance is created. If one does not
* exist, it will created first time a buffer is returned.
*/
BufferRecycler mCurrRecycler = null;
/*
//////////////////////////////////////////////////////////
// Life-cycle
//////////////////////////////////////////////////////////
*/
private ReaderConfig(boolean j2meSubset, SymbolTable symbols,
int configFlags, int configFlagMods,
int inputBufLen,
int minTextSegmentLen)
{
mIsJ2MESubset = j2meSubset;
mSymbols = symbols;
mConfigFlags = configFlags;
mConfigFlagMods = configFlagMods;
mInputBufferLen = inputBufLen;
mMinTextSegmentLen = minTextSegmentLen;
/* Ok, let's then see if we can find a buffer recycler. Since they
* are lazily constructed, and since GC may just flush them out
* on its whims, it's possible we might not find one. That's ok;
* we can reconstruct one if and when we are to return one or more
* buffers.
*/
SoftReference ref = (SoftReference) mRecyclerRef.get();
if (ref != null) {
mCurrRecycler = (BufferRecycler) ref.get();
}
}
public static ReaderConfig createJ2MEDefaults()
{
/* For J2ME we'll use slightly smaller buffer sizes by
* default, on assumption lower memory usage is desireable:
*/
ReaderConfig rc = new ReaderConfig
(true, null, DEFAULT_FLAGS_J2ME, 0,
// 4k input buffer (2000 chars):
2000,
DEFAULT_SHORTEST_TEXT_SEGMENT);
return rc;
}
public static ReaderConfig createFullDefaults()
{
/* For full version, can use bit larger buffers to achieve better
* overall performance.
*/
ReaderConfig rc = new ReaderConfig
(false, null, DEFAULT_FLAGS_FULL, 0,
// 8k input buffer (4000 chars):
4000,
DEFAULT_SHORTEST_TEXT_SEGMENT);
return rc;
}
public ReaderConfig createNonShared(SymbolTable sym)
{
// should we throw an exception?
//if (sym == null) { }
ReaderConfig rc = new ReaderConfig(mIsJ2MESubset, sym,
mConfigFlags, mConfigFlagMods,
mInputBufferLen,
mMinTextSegmentLen);
rc.mReporter = mReporter;
rc.mDtdResolver = mDtdResolver;
rc.mEntityResolver = mEntityResolver;
rc.mBaseURL = mBaseURL;
rc.mParsingMode = mParsingMode;
if (mSpecialProperties != null) {
int len = mSpecialProperties.length;
Object[] specProps = new Object[len];
System.arraycopy(mSpecialProperties, 0, specProps, 0, len);
rc.mSpecialProperties = specProps;
}
return rc;
}
/**
* Unlike name suggests there is also some limited state information
* associated with the config object. If these objects are reused,
* that state needs to be reset between reuses, to avoid carrying
* over incorrect state.
*/
public void resetState()
{
// Current, only xml 1.0 vs 1.1 state is stored here:
mXml11 = false;
}
/*
//////////////////////////////////////////////////////////
// Implementation of abstract methods
//////////////////////////////////////////////////////////
*/
protected int findPropertyId(String propName)
{
Integer I = (Integer) sProperties.get(propName);
return (I == null) ? -1 : I.intValue();
}
/*
//////////////////////////////////////////////////////////
// Public API, accessors
//////////////////////////////////////////////////////////
*/
// // // Accessors for immutable configuration:
public SymbolTable getSymbols() { return mSymbols; }
/**
* In future this property could/should be made configurable?
*/
public int getDtdCacheSize() {
return mIsJ2MESubset ? DTD_CACHE_SIZE_J2ME : DTD_CACHE_SIZE_J2SE;
}
// // // "Raw" accessors for on/off properties:
public int getConfigFlags() { return mConfigFlags; }
// // // Standard StAX on/off property accessors
public boolean willCoalesceText() {
return _hasConfigFlag(CFG_COALESCE_TEXT);
}
public boolean willSupportNamespaces() {
return _hasConfigFlag(CFG_NAMESPACE_AWARE);
}
public boolean willReplaceEntityRefs() {
return _hasConfigFlag(CFG_REPLACE_ENTITY_REFS);
}
public boolean willSupportExternalEntities() {
return _hasConfigFlag(CFG_SUPPORT_EXTERNAL_ENTITIES);
}
public boolean willSupportDTDs() {
return _hasConfigFlag(CFG_SUPPORT_DTD);
}
public boolean willValidateWithDTD() {
return _hasConfigFlag(CFG_VALIDATE_AGAINST_DTD);
}
// // // Stax2 on/off property accessors
public boolean willReportCData() {
return _hasConfigFlag(CFG_REPORT_CDATA);
}
public boolean willParseLazily() {
return _hasConfigFlag(CFG_LAZY_PARSING);
}
public boolean willInternNames() {
return _hasConfigFlag(CFG_INTERN_NAMES);
}
public boolean willInternNsURIs() {
return _hasConfigFlag(CFG_INTERN_NS_URIS);
}
public boolean willPreserveLocation() {
return _hasConfigFlag(CFG_PRESERVE_LOCATION);
}
public boolean willAutoCloseInput() {
return _hasConfigFlag(CFG_AUTO_CLOSE_INPUT);
}
// // // Woodstox on/off property accessors
public boolean willReportPrologWhitespace() {
return _hasConfigFlag(CFG_REPORT_PROLOG_WS);
}
public boolean willCacheDTDs() {
return _hasConfigFlag(CFG_CACHE_DTDS);
}
public boolean willCacheDTDsByPublicId() {
return _hasConfigFlag(CFG_CACHE_DTDS_BY_PUBLIC_ID);
}
public boolean willDoXmlIdTyping() {
return _hasConfigFlag(CFG_XMLID_TYPING);
}
public boolean willDoXmlIdUniqChecks() {
return _hasConfigFlag(CFG_XMLID_UNIQ_CHECKS);
}
public boolean willSupportDTDPP() {
return _hasConfigFlag(CFG_SUPPORT_DTDPP);
}
public int getInputBufferLength() { return mInputBufferLen; }
public int getShortestReportedTextSegment() { return mMinTextSegmentLen; }
public Map getCustomInternalEntities()
{
Map custEnt = (Map) _getSpecialProperty(SP_IX_CUSTOM_ENTITIES);
if (custEnt == null) {
return Collections.EMPTY_MAP;
}
// Better be defensive and just return a copy...
int len = custEnt.size();
HashMap m = new HashMap(len + (len >> 2), 0.81f);
Iterator it = custEnt.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
/* Cast is there just as a safe-guard (assertion), and to
* document the type...
*/
m.put(me.getKey(), (EntityDecl) me.getValue());
}
return m;
}
public EntityDecl findCustomInternalEntity(String id)
{
Map custEnt = (Map) _getSpecialProperty(SP_IX_CUSTOM_ENTITIES);
if (custEnt == null) {
return null;
}
return (EntityDecl) custEnt.get(id);
}
public XMLReporter getXMLReporter() { return mReporter; }
public XMLResolver getXMLResolver() { return mEntityResolver; }
public XMLResolver getDtdResolver() { return mDtdResolver; }
public XMLResolver getEntityResolver() { return mEntityResolver; }
public XMLResolver getUndeclaredEntityResolver() {
return (XMLResolver) _getSpecialProperty(SP_IX_UNDECL_ENT_RESOLVER);
}
public URL getBaseURL() { return mBaseURL; }
public WstxInputProperties.ParsingMode getInputParsingMode() {
return mParsingMode;
}
public boolean inputParsingModeDocuments() {
return mParsingMode == WstxInputProperties.PARSING_MODE_DOCUMENTS;
}
public boolean inputParsingModeFragment() {
return mParsingMode == WstxInputProperties.PARSING_MODE_FRAGMENT;
}
/**
* @return True if the input well-formedness and validation checks
* should be done according to xml 1.1 specification; false if
* xml 1.0 specification.
*/
public boolean isXml11() {
return mXml11;
}
public DTDEventListener getDTDEventListener() {
return (DTDEventListener) _getSpecialProperty(SP_IX_DTD_EVENT_LISTENER);
}
public DTDValidationSchema getDTDOverride() {
return (DTDValidationSchema) _getSpecialProperty(SP_IX_DTD_OVERRIDE);
}
/**
* Special accessor to use to verify whether name interning has
* explicitly been enabled; true if call was been made to set
* it to true; false otherwise (default, or set to false)
*/
public boolean hasInternNamesBeenEnabled() {
return _hasExplicitConfigFlag(CFG_INTERN_NAMES);
}
public boolean hasInternNsURIsBeenEnabled() {
return _hasExplicitConfigFlag(CFG_INTERN_NS_URIS);
}
/*
//////////////////////////////////////////////////////////
// Simple mutators
//////////////////////////////////////////////////////////
*/
public void setConfigFlag(int flag) {
mConfigFlags |= flag;
mConfigFlagMods |= flag;
}
public void clearConfigFlag(int flag) {
mConfigFlags &= ~flag;
mConfigFlagMods |= flag;
}
// // // Mutators for standard StAX properties
public void doCoalesceText(boolean state) {
setConfigFlag(CFG_COALESCE_TEXT, state);
}
public void doSupportNamespaces(boolean state) {
setConfigFlag(CFG_NAMESPACE_AWARE, state);
}
public void doReplaceEntityRefs(boolean state) {
setConfigFlag(CFG_REPLACE_ENTITY_REFS, state);
}
public void doSupportExternalEntities(boolean state) {
setConfigFlag(CFG_SUPPORT_EXTERNAL_ENTITIES, state);
}
public void doSupportDTDs(boolean state) {
setConfigFlag(CFG_SUPPORT_DTD, state);
}
public void doValidateWithDTD(boolean state) {
setConfigFlag(CFG_VALIDATE_AGAINST_DTD, state);
}
// // // Mutators for Woodstox-specific properties
public void doInternNames(boolean state) {
setConfigFlag(CFG_INTERN_NAMES, state);
}
public void doInternNsURIs(boolean state) {
setConfigFlag(CFG_INTERN_NS_URIS, state);
}
public void doReportPrologWhitespace(boolean state) {
setConfigFlag(CFG_REPORT_PROLOG_WS, state);
}
public void doReportCData(boolean state) {
setConfigFlag(CFG_REPORT_CDATA, state);
}
public void doCacheDTDs(boolean state) {
setConfigFlag(CFG_CACHE_DTDS, state);
}
public void doCacheDTDsByPublicId(boolean state) {
setConfigFlag(CFG_CACHE_DTDS_BY_PUBLIC_ID, state);
}
public void doParseLazily(boolean state) {
setConfigFlag(CFG_LAZY_PARSING, state);
}
public void doXmlIdTyping(boolean state) {
setConfigFlag(CFG_XMLID_TYPING, state);
}
public void doXmlIdUniqChecks(boolean state) {
setConfigFlag(CFG_XMLID_UNIQ_CHECKS, state);
}
public void doPreserveLocation(boolean state) {
setConfigFlag(CFG_PRESERVE_LOCATION, state);
}
public void doAutoCloseInput(boolean state) {
setConfigFlag(CFG_AUTO_CLOSE_INPUT, state);
}
public void doSupportDTDPP(boolean state) {
setConfigFlag(CFG_SUPPORT_DTDPP, state);
}
public void setInputBufferLength(int value)
{
/* Let's enforce minimum here; necessary to allow longest
* consequtive text span to be available (xml decl, etc)
*/
if (value < MIN_INPUT_BUFFER_LENGTH) {
value = MIN_INPUT_BUFFER_LENGTH;
}
mInputBufferLen = value;
}
public void setShortestReportedTextSegment(int value) {
mMinTextSegmentLen = value;
}
public void setCustomInternalEntities(Map m)
{
Map entMap;
if (m == null || m.size() < 1) {
entMap = Collections.EMPTY_MAP;
} else {
int len = m.size();
entMap = new HashMap(len + (len >> 1), 0.75f);
Iterator it = m.entrySet().iterator();
while (it.hasNext()) {
Map.Entry me = (Map.Entry) it.next();
Object val = me.getValue();
char[] ch;
if (val == null) {
ch = DataUtil.getEmptyCharArray();
} else if (val instanceof char[]) {
ch = (char[]) val;
} else {
// Probably String, but let's just ensure that
String str = val.toString();
ch = str.toCharArray();
}
String name = (String) me.getKey();
entMap.put(name, IntEntity.create(name, ch));
}
}
_setSpecialProperty(SP_IX_CUSTOM_ENTITIES, entMap);
}
public void setXMLReporter(XMLReporter r) {
mReporter = r;
}
/**
* Note: for better granularity, you should call {@link #setEntityResolver}
* and {@link #setDtdResolver} instead.
*/
public void setXMLResolver(XMLResolver r) {
mEntityResolver = r;
mDtdResolver = r;
}
public void setDtdResolver(XMLResolver r) {
mDtdResolver = r;
}
public void setEntityResolver(XMLResolver r) {
mEntityResolver = r;
}
public void setUndeclaredEntityResolver(XMLResolver r) {
_setSpecialProperty(SP_IX_UNDECL_ENT_RESOLVER, r);
}
public void setBaseURL(URL baseURL) { mBaseURL = baseURL; }
public void setInputParsingMode(WstxInputProperties.ParsingMode mode) {
mParsingMode = mode;
}
/**
* Method called to enable or disable 1.1 compliant processing; if
* disabled, defaults to xml 1.0 compliant processing.
*/
public void enableXml11(boolean state) {
mXml11 = state;
}
public void setDTDEventListener(DTDEventListener l) {
_setSpecialProperty(SP_IX_DTD_EVENT_LISTENER, l);
}
public void setDTDOverride(DTDValidationSchema schema) {
_setSpecialProperty(SP_IX_DTD_OVERRIDE, schema);
}
/*
/////////////////////////////////////////////////////
// Profile mutators:
/////////////////////////////////////////////////////
*/
/**
* Method to call to make Reader created conform as closely to XML
* standard as possible, doing all checks and transformations mandated
* (linefeed conversions, attr value normalizations).
* See {@link XMLInputFactory2#configureForXmlConformance} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* None.
*
*
* Notes: Does NOT change 'performance' settings (buffer sizes,
* DTD caching, coalescing, interning, accurate location info).
*/
public void configureForXmlConformance()
{
// // StAX 1.0 settings
doSupportNamespaces(true);
doSupportDTDs(true);
doSupportExternalEntities(true);
doReplaceEntityRefs(true);
// // Stax2 additional settings
// Better enable full xml:id checks:
doXmlIdTyping(true);
doXmlIdUniqChecks(true);
// Woodstox-specific ones:
}
/**
* Method to call to make Reader created be as "convenient" to use
* as possible; ie try to avoid having to deal with some of things
* like segmented text chunks. This may incur some slight performance
* penalties, but should not affect XML conformance.
* See {@link XMLInputFactory2#configureForConvenience} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* - Disable
XMLStreamFactory2.P_LAZY_PARSING
(to allow for synchronous
* error notification by forcing full XML events to be completely
* parsed when reader's next() is called)
*
*
*/
public void configureForConvenience()
{
// StAX (1.0) settings:
doCoalesceText(true);
doReplaceEntityRefs(true);
// StAX2:
doReportCData(false);
doReportPrologWhitespace(false);
/* Also, knowing exact locations is nice esp. for error
* reporting purposes
*/
doPreserveLocation(true);
// Woodstox-specific:
/* Also, we can force errors to be reported in timely manner:
* (once again, at potential expense of performance)
*/
doParseLazily(false);
}
/**
* Method to call to make the Reader created be as fast as possible reading
* documents, especially for long-running processes where caching is
* likely to help.
*
* See {@link XMLInputFactory2#configureForSpeed} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* - Enable
P_CACHE_DTDS
.
*
* - Enable
XMLStremaFactory2.P_LAZY_PARSING
(can improve performance
* especially when skipping text segments)
*
* - Disable Xml:id uniqueness checks (and leave typing as is)
*
* - Set lowish value for
P_MIN_TEXT_SEGMENT
, to allow
* reader to optimize segment length it uses (and possibly avoids
* one copy operation in the process)
*
* - Increase
P_INPUT_BUFFER_LENGTH
a bit from default,
* to allow for longer consequtive read operations; also reduces cases
* where partial text segments are on input buffer boundaries.
*
*
*/
public void configureForSpeed()
{
// StAX (1.0):
doCoalesceText(false);
// StAX2:
doPreserveLocation(false);
doReportPrologWhitespace(false);
//doInternNames(true); // this is a NOP
doInternNsURIs(true);
doXmlIdUniqChecks(false);
// Woodstox-specific:
doCacheDTDs(true);
doParseLazily(true);
/* If we let Reader decide sizes of text segments, it should be
* able to optimize it better, thus low min value. This value
* is only used in cases where text is at buffer boundary, or
* where entity prevents using consequtive chars from input buffer:
*/
setShortestReportedTextSegment(16);
setInputBufferLength(8000); // 16k input buffer
}
/**
* Method to call to minimize the memory usage of the stream/event reader;
* both regarding Objects created, and the temporary memory usage during
* parsing.
* This generally incurs some performance penalties, due to using
* smaller input buffers.
*
* See {@link XMLInputFactory2#configureForLowMemUsage} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* - Disable
P_CACHE_DTDS
*
* - Enable
P_PARSE_LAZILY
*
* - Resets
P_MIN_TEXT_SEGMENT
to the (somewhat low)
* default value.
* -
*
- Reduces
P_INPUT_BUFFER_LENGTH
a bit from the default
* -
*
*/
public void configureForLowMemUsage()
{
// StAX (1.0)
doCoalesceText(false);
// StAX2:
doPreserveLocation(false); // can reduce temporary mem usage
// Woodstox-specific:
doCacheDTDs(false);
doParseLazily(true); // can reduce temporary mem usage
doXmlIdUniqChecks(false); // enabling would increase mem usage
setShortestReportedTextSegment(ReaderConfig.DEFAULT_SHORTEST_TEXT_SEGMENT);
setInputBufferLength(512); // 1k input buffer
// Text buffer need not be huge, as we do not coalesce
}
/**
* Method to call to make Reader try to preserve as much of input
* formatting as possible, so that round-tripping would be as lossless
* as possible.
*
* See {@link XMLInputFactory2#configureForLowMemUsage} for
* required settings for standard StAX/StAX properties.
*
* In addition to the standard settings, following Woodstox-specific
* settings are also done:
*
* - Increases
P_MIN_TEXT_SEGMENT
to the maximum value so
* that all original text segment chunks are reported without
* segmentation (but without coalescing with adjacent CDATA segments)
* -
*
*/
public void configureForRoundTripping()
{
// StAX (1.0)
doCoalesceText(false);
doReplaceEntityRefs(false);
// StAX2:
doReportCData(true);
doReportPrologWhitespace(true);
// Woodstox specific settings
// effectively prevents from reporting partial segments:
setShortestReportedTextSegment(Integer.MAX_VALUE);
}
/*
/////////////////////////////////////////////////////
// Buffer recycling:
/////////////////////////////////////////////////////
*/
public char[] allocSmallCBuffer(int minSize)
{
//System.err.println("DEBUG: cfg, allocCSmall: "+mCurrRecycler);
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getSmallCBuffer(minSize);
if (result != null) {
return result;
}
}
// Nope; no recycler, or it has no suitable buffers, let's create:
return new char[minSize];
}
public void freeSmallCBuffer(char[] buffer)
{
//System.err.println("DEBUG: cfg, freeCSmall: "+buffer);
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnSmallCBuffer(buffer);
}
public char[] allocMediumCBuffer(int minSize)
{
//System.err.println("DEBUG: cfg, allocCMed: "+mCurrRecycler);
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getMediumCBuffer(minSize);
if (result != null) {
return result;
}
}
return new char[minSize];
}
public void freeMediumCBuffer(char[] buffer)
{
//System.err.println("DEBUG: cfg, freeCMed: "+buffer);
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnMediumCBuffer(buffer);
}
public char[] allocFullCBuffer(int minSize)
{
//System.err.println("DEBUG: cfg, allocCFull: "+mCurrRecycler);
if (mCurrRecycler != null) {
char[] result = mCurrRecycler.getFullCBuffer(minSize);
if (result != null) {
return result;
}
}
return new char[minSize];
}
public void freeFullCBuffer(char[] buffer)
{
//System.err.println("DEBUG: cfg, freeCFull: "+buffer);
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnFullCBuffer(buffer);
}
public byte[] allocFullBBuffer(int minSize)
{
//System.err.println("DEBUG: cfg, allocBFull: "+mCurrRecycler);
if (mCurrRecycler != null) {
byte[] result = mCurrRecycler.getFullBBuffer(minSize);
if (result != null) {
return result;
}
}
return new byte[minSize];
}
public void freeFullBBuffer(byte[] buffer)
{
//System.err.println("DEBUG: cfg, freeBFull: "+buffer);
// Need to create (and assign) the buffer?
if (mCurrRecycler == null) {
mCurrRecycler = createRecycler();
}
mCurrRecycler.returnFullBBuffer(buffer);
}
static int Counter = 0;
private BufferRecycler createRecycler()
{
BufferRecycler recycler = new BufferRecycler();
// No way to reuse/reset SoftReference, have to create new always:
//System.err.println("DEBUG: RefCount: "+(++Counter));
mRecyclerRef.set(new SoftReference(recycler));
return recycler;
}
/*
/////////////////////////////////////////////////////
// Internal methods:
/////////////////////////////////////////////////////
*/
private void setConfigFlag(int flag, boolean state)
{
if (state) {
mConfigFlags |= flag;
} else {
mConfigFlags &= ~flag;
}
mConfigFlagMods |= flag;
}
public Object getProperty(int id)
{
switch (id) {
// First, standard Stax 1.0 properties:
case PROP_COALESCE_TEXT:
return willCoalesceText() ? Boolean.TRUE : Boolean.FALSE;
case PROP_NAMESPACE_AWARE:
return willSupportNamespaces() ? Boolean.TRUE : Boolean.FALSE;
case PROP_REPLACE_ENTITY_REFS:
return willReplaceEntityRefs() ? Boolean.TRUE : Boolean.FALSE;
case PROP_SUPPORT_EXTERNAL_ENTITIES:
return willSupportExternalEntities() ? Boolean.TRUE : Boolean.FALSE;
case PROP_VALIDATE_AGAINST_DTD:
return willValidateWithDTD() ? Boolean.TRUE : Boolean.FALSE;
case PROP_SUPPORT_DTD:
return willSupportDTDs() ? Boolean.TRUE : Boolean.FALSE;
case PROP_WARNING_REPORTER:
return getXMLReporter();
case PROP_XML_RESOLVER:
return getXMLResolver();
case PROP_EVENT_ALLOCATOR:
/* 25-Mar-2006, TSa: Not really supported here, so let's
* return null
*/
return null;
// Then Stax2 properties:
case PROP_REPORT_PROLOG_WS:
return willReportPrologWhitespace() ? Boolean.TRUE : Boolean.FALSE;
case PROP_REPORT_CDATA:
return willReportCData() ? Boolean.TRUE : Boolean.FALSE;
case PROP_INTERN_NAMES:
return willInternNames() ? Boolean.TRUE : Boolean.FALSE;
case PROP_INTERN_NS_URIS:
return willInternNsURIs() ? Boolean.TRUE : Boolean.FALSE;
case PROP_PRESERVE_LOCATION:
return willPreserveLocation() ? Boolean.TRUE : Boolean.FALSE;
case PROP_AUTO_CLOSE_INPUT:
return willAutoCloseInput() ? Boolean.TRUE : Boolean.FALSE;
case PROP_DTD_OVERRIDE:
return getDTDOverride();
// // // Then Woodstox custom properties:
// first, flags:
case PROP_CACHE_DTDS:
return willCacheDTDs() ? Boolean.TRUE : Boolean.FALSE;
case PROP_CACHE_DTDS_BY_PUBLIC_ID:
return willCacheDTDsByPublicId() ? Boolean.TRUE : Boolean.FALSE;
case PROP_LAZY_PARSING:
return willParseLazily() ? Boolean.TRUE : Boolean.FALSE;
case PROP_SUPPORT_XMLID:
{
if (!_hasConfigFlag(CFG_XMLID_TYPING)) {
return XMLStreamProperties.XSP_V_XMLID_NONE;
}
return _hasConfigFlag(CFG_XMLID_UNIQ_CHECKS) ?
XMLStreamProperties.XSP_V_XMLID_FULL :
XMLStreamProperties.XSP_V_XMLID_TYPING;
}
// then object values:
case PROP_INPUT_BUFFER_LENGTH:
return DataUtil.Integer(getInputBufferLength());
case PROP_MIN_TEXT_SEGMENT:
return DataUtil.Integer(getShortestReportedTextSegment());
case PROP_CUSTOM_INTERNAL_ENTITIES:
return getCustomInternalEntities();
case PROP_DTD_RESOLVER:
return getDtdResolver();
case PROP_ENTITY_RESOLVER:
return getEntityResolver();
case PROP_UNDECLARED_ENTITY_RESOLVER:
return getUndeclaredEntityResolver();
case PROP_BASE_URL:
return getBaseURL();
case PROP_INPUT_PARSING_MODE:
return getInputParsingMode();
default: // sanity check, should never happen
throw new IllegalStateException("Internal error: no handler for property with internal id "+id+".");
}
}
public boolean setProperty(String propName, int id, Object value)
{
switch (id) {
// First, standard (Stax 1.0) properties:
case PROP_COALESCE_TEXT:
doCoalesceText(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_NAMESPACE_AWARE:
doSupportNamespaces(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_REPLACE_ENTITY_REFS:
doReplaceEntityRefs(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_SUPPORT_EXTERNAL_ENTITIES:
doSupportExternalEntities(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_SUPPORT_DTD:
doSupportDTDs(ArgUtil.convertToBoolean(propName, value));
break;
// // // Then ones that can be dispatched:
case PROP_VALIDATE_AGAINST_DTD:
doValidateWithDTD(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_WARNING_REPORTER:
setXMLReporter((XMLReporter) value);
break;
case PROP_XML_RESOLVER:
setXMLResolver((XMLResolver) value);
break;
case PROP_EVENT_ALLOCATOR:
/* 25-Mar-2006, TSa: Not really supported here, so let's
* return false to let caller deal with it
*/
return false;
// // // Then Stax2 properties, flags:
case PROP_INTERN_NS_URIS:
doInternNsURIs(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_INTERN_NAMES:
doInternNames(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_REPORT_CDATA:
doReportCData(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_REPORT_PROLOG_WS:
doReportPrologWhitespace(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_PRESERVE_LOCATION:
doPreserveLocation(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_AUTO_CLOSE_INPUT:
doAutoCloseInput(ArgUtil.convertToBoolean(propName, value));
break;
// // // Then Stax2 properties, enum/object types:
case PROP_SUPPORT_XMLID:
{
boolean typing, uniq;
if (XMLStreamProperties.XSP_V_XMLID_NONE.equals(value)) {
typing = uniq = false;
} else if (XMLStreamProperties.XSP_V_XMLID_TYPING.equals(value)) {
typing = true;
uniq = false;
} else if (XMLStreamProperties.XSP_V_XMLID_FULL.equals(value)) {
typing = uniq = true;
} else {
throw new IllegalArgumentException
("Illegal argument ('"+value+"') to set property "
+XMLStreamProperties.XSP_SUPPORT_XMLID+" to: has to be one of '"
+XMLStreamProperties.XSP_V_XMLID_NONE+"', '"+XMLStreamProperties.XSP_V_XMLID_TYPING+"' or '"+XMLStreamProperties.XSP_V_XMLID_FULL+"'"
);
}
setConfigFlag(CFG_XMLID_TYPING, typing);
setConfigFlag(CFG_XMLID_UNIQ_CHECKS, uniq);
}
break;
case PROP_DTD_OVERRIDE:
setDTDOverride((DTDValidationSchema) value);
break;
// // // And then Woodstox specific, flags
case PROP_CACHE_DTDS:
doCacheDTDs(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_CACHE_DTDS_BY_PUBLIC_ID:
doCacheDTDsByPublicId(ArgUtil.convertToBoolean(propName, value));
break;
case PROP_LAZY_PARSING:
doParseLazily(ArgUtil.convertToBoolean(propName, value));
break;
// // // And then Woodstox specific, enum/object:
case PROP_INPUT_BUFFER_LENGTH:
setInputBufferLength(ArgUtil.convertToInt(propName, value, 1));
break;
case PROP_MIN_TEXT_SEGMENT:
setShortestReportedTextSegment(ArgUtil.convertToInt(propName, value, 1));
break;
case PROP_CUSTOM_INTERNAL_ENTITIES:
setCustomInternalEntities((Map) value);
break;
case PROP_DTD_RESOLVER:
setDtdResolver((XMLResolver) value);
break;
case PROP_ENTITY_RESOLVER:
setEntityResolver((XMLResolver) value);
break;
case PROP_UNDECLARED_ENTITY_RESOLVER:
setUndeclaredEntityResolver((XMLResolver) value);
break;
case PROP_BASE_URL:
/* 17-Nov-2008, TSa: Let's make it bit more versatile; if it's not
* a URL per se, let's assume it is something that we can convert
* to URL
*/
{
URL u;
if (value == null) {
u = null;
} else if (value instanceof URL) {
u = (URL) value;
} else {
try {
u = new URL(value.toString());
} catch (Exception ioe) { // MalformedURLException actually...
throw new IllegalArgumentException(ioe.getMessage(), ioe);
}
}
setBaseURL(u);
}
break;
case PROP_INPUT_PARSING_MODE:
setInputParsingMode((WstxInputProperties.ParsingMode) value);
break;
default: // sanity check, should never happen
throw new IllegalStateException("Internal error: no handler for property with internal id "+id+".");
}
return true;
}
protected boolean _hasConfigFlag(int flag) {
return (mConfigFlags & flag) != 0;
}
/**
* Method similar to {@link #_hasConfigFlag}, but that will only
* return true if in addition to being set, flag has been explicitly
* modified (i.e. setProperty has been called to modify it)
*/
protected boolean _hasExplicitConfigFlag(int flag) {
return _hasConfigFlag(flag) && (mConfigFlagMods & flag) != 0;
}
private final Object _getSpecialProperty(int ix)
{
if (mSpecialProperties == null) {
return null;
}
return mSpecialProperties[ix];
}
private final void _setSpecialProperty(int ix, Object value)
{
if (mSpecialProperties == null) {
mSpecialProperties = new Object[SPEC_PROC_COUNT];
}
mSpecialProperties[ix] = value;
}
}