All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ctc.wstx.api.WstxInputProperties Maven / Gradle / Ivy

Go to download

Woodstox is a high-performance XML processor that implements Stax (JSR-173), SAX2 and Stax2 APIs

There is a newer version: 7.1.0
Show newest version
package com.ctc.wstx.api;

import javax.xml.stream.XMLResolver;

import org.codehaus.stax2.XMLInputFactory2;

/**
 * Class that contains constant for property names used to configure
 * cursor and event readers produced by Wstx implementation of
 * {@link javax.xml.stream.XMLInputFactory}.
 *

* TODO: * * - CHECK_CHAR_VALIDITY (separate for white spaces?) * - CATALOG_RESOLVER? (or at least, ENABLE_CATALOGS) */ public final class WstxInputProperties { /** * Constants used when no DTD handling is done, and we do not know the * 'real' type of an attribute. Seems like CDATA is the safe choice. */ public final static String UNKNOWN_ATTR_TYPE = "CDATA"; /* /////////////////////////////////////////////////////////////////////// // Simple on/off settings: /////////////////////////////////////////////////////////////////////// */ // // // Normalization: /** * Feature that controls whether linefeeds are normalized into * canonical linefeed as mandated by xml specification. *

* Note that disabling this property (from its default enabled * state) will result in non-conforming XML processing. It may * be useful for use cases where changes to input content should * be minimized. *

* Note: this property was initially removed from Woodstox 4.0, * but was reintroduced in 4.0.8 due to user request. */ public final static String P_NORMALIZE_LFS = "com.ctc.wstx.normalizeLFs"; //public final static String P_NORMALIZE_ATTR_VALUES = "com.ctc.wstx.normalizeAttrValues"; // // // XML character validation: /** * Whether readers will verify that characters in text content are fully * valid XML characters (not just Unicode). If true, will check * that they are valid (including white space); if false, will not * check. *

* Note that this property will NOT have effect on all encoding problems, * specifically: *

    *
  • UTF-8 decoder will still report invalid UTF-8 byte sequences (and same * for other character encodings). *
  • *
  • XML Name character rules follow separate validation which will not be affected *
  • *
*

* Turning this option off may improve parsing performance; leaving * it on guarantees compatibility with XML 1.0 specification regarding character * validity rules. */ public final static String P_VALIDATE_TEXT_CHARS = "com.ctc.wstx.validateTextChars"; /** * Allow all XML 1.1 characters escapes even if input document is described as XML 1.0 * (in addition to ones allowed in 1.l0). *

* Since this is non-standard option (that is, deviates from XML specification), it is * disabled by default. * * @since 5.2 */ public final static String P_ALLOW_XML11_ESCAPED_CHARS_IN_XML10 = "com.ctc.wstx.allowXml11EscapedCharsInXml10"; // // // Caching: /** * Whether readers will try to cache parsed external DTD subsets or not. */ public final static String P_CACHE_DTDS = "com.ctc.wstx.cacheDTDs"; /** * Whether reader is to cache DTDs (when caching enabled) based on public id * or not: if not, system id will be primarily used. Although theoretically * public IDs should be unique, and should be good caching keys, sometimes * broken documents use 'wrong' public IDs, and such by default caching keys * are based on system id only. */ public final static String P_CACHE_DTDS_BY_PUBLIC_ID = "com.ctc.wstx.cacheDTDsByPublicId"; // // // Enabling/disabling lazy/incomplete parsing /** * Whether stream readers are allowed to do lazy parsing, meaning * to parse minimal part of the event when * {@link javax.xml.stream.XMLStreamReader#next} is called, and only parse the rest * as needed (or skip remainder of no extra information is needed). * Alternative to lazy parsing is called "eager parsing", and is * what most xml parsers use by default. *

* Enabling lazy parsing can improve performance for tasks where * number of textual events are skipped. The downside is that * not all well-formedness problems are reported when * {@link javax.xml.stream.XMLStreamReader#next} is called, but only when the * rest of event are read or skipped. *

* Default value for Woodstox is such that lazy parsing is * enabled. * * @deprecated As of Woodstox 4.0 use * {@link XMLInputFactory2#P_LAZY_PARSING} instead (from * Stax2 extension API, v3.0) */ @Deprecated public final static String P_LAZY_PARSING = XMLInputFactory2.P_LAZY_PARSING; // // // API behavior (for backwards compatibility) /** * This read-only property indicates whether null is returned for default name space prefix; * Boolean.TRUE indicates it does, Boolean.FALSE that it does not. *

* Default value for 4.1 is 'false'; this will most likely change for 5.0 since * Stax API actually specifies null to be used. * * @since 4.1.2 */ public final static String P_RETURN_NULL_FOR_DEFAULT_NAMESPACE = "com.ctc.wstx.returnNullForDefaultNamespace"; // // // Enabling/disabling support for dtd++ /** * Whether the Reader will recognized DTD++ extensions when parsing * DTD subsets. *

* Note: not implemented by Woodstox. * * @deprecated Never implement, let's phase this out (deprecated in 4.2) */ @Deprecated public final static String P_SUPPORT_DTDPP = "com.ctc.wstx.supportDTDPP"; /** * Whether the Reader will treat character references as entities while parsing * XML documents. */ public static final String P_TREAT_CHAR_REFS_AS_ENTS = "com.ctc.wstx.treatCharRefsAsEnts"; // // // Enabling alternate mode for parsing XML fragments instead // // // of full documents // Automatic W3C Schema support? /* * Whether W3C Schema hint attributes are recognized within document, * and used to locate Schema to use for validation. */ //public final static String P_AUTOMATIC_W3C_SCHEMA = 0x00100000; /* /////////////////////////////////////////////////////////////////////// // More complex settings /////////////////////////////////////////////////////////////////////// */ // // // Buffer sizes; /** * Size of input buffer (in chars), to use for reading XML content * from input stream/reader. */ public final static String P_INPUT_BUFFER_LENGTH = "com.ctc.wstx.inputBufferLength"; // // // Constraints on sizes of text segments parsed: /** * Property to specify shortest non-complete text segment (part of * CDATA section or text content) that parser is allowed to return, * if not required to coalesce text. */ public final static String P_MIN_TEXT_SEGMENT = "com.ctc.wstx.minTextSegment"; // // // Other size constraints (4.2+) /** * Maximum number of attributes allowed for single XML element. * @since 4.2 */ public final static String P_MAX_ATTRIBUTES_PER_ELEMENT = "com.ctc.wstx.maxAttributesPerElement"; /** * Maximum length of of individual attribute values (in characters) * @since 4.2 */ public final static String P_MAX_ATTRIBUTE_SIZE = "com.ctc.wstx.maxAttributeSize"; /** * Maximum number of child elements for any given element. * @since 4.2 */ public final static String P_MAX_CHILDREN_PER_ELEMENT = "com.ctc.wstx.maxChildrenPerElement"; /** * Maximum number of all elements in a single document. * @since 4.2 */ public final static String P_MAX_ELEMENT_COUNT = "com.ctc.wstx.maxElementCount"; /** * Maximum level of nesting of XML elements, starting with root element. * @since 4.2 */ public final static String P_MAX_ELEMENT_DEPTH = "com.ctc.wstx.maxElementDepth"; /** * Maximum length of input document, in characters. * @since 4.2 */ public final static String P_MAX_CHARACTERS = "com.ctc.wstx.maxCharacters"; /** * Maximum length of individual text (cdata) segments in input, in characters. * @since 4.2 */ public final static String P_MAX_TEXT_LENGTH = "com.ctc.wstx.maxTextLength"; // and more size constraints (4.3+) /** * Maximum number of total (general parsed) entity expansions within input. * * @since 4.3 */ public final static String P_MAX_ENTITY_COUNT = "com.ctc.wstx.maxEntityCount"; /** * Maximum depth of nested (general parsed) entity expansions. * * @since 4.3 */ public final static String P_MAX_ENTITY_DEPTH = "com.ctc.wstx.maxEntityDepth"; // // // Entity handling /** * Property of type {@link java.util.Map}, that defines explicit set of * internal (generic) entities that will define of override any entities * defined in internal or external subsets; except for the 5 pre-defined * entities (lt, gt, amp, apos, quot). Can be used to explicitly define * entities that would normally come from a DTD. */ public final static String P_CUSTOM_INTERNAL_ENTITIES = "com.ctc.wstx.customInternalEntities"; /** * Property of type {@link XMLResolver}, that * will allow overriding of default DTD and external parameter entity * resolution. */ public final static String P_DTD_RESOLVER = "com.ctc.wstx.dtdResolver"; /** * Property of type {@link XMLResolver}, that * will allow overriding of default external general entity * resolution. Note that using this property overrides settings done * using {@link javax.xml.stream.XMLInputFactory#RESOLVER} (and vice versa). */ public final static String P_ENTITY_RESOLVER = "com.ctc.wstx.entityResolver"; /** * Property of type {@link XMLResolver}, that * will allow graceful handling of references to undeclared (general) * entities. */ public final static String P_UNDECLARED_ENTITY_RESOLVER = "com.ctc.wstx.undeclaredEntityResolver"; /** * Property of type {@link java.net.URL}, that will allow specifying * context URL to use when resolving relative references, for the * main-level entities (external DTD subset, references from the internal * DTD subset). */ public final static String P_BASE_URL = "com.ctc.wstx.baseURL"; // // // Alternate parsing modes /** * Three-valued property (one of * {@link #PARSING_MODE_DOCUMENT}, * {@link #PARSING_MODE_FRAGMENT} or * {@link #PARSING_MODE_DOCUMENTS}; default being the document mode) * that can be used to handle "non-standard" XML content. The default * mode (PARSING_MODE_DOCUMENT) allows parsing of only * well-formed XML documents, but the other two modes allow more lenient * parsing. Fragment mode allows parsing of XML content that does not * have a single root element (can have zero or more), nor can have * XML or DOCTYPE declarations: this may be useful if parsing a subset * of a full XML document. Multi-document * (PARSING_MODE_DOCUMENTS) mode on the other hand allows * parsing of a stream that contains multiple consequtive well-formed * documents, with possibly multiple XML and DOCTYPE declarations. *

* The main difference from the API perspective is that in first two * modes, START_DOCUMENT and END_DOCUMENT are used as usual (as the first * and last events returned), whereas the multi-document mode can return * multiple pairs of these events: although it is still true that the * first event (one cursor points to when reader is instantiated or * returned by the event reader), there may be intervening pairs that * signal boundary between two adjacent enclosed documents. */ public final static String P_INPUT_PARSING_MODE = "com.ctc.wstx.fragmentMode"; // // // DTD defaulting, overriding /* /////////////////////////////////////////////////////////////////////// // Helper classes, values enumerations /////////////////////////////////////////////////////////////////////// */ public final static ParsingMode PARSING_MODE_DOCUMENT = new ParsingMode(); public final static ParsingMode PARSING_MODE_FRAGMENT = new ParsingMode(); public final static ParsingMode PARSING_MODE_DOCUMENTS = new ParsingMode(); /** * Inner class used for creating type-safe enumerations (prior to JDK 1.5). */ public final static class ParsingMode { ParsingMode() { } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy