All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.xerces.impl.XMLDocumentFragmentScannerImpl Maven / Gradle / Ivy

Go to download

Xerces2 is the next generation of high performance, fully compliant XML parsers in the Apache Xerces family. This new version of Xerces introduces the Xerces Native Interface (XNI), a complete framework for building parser components and configurations that is extremely modular and easy to program. The Apache Xerces2 parser is the reference implementation of XNI but other parser components, configurations, and parsers can be written using the Xerces Native Interface. For complete design and implementation documents, refer to the XNI Manual. Xerces2 is a fully conforming XML Schema 1.0 processor. A partial experimental implementation of the XML Schema 1.1 Structures and Datatypes Working Drafts (December 2009) and an experimental implementation of the XML Schema Definition Language (XSD): Component Designators (SCD) Candidate Recommendation (January 2010) are provided for evaluation. For more information, refer to the XML Schema page. Xerces2 also provides a complete implementation of the Document Object Model Level 3 Core and Load/Save W3C Recommendations and provides a complete implementation of the XML Inclusions (XInclude) W3C Recommendation. It also provides support for OASIS XML Catalogs v1.1. Xerces2 is able to parse documents written according to the XML 1.1 Recommendation, except that it does not yet provide an option to enable normalization checking as described in section 2.13 of this specification. It also handles namespaces according to the XML Namespaces 1.1 Recommendation, and will correctly serialize XML 1.1 documents if the DOM level 3 load/save APIs are in use.

There is a newer version: 2.12.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.xerces.impl;

import java.io.CharConversionException;
import java.io.EOFException;
import java.io.IOException;

import org.apache.xerces.impl.io.MalformedByteSequenceException;
import org.apache.xerces.impl.msg.XMLMessageFormatter;
import org.apache.xerces.util.AugmentationsImpl;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.util.XMLChar;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.util.XMLSymbols;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDocumentHandler;
import org.apache.xerces.xni.XMLResourceIdentifier;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponent;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentScanner;
import org.apache.xerces.xni.parser.XMLInputSource;

/**
 * This class is responsible for scanning the structure and content
 * of document fragments. The scanner acts as the source for the 
 * document information which is communicated to the document handler.
 * 

* This component requires the following features and properties from the * component manager that uses it: *

    *
  • http://xml.org/sax/features/validation
  • *
  • http://apache.org/xml/features/scanner/notify-char-refs
  • *
  • http://apache.org/xml/features/scanner/notify-builtin-refs
  • *
  • http://apache.org/xml/properties/internal/symbol-table
  • *
  • http://apache.org/xml/properties/internal/error-reporter
  • *
  • http://apache.org/xml/properties/internal/entity-manager
  • *
* * @xerces.internal * * @author Glenn Marcy, IBM * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author Eric Ye, IBM * * @version $Id: XMLDocumentFragmentScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $ */ public class XMLDocumentFragmentScannerImpl extends XMLScanner implements XMLDocumentScanner, XMLComponent, XMLEntityHandler { // // Constants // // scanner states /** Scanner state: start of markup. */ protected static final int SCANNER_STATE_START_OF_MARKUP = 1; /** Scanner state: comment. */ protected static final int SCANNER_STATE_COMMENT = 2; /** Scanner state: processing instruction. */ protected static final int SCANNER_STATE_PI = 3; /** Scanner state: DOCTYPE. */ protected static final int SCANNER_STATE_DOCTYPE = 4; /** Scanner state: root element. */ protected static final int SCANNER_STATE_ROOT_ELEMENT = 6; /** Scanner state: content. */ protected static final int SCANNER_STATE_CONTENT = 7; /** Scanner state: reference. */ protected static final int SCANNER_STATE_REFERENCE = 8; /** Scanner state: end of input. */ protected static final int SCANNER_STATE_END_OF_INPUT = 13; /** Scanner state: terminated. */ protected static final int SCANNER_STATE_TERMINATED = 14; /** Scanner state: CDATA section. */ protected static final int SCANNER_STATE_CDATA = 15; /** Scanner state: Text declaration. */ protected static final int SCANNER_STATE_TEXT_DECL = 16; // feature identifiers /** Feature identifier: namespaces. */ protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; /** Feature identifier: notify built-in refereces. */ protected static final String NOTIFY_BUILTIN_REFS = Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; // property identifiers /** Property identifier: entity resolver. */ protected static final String ENTITY_RESOLVER = Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; // recognized features and properties /** Recognized features. */ private static final String[] RECOGNIZED_FEATURES = { NAMESPACES, VALIDATION, NOTIFY_BUILTIN_REFS, NOTIFY_CHAR_REFS, }; /** Feature defaults. */ private static final Boolean[] FEATURE_DEFAULTS = { null, null, Boolean.FALSE, Boolean.FALSE, }; /** Recognized properties. */ private static final String[] RECOGNIZED_PROPERTIES = { SYMBOL_TABLE, ERROR_REPORTER, ENTITY_MANAGER, ENTITY_RESOLVER, }; /** Property defaults. */ private static final Object[] PROPERTY_DEFAULTS = { null, null, null, null, }; // debugging /** Debug scanner state. */ private static final boolean DEBUG_SCANNER_STATE = false; /** Debug dispatcher. */ private static final boolean DEBUG_DISPATCHER = false; /** Debug content dispatcher scanning. */ protected static final boolean DEBUG_CONTENT_SCANNING = false; // // Data // // protected data /** Document handler. */ protected XMLDocumentHandler fDocumentHandler; /** Entity stack. */ protected int[] fEntityStack = new int[4]; /** Markup depth. */ protected int fMarkupDepth; /** Scanner state. */ protected int fScannerState; /** SubScanner state: inside scanContent method. */ protected boolean fInScanContent = false; /** has external dtd */ protected boolean fHasExternalDTD; /** Standalone. */ protected boolean fStandalone; /** True if [Entity Declared] is a VC; false if it is a WFC. */ protected boolean fIsEntityDeclaredVC; /** External subset resolver. **/ protected ExternalSubsetResolver fExternalSubsetResolver; // element information /** Current element. */ protected QName fCurrentElement; /** Element stack. */ protected final ElementStack fElementStack = new ElementStack(); // other info /** Document system identifier. * REVISIT: So what's this used for? - NG * protected String fDocumentSystemId; ******/ // features /** Notify built-in references. */ protected boolean fNotifyBuiltInRefs = false; // dispatchers /** Active dispatcher. */ protected Dispatcher fDispatcher; /** Content dispatcher. */ protected final Dispatcher fContentDispatcher = createContentDispatcher(); // temporary variables /** Element QName. */ protected final QName fElementQName = new QName(); /** Attribute QName. */ protected final QName fAttributeQName = new QName(); /** Element attributes. */ protected final XMLAttributesImpl fAttributes = new XMLAttributesImpl(); /** String. */ protected final XMLString fTempString = new XMLString(); /** String. */ protected final XMLString fTempString2 = new XMLString(); /** Array of 3 strings. */ private final String[] fStrings = new String[3]; /** String buffer. */ private final XMLStringBuffer fStringBuffer = new XMLStringBuffer(); /** String buffer. */ private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); /** Another QName. */ private final QName fQName = new QName(); /** Single character array. */ private final char[] fSingleChar = new char[1]; /** * Saw spaces after element name or between attributes. * * This is reserved for the case where scanning of a start element spans * several methods, as is the case when scanning the start of a root element * where a DTD external subset may be read after scanning the element name. */ private boolean fSawSpace; /** Reusable Augmentations. */ private Augmentations fTempAugmentations = null; // // Constructors // /** Default constructor. */ public XMLDocumentFragmentScannerImpl() {} // () // // XMLDocumentScanner methods // /** * Sets the input source. * * @param inputSource The input source. * * @throws IOException Thrown on i/o error. */ public void setInputSource(XMLInputSource inputSource) throws IOException { fEntityManager.setEntityHandler(this); fEntityManager.startEntity("$fragment$", inputSource, false, true); //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); } // setInputSource(XMLInputSource) /** * Scans a document. * * @param complete True if the scanner should scan the document * completely, pushing all events to the registered * document handler. A value of false indicates that * that the scanner should only scan the next portion * of the document and return. A scanner instance is * permitted to completely scan a document if it does * not support this "pull" scanning model. * * @return True if there is more to scan, false otherwise. */ public boolean scanDocument(boolean complete) throws IOException, XNIException { // reset entity scanner fEntityScanner = fEntityManager.getEntityScanner(); // keep dispatching "events" fEntityManager.setEntityHandler(this); do { if (!fDispatcher.dispatch(complete)) { return false; } } while (complete); // return success return true; } // scanDocument(boolean):boolean // // XMLComponent methods // /** * Resets the component. The component can query the component manager * about any features and properties that affect the operation of the * component. * * @param componentManager The component manager. * * @throws SAXException Thrown by component on initialization error. * For example, if a feature or property is * required for the operation of the component, the * component manager may throw a * SAXNotRecognizedException or a * SAXNotSupportedException. */ public void reset(XMLComponentManager componentManager) throws XMLConfigurationException { super.reset(componentManager); // other settings //fDocumentSystemId = null; // sax features fAttributes.setNamespaces(fNamespaces); // initialize vars fMarkupDepth = 0; fCurrentElement = null; fElementStack.clear(); fHasExternalDTD = false; fStandalone = false; fIsEntityDeclaredVC = false; fInScanContent = false; // setup dispatcher setScannerState(SCANNER_STATE_CONTENT); setDispatcher(fContentDispatcher); if (fParserSettings) { // parser settings have changed. reset them. // xerces features try { fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS); } catch (XMLConfigurationException e) { fNotifyBuiltInRefs = false; } // xerces properties try { Object resolver = componentManager.getProperty(ENTITY_RESOLVER); fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? (ExternalSubsetResolver) resolver : null; } catch (XMLConfigurationException e) { fExternalSubsetResolver = null; } } } // reset(XMLComponentManager) /** * Returns a list of feature identifiers that are recognized by * this component. This method may return null if no features * are recognized by this component. */ public String[] getRecognizedFeatures() { return (String[])(RECOGNIZED_FEATURES.clone()); } // getRecognizedFeatures():String[] /** * Sets the state of a feature. This method is called by the component * manager any time after reset when a feature changes state. *

* Note: Components should silently ignore features * that do not affect the operation of the component. * * @param featureId The feature identifier. * @param state The state of the feature. * * @throws SAXNotRecognizedException The component should not throw * this exception. * @throws SAXNotSupportedException The component should not throw * this exception. */ public void setFeature(String featureId, boolean state) throws XMLConfigurationException { super.setFeature(featureId, state); // Xerces properties if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); if (suffixLength == Constants.NOTIFY_BUILTIN_REFS_FEATURE.length() && featureId.endsWith(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { fNotifyBuiltInRefs = state; } } } // setFeature(String,boolean) /** * Returns a list of property identifiers that are recognized by * this component. This method may return null if no properties * are recognized by this component. */ public String[] getRecognizedProperties() { return (String[])(RECOGNIZED_PROPERTIES.clone()); } // getRecognizedProperties():String[] /** * Sets the value of a property. This method is called by the component * manager any time after reset when a property changes value. *

* Note: Components should silently ignore properties * that do not affect the operation of the component. * * @param propertyId The property identifier. * @param value The value of the property. * * @throws SAXNotRecognizedException The component should not throw * this exception. * @throws SAXNotSupportedException The component should not throw * this exception. */ public void setProperty(String propertyId, Object value) throws XMLConfigurationException { super.setProperty(propertyId, value); // Xerces properties if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { fEntityManager = (XMLEntityManager)value; return; } if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? (ExternalSubsetResolver) value : null; return; } } } // setProperty(String,Object) /** * Returns the default state for a feature, or null if this * component does not want to report a default value for this * feature. * * @param featureId The feature identifier. * * @since Xerces 2.2.0 */ public Boolean getFeatureDefault(String featureId) { for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { if (RECOGNIZED_FEATURES[i].equals(featureId)) { return FEATURE_DEFAULTS[i]; } } return null; } // getFeatureDefault(String):Boolean /** * Returns the default state for a property, or null if this * component does not want to report a default value for this * property. * * @param propertyId The property identifier. * * @since Xerces 2.2.0 */ public Object getPropertyDefault(String propertyId) { for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { return PROPERTY_DEFAULTS[i]; } } return null; } // getPropertyDefault(String):Object // // XMLDocumentSource methods // /** * setDocumentHandler * * @param documentHandler */ public void setDocumentHandler(XMLDocumentHandler documentHandler) { fDocumentHandler = documentHandler; } // setDocumentHandler(XMLDocumentHandler) /** Returns the document handler */ public XMLDocumentHandler getDocumentHandler(){ return fDocumentHandler; } // // XMLEntityHandler methods // /** * This method notifies of the start of an entity. The DTD has the * pseudo-name of "[dtd]" parameter entity names start with '%'; and * general entities are just specified by their name. * * @param name The name of the entity. * @param identifier The resource identifier. * @param encoding The auto-detected IANA encoding name of the entity * stream. This value will be null in those situations * where the entity encoding is not auto-detected (e.g. * internal entities or a document entity that is * parsed from a java.io.Reader). * @param augs Additional information that may include infoset augmentations * * @throws XNIException Thrown by handler to signal an error. */ public void startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs) throws XNIException { // keep track of this entity before fEntityDepth is increased if (fEntityDepth == fEntityStack.length) { int[] entityarray = new int[fEntityStack.length * 2]; System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); fEntityStack = entityarray; } fEntityStack[fEntityDepth] = fMarkupDepth; super.startEntity(name, identifier, encoding, augs); // WFC: entity declared in external subset in standalone doc if(fStandalone && fEntityManager.isEntityDeclInExternalSubset(name)) { reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", new Object[]{name}); } // call handler if (fDocumentHandler != null && !fScanningAttribute) { if (!name.equals("[xml]")) { fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); } } } // startEntity(String,XMLResourceIdentifier,String) /** * This method notifies the end of an entity. The DTD has the pseudo-name * of "[dtd]" parameter entity names start with '%'; and general entities * are just specified by their name. * * @param name The name of the entity. * @param augs Additional information that may include infoset augmentations * * @throws XNIException Thrown by handler to signal an error. */ public void endEntity(String name, Augmentations augs) throws XNIException { // flush possible pending output buffer - see scanContent if (fInScanContent && fStringBuffer.length != 0 && fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); fStringBuffer.length = 0; // make sure we know it's been flushed } super.endEntity(name, augs); // make sure markup is properly balanced if (fMarkupDepth != fEntityStack[fEntityDepth]) { reportFatalError("MarkupEntityMismatch", null); } // call handler if (fDocumentHandler != null && !fScanningAttribute) { if (!name.equals("[xml]")) { fDocumentHandler.endGeneralEntity(name, augs); } } } // endEntity(String) // // Protected methods // // dispatcher factory methods /** Creates a content dispatcher. */ protected Dispatcher createContentDispatcher() { return new FragmentContentDispatcher(); } // createContentDispatcher():Dispatcher // scanning methods /** * Scans an XML or text declaration. *

*

     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
     *                 | ('"' ('yes' | 'no') '"'))
     *
     * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
     * 
* * @param scanningTextDecl True if a text declaration is to * be scanned instead of an XML * declaration. */ protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) throws IOException, XNIException { // scan decl super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); fMarkupDepth--; // pseudo-attribute values String version = fStrings[0]; String encoding = fStrings[1]; String standalone = fStrings[2]; // set standalone fStandalone = standalone != null && standalone.equals("yes"); fEntityManager.setStandalone(fStandalone); // set version on reader fEntityScanner.setXMLVersion(version); // call handler if (fDocumentHandler != null) { if (scanningTextDecl) { fDocumentHandler.textDecl(version, encoding, null); } else { fDocumentHandler.xmlDecl(version, encoding, standalone, null); } } // set encoding on reader if (encoding != null && !fEntityScanner.fCurrentEntity.isEncodingExternallySpecified()) { fEntityScanner.setEncoding(encoding); } } // scanXMLDeclOrTextDecl(boolean) /** * Scans a processing data. This is needed to handle the situation * where a document starts with a processing instruction whose * target name starts with "xml". (e.g. xmlfoo) * * @param target The PI target * @param data The string to fill in with the data */ protected void scanPIData(String target, XMLString data) throws IOException, XNIException { super.scanPIData(target, data); fMarkupDepth--; // call handler if (fDocumentHandler != null) { fDocumentHandler.processingInstruction(target, data, null); } } // scanPIData(String) /** * Scans a comment. *

*

     * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
     * 
*

* Note: Called after scanning past '<!--' */ protected void scanComment() throws IOException, XNIException { scanComment(fStringBuffer); fMarkupDepth--; // call handler if (fDocumentHandler != null) { fDocumentHandler.comment(fStringBuffer, null); } } // scanComment() /** * Scans a start element. This method will handle the binding of * namespace information and notifying the handler of the start * of the element. *

*

     * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
     * [40] STag ::= '<' Name (S Attribute)* S? '>'
     * 
*

* Note: This method assumes that the leading * '<' character has been consumed. *

* Note: This method uses the fElementQName and * fAttributes variables. The contents of these variables will be * destroyed. The caller should copy important information out of * these variables before calling this method. * * @return True if element is empty. (i.e. It matches * production [44]. */ protected boolean scanStartElement() throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement()"); // name if (fNamespaces) { fEntityScanner.scanQName(fElementQName); } else { String name = fEntityScanner.scanName(); fElementQName.setValues(null, name, name, null); } String rawname = fElementQName.rawname; // push element stack fCurrentElement = fElementStack.pushElement(fElementQName); // attributes boolean empty = false; fAttributes.removeAllAttributes(); do { // spaces boolean sawSpace = fEntityScanner.skipSpaces(); // end tag? int c = fEntityScanner.peekChar(); if (c == '>') { fEntityScanner.scanChar(); break; } else if (c == '/') { fEntityScanner.scanChar(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ElementUnterminated", new Object[]{rawname}); } empty = true; break; } else if (!isValidNameStartChar(c) || !sawSpace) { // Second chance. Check if this character is a high // surrogate of a valid name start character. if (!isValidNameStartHighSurrogate(c) || !sawSpace) { reportFatalError("ElementUnterminated", new Object[] { rawname }); } } // attributes scanAttribute(fAttributes); } while (true); // call handler if (fDocumentHandler != null) { if (empty) { //decrease the markup depth.. fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } fDocumentHandler.emptyElement(fElementQName, fAttributes, null); //pop the element off the stack.. fElementStack.popElement(fElementQName); } else { fDocumentHandler.startElement(fElementQName, fAttributes, null); } } if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElement(): "+empty); return empty; } // scanStartElement():boolean /** * Scans the name of an element in a start or empty tag. * * @see #scanStartElement() */ protected void scanStartElementName () throws IOException, XNIException { // name if (fNamespaces) { fEntityScanner.scanQName(fElementQName); } else { String name = fEntityScanner.scanName(); fElementQName.setValues(null, name, name, null); } // Must skip spaces here because the DTD scanner // would consume them at the end of the external subset. fSawSpace = fEntityScanner.skipSpaces(); } // scanStartElementName() /** * Scans the remainder of a start or empty tag after the element name. * * @see #scanStartElement * @return True if element is empty. */ protected boolean scanStartElementAfterName() throws IOException, XNIException { String rawname = fElementQName.rawname; // push element stack fCurrentElement = fElementStack.pushElement(fElementQName); // attributes boolean empty = false; fAttributes.removeAllAttributes(); do { // end tag? int c = fEntityScanner.peekChar(); if (c == '>') { fEntityScanner.scanChar(); break; } else if (c == '/') { fEntityScanner.scanChar(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ElementUnterminated", new Object[]{rawname}); } empty = true; break; } else if (!isValidNameStartChar(c) || !fSawSpace) { // Second chance. Check if this character is a high // surrogate of a valid name start character. if (!isValidNameStartHighSurrogate(c) || !fSawSpace) { reportFatalError("ElementUnterminated", new Object[] { rawname }); } } // attributes scanAttribute(fAttributes); // spaces fSawSpace = fEntityScanner.skipSpaces(); } while (true); // call handler if (fDocumentHandler != null) { if (empty) { //decrease the markup depth.. fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } fDocumentHandler.emptyElement(fElementQName, fAttributes, null); //pop the element off the stack.. fElementStack.popElement(fElementQName); } else { fDocumentHandler.startElement(fElementQName, fAttributes, null); } } if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElementAfterName(): "+empty); return empty; } // scanStartElementAfterName() /** * Scans an attribute. *

*

     * [41] Attribute ::= Name Eq AttValue
     * 
*

* Note: This method assumes that the next * character on the stream is the first character of the attribute * name. *

* Note: This method uses the fAttributeQName and * fQName variables. The contents of these variables will be * destroyed. * * @param attributes The attributes list for the scanned attribute. */ protected void scanAttribute(XMLAttributes attributes) throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()"); // name if (fNamespaces) { fEntityScanner.scanQName(fAttributeQName); } else { String name = fEntityScanner.scanName(); fAttributeQName.setValues(null, name, name, null); } // equals fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('=')) { reportFatalError("EqRequiredInAttribute", new Object[]{fCurrentElement.rawname,fAttributeQName.rawname}); } fEntityScanner.skipSpaces(); // content int oldLen = attributes.getLength(); int attrIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); // WFC: Unique Att Spec if (oldLen == attributes.getLength()) { reportFatalError("AttributeNotUnique", new Object[]{fCurrentElement.rawname, fAttributeQName.rawname}); } // Scan attribute value and return true if the un-normalized and normalized value are the same boolean isSameNormalizedAttr = scanAttributeValue(fTempString, fTempString2, fAttributeQName.rawname, fIsEntityDeclaredVC, fCurrentElement.rawname); attributes.setValue(attrIndex, fTempString.toString()); // If the non-normalized and normalized value are the same, avoid creating a new string. if (!isSameNormalizedAttr) { attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); } attributes.setSpecified(attrIndex, true); if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()"); } // scanAttribute(XMLAttributes) /** * Scans element content. * * @return Returns the next character on the stream. */ protected int scanContent() throws IOException, XNIException { XMLString content = fTempString; int c = fEntityScanner.scanContent(content); if (c == '\r') { // happens when there is the character reference fEntityScanner.scanChar(); fStringBuffer.clear(); fStringBuffer.append(fTempString); fStringBuffer.append((char)c); content = fStringBuffer; c = -1; } if (fDocumentHandler != null && content.length > 0) { fDocumentHandler.characters(content, null); } if (c == ']' && fTempString.length == 0) { fStringBuffer.clear(); fStringBuffer.append((char)fEntityScanner.scanChar()); // remember where we are in case we get an endEntity before we // could flush the buffer out - this happens when we're parsing an // entity which ends with a ] fInScanContent = true; // // We work on a single character basis to handle cases such as: // ']]]>' which we might otherwise miss. // if (fEntityScanner.skipChar(']')) { fStringBuffer.append(']'); while (fEntityScanner.skipChar(']')) { fStringBuffer.append(']'); } if (fEntityScanner.skipChar('>')) { reportFatalError("CDEndInContent", null); } } if (fDocumentHandler != null && fStringBuffer.length != 0) { fDocumentHandler.characters(fStringBuffer, null); } fInScanContent = false; c = -1; } return c; } // scanContent():int /** * Scans a CDATA section. *

* Note: This method uses the fTempString and * fStringBuffer variables. * * @param complete True if the CDATA section is to be scanned * completely. * * @return True if CDATA is completely scanned. */ protected boolean scanCDATASection(boolean complete) throws IOException, XNIException { // call handler if (fDocumentHandler != null) { fDocumentHandler.startCDATA(null); } while (true) { fStringBuffer.clear(); if (!fEntityScanner.scanData("]]", fStringBuffer)) { if (fDocumentHandler != null && fStringBuffer.length > 0) { fDocumentHandler.characters(fStringBuffer, null); } int brackets = 0; while (fEntityScanner.skipChar(']')) { brackets++; } if (fDocumentHandler != null && brackets > 0) { fStringBuffer.clear(); if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) { // Handle large sequences of ']' int chunks = brackets / XMLEntityManager.DEFAULT_BUFFER_SIZE; int remainder = brackets % XMLEntityManager.DEFAULT_BUFFER_SIZE; for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) { fStringBuffer.append(']'); } for (int i = 0; i < chunks; i++) { fDocumentHandler.characters(fStringBuffer, null); } if (remainder != 0) { fStringBuffer.length = remainder; fDocumentHandler.characters(fStringBuffer, null); } } else { for (int i = 0; i < brackets; i++) { fStringBuffer.append(']'); } fDocumentHandler.characters(fStringBuffer, null); } } if (fEntityScanner.skipChar('>')) { break; } if (fDocumentHandler != null) { fStringBuffer.clear(); fStringBuffer.append("]]"); fDocumentHandler.characters(fStringBuffer, null); } } else { if (fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); } int c = fEntityScanner.peekChar(); if (c != -1 && isInvalidLiteral(c)) { if (XMLChar.isHighSurrogate(c)) { fStringBuffer.clear(); scanSurrogates(fStringBuffer); if (fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); } } else { reportFatalError("InvalidCharInCDSect", new Object[]{Integer.toString(c,16)}); fEntityScanner.scanChar(); } } } } fMarkupDepth--; // call handler if (fDocumentHandler != null) { fDocumentHandler.endCDATA(null); } return true; } // scanCDATASection(boolean):boolean /** * Scans an end element. *

*

     * [42] ETag ::= '</' Name S? '>'
     * 
*

* Note: This method uses the fElementQName variable. * The contents of this variable will be destroyed. The caller should * copy the needed information out of this variable before calling * this method. * * @return The element depth. */ protected int scanEndElement() throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanEndElement()"); fElementStack.popElement(fElementQName) ; // Take advantage of the fact that next string _should_ be "fElementQName.rawName", //In scanners most of the time is consumed on checks done for XML characters, we can // optimize on it and avoid the checks done for endElement, //we will also avoid symbol table lookup - [email protected] // this should work both for namespace processing true or false... //REVISIT: if the string is not the same as expected.. we need to do better error handling.. //We can skip this for now... In any case if the string doesn't match -- document is not well formed. if (!fEntityScanner.skipString(fElementQName.rawname)) { reportFatalError("ETagRequired", new Object[]{fElementQName.rawname}); } // end fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ETagUnterminated", new Object[]{fElementQName.rawname}); } fMarkupDepth--; //we have increased the depth for two markup "<" characters fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } // call handler if (fDocumentHandler != null ) { fDocumentHandler.endElement(fElementQName, null); } return fMarkupDepth; } // scanEndElement():int /** * Scans a character reference. *

*

     * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
     * 
*/ protected void scanCharReference() throws IOException, XNIException { fStringBuffer2.clear(); int ch = scanCharReferenceValue(fStringBuffer2, null); fMarkupDepth--; if (ch != -1) { // call handler if (fDocumentHandler != null) { if (fNotifyCharRefs) { fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); } Augmentations augs = null; if (fValidation && ch <= 0x20) { if (fTempAugmentations != null) { fTempAugmentations.removeAllItems(); } else { fTempAugmentations = new AugmentationsImpl(); } augs = fTempAugmentations; augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); } fDocumentHandler.characters(fStringBuffer2, augs); if (fNotifyCharRefs) { fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); } } } } // scanCharReference() /** * Scans an entity reference. * * @throws IOException Thrown if i/o error occurs. * @throws XNIException Thrown if handler throws exception upon * notification. */ protected void scanEntityReference() throws IOException, XNIException { // name String name = fEntityScanner.scanName(); if (name == null) { reportFatalError("NameRequiredInReference", null); return; } // end if (!fEntityScanner.skipChar(';')) { reportFatalError("SemicolonRequiredInReference", new Object []{name}); } fMarkupDepth--; // handle built-in entities if (name == fAmpSymbol) { handleCharacter('&', fAmpSymbol); } else if (name == fLtSymbol) { handleCharacter('<', fLtSymbol); } else if (name == fGtSymbol) { handleCharacter('>', fGtSymbol); } else if (name == fQuotSymbol) { handleCharacter('"', fQuotSymbol); } else if (name == fAposSymbol) { handleCharacter('\'', fAposSymbol); } // start general entity else if (fEntityManager.isUnparsedEntity(name)) { reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); } else { if (!fEntityManager.isDeclaredEntity(name)) { if (fIsEntityDeclaredVC) { if (fValidation) fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); } else { reportFatalError("EntityNotDeclared", new Object[]{name}); } } fEntityManager.startEntity(name, false); } } // scanEntityReference() // utility methods /** * Calls document handler with a single character resulting from * built-in entity resolution. * * @param c * @param entity built-in name */ private void handleCharacter(char c, String entity) throws XNIException { if (fDocumentHandler != null) { if (fNotifyBuiltInRefs) { fDocumentHandler.startGeneralEntity(entity, null, null, null); } fSingleChar[0] = c; fTempString.setValues(fSingleChar, 0, 1); fDocumentHandler.characters(fTempString, null); if (fNotifyBuiltInRefs) { fDocumentHandler.endGeneralEntity(entity, null); } } } // handleCharacter(char) /** * Handles the end element. This method will make sure that * the end element name matches the current element and notify * the handler about the end of the element and the end of any * relevent prefix mappings. *

* Note: This method uses the fQName variable. * The contents of this variable will be destroyed. * * @param element The element. * * @return The element depth. * * @throws XNIException Thrown if the handler throws a SAX exception * upon notification. * */ // REVISIT: need to remove this method. It's not called anymore, because // the handling is done when the end tag is scanned. - SG protected int handleEndElement(QName element, boolean isEmpty) throws XNIException { fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } // make sure the elements match QName startElement = fQName; fElementStack.popElement(startElement); if (element.rawname != startElement.rawname) { reportFatalError("ETagRequired", new Object[]{startElement.rawname}); } // bind namespaces if (fNamespaces) { element.uri = startElement.uri; } // call handler if (fDocumentHandler != null && !isEmpty) { fDocumentHandler.endElement(element, null); } return fMarkupDepth; } // callEndElement(QName,boolean):int // helper methods /** * Sets the scanner state. * * @param state The new scanner state. */ protected final void setScannerState(int state) { fScannerState = state; if (DEBUG_SCANNER_STATE) { System.out.print("### setScannerState: "); System.out.print(getScannerStateName(state)); System.out.println(); } } // setScannerState(int) /** * Sets the dispatcher. * * @param dispatcher The new dispatcher. */ protected final void setDispatcher(Dispatcher dispatcher) { fDispatcher = dispatcher; if (DEBUG_DISPATCHER) { System.out.print("%%% setDispatcher: "); System.out.print(getDispatcherName(dispatcher)); System.out.println(); } } // // Private methods // /** Returns the scanner state name. */ protected String getScannerStateName(int state) { switch (state) { case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; } return "??? ("+state+')'; } // getScannerStateName(int):String /** Returns the dispatcher name. */ public String getDispatcherName(Dispatcher dispatcher) { if (DEBUG_DISPATCHER) { if (dispatcher != null) { String name = dispatcher.getClass().getName(); int index = name.lastIndexOf('.'); if (index != -1) { name = name.substring(index + 1); index = name.lastIndexOf('$'); if (index != -1) { name = name.substring(index + 1); } } return name; } } return "null"; } // getDispatcherName():String // // Classes // /** * Element stack. This stack operates without synchronization, error * checking, and it re-uses objects instead of throwing popped items * away. * * @author Andy Clark, IBM */ protected static class ElementStack { // // Data // /** The stack data. */ protected QName[] fElements; /** The size of the stack. */ protected int fSize; // // Constructors // /** Default constructor. */ public ElementStack() { fElements = new QName[10]; for (int i = 0; i < fElements.length; i++) { fElements[i] = new QName(); } } // () // // Public methods // /** * Pushes an element on the stack. *

* Note: The QName values are copied into the * stack. In other words, the caller does not orphan * the element to the stack. Also, the QName object returned * is not orphaned to the caller. It should be * considered read-only. * * @param element The element to push onto the stack. * * @return Returns the actual QName object that stores the */ public QName pushElement(QName element) { if (fSize == fElements.length) { QName[] array = new QName[fElements.length * 2]; System.arraycopy(fElements, 0, array, 0, fSize); fElements = array; for (int i = fSize; i < fElements.length; i++) { fElements[i] = new QName(); } } fElements[fSize].setValues(element); return fElements[fSize++]; } // pushElement(QName):QName /** * Pops an element off of the stack by setting the values of * the specified QName. *

* Note: The object returned is not * orphaned to the caller. Therefore, the caller should consider * the object to be read-only. */ public void popElement(QName element) { element.setValues(fElements[--fSize]); } // popElement(QName) /** Clears the stack without throwing away existing QName objects. */ public void clear() { fSize = 0; } // clear() } // class ElementStack /** * This interface defines an XML "event" dispatching model. Classes * that implement this interface are responsible for scanning parts * of the XML document and dispatching callbacks. * * @xerces.internal * * @author Glenn Marcy, IBM */ protected interface Dispatcher { // // Dispatcher methods // /** * Dispatch an XML "event". * * @param complete True if this dispatcher is intended to scan * and dispatch as much as possible. * * @return True if there is more to dispatch either from this * or a another dispatcher. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ public boolean dispatch(boolean complete) throws IOException, XNIException; } // interface Dispatcher /** * Dispatcher to handle content scanning. * * @author Andy Clark, IBM * @author Eric Ye, IBM */ protected class FragmentContentDispatcher implements Dispatcher { // // Dispatcher methods // /** * Dispatch an XML "event". * * @param complete True if this dispatcher is intended to scan * and dispatch as much as possible. * * @return True if there is more to dispatch either from this * or a another dispatcher. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ public boolean dispatch(boolean complete) throws IOException, XNIException { try { boolean again; do { again = false; switch (fScannerState) { case SCANNER_STATE_CONTENT: { if (fEntityScanner.skipChar('<')) { setScannerState(SCANNER_STATE_START_OF_MARKUP); again = true; } else if (fEntityScanner.skipChar('&')) { setScannerState(SCANNER_STATE_REFERENCE); again = true; } else { do { int c = scanContent(); if (c == '<') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_START_OF_MARKUP); break; } else if (c == '&') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_REFERENCE); break; } else if (c != -1 && isInvalidLiteral(c)) { if (XMLChar.isHighSurrogate(c)) { // special case: surrogates fStringBuffer.clear(); if (scanSurrogates(fStringBuffer)) { // call handler if (fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); } } } else { reportFatalError("InvalidCharInContent", new Object[] { Integer.toString(c, 16)}); fEntityScanner.scanChar(); } } } while (complete); } break; } case SCANNER_STATE_START_OF_MARKUP: { fMarkupDepth++; if (fEntityScanner.skipChar('/')) { if (scanEndElement() == 0) { if (elementDepthIsZeroHook()) { return true; } } setScannerState(SCANNER_STATE_CONTENT); } else if (isValidNameStartChar(fEntityScanner.peekChar())) { scanStartElement(); setScannerState(SCANNER_STATE_CONTENT); } else if (fEntityScanner.skipChar('!')) { if (fEntityScanner.skipChar('-')) { if (!fEntityScanner.skipChar('-')) { reportFatalError("InvalidCommentStart", null); } setScannerState(SCANNER_STATE_COMMENT); again = true; } else if (fEntityScanner.skipString("[CDATA[")) { setScannerState(SCANNER_STATE_CDATA); again = true; } else if (!scanForDoctypeHook()) { reportFatalError("MarkupNotRecognizedInContent", null); } } else if (fEntityScanner.skipChar('?')) { setScannerState(SCANNER_STATE_PI); again = true; } else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { scanStartElement(); setScannerState(SCANNER_STATE_CONTENT); } else { reportFatalError("MarkupNotRecognizedInContent", null); setScannerState(SCANNER_STATE_CONTENT); } break; } case SCANNER_STATE_COMMENT: { scanComment(); setScannerState(SCANNER_STATE_CONTENT); break; } case SCANNER_STATE_PI: { scanPI(); setScannerState(SCANNER_STATE_CONTENT); break; } case SCANNER_STATE_CDATA: { scanCDATASection(complete); setScannerState(SCANNER_STATE_CONTENT); break; } case SCANNER_STATE_REFERENCE: { fMarkupDepth++; // NOTE: We need to set the state beforehand // because the XMLEntityHandler#startEntity // callback could set the state to // SCANNER_STATE_TEXT_DECL and we don't want // to override that scanner state. setScannerState(SCANNER_STATE_CONTENT); if (fEntityScanner.skipChar('#')) { scanCharReference(); } else { scanEntityReference(); } break; } case SCANNER_STATE_TEXT_DECL: { // scan text decl if (fEntityScanner.skipString("





© 2015 - 2024 Weber Informatics LLC | Privacy Policy