All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.xerces.impl.XMLDocumentFragmentScannerImpl Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.xerces.impl;

import java.io.CharConversionException;
import java.io.EOFException;
import java.io.IOException;

import org.apache.xerces.impl.io.MalformedByteSequenceException;
import org.apache.xerces.impl.msg.XMLMessageFormatter;
import org.apache.xerces.util.AugmentationsImpl;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.util.XMLChar;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.util.XMLSymbols;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDocumentHandler;
import org.apache.xerces.xni.XMLResourceIdentifier;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponent;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentScanner;
import org.apache.xerces.xni.parser.XMLInputSource;

/**
 * This class is responsible for scanning the structure and content
 * of document fragments. The scanner acts as the source for the 
 * document information which is communicated to the document handler.
 * 

* This component requires the following features and properties from the * component manager that uses it: *

    *
  • http://xml.org/sax/features/validation
  • *
  • http://apache.org/xml/features/scanner/notify-char-refs
  • *
  • http://apache.org/xml/features/scanner/notify-builtin-refs
  • *
  • http://apache.org/xml/properties/internal/symbol-table
  • *
  • http://apache.org/xml/properties/internal/error-reporter
  • *
  • http://apache.org/xml/properties/internal/entity-manager
  • *
* * @xerces.internal * * @author Glenn Marcy, IBM * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author Eric Ye, IBM * * @version $Id: XMLDocumentFragmentScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $ */ public class XMLDocumentFragmentScannerImpl extends XMLScanner implements XMLDocumentScanner, XMLComponent, XMLEntityHandler { // // Constants // // scanner states /** Scanner state: start of markup. */ protected static final int SCANNER_STATE_START_OF_MARKUP = 1; /** Scanner state: comment. */ protected static final int SCANNER_STATE_COMMENT = 2; /** Scanner state: processing instruction. */ protected static final int SCANNER_STATE_PI = 3; /** Scanner state: DOCTYPE. */ protected static final int SCANNER_STATE_DOCTYPE = 4; /** Scanner state: root element. */ protected static final int SCANNER_STATE_ROOT_ELEMENT = 6; /** Scanner state: content. */ protected static final int SCANNER_STATE_CONTENT = 7; /** Scanner state: reference. */ protected static final int SCANNER_STATE_REFERENCE = 8; /** Scanner state: end of input. */ protected static final int SCANNER_STATE_END_OF_INPUT = 13; /** Scanner state: terminated. */ protected static final int SCANNER_STATE_TERMINATED = 14; /** Scanner state: CDATA section. */ protected static final int SCANNER_STATE_CDATA = 15; /** Scanner state: Text declaration. */ protected static final int SCANNER_STATE_TEXT_DECL = 16; // feature identifiers /** Feature identifier: namespaces. */ protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE; /** Feature identifier: notify built-in refereces. */ protected static final String NOTIFY_BUILTIN_REFS = Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; // property identifiers /** Property identifier: entity resolver. */ protected static final String ENTITY_RESOLVER = Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; // recognized features and properties /** Recognized features. */ private static final String[] RECOGNIZED_FEATURES = { NAMESPACES, VALIDATION, NOTIFY_BUILTIN_REFS, NOTIFY_CHAR_REFS, }; /** Feature defaults. */ private static final Boolean[] FEATURE_DEFAULTS = { null, null, Boolean.FALSE, Boolean.FALSE, }; /** Recognized properties. */ private static final String[] RECOGNIZED_PROPERTIES = { SYMBOL_TABLE, ERROR_REPORTER, ENTITY_MANAGER, ENTITY_RESOLVER, }; /** Property defaults. */ private static final Object[] PROPERTY_DEFAULTS = { null, null, null, null, }; // debugging /** Debug scanner state. */ private static final boolean DEBUG_SCANNER_STATE = false; /** Debug dispatcher. */ private static final boolean DEBUG_DISPATCHER = false; /** Debug content dispatcher scanning. */ protected static final boolean DEBUG_CONTENT_SCANNING = false; // // Data // // protected data /** Document handler. */ protected XMLDocumentHandler fDocumentHandler; /** Entity stack. */ protected int[] fEntityStack = new int[4]; /** Markup depth. */ protected int fMarkupDepth; /** Scanner state. */ protected int fScannerState; /** SubScanner state: inside scanContent method. */ protected boolean fInScanContent = false; /** has external dtd */ protected boolean fHasExternalDTD; /** Standalone. */ protected boolean fStandalone; /** True if [Entity Declared] is a VC; false if it is a WFC. */ protected boolean fIsEntityDeclaredVC; /** External subset resolver. **/ protected ExternalSubsetResolver fExternalSubsetResolver; // element information /** Current element. */ protected QName fCurrentElement; /** Element stack. */ protected final ElementStack fElementStack = new ElementStack(); // other info /** Document system identifier. * REVISIT: So what's this used for? - NG * protected String fDocumentSystemId; ******/ // features /** Notify built-in references. */ protected boolean fNotifyBuiltInRefs = false; // dispatchers /** Active dispatcher. */ protected Dispatcher fDispatcher; /** Content dispatcher. */ protected final Dispatcher fContentDispatcher = createContentDispatcher(); // temporary variables /** Element QName. */ protected final QName fElementQName = new QName(); /** Attribute QName. */ protected final QName fAttributeQName = new QName(); /** Element attributes. */ protected final XMLAttributesImpl fAttributes = new XMLAttributesImpl(); /** String. */ protected final XMLString fTempString = new XMLString(); /** String. */ protected final XMLString fTempString2 = new XMLString(); /** Array of 3 strings. */ private final String[] fStrings = new String[3]; /** String buffer. */ private final XMLStringBuffer fStringBuffer = new XMLStringBuffer(); /** String buffer. */ private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); /** Another QName. */ private final QName fQName = new QName(); /** Single character array. */ private final char[] fSingleChar = new char[1]; /** * Saw spaces after element name or between attributes. * * This is reserved for the case where scanning of a start element spans * several methods, as is the case when scanning the start of a root element * where a DTD external subset may be read after scanning the element name. */ private boolean fSawSpace; /** Reusable Augmentations. */ private Augmentations fTempAugmentations = null; // // Constructors // /** Default constructor. */ public XMLDocumentFragmentScannerImpl() {} // () // // XMLDocumentScanner methods // /** * Sets the input source. * * @param inputSource The input source. * * @throws IOException Thrown on i/o error. */ public void setInputSource(XMLInputSource inputSource) throws IOException { fEntityManager.setEntityHandler(this); fEntityManager.startEntity("$fragment$", inputSource, false, true); //fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); } // setInputSource(XMLInputSource) /** * Scans a document. * * @param complete True if the scanner should scan the document * completely, pushing all events to the registered * document handler. A value of false indicates that * that the scanner should only scan the next portion * of the document and return. A scanner instance is * permitted to completely scan a document if it does * not support this "pull" scanning model. * * @return True if there is more to scan, false otherwise. */ public boolean scanDocument(boolean complete) throws IOException, XNIException { // reset entity scanner fEntityScanner = fEntityManager.getEntityScanner(); // keep dispatching "events" fEntityManager.setEntityHandler(this); do { if (!fDispatcher.dispatch(complete)) { return false; } } while (complete); // return success return true; } // scanDocument(boolean):boolean // // XMLComponent methods // /** * Resets the component. The component can query the component manager * about any features and properties that affect the operation of the * component. * * @param componentManager The component manager. * * @throws SAXException Thrown by component on initialization error. * For example, if a feature or property is * required for the operation of the component, the * component manager may throw a * SAXNotRecognizedException or a * SAXNotSupportedException. */ public void reset(XMLComponentManager componentManager) throws XMLConfigurationException { super.reset(componentManager); // other settings //fDocumentSystemId = null; // sax features fAttributes.setNamespaces(fNamespaces); // initialize vars fMarkupDepth = 0; fCurrentElement = null; fElementStack.clear(); fHasExternalDTD = false; fStandalone = false; fIsEntityDeclaredVC = false; fInScanContent = false; // setup dispatcher setScannerState(SCANNER_STATE_CONTENT); setDispatcher(fContentDispatcher); if (fParserSettings) { // parser settings have changed. reset them. // xerces features try { fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS); } catch (XMLConfigurationException e) { fNotifyBuiltInRefs = false; } // xerces properties try { Object resolver = componentManager.getProperty(ENTITY_RESOLVER); fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? (ExternalSubsetResolver) resolver : null; } catch (XMLConfigurationException e) { fExternalSubsetResolver = null; } } } // reset(XMLComponentManager) /** * Returns a list of feature identifiers that are recognized by * this component. This method may return null if no features * are recognized by this component. */ public String[] getRecognizedFeatures() { return (String[])(RECOGNIZED_FEATURES.clone()); } // getRecognizedFeatures():String[] /** * Sets the state of a feature. This method is called by the component * manager any time after reset when a feature changes state. *

* Note: Components should silently ignore features * that do not affect the operation of the component. * * @param featureId The feature identifier. * @param state The state of the feature. * * @throws SAXNotRecognizedException The component should not throw * this exception. * @throws SAXNotSupportedException The component should not throw * this exception. */ public void setFeature(String featureId, boolean state) throws XMLConfigurationException { super.setFeature(featureId, state); // Xerces properties if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); if (suffixLength == Constants.NOTIFY_BUILTIN_REFS_FEATURE.length() && featureId.endsWith(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { fNotifyBuiltInRefs = state; } } } // setFeature(String,boolean) /** * Returns a list of property identifiers that are recognized by * this component. This method may return null if no properties * are recognized by this component. */ public String[] getRecognizedProperties() { return (String[])(RECOGNIZED_PROPERTIES.clone()); } // getRecognizedProperties():String[] /** * Sets the value of a property. This method is called by the component * manager any time after reset when a property changes value. *

* Note: Components should silently ignore properties * that do not affect the operation of the component. * * @param propertyId The property identifier. * @param value The value of the property. * * @throws SAXNotRecognizedException The component should not throw * this exception. * @throws SAXNotSupportedException The component should not throw * this exception. */ public void setProperty(String propertyId, Object value) throws XMLConfigurationException { super.setProperty(propertyId, value); // Xerces properties if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { fEntityManager = (XMLEntityManager)value; return; } if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? (ExternalSubsetResolver) value : null; return; } } } // setProperty(String,Object) /** * Returns the default state for a feature, or null if this * component does not want to report a default value for this * feature. * * @param featureId The feature identifier. * * @since Xerces 2.2.0 */ public Boolean getFeatureDefault(String featureId) { for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { if (RECOGNIZED_FEATURES[i].equals(featureId)) { return FEATURE_DEFAULTS[i]; } } return null; } // getFeatureDefault(String):Boolean /** * Returns the default state for a property, or null if this * component does not want to report a default value for this * property. * * @param propertyId The property identifier. * * @since Xerces 2.2.0 */ public Object getPropertyDefault(String propertyId) { for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { return PROPERTY_DEFAULTS[i]; } } return null; } // getPropertyDefault(String):Object // // XMLDocumentSource methods // /** * setDocumentHandler * * @param documentHandler */ public void setDocumentHandler(XMLDocumentHandler documentHandler) { fDocumentHandler = documentHandler; } // setDocumentHandler(XMLDocumentHandler) /** Returns the document handler */ public XMLDocumentHandler getDocumentHandler(){ return fDocumentHandler; } // // XMLEntityHandler methods // /** * This method notifies of the start of an entity. The DTD has the * pseudo-name of "[dtd]" parameter entity names start with '%'; and * general entities are just specified by their name. * * @param name The name of the entity. * @param identifier The resource identifier. * @param encoding The auto-detected IANA encoding name of the entity * stream. This value will be null in those situations * where the entity encoding is not auto-detected (e.g. * internal entities or a document entity that is * parsed from a java.io.Reader). * @param augs Additional information that may include infoset augmentations * * @throws XNIException Thrown by handler to signal an error. */ public void startEntity(String name, XMLResourceIdentifier identifier, String encoding, Augmentations augs) throws XNIException { // keep track of this entity before fEntityDepth is increased if (fEntityDepth == fEntityStack.length) { int[] entityarray = new int[fEntityStack.length * 2]; System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); fEntityStack = entityarray; } fEntityStack[fEntityDepth] = fMarkupDepth; super.startEntity(name, identifier, encoding, augs); // WFC: entity declared in external subset in standalone doc if(fStandalone && fEntityManager.isEntityDeclInExternalSubset(name)) { reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", new Object[]{name}); } // call handler if (fDocumentHandler != null && !fScanningAttribute) { if (!name.equals("[xml]")) { fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); } } } // startEntity(String,XMLResourceIdentifier,String) /** * This method notifies the end of an entity. The DTD has the pseudo-name * of "[dtd]" parameter entity names start with '%'; and general entities * are just specified by their name. * * @param name The name of the entity. * @param augs Additional information that may include infoset augmentations * * @throws XNIException Thrown by handler to signal an error. */ public void endEntity(String name, Augmentations augs) throws XNIException { // flush possible pending output buffer - see scanContent if (fInScanContent && fStringBuffer.length != 0 && fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); fStringBuffer.length = 0; // make sure we know it's been flushed } super.endEntity(name, augs); // make sure markup is properly balanced if (fMarkupDepth != fEntityStack[fEntityDepth]) { reportFatalError("MarkupEntityMismatch", null); } // call handler if (fDocumentHandler != null && !fScanningAttribute) { if (!name.equals("[xml]")) { fDocumentHandler.endGeneralEntity(name, augs); } } } // endEntity(String) // // Protected methods // // dispatcher factory methods /** Creates a content dispatcher. */ protected Dispatcher createContentDispatcher() { return new FragmentContentDispatcher(); } // createContentDispatcher():Dispatcher // scanning methods /** * Scans an XML or text declaration. *

*

     * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
     *                 | ('"' ('yes' | 'no') '"'))
     *
     * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
     * 
* * @param scanningTextDecl True if a text declaration is to * be scanned instead of an XML * declaration. */ protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) throws IOException, XNIException { // scan decl super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); fMarkupDepth--; // pseudo-attribute values String version = fStrings[0]; String encoding = fStrings[1]; String standalone = fStrings[2]; // set standalone fStandalone = standalone != null && standalone.equals("yes"); fEntityManager.setStandalone(fStandalone); // set version on reader fEntityScanner.setXMLVersion(version); // call handler if (fDocumentHandler != null) { if (scanningTextDecl) { fDocumentHandler.textDecl(version, encoding, null); } else { fDocumentHandler.xmlDecl(version, encoding, standalone, null); } } // set encoding on reader if (encoding != null && !fEntityScanner.fCurrentEntity.isEncodingExternallySpecified()) { fEntityScanner.setEncoding(encoding); } } // scanXMLDeclOrTextDecl(boolean) /** * Scans a processing data. This is needed to handle the situation * where a document starts with a processing instruction whose * target name starts with "xml". (e.g. xmlfoo) * * @param target The PI target * @param data The string to fill in with the data */ protected void scanPIData(String target, XMLString data) throws IOException, XNIException { super.scanPIData(target, data); fMarkupDepth--; // call handler if (fDocumentHandler != null) { fDocumentHandler.processingInstruction(target, data, null); } } // scanPIData(String) /** * Scans a comment. *

*

     * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
     * 
*

* Note: Called after scanning past '<!--' */ protected void scanComment() throws IOException, XNIException { scanComment(fStringBuffer); fMarkupDepth--; // call handler if (fDocumentHandler != null) { fDocumentHandler.comment(fStringBuffer, null); } } // scanComment() /** * Scans a start element. This method will handle the binding of * namespace information and notifying the handler of the start * of the element. *

*

     * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
     * [40] STag ::= '<' Name (S Attribute)* S? '>'
     * 
*

* Note: This method assumes that the leading * '<' character has been consumed. *

* Note: This method uses the fElementQName and * fAttributes variables. The contents of these variables will be * destroyed. The caller should copy important information out of * these variables before calling this method. * * @return True if element is empty. (i.e. It matches * production [44]. */ protected boolean scanStartElement() throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement()"); // name if (fNamespaces) { fEntityScanner.scanQName(fElementQName); } else { String name = fEntityScanner.scanName(); fElementQName.setValues(null, name, name, null); } String rawname = fElementQName.rawname; // push element stack fCurrentElement = fElementStack.pushElement(fElementQName); // attributes boolean empty = false; fAttributes.removeAllAttributes(); do { // spaces boolean sawSpace = fEntityScanner.skipSpaces(); // end tag? int c = fEntityScanner.peekChar(); if (c == '>') { fEntityScanner.scanChar(); break; } else if (c == '/') { fEntityScanner.scanChar(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ElementUnterminated", new Object[]{rawname}); } empty = true; break; } else if (!isValidNameStartChar(c) || !sawSpace) { // Second chance. Check if this character is a high // surrogate of a valid name start character. if (!isValidNameStartHighSurrogate(c) || !sawSpace) { reportFatalError("ElementUnterminated", new Object[] { rawname }); } } // attributes scanAttribute(fAttributes); } while (true); // call handler if (fDocumentHandler != null) { if (empty) { //decrease the markup depth.. fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } fDocumentHandler.emptyElement(fElementQName, fAttributes, null); //pop the element off the stack.. fElementStack.popElement(fElementQName); } else { fDocumentHandler.startElement(fElementQName, fAttributes, null); } } if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElement(): "+empty); return empty; } // scanStartElement():boolean /** * Scans the name of an element in a start or empty tag. * * @see #scanStartElement() */ protected void scanStartElementName () throws IOException, XNIException { // name if (fNamespaces) { fEntityScanner.scanQName(fElementQName); } else { String name = fEntityScanner.scanName(); fElementQName.setValues(null, name, name, null); } // Must skip spaces here because the DTD scanner // would consume them at the end of the external subset. fSawSpace = fEntityScanner.skipSpaces(); } // scanStartElementName() /** * Scans the remainder of a start or empty tag after the element name. * * @see #scanStartElement * @return True if element is empty. */ protected boolean scanStartElementAfterName() throws IOException, XNIException { String rawname = fElementQName.rawname; // push element stack fCurrentElement = fElementStack.pushElement(fElementQName); // attributes boolean empty = false; fAttributes.removeAllAttributes(); do { // end tag? int c = fEntityScanner.peekChar(); if (c == '>') { fEntityScanner.scanChar(); break; } else if (c == '/') { fEntityScanner.scanChar(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ElementUnterminated", new Object[]{rawname}); } empty = true; break; } else if (!isValidNameStartChar(c) || !fSawSpace) { // Second chance. Check if this character is a high // surrogate of a valid name start character. if (!isValidNameStartHighSurrogate(c) || !fSawSpace) { reportFatalError("ElementUnterminated", new Object[] { rawname }); } } // attributes scanAttribute(fAttributes); // spaces fSawSpace = fEntityScanner.skipSpaces(); } while (true); // call handler if (fDocumentHandler != null) { if (empty) { //decrease the markup depth.. fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } fDocumentHandler.emptyElement(fElementQName, fAttributes, null); //pop the element off the stack.. fElementStack.popElement(fElementQName); } else { fDocumentHandler.startElement(fElementQName, fAttributes, null); } } if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElementAfterName(): "+empty); return empty; } // scanStartElementAfterName() /** * Scans an attribute. *

*

     * [41] Attribute ::= Name Eq AttValue
     * 
*

* Note: This method assumes that the next * character on the stream is the first character of the attribute * name. *

* Note: This method uses the fAttributeQName and * fQName variables. The contents of these variables will be * destroyed. * * @param attributes The attributes list for the scanned attribute. */ protected void scanAttribute(XMLAttributes attributes) throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()"); // name if (fNamespaces) { fEntityScanner.scanQName(fAttributeQName); } else { String name = fEntityScanner.scanName(); fAttributeQName.setValues(null, name, name, null); } // equals fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('=')) { reportFatalError("EqRequiredInAttribute", new Object[]{fCurrentElement.rawname,fAttributeQName.rawname}); } fEntityScanner.skipSpaces(); // content int oldLen = attributes.getLength(); int attrIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); // WFC: Unique Att Spec if (oldLen == attributes.getLength()) { reportFatalError("AttributeNotUnique", new Object[]{fCurrentElement.rawname, fAttributeQName.rawname}); } // Scan attribute value and return true if the un-normalized and normalized value are the same boolean isSameNormalizedAttr = scanAttributeValue(fTempString, fTempString2, fAttributeQName.rawname, fIsEntityDeclaredVC, fCurrentElement.rawname); attributes.setValue(attrIndex, fTempString.toString()); // If the non-normalized and normalized value are the same, avoid creating a new string. if (!isSameNormalizedAttr) { attributes.setNonNormalizedValue(attrIndex, fTempString2.toString()); } attributes.setSpecified(attrIndex, true); if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()"); } // scanAttribute(XMLAttributes) /** * Scans element content. * * @return Returns the next character on the stream. */ protected int scanContent() throws IOException, XNIException { XMLString content = fTempString; int c = fEntityScanner.scanContent(content); if (c == '\r') { // happens when there is the character reference fEntityScanner.scanChar(); fStringBuffer.clear(); fStringBuffer.append(fTempString); fStringBuffer.append((char)c); content = fStringBuffer; c = -1; } if (fDocumentHandler != null && content.length > 0) { fDocumentHandler.characters(content, null); } if (c == ']' && fTempString.length == 0) { fStringBuffer.clear(); fStringBuffer.append((char)fEntityScanner.scanChar()); // remember where we are in case we get an endEntity before we // could flush the buffer out - this happens when we're parsing an // entity which ends with a ] fInScanContent = true; // // We work on a single character basis to handle cases such as: // ']]]>' which we might otherwise miss. // if (fEntityScanner.skipChar(']')) { fStringBuffer.append(']'); while (fEntityScanner.skipChar(']')) { fStringBuffer.append(']'); } if (fEntityScanner.skipChar('>')) { reportFatalError("CDEndInContent", null); } } if (fDocumentHandler != null && fStringBuffer.length != 0) { fDocumentHandler.characters(fStringBuffer, null); } fInScanContent = false; c = -1; } return c; } // scanContent():int /** * Scans a CDATA section. *

* Note: This method uses the fTempString and * fStringBuffer variables. * * @param complete True if the CDATA section is to be scanned * completely. * * @return True if CDATA is completely scanned. */ protected boolean scanCDATASection(boolean complete) throws IOException, XNIException { // call handler if (fDocumentHandler != null) { fDocumentHandler.startCDATA(null); } while (true) { fStringBuffer.clear(); if (!fEntityScanner.scanData("]]", fStringBuffer)) { if (fDocumentHandler != null && fStringBuffer.length > 0) { fDocumentHandler.characters(fStringBuffer, null); } int brackets = 0; while (fEntityScanner.skipChar(']')) { brackets++; } if (fDocumentHandler != null && brackets > 0) { fStringBuffer.clear(); if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) { // Handle large sequences of ']' int chunks = brackets / XMLEntityManager.DEFAULT_BUFFER_SIZE; int remainder = brackets % XMLEntityManager.DEFAULT_BUFFER_SIZE; for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) { fStringBuffer.append(']'); } for (int i = 0; i < chunks; i++) { fDocumentHandler.characters(fStringBuffer, null); } if (remainder != 0) { fStringBuffer.length = remainder; fDocumentHandler.characters(fStringBuffer, null); } } else { for (int i = 0; i < brackets; i++) { fStringBuffer.append(']'); } fDocumentHandler.characters(fStringBuffer, null); } } if (fEntityScanner.skipChar('>')) { break; } if (fDocumentHandler != null) { fStringBuffer.clear(); fStringBuffer.append("]]"); fDocumentHandler.characters(fStringBuffer, null); } } else { if (fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); } int c = fEntityScanner.peekChar(); if (c != -1 && isInvalidLiteral(c)) { if (XMLChar.isHighSurrogate(c)) { fStringBuffer.clear(); scanSurrogates(fStringBuffer); if (fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); } } else { reportFatalError("InvalidCharInCDSect", new Object[]{Integer.toString(c,16)}); fEntityScanner.scanChar(); } } } } fMarkupDepth--; // call handler if (fDocumentHandler != null) { fDocumentHandler.endCDATA(null); } return true; } // scanCDATASection(boolean):boolean /** * Scans an end element. *

*

     * [42] ETag ::= '</' Name S? '>'
     * 
*

* Note: This method uses the fElementQName variable. * The contents of this variable will be destroyed. The caller should * copy the needed information out of this variable before calling * this method. * * @return The element depth. */ protected int scanEndElement() throws IOException, XNIException { if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanEndElement()"); fElementStack.popElement(fElementQName) ; // Take advantage of the fact that next string _should_ be "fElementQName.rawName", //In scanners most of the time is consumed on checks done for XML characters, we can // optimize on it and avoid the checks done for endElement, //we will also avoid symbol table lookup - [email protected] // this should work both for namespace processing true or false... //REVISIT: if the string is not the same as expected.. we need to do better error handling.. //We can skip this for now... In any case if the string doesn't match -- document is not well formed. if (!fEntityScanner.skipString(fElementQName.rawname)) { reportFatalError("ETagRequired", new Object[]{fElementQName.rawname}); } // end fEntityScanner.skipSpaces(); if (!fEntityScanner.skipChar('>')) { reportFatalError("ETagUnterminated", new Object[]{fElementQName.rawname}); } fMarkupDepth--; //we have increased the depth for two markup "<" characters fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } // call handler if (fDocumentHandler != null ) { fDocumentHandler.endElement(fElementQName, null); } return fMarkupDepth; } // scanEndElement():int /** * Scans a character reference. *

*

     * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
     * 
*/ protected void scanCharReference() throws IOException, XNIException { fStringBuffer2.clear(); int ch = scanCharReferenceValue(fStringBuffer2, null); fMarkupDepth--; if (ch != -1) { // call handler if (fDocumentHandler != null) { if (fNotifyCharRefs) { fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); } Augmentations augs = null; if (fValidation && ch <= 0x20) { if (fTempAugmentations != null) { fTempAugmentations.removeAllItems(); } else { fTempAugmentations = new AugmentationsImpl(); } augs = fTempAugmentations; augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); } fDocumentHandler.characters(fStringBuffer2, augs); if (fNotifyCharRefs) { fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); } } } } // scanCharReference() /** * Scans an entity reference. * * @throws IOException Thrown if i/o error occurs. * @throws XNIException Thrown if handler throws exception upon * notification. */ protected void scanEntityReference() throws IOException, XNIException { // name String name = fEntityScanner.scanName(); if (name == null) { reportFatalError("NameRequiredInReference", null); return; } // end if (!fEntityScanner.skipChar(';')) { reportFatalError("SemicolonRequiredInReference", new Object []{name}); } fMarkupDepth--; // handle built-in entities if (name == fAmpSymbol) { handleCharacter('&', fAmpSymbol); } else if (name == fLtSymbol) { handleCharacter('<', fLtSymbol); } else if (name == fGtSymbol) { handleCharacter('>', fGtSymbol); } else if (name == fQuotSymbol) { handleCharacter('"', fQuotSymbol); } else if (name == fAposSymbol) { handleCharacter('\'', fAposSymbol); } // start general entity else if (fEntityManager.isUnparsedEntity(name)) { reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); } else { if (!fEntityManager.isDeclaredEntity(name)) { if (fIsEntityDeclaredVC) { if (fValidation) fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); } else { reportFatalError("EntityNotDeclared", new Object[]{name}); } } fEntityManager.startEntity(name, false); } } // scanEntityReference() // utility methods /** * Calls document handler with a single character resulting from * built-in entity resolution. * * @param c * @param entity built-in name */ private void handleCharacter(char c, String entity) throws XNIException { if (fDocumentHandler != null) { if (fNotifyBuiltInRefs) { fDocumentHandler.startGeneralEntity(entity, null, null, null); } fSingleChar[0] = c; fTempString.setValues(fSingleChar, 0, 1); fDocumentHandler.characters(fTempString, null); if (fNotifyBuiltInRefs) { fDocumentHandler.endGeneralEntity(entity, null); } } } // handleCharacter(char) /** * Handles the end element. This method will make sure that * the end element name matches the current element and notify * the handler about the end of the element and the end of any * relevent prefix mappings. *

* Note: This method uses the fQName variable. * The contents of this variable will be destroyed. * * @param element The element. * * @return The element depth. * * @throws XNIException Thrown if the handler throws a SAX exception * upon notification. * */ // REVISIT: need to remove this method. It's not called anymore, because // the handling is done when the end tag is scanned. - SG protected int handleEndElement(QName element, boolean isEmpty) throws XNIException { fMarkupDepth--; // check that this element was opened in the same entity if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { reportFatalError("ElementEntityMismatch", new Object[]{fCurrentElement.rawname}); } // make sure the elements match QName startElement = fQName; fElementStack.popElement(startElement); if (element.rawname != startElement.rawname) { reportFatalError("ETagRequired", new Object[]{startElement.rawname}); } // bind namespaces if (fNamespaces) { element.uri = startElement.uri; } // call handler if (fDocumentHandler != null && !isEmpty) { fDocumentHandler.endElement(element, null); } return fMarkupDepth; } // callEndElement(QName,boolean):int // helper methods /** * Sets the scanner state. * * @param state The new scanner state. */ protected final void setScannerState(int state) { fScannerState = state; if (DEBUG_SCANNER_STATE) { System.out.print("### setScannerState: "); System.out.print(getScannerStateName(state)); System.out.println(); } } // setScannerState(int) /** * Sets the dispatcher. * * @param dispatcher The new dispatcher. */ protected final void setDispatcher(Dispatcher dispatcher) { fDispatcher = dispatcher; if (DEBUG_DISPATCHER) { System.out.print("%%% setDispatcher: "); System.out.print(getDispatcherName(dispatcher)); System.out.println(); } } // // Private methods // /** Returns the scanner state name. */ protected String getScannerStateName(int state) { switch (state) { case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; } return "??? ("+state+')'; } // getScannerStateName(int):String /** Returns the dispatcher name. */ public String getDispatcherName(Dispatcher dispatcher) { if (DEBUG_DISPATCHER) { if (dispatcher != null) { String name = dispatcher.getClass().getName(); int index = name.lastIndexOf('.'); if (index != -1) { name = name.substring(index + 1); index = name.lastIndexOf('$'); if (index != -1) { name = name.substring(index + 1); } } return name; } } return "null"; } // getDispatcherName():String // // Classes // /** * Element stack. This stack operates without synchronization, error * checking, and it re-uses objects instead of throwing popped items * away. * * @author Andy Clark, IBM */ protected static class ElementStack { // // Data // /** The stack data. */ protected QName[] fElements; /** The size of the stack. */ protected int fSize; // // Constructors // /** Default constructor. */ public ElementStack() { fElements = new QName[10]; for (int i = 0; i < fElements.length; i++) { fElements[i] = new QName(); } } // () // // Public methods // /** * Pushes an element on the stack. *

* Note: The QName values are copied into the * stack. In other words, the caller does not orphan * the element to the stack. Also, the QName object returned * is not orphaned to the caller. It should be * considered read-only. * * @param element The element to push onto the stack. * * @return Returns the actual QName object that stores the */ public QName pushElement(QName element) { if (fSize == fElements.length) { QName[] array = new QName[fElements.length * 2]; System.arraycopy(fElements, 0, array, 0, fSize); fElements = array; for (int i = fSize; i < fElements.length; i++) { fElements[i] = new QName(); } } fElements[fSize].setValues(element); return fElements[fSize++]; } // pushElement(QName):QName /** * Pops an element off of the stack by setting the values of * the specified QName. *

* Note: The object returned is not * orphaned to the caller. Therefore, the caller should consider * the object to be read-only. */ public void popElement(QName element) { element.setValues(fElements[--fSize]); } // popElement(QName) /** Clears the stack without throwing away existing QName objects. */ public void clear() { fSize = 0; } // clear() } // class ElementStack /** * This interface defines an XML "event" dispatching model. Classes * that implement this interface are responsible for scanning parts * of the XML document and dispatching callbacks. * * @xerces.internal * * @author Glenn Marcy, IBM */ protected interface Dispatcher { // // Dispatcher methods // /** * Dispatch an XML "event". * * @param complete True if this dispatcher is intended to scan * and dispatch as much as possible. * * @return True if there is more to dispatch either from this * or a another dispatcher. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ public boolean dispatch(boolean complete) throws IOException, XNIException; } // interface Dispatcher /** * Dispatcher to handle content scanning. * * @author Andy Clark, IBM * @author Eric Ye, IBM */ protected class FragmentContentDispatcher implements Dispatcher { // // Dispatcher methods // /** * Dispatch an XML "event". * * @param complete True if this dispatcher is intended to scan * and dispatch as much as possible. * * @return True if there is more to dispatch either from this * or a another dispatcher. * * @throws IOException Thrown on i/o error. * @throws XNIException Thrown on parse error. */ public boolean dispatch(boolean complete) throws IOException, XNIException { try { boolean again; do { again = false; switch (fScannerState) { case SCANNER_STATE_CONTENT: { if (fEntityScanner.skipChar('<')) { setScannerState(SCANNER_STATE_START_OF_MARKUP); again = true; } else if (fEntityScanner.skipChar('&')) { setScannerState(SCANNER_STATE_REFERENCE); again = true; } else { do { int c = scanContent(); if (c == '<') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_START_OF_MARKUP); break; } else if (c == '&') { fEntityScanner.scanChar(); setScannerState(SCANNER_STATE_REFERENCE); break; } else if (c != -1 && isInvalidLiteral(c)) { if (XMLChar.isHighSurrogate(c)) { // special case: surrogates fStringBuffer.clear(); if (scanSurrogates(fStringBuffer)) { // call handler if (fDocumentHandler != null) { fDocumentHandler.characters(fStringBuffer, null); } } } else { reportFatalError("InvalidCharInContent", new Object[] { Integer.toString(c, 16)}); fEntityScanner.scanChar(); } } } while (complete); } break; } case SCANNER_STATE_START_OF_MARKUP: { fMarkupDepth++; if (fEntityScanner.skipChar('/')) { if (scanEndElement() == 0) { if (elementDepthIsZeroHook()) { return true; } } setScannerState(SCANNER_STATE_CONTENT); } else if (isValidNameStartChar(fEntityScanner.peekChar())) { scanStartElement(); setScannerState(SCANNER_STATE_CONTENT); } else if (fEntityScanner.skipChar('!')) { if (fEntityScanner.skipChar('-')) { if (!fEntityScanner.skipChar('-')) { reportFatalError("InvalidCommentStart", null); } setScannerState(SCANNER_STATE_COMMENT); again = true; } else if (fEntityScanner.skipString("[CDATA[")) { setScannerState(SCANNER_STATE_CDATA); again = true; } else if (!scanForDoctypeHook()) { reportFatalError("MarkupNotRecognizedInContent", null); } } else if (fEntityScanner.skipChar('?')) { setScannerState(SCANNER_STATE_PI); again = true; } else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { scanStartElement(); setScannerState(SCANNER_STATE_CONTENT); } else { reportFatalError("MarkupNotRecognizedInContent", null); setScannerState(SCANNER_STATE_CONTENT); } break; } case SCANNER_STATE_COMMENT: { scanComment(); setScannerState(SCANNER_STATE_CONTENT); break; } case SCANNER_STATE_PI: { scanPI(); setScannerState(SCANNER_STATE_CONTENT); break; } case SCANNER_STATE_CDATA: { scanCDATASection(complete); setScannerState(SCANNER_STATE_CONTENT); break; } case SCANNER_STATE_REFERENCE: { fMarkupDepth++; // NOTE: We need to set the state beforehand // because the XMLEntityHandler#startEntity // callback could set the state to // SCANNER_STATE_TEXT_DECL and we don't want // to override that scanner state. setScannerState(SCANNER_STATE_CONTENT); if (fEntityScanner.skipChar('#')) { scanCharReference(); } else { scanEntityReference(); } break; } case SCANNER_STATE_TEXT_DECL: { // scan text decl if (fEntityScanner.skipString("





© 2015 - 2024 Weber Informatics LLC | Privacy Policy