org.apache.xerces.impl.XMLDocumentFragmentScannerImpl Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xerces.impl;
import java.io.CharConversionException;
import java.io.EOFException;
import java.io.IOException;
import org.apache.xerces.impl.io.MalformedByteSequenceException;
import org.apache.xerces.impl.msg.XMLMessageFormatter;
import org.apache.xerces.util.AugmentationsImpl;
import org.apache.xerces.util.XMLAttributesImpl;
import org.apache.xerces.util.XMLChar;
import org.apache.xerces.util.XMLStringBuffer;
import org.apache.xerces.util.XMLSymbols;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.QName;
import org.apache.xerces.xni.XMLAttributes;
import org.apache.xerces.xni.XMLDocumentHandler;
import org.apache.xerces.xni.XMLResourceIdentifier;
import org.apache.xerces.xni.XMLString;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.parser.XMLComponent;
import org.apache.xerces.xni.parser.XMLComponentManager;
import org.apache.xerces.xni.parser.XMLConfigurationException;
import org.apache.xerces.xni.parser.XMLDocumentScanner;
import org.apache.xerces.xni.parser.XMLInputSource;
/**
* This class is responsible for scanning the structure and content
* of document fragments. The scanner acts as the source for the
* document information which is communicated to the document handler.
*
* This component requires the following features and properties from the
* component manager that uses it:
*
* - http://xml.org/sax/features/validation
* - http://apache.org/xml/features/scanner/notify-char-refs
* - http://apache.org/xml/features/scanner/notify-builtin-refs
* - http://apache.org/xml/properties/internal/symbol-table
* - http://apache.org/xml/properties/internal/error-reporter
* - http://apache.org/xml/properties/internal/entity-manager
*
*
* @xerces.internal
*
* @author Glenn Marcy, IBM
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
* @author Eric Ye, IBM
*
* @version $Id: XMLDocumentFragmentScannerImpl.java 572055 2007-09-02 17:55:43Z mrglavas $
*/
public class XMLDocumentFragmentScannerImpl
extends XMLScanner
implements XMLDocumentScanner, XMLComponent, XMLEntityHandler {
//
// Constants
//
// scanner states
/** Scanner state: start of markup. */
protected static final int SCANNER_STATE_START_OF_MARKUP = 1;
/** Scanner state: comment. */
protected static final int SCANNER_STATE_COMMENT = 2;
/** Scanner state: processing instruction. */
protected static final int SCANNER_STATE_PI = 3;
/** Scanner state: DOCTYPE. */
protected static final int SCANNER_STATE_DOCTYPE = 4;
/** Scanner state: root element. */
protected static final int SCANNER_STATE_ROOT_ELEMENT = 6;
/** Scanner state: content. */
protected static final int SCANNER_STATE_CONTENT = 7;
/** Scanner state: reference. */
protected static final int SCANNER_STATE_REFERENCE = 8;
/** Scanner state: end of input. */
protected static final int SCANNER_STATE_END_OF_INPUT = 13;
/** Scanner state: terminated. */
protected static final int SCANNER_STATE_TERMINATED = 14;
/** Scanner state: CDATA section. */
protected static final int SCANNER_STATE_CDATA = 15;
/** Scanner state: Text declaration. */
protected static final int SCANNER_STATE_TEXT_DECL = 16;
// feature identifiers
/** Feature identifier: namespaces. */
protected static final String NAMESPACES =
Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;
/** Feature identifier: notify built-in refereces. */
protected static final String NOTIFY_BUILTIN_REFS =
Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
// property identifiers
/** Property identifier: entity resolver. */
protected static final String ENTITY_RESOLVER =
Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
// recognized features and properties
/** Recognized features. */
private static final String[] RECOGNIZED_FEATURES = {
NAMESPACES,
VALIDATION,
NOTIFY_BUILTIN_REFS,
NOTIFY_CHAR_REFS,
};
/** Feature defaults. */
private static final Boolean[] FEATURE_DEFAULTS = {
null,
null,
Boolean.FALSE,
Boolean.FALSE,
};
/** Recognized properties. */
private static final String[] RECOGNIZED_PROPERTIES = {
SYMBOL_TABLE,
ERROR_REPORTER,
ENTITY_MANAGER,
ENTITY_RESOLVER,
};
/** Property defaults. */
private static final Object[] PROPERTY_DEFAULTS = {
null,
null,
null,
null,
};
// debugging
/** Debug scanner state. */
private static final boolean DEBUG_SCANNER_STATE = false;
/** Debug dispatcher. */
private static final boolean DEBUG_DISPATCHER = false;
/** Debug content dispatcher scanning. */
protected static final boolean DEBUG_CONTENT_SCANNING = false;
//
// Data
//
// protected data
/** Document handler. */
protected XMLDocumentHandler fDocumentHandler;
/** Entity stack. */
protected int[] fEntityStack = new int[4];
/** Markup depth. */
protected int fMarkupDepth;
/** Scanner state. */
protected int fScannerState;
/** SubScanner state: inside scanContent method. */
protected boolean fInScanContent = false;
/** has external dtd */
protected boolean fHasExternalDTD;
/** Standalone. */
protected boolean fStandalone;
/** True if [Entity Declared] is a VC; false if it is a WFC. */
protected boolean fIsEntityDeclaredVC;
/** External subset resolver. **/
protected ExternalSubsetResolver fExternalSubsetResolver;
// element information
/** Current element. */
protected QName fCurrentElement;
/** Element stack. */
protected final ElementStack fElementStack = new ElementStack();
// other info
/** Document system identifier.
* REVISIT: So what's this used for? - NG
* protected String fDocumentSystemId;
******/
// features
/** Notify built-in references. */
protected boolean fNotifyBuiltInRefs = false;
// dispatchers
/** Active dispatcher. */
protected Dispatcher fDispatcher;
/** Content dispatcher. */
protected final Dispatcher fContentDispatcher = createContentDispatcher();
// temporary variables
/** Element QName. */
protected final QName fElementQName = new QName();
/** Attribute QName. */
protected final QName fAttributeQName = new QName();
/** Element attributes. */
protected final XMLAttributesImpl fAttributes = new XMLAttributesImpl();
/** String. */
protected final XMLString fTempString = new XMLString();
/** String. */
protected final XMLString fTempString2 = new XMLString();
/** Array of 3 strings. */
private final String[] fStrings = new String[3];
/** String buffer. */
private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
/** String buffer. */
private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
/** Another QName. */
private final QName fQName = new QName();
/** Single character array. */
private final char[] fSingleChar = new char[1];
/**
* Saw spaces after element name or between attributes.
*
* This is reserved for the case where scanning of a start element spans
* several methods, as is the case when scanning the start of a root element
* where a DTD external subset may be read after scanning the element name.
*/
private boolean fSawSpace;
/** Reusable Augmentations. */
private Augmentations fTempAugmentations = null;
//
// Constructors
//
/** Default constructor. */
public XMLDocumentFragmentScannerImpl() {} // ()
//
// XMLDocumentScanner methods
//
/**
* Sets the input source.
*
* @param inputSource The input source.
*
* @throws IOException Thrown on i/o error.
*/
public void setInputSource(XMLInputSource inputSource) throws IOException {
fEntityManager.setEntityHandler(this);
fEntityManager.startEntity("$fragment$", inputSource, false, true);
//fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
} // setInputSource(XMLInputSource)
/**
* Scans a document.
*
* @param complete True if the scanner should scan the document
* completely, pushing all events to the registered
* document handler. A value of false indicates that
* that the scanner should only scan the next portion
* of the document and return. A scanner instance is
* permitted to completely scan a document if it does
* not support this "pull" scanning model.
*
* @return True if there is more to scan, false otherwise.
*/
public boolean scanDocument(boolean complete)
throws IOException, XNIException {
// reset entity scanner
fEntityScanner = fEntityManager.getEntityScanner();
// keep dispatching "events"
fEntityManager.setEntityHandler(this);
do {
if (!fDispatcher.dispatch(complete)) {
return false;
}
} while (complete);
// return success
return true;
} // scanDocument(boolean):boolean
//
// XMLComponent methods
//
/**
* Resets the component. The component can query the component manager
* about any features and properties that affect the operation of the
* component.
*
* @param componentManager The component manager.
*
* @throws SAXException Thrown by component on initialization error.
* For example, if a feature or property is
* required for the operation of the component, the
* component manager may throw a
* SAXNotRecognizedException or a
* SAXNotSupportedException.
*/
public void reset(XMLComponentManager componentManager)
throws XMLConfigurationException {
super.reset(componentManager);
// other settings
//fDocumentSystemId = null;
// sax features
fAttributes.setNamespaces(fNamespaces);
// initialize vars
fMarkupDepth = 0;
fCurrentElement = null;
fElementStack.clear();
fHasExternalDTD = false;
fStandalone = false;
fIsEntityDeclaredVC = false;
fInScanContent = false;
// setup dispatcher
setScannerState(SCANNER_STATE_CONTENT);
setDispatcher(fContentDispatcher);
if (fParserSettings) {
// parser settings have changed. reset them.
// xerces features
try {
fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS);
} catch (XMLConfigurationException e) {
fNotifyBuiltInRefs = false;
}
// xerces properties
try {
Object resolver = componentManager.getProperty(ENTITY_RESOLVER);
fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ?
(ExternalSubsetResolver) resolver : null;
}
catch (XMLConfigurationException e) {
fExternalSubsetResolver = null;
}
}
} // reset(XMLComponentManager)
/**
* Returns a list of feature identifiers that are recognized by
* this component. This method may return null if no features
* are recognized by this component.
*/
public String[] getRecognizedFeatures() {
return (String[])(RECOGNIZED_FEATURES.clone());
} // getRecognizedFeatures():String[]
/**
* Sets the state of a feature. This method is called by the component
* manager any time after reset when a feature changes state.
*
* Note: Components should silently ignore features
* that do not affect the operation of the component.
*
* @param featureId The feature identifier.
* @param state The state of the feature.
*
* @throws SAXNotRecognizedException The component should not throw
* this exception.
* @throws SAXNotSupportedException The component should not throw
* this exception.
*/
public void setFeature(String featureId, boolean state)
throws XMLConfigurationException {
super.setFeature(featureId, state);
// Xerces properties
if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length();
if (suffixLength == Constants.NOTIFY_BUILTIN_REFS_FEATURE.length() &&
featureId.endsWith(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
fNotifyBuiltInRefs = state;
}
}
} // setFeature(String,boolean)
/**
* Returns a list of property identifiers that are recognized by
* this component. This method may return null if no properties
* are recognized by this component.
*/
public String[] getRecognizedProperties() {
return (String[])(RECOGNIZED_PROPERTIES.clone());
} // getRecognizedProperties():String[]
/**
* Sets the value of a property. This method is called by the component
* manager any time after reset when a property changes value.
*
* Note: Components should silently ignore properties
* that do not affect the operation of the component.
*
* @param propertyId The property identifier.
* @param value The value of the property.
*
* @throws SAXNotRecognizedException The component should not throw
* this exception.
* @throws SAXNotSupportedException The component should not throw
* this exception.
*/
public void setProperty(String propertyId, Object value)
throws XMLConfigurationException {
super.setProperty(propertyId, value);
// Xerces properties
if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
fEntityManager = (XMLEntityManager)value;
return;
}
if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() &&
propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ?
(ExternalSubsetResolver) value : null;
return;
}
}
} // setProperty(String,Object)
/**
* Returns the default state for a feature, or null if this
* component does not want to report a default value for this
* feature.
*
* @param featureId The feature identifier.
*
* @since Xerces 2.2.0
*/
public Boolean getFeatureDefault(String featureId) {
for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
if (RECOGNIZED_FEATURES[i].equals(featureId)) {
return FEATURE_DEFAULTS[i];
}
}
return null;
} // getFeatureDefault(String):Boolean
/**
* Returns the default state for a property, or null if this
* component does not want to report a default value for this
* property.
*
* @param propertyId The property identifier.
*
* @since Xerces 2.2.0
*/
public Object getPropertyDefault(String propertyId) {
for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
return PROPERTY_DEFAULTS[i];
}
}
return null;
} // getPropertyDefault(String):Object
//
// XMLDocumentSource methods
//
/**
* setDocumentHandler
*
* @param documentHandler
*/
public void setDocumentHandler(XMLDocumentHandler documentHandler) {
fDocumentHandler = documentHandler;
} // setDocumentHandler(XMLDocumentHandler)
/** Returns the document handler */
public XMLDocumentHandler getDocumentHandler(){
return fDocumentHandler;
}
//
// XMLEntityHandler methods
//
/**
* This method notifies of the start of an entity. The DTD has the
* pseudo-name of "[dtd]" parameter entity names start with '%'; and
* general entities are just specified by their name.
*
* @param name The name of the entity.
* @param identifier The resource identifier.
* @param encoding The auto-detected IANA encoding name of the entity
* stream. This value will be null in those situations
* where the entity encoding is not auto-detected (e.g.
* internal entities or a document entity that is
* parsed from a java.io.Reader).
* @param augs Additional information that may include infoset augmentations
*
* @throws XNIException Thrown by handler to signal an error.
*/
public void startEntity(String name,
XMLResourceIdentifier identifier,
String encoding, Augmentations augs) throws XNIException {
// keep track of this entity before fEntityDepth is increased
if (fEntityDepth == fEntityStack.length) {
int[] entityarray = new int[fEntityStack.length * 2];
System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length);
fEntityStack = entityarray;
}
fEntityStack[fEntityDepth] = fMarkupDepth;
super.startEntity(name, identifier, encoding, augs);
// WFC: entity declared in external subset in standalone doc
if(fStandalone && fEntityManager.isEntityDeclInExternalSubset(name)) {
reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
new Object[]{name});
}
// call handler
if (fDocumentHandler != null && !fScanningAttribute) {
if (!name.equals("[xml]")) {
fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs);
}
}
} // startEntity(String,XMLResourceIdentifier,String)
/**
* This method notifies the end of an entity. The DTD has the pseudo-name
* of "[dtd]" parameter entity names start with '%'; and general entities
* are just specified by their name.
*
* @param name The name of the entity.
* @param augs Additional information that may include infoset augmentations
*
* @throws XNIException Thrown by handler to signal an error.
*/
public void endEntity(String name, Augmentations augs) throws XNIException {
// flush possible pending output buffer - see scanContent
if (fInScanContent && fStringBuffer.length != 0
&& fDocumentHandler != null) {
fDocumentHandler.characters(fStringBuffer, null);
fStringBuffer.length = 0; // make sure we know it's been flushed
}
super.endEntity(name, augs);
// make sure markup is properly balanced
if (fMarkupDepth != fEntityStack[fEntityDepth]) {
reportFatalError("MarkupEntityMismatch", null);
}
// call handler
if (fDocumentHandler != null && !fScanningAttribute) {
if (!name.equals("[xml]")) {
fDocumentHandler.endGeneralEntity(name, augs);
}
}
} // endEntity(String)
//
// Protected methods
//
// dispatcher factory methods
/** Creates a content dispatcher. */
protected Dispatcher createContentDispatcher() {
return new FragmentContentDispatcher();
} // createContentDispatcher():Dispatcher
// scanning methods
/**
* Scans an XML or text declaration.
*
*
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
* | ('"' ('yes' | 'no') '"'))
*
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
*
*
* @param scanningTextDecl True if a text declaration is to
* be scanned instead of an XML
* declaration.
*/
protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
throws IOException, XNIException {
// scan decl
super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
fMarkupDepth--;
// pseudo-attribute values
String version = fStrings[0];
String encoding = fStrings[1];
String standalone = fStrings[2];
// set standalone
fStandalone = standalone != null && standalone.equals("yes");
fEntityManager.setStandalone(fStandalone);
// set version on reader
fEntityScanner.setXMLVersion(version);
// call handler
if (fDocumentHandler != null) {
if (scanningTextDecl) {
fDocumentHandler.textDecl(version, encoding, null);
}
else {
fDocumentHandler.xmlDecl(version, encoding, standalone, null);
}
}
// set encoding on reader
if (encoding != null && !fEntityScanner.fCurrentEntity.isEncodingExternallySpecified()) {
fEntityScanner.setEncoding(encoding);
}
} // scanXMLDeclOrTextDecl(boolean)
/**
* Scans a processing data. This is needed to handle the situation
* where a document starts with a processing instruction whose
* target name starts with "xml". (e.g. xmlfoo)
*
* @param target The PI target
* @param data The string to fill in with the data
*/
protected void scanPIData(String target, XMLString data)
throws IOException, XNIException {
super.scanPIData(target, data);
fMarkupDepth--;
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.processingInstruction(target, data, null);
}
} // scanPIData(String)
/**
* Scans a comment.
*
*
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
*
*
* Note: Called after scanning past '<!--'
*/
protected void scanComment() throws IOException, XNIException {
scanComment(fStringBuffer);
fMarkupDepth--;
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.comment(fStringBuffer, null);
}
} // scanComment()
/**
* Scans a start element. This method will handle the binding of
* namespace information and notifying the handler of the start
* of the element.
*
*
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
* [40] STag ::= '<' Name (S Attribute)* S? '>'
*
*
* Note: This method assumes that the leading
* '<' character has been consumed.
*
* Note: This method uses the fElementQName and
* fAttributes variables. The contents of these variables will be
* destroyed. The caller should copy important information out of
* these variables before calling this method.
*
* @return True if element is empty. (i.e. It matches
* production [44].
*/
protected boolean scanStartElement()
throws IOException, XNIException {
if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanStartElement()");
// name
if (fNamespaces) {
fEntityScanner.scanQName(fElementQName);
}
else {
String name = fEntityScanner.scanName();
fElementQName.setValues(null, name, name, null);
}
String rawname = fElementQName.rawname;
// push element stack
fCurrentElement = fElementStack.pushElement(fElementQName);
// attributes
boolean empty = false;
fAttributes.removeAllAttributes();
do {
// spaces
boolean sawSpace = fEntityScanner.skipSpaces();
// end tag?
int c = fEntityScanner.peekChar();
if (c == '>') {
fEntityScanner.scanChar();
break;
}
else if (c == '/') {
fEntityScanner.scanChar();
if (!fEntityScanner.skipChar('>')) {
reportFatalError("ElementUnterminated",
new Object[]{rawname});
}
empty = true;
break;
}
else if (!isValidNameStartChar(c) || !sawSpace) {
// Second chance. Check if this character is a high
// surrogate of a valid name start character.
if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
reportFatalError("ElementUnterminated",
new Object[] { rawname });
}
}
// attributes
scanAttribute(fAttributes);
} while (true);
// call handler
if (fDocumentHandler != null) {
if (empty) {
//decrease the markup depth..
fMarkupDepth--;
// check that this element was opened in the same entity
if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
reportFatalError("ElementEntityMismatch",
new Object[]{fCurrentElement.rawname});
}
fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
//pop the element off the stack..
fElementStack.popElement(fElementQName);
}
else {
fDocumentHandler.startElement(fElementQName, fAttributes, null);
}
}
if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElement(): "+empty);
return empty;
} // scanStartElement():boolean
/**
* Scans the name of an element in a start or empty tag.
*
* @see #scanStartElement()
*/
protected void scanStartElementName ()
throws IOException, XNIException {
// name
if (fNamespaces) {
fEntityScanner.scanQName(fElementQName);
}
else {
String name = fEntityScanner.scanName();
fElementQName.setValues(null, name, name, null);
}
// Must skip spaces here because the DTD scanner
// would consume them at the end of the external subset.
fSawSpace = fEntityScanner.skipSpaces();
} // scanStartElementName()
/**
* Scans the remainder of a start or empty tag after the element name.
*
* @see #scanStartElement
* @return True if element is empty.
*/
protected boolean scanStartElementAfterName()
throws IOException, XNIException {
String rawname = fElementQName.rawname;
// push element stack
fCurrentElement = fElementStack.pushElement(fElementQName);
// attributes
boolean empty = false;
fAttributes.removeAllAttributes();
do {
// end tag?
int c = fEntityScanner.peekChar();
if (c == '>') {
fEntityScanner.scanChar();
break;
}
else if (c == '/') {
fEntityScanner.scanChar();
if (!fEntityScanner.skipChar('>')) {
reportFatalError("ElementUnterminated",
new Object[]{rawname});
}
empty = true;
break;
}
else if (!isValidNameStartChar(c) || !fSawSpace) {
// Second chance. Check if this character is a high
// surrogate of a valid name start character.
if (!isValidNameStartHighSurrogate(c) || !fSawSpace) {
reportFatalError("ElementUnterminated",
new Object[] { rawname });
}
}
// attributes
scanAttribute(fAttributes);
// spaces
fSawSpace = fEntityScanner.skipSpaces();
} while (true);
// call handler
if (fDocumentHandler != null) {
if (empty) {
//decrease the markup depth..
fMarkupDepth--;
// check that this element was opened in the same entity
if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
reportFatalError("ElementEntityMismatch",
new Object[]{fCurrentElement.rawname});
}
fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
//pop the element off the stack..
fElementStack.popElement(fElementQName);
}
else {
fDocumentHandler.startElement(fElementQName, fAttributes, null);
}
}
if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanStartElementAfterName(): "+empty);
return empty;
} // scanStartElementAfterName()
/**
* Scans an attribute.
*
*
* [41] Attribute ::= Name Eq AttValue
*
*
* Note: This method assumes that the next
* character on the stream is the first character of the attribute
* name.
*
* Note: This method uses the fAttributeQName and
* fQName variables. The contents of these variables will be
* destroyed.
*
* @param attributes The attributes list for the scanned attribute.
*/
protected void scanAttribute(XMLAttributes attributes)
throws IOException, XNIException {
if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanAttribute()");
// name
if (fNamespaces) {
fEntityScanner.scanQName(fAttributeQName);
}
else {
String name = fEntityScanner.scanName();
fAttributeQName.setValues(null, name, name, null);
}
// equals
fEntityScanner.skipSpaces();
if (!fEntityScanner.skipChar('=')) {
reportFatalError("EqRequiredInAttribute",
new Object[]{fCurrentElement.rawname,fAttributeQName.rawname});
}
fEntityScanner.skipSpaces();
// content
int oldLen = attributes.getLength();
int attrIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
// WFC: Unique Att Spec
if (oldLen == attributes.getLength()) {
reportFatalError("AttributeNotUnique",
new Object[]{fCurrentElement.rawname,
fAttributeQName.rawname});
}
// Scan attribute value and return true if the un-normalized and normalized value are the same
boolean isSameNormalizedAttr = scanAttributeValue(fTempString, fTempString2,
fAttributeQName.rawname, fIsEntityDeclaredVC, fCurrentElement.rawname);
attributes.setValue(attrIndex, fTempString.toString());
// If the non-normalized and normalized value are the same, avoid creating a new string.
if (!isSameNormalizedAttr) {
attributes.setNonNormalizedValue(attrIndex, fTempString2.toString());
}
attributes.setSpecified(attrIndex, true);
if (DEBUG_CONTENT_SCANNING) System.out.println("<<< scanAttribute()");
} // scanAttribute(XMLAttributes)
/**
* Scans element content.
*
* @return Returns the next character on the stream.
*/
protected int scanContent() throws IOException, XNIException {
XMLString content = fTempString;
int c = fEntityScanner.scanContent(content);
if (c == '\r') {
// happens when there is the character reference
fEntityScanner.scanChar();
fStringBuffer.clear();
fStringBuffer.append(fTempString);
fStringBuffer.append((char)c);
content = fStringBuffer;
c = -1;
}
if (fDocumentHandler != null && content.length > 0) {
fDocumentHandler.characters(content, null);
}
if (c == ']' && fTempString.length == 0) {
fStringBuffer.clear();
fStringBuffer.append((char)fEntityScanner.scanChar());
// remember where we are in case we get an endEntity before we
// could flush the buffer out - this happens when we're parsing an
// entity which ends with a ]
fInScanContent = true;
//
// We work on a single character basis to handle cases such as:
// ']]]>' which we might otherwise miss.
//
if (fEntityScanner.skipChar(']')) {
fStringBuffer.append(']');
while (fEntityScanner.skipChar(']')) {
fStringBuffer.append(']');
}
if (fEntityScanner.skipChar('>')) {
reportFatalError("CDEndInContent", null);
}
}
if (fDocumentHandler != null && fStringBuffer.length != 0) {
fDocumentHandler.characters(fStringBuffer, null);
}
fInScanContent = false;
c = -1;
}
return c;
} // scanContent():int
/**
* Scans a CDATA section.
*
* Note: This method uses the fTempString and
* fStringBuffer variables.
*
* @param complete True if the CDATA section is to be scanned
* completely.
*
* @return True if CDATA is completely scanned.
*/
protected boolean scanCDATASection(boolean complete)
throws IOException, XNIException {
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.startCDATA(null);
}
while (true) {
fStringBuffer.clear();
if (!fEntityScanner.scanData("]]", fStringBuffer)) {
if (fDocumentHandler != null && fStringBuffer.length > 0) {
fDocumentHandler.characters(fStringBuffer, null);
}
int brackets = 0;
while (fEntityScanner.skipChar(']')) {
brackets++;
}
if (fDocumentHandler != null && brackets > 0) {
fStringBuffer.clear();
if (brackets > XMLEntityManager.DEFAULT_BUFFER_SIZE) {
// Handle large sequences of ']'
int chunks = brackets / XMLEntityManager.DEFAULT_BUFFER_SIZE;
int remainder = brackets % XMLEntityManager.DEFAULT_BUFFER_SIZE;
for (int i = 0; i < XMLEntityManager.DEFAULT_BUFFER_SIZE; i++) {
fStringBuffer.append(']');
}
for (int i = 0; i < chunks; i++) {
fDocumentHandler.characters(fStringBuffer, null);
}
if (remainder != 0) {
fStringBuffer.length = remainder;
fDocumentHandler.characters(fStringBuffer, null);
}
}
else {
for (int i = 0; i < brackets; i++) {
fStringBuffer.append(']');
}
fDocumentHandler.characters(fStringBuffer, null);
}
}
if (fEntityScanner.skipChar('>')) {
break;
}
if (fDocumentHandler != null) {
fStringBuffer.clear();
fStringBuffer.append("]]");
fDocumentHandler.characters(fStringBuffer, null);
}
}
else {
if (fDocumentHandler != null) {
fDocumentHandler.characters(fStringBuffer, null);
}
int c = fEntityScanner.peekChar();
if (c != -1 && isInvalidLiteral(c)) {
if (XMLChar.isHighSurrogate(c)) {
fStringBuffer.clear();
scanSurrogates(fStringBuffer);
if (fDocumentHandler != null) {
fDocumentHandler.characters(fStringBuffer, null);
}
}
else {
reportFatalError("InvalidCharInCDSect",
new Object[]{Integer.toString(c,16)});
fEntityScanner.scanChar();
}
}
}
}
fMarkupDepth--;
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.endCDATA(null);
}
return true;
} // scanCDATASection(boolean):boolean
/**
* Scans an end element.
*
*
* [42] ETag ::= '</' Name S? '>'
*
*
* Note: This method uses the fElementQName variable.
* The contents of this variable will be destroyed. The caller should
* copy the needed information out of this variable before calling
* this method.
*
* @return The element depth.
*/
protected int scanEndElement() throws IOException, XNIException {
if (DEBUG_CONTENT_SCANNING) System.out.println(">>> scanEndElement()");
fElementStack.popElement(fElementQName) ;
// Take advantage of the fact that next string _should_ be "fElementQName.rawName",
//In scanners most of the time is consumed on checks done for XML characters, we can
// optimize on it and avoid the checks done for endElement,
//we will also avoid symbol table lookup - [email protected]
// this should work both for namespace processing true or false...
//REVISIT: if the string is not the same as expected.. we need to do better error handling..
//We can skip this for now... In any case if the string doesn't match -- document is not well formed.
if (!fEntityScanner.skipString(fElementQName.rawname)) {
reportFatalError("ETagRequired", new Object[]{fElementQName.rawname});
}
// end
fEntityScanner.skipSpaces();
if (!fEntityScanner.skipChar('>')) {
reportFatalError("ETagUnterminated",
new Object[]{fElementQName.rawname});
}
fMarkupDepth--;
//we have increased the depth for two markup "<" characters
fMarkupDepth--;
// check that this element was opened in the same entity
if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
reportFatalError("ElementEntityMismatch",
new Object[]{fCurrentElement.rawname});
}
// call handler
if (fDocumentHandler != null ) {
fDocumentHandler.endElement(fElementQName, null);
}
return fMarkupDepth;
} // scanEndElement():int
/**
* Scans a character reference.
*
*
* [66] CharRef ::= '' [0-9]+ ';' | '' [0-9a-fA-F]+ ';'
*
*/
protected void scanCharReference()
throws IOException, XNIException {
fStringBuffer2.clear();
int ch = scanCharReferenceValue(fStringBuffer2, null);
fMarkupDepth--;
if (ch != -1) {
// call handler
if (fDocumentHandler != null) {
if (fNotifyCharRefs) {
fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null);
}
Augmentations augs = null;
if (fValidation && ch <= 0x20) {
if (fTempAugmentations != null) {
fTempAugmentations.removeAllItems();
}
else {
fTempAugmentations = new AugmentationsImpl();
}
augs = fTempAugmentations;
augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE);
}
fDocumentHandler.characters(fStringBuffer2, augs);
if (fNotifyCharRefs) {
fDocumentHandler.endGeneralEntity(fCharRefLiteral, null);
}
}
}
} // scanCharReference()
/**
* Scans an entity reference.
*
* @throws IOException Thrown if i/o error occurs.
* @throws XNIException Thrown if handler throws exception upon
* notification.
*/
protected void scanEntityReference() throws IOException, XNIException {
// name
String name = fEntityScanner.scanName();
if (name == null) {
reportFatalError("NameRequiredInReference", null);
return;
}
// end
if (!fEntityScanner.skipChar(';')) {
reportFatalError("SemicolonRequiredInReference", new Object []{name});
}
fMarkupDepth--;
// handle built-in entities
if (name == fAmpSymbol) {
handleCharacter('&', fAmpSymbol);
}
else if (name == fLtSymbol) {
handleCharacter('<', fLtSymbol);
}
else if (name == fGtSymbol) {
handleCharacter('>', fGtSymbol);
}
else if (name == fQuotSymbol) {
handleCharacter('"', fQuotSymbol);
}
else if (name == fAposSymbol) {
handleCharacter('\'', fAposSymbol);
}
// start general entity
else if (fEntityManager.isUnparsedEntity(name)) {
reportFatalError("ReferenceToUnparsedEntity", new Object[]{name});
}
else {
if (!fEntityManager.isDeclaredEntity(name)) {
if (fIsEntityDeclaredVC) {
if (fValidation)
fErrorReporter.reportError( XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared",
new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR);
}
else {
reportFatalError("EntityNotDeclared", new Object[]{name});
}
}
fEntityManager.startEntity(name, false);
}
} // scanEntityReference()
// utility methods
/**
* Calls document handler with a single character resulting from
* built-in entity resolution.
*
* @param c
* @param entity built-in name
*/
private void handleCharacter(char c, String entity) throws XNIException {
if (fDocumentHandler != null) {
if (fNotifyBuiltInRefs) {
fDocumentHandler.startGeneralEntity(entity, null, null, null);
}
fSingleChar[0] = c;
fTempString.setValues(fSingleChar, 0, 1);
fDocumentHandler.characters(fTempString, null);
if (fNotifyBuiltInRefs) {
fDocumentHandler.endGeneralEntity(entity, null);
}
}
} // handleCharacter(char)
/**
* Handles the end element. This method will make sure that
* the end element name matches the current element and notify
* the handler about the end of the element and the end of any
* relevent prefix mappings.
*
* Note: This method uses the fQName variable.
* The contents of this variable will be destroyed.
*
* @param element The element.
*
* @return The element depth.
*
* @throws XNIException Thrown if the handler throws a SAX exception
* upon notification.
*
*/
// REVISIT: need to remove this method. It's not called anymore, because
// the handling is done when the end tag is scanned. - SG
protected int handleEndElement(QName element, boolean isEmpty)
throws XNIException {
fMarkupDepth--;
// check that this element was opened in the same entity
if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
reportFatalError("ElementEntityMismatch",
new Object[]{fCurrentElement.rawname});
}
// make sure the elements match
QName startElement = fQName;
fElementStack.popElement(startElement);
if (element.rawname != startElement.rawname) {
reportFatalError("ETagRequired",
new Object[]{startElement.rawname});
}
// bind namespaces
if (fNamespaces) {
element.uri = startElement.uri;
}
// call handler
if (fDocumentHandler != null && !isEmpty) {
fDocumentHandler.endElement(element, null);
}
return fMarkupDepth;
} // callEndElement(QName,boolean):int
// helper methods
/**
* Sets the scanner state.
*
* @param state The new scanner state.
*/
protected final void setScannerState(int state) {
fScannerState = state;
if (DEBUG_SCANNER_STATE) {
System.out.print("### setScannerState: ");
System.out.print(getScannerStateName(state));
System.out.println();
}
} // setScannerState(int)
/**
* Sets the dispatcher.
*
* @param dispatcher The new dispatcher.
*/
protected final void setDispatcher(Dispatcher dispatcher) {
fDispatcher = dispatcher;
if (DEBUG_DISPATCHER) {
System.out.print("%%% setDispatcher: ");
System.out.print(getDispatcherName(dispatcher));
System.out.println();
}
}
//
// Private methods
//
/** Returns the scanner state name. */
protected String getScannerStateName(int state) {
switch (state) {
case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE";
case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT";
case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP";
case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT";
case SCANNER_STATE_PI: return "SCANNER_STATE_PI";
case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT";
case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE";
case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT";
case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED";
case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA";
case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL";
}
return "??? ("+state+')';
} // getScannerStateName(int):String
/** Returns the dispatcher name. */
public String getDispatcherName(Dispatcher dispatcher) {
if (DEBUG_DISPATCHER) {
if (dispatcher != null) {
String name = dispatcher.getClass().getName();
int index = name.lastIndexOf('.');
if (index != -1) {
name = name.substring(index + 1);
index = name.lastIndexOf('$');
if (index != -1) {
name = name.substring(index + 1);
}
}
return name;
}
}
return "null";
} // getDispatcherName():String
//
// Classes
//
/**
* Element stack. This stack operates without synchronization, error
* checking, and it re-uses objects instead of throwing popped items
* away.
*
* @author Andy Clark, IBM
*/
protected static class ElementStack {
//
// Data
//
/** The stack data. */
protected QName[] fElements;
/** The size of the stack. */
protected int fSize;
//
// Constructors
//
/** Default constructor. */
public ElementStack() {
fElements = new QName[10];
for (int i = 0; i < fElements.length; i++) {
fElements[i] = new QName();
}
} // ()
//
// Public methods
//
/**
* Pushes an element on the stack.
*
* Note: The QName values are copied into the
* stack. In other words, the caller does not orphan
* the element to the stack. Also, the QName object returned
* is not orphaned to the caller. It should be
* considered read-only.
*
* @param element The element to push onto the stack.
*
* @return Returns the actual QName object that stores the
*/
public QName pushElement(QName element) {
if (fSize == fElements.length) {
QName[] array = new QName[fElements.length * 2];
System.arraycopy(fElements, 0, array, 0, fSize);
fElements = array;
for (int i = fSize; i < fElements.length; i++) {
fElements[i] = new QName();
}
}
fElements[fSize].setValues(element);
return fElements[fSize++];
} // pushElement(QName):QName
/**
* Pops an element off of the stack by setting the values of
* the specified QName.
*
* Note: The object returned is not
* orphaned to the caller. Therefore, the caller should consider
* the object to be read-only.
*/
public void popElement(QName element) {
element.setValues(fElements[--fSize]);
} // popElement(QName)
/** Clears the stack without throwing away existing QName objects. */
public void clear() {
fSize = 0;
} // clear()
} // class ElementStack
/**
* This interface defines an XML "event" dispatching model. Classes
* that implement this interface are responsible for scanning parts
* of the XML document and dispatching callbacks.
*
* @xerces.internal
*
* @author Glenn Marcy, IBM
*/
protected interface Dispatcher {
//
// Dispatcher methods
//
/**
* Dispatch an XML "event".
*
* @param complete True if this dispatcher is intended to scan
* and dispatch as much as possible.
*
* @return True if there is more to dispatch either from this
* or a another dispatcher.
*
* @throws IOException Thrown on i/o error.
* @throws XNIException Thrown on parse error.
*/
public boolean dispatch(boolean complete)
throws IOException, XNIException;
} // interface Dispatcher
/**
* Dispatcher to handle content scanning.
*
* @author Andy Clark, IBM
* @author Eric Ye, IBM
*/
protected class FragmentContentDispatcher
implements Dispatcher {
//
// Dispatcher methods
//
/**
* Dispatch an XML "event".
*
* @param complete True if this dispatcher is intended to scan
* and dispatch as much as possible.
*
* @return True if there is more to dispatch either from this
* or a another dispatcher.
*
* @throws IOException Thrown on i/o error.
* @throws XNIException Thrown on parse error.
*/
public boolean dispatch(boolean complete)
throws IOException, XNIException {
try {
boolean again;
do {
again = false;
switch (fScannerState) {
case SCANNER_STATE_CONTENT: {
if (fEntityScanner.skipChar('<')) {
setScannerState(SCANNER_STATE_START_OF_MARKUP);
again = true;
}
else if (fEntityScanner.skipChar('&')) {
setScannerState(SCANNER_STATE_REFERENCE);
again = true;
}
else {
do {
int c = scanContent();
if (c == '<') {
fEntityScanner.scanChar();
setScannerState(SCANNER_STATE_START_OF_MARKUP);
break;
}
else if (c == '&') {
fEntityScanner.scanChar();
setScannerState(SCANNER_STATE_REFERENCE);
break;
}
else if (c != -1 && isInvalidLiteral(c)) {
if (XMLChar.isHighSurrogate(c)) {
// special case: surrogates
fStringBuffer.clear();
if (scanSurrogates(fStringBuffer)) {
// call handler
if (fDocumentHandler != null) {
fDocumentHandler.characters(fStringBuffer, null);
}
}
}
else {
reportFatalError("InvalidCharInContent",
new Object[] {
Integer.toString(c, 16)});
fEntityScanner.scanChar();
}
}
} while (complete);
}
break;
}
case SCANNER_STATE_START_OF_MARKUP: {
fMarkupDepth++;
if (fEntityScanner.skipChar('/')) {
if (scanEndElement() == 0) {
if (elementDepthIsZeroHook()) {
return true;
}
}
setScannerState(SCANNER_STATE_CONTENT);
}
else if (isValidNameStartChar(fEntityScanner.peekChar())) {
scanStartElement();
setScannerState(SCANNER_STATE_CONTENT);
}
else if (fEntityScanner.skipChar('!')) {
if (fEntityScanner.skipChar('-')) {
if (!fEntityScanner.skipChar('-')) {
reportFatalError("InvalidCommentStart",
null);
}
setScannerState(SCANNER_STATE_COMMENT);
again = true;
}
else if (fEntityScanner.skipString("[CDATA[")) {
setScannerState(SCANNER_STATE_CDATA);
again = true;
}
else if (!scanForDoctypeHook()) {
reportFatalError("MarkupNotRecognizedInContent",
null);
}
}
else if (fEntityScanner.skipChar('?')) {
setScannerState(SCANNER_STATE_PI);
again = true;
}
else if (isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
scanStartElement();
setScannerState(SCANNER_STATE_CONTENT);
}
else {
reportFatalError("MarkupNotRecognizedInContent",
null);
setScannerState(SCANNER_STATE_CONTENT);
}
break;
}
case SCANNER_STATE_COMMENT: {
scanComment();
setScannerState(SCANNER_STATE_CONTENT);
break;
}
case SCANNER_STATE_PI: {
scanPI();
setScannerState(SCANNER_STATE_CONTENT);
break;
}
case SCANNER_STATE_CDATA: {
scanCDATASection(complete);
setScannerState(SCANNER_STATE_CONTENT);
break;
}
case SCANNER_STATE_REFERENCE: {
fMarkupDepth++;
// NOTE: We need to set the state beforehand
// because the XMLEntityHandler#startEntity
// callback could set the state to
// SCANNER_STATE_TEXT_DECL and we don't want
// to override that scanner state.
setScannerState(SCANNER_STATE_CONTENT);
if (fEntityScanner.skipChar('#')) {
scanCharReference();
}
else {
scanEntityReference();
}
break;
}
case SCANNER_STATE_TEXT_DECL: {
// scan text decl
if (fEntityScanner.skipString("