org.htmlunit.cyberneko.xerces.parsers.AbstractDOMParser Maven / Gradle / Ivy

Go to download
/*
 * Copyright (c) 2017-2024 Ronald Brill
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.htmlunit.cyberneko.xerces.parsers;

import java.io.CharConversionException;
import java.io.IOException;
import java.util.Stack;

import org.htmlunit.cyberneko.xerces.dom.AttrImpl;
import org.htmlunit.cyberneko.xerces.dom.CoreDocumentImpl;
import org.htmlunit.cyberneko.xerces.dom.DOMMessageFormatter;
import org.htmlunit.cyberneko.xerces.dom.DocumentImpl;
import org.htmlunit.cyberneko.xerces.dom.EntityImpl;
import org.htmlunit.cyberneko.xerces.dom.EntityReferenceImpl;
import org.htmlunit.cyberneko.xerces.dom.TextImpl;
import org.htmlunit.cyberneko.xerces.util.ErrorHandlerWrapper;
import org.htmlunit.cyberneko.xerces.util.SAXMessageFormatter;
import org.htmlunit.cyberneko.xerces.xni.Augmentations;
import org.htmlunit.cyberneko.xerces.xni.NamespaceContext;
import org.htmlunit.cyberneko.xerces.xni.QName;
import org.htmlunit.cyberneko.xerces.xni.XMLAttributes;
import org.htmlunit.cyberneko.xerces.xni.XMLLocator;
import org.htmlunit.cyberneko.xerces.xni.XMLString;
import org.htmlunit.cyberneko.xerces.xni.XNIException;
import org.htmlunit.cyberneko.xerces.xni.parser.XMLConfigurationException;
import org.htmlunit.cyberneko.xerces.xni.parser.XMLErrorHandler;
import org.htmlunit.cyberneko.xerces.xni.parser.XMLInputSource;
import org.htmlunit.cyberneko.xerces.xni.parser.XMLParseException;
import org.htmlunit.cyberneko.xerces.xni.parser.XMLParserConfiguration;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.LocatorImpl;

/**
 * This is the base class of all DOM parsers. It implements the XNI callback
 * methods to create the DOM tree. After a successful parse of an XML document,
 * the DOM Document object can be queried using the getDocument
 * method. The actual pipeline is defined in parser configuration.
 *
 * @author Arnaud Le Hors, IBM
 * @author Andy Clark, IBM
 * @author Elena Litani, IBM
 */
public class AbstractDOMParser extends AbstractXMLDocumentParser {

    /** Feature id: namespace. */
    protected static final String NAMESPACES = Constants.SAX_FEATURE_PREFIX + Constants.NAMESPACES_FEATURE;

    /** Feature id: create entity ref nodes. */
    protected static final String CREATE_ENTITY_REF_NODES = Constants.XERCES_FEATURE_PREFIX
            + Constants.CREATE_ENTITY_REF_NODES_FEATURE;

    /** Feature id: include comments. */
    protected static final String INCLUDE_COMMENTS_FEATURE = Constants.XERCES_FEATURE_PREFIX
            + Constants.INCLUDE_COMMENTS_FEATURE;

    /** Feature id: create cdata nodes. */
    protected static final String CREATE_CDATA_NODES_FEATURE = Constants.XERCES_FEATURE_PREFIX
            + Constants.CREATE_CDATA_NODES_FEATURE;

    /** Feature id: include ignorable whitespace. */
    protected static final String INCLUDE_IGNORABLE_WHITESPACE = Constants.XERCES_FEATURE_PREFIX
            + Constants.INCLUDE_IGNORABLE_WHITESPACE;

    /** Recognized features. */
    private static final String[] RECOGNIZED_FEATURES = {
        NAMESPACES,
        CREATE_ENTITY_REF_NODES,
        INCLUDE_COMMENTS_FEATURE,
        CREATE_CDATA_NODES_FEATURE,
        INCLUDE_IGNORABLE_WHITESPACE};

    /** Recognized properties. */
    private static final String[] RECOGNIZED_PROPERTIES = {};

    private static final boolean DEBUG_EVENTS = false;

    /** Create entity reference nodes. */
    protected boolean fCreateEntityRefNodes;

    /** Include ignorable whitespace. */
    protected boolean fIncludeIgnorableWhitespace;

    /** Include Comments. */
    protected boolean fIncludeComments;

    /** Create cdata nodes. */
    protected boolean fCreateCDATANodes;

    /** The document. */
    protected Document fDocument;

    /** The default Xerces document implementation, if used. */
    protected CoreDocumentImpl fDocumentImpl;

    /** The document class to use. */
    protected Class fDocumentClass;

    /** The document type node. */
    protected DocumentType fDocumentType;

    /** Current node. */
    protected Node fCurrentNode;
    protected CDATASection fCurrentCDATASection;
    protected EntityImpl fCurrentEntityDecl;

    /** Character buffer */
    protected final XMLString fStringBuffer = new XMLString();

    protected boolean fNamespaceAware;

    /** True if inside CDATA section. */
    protected boolean fInCDATASection;

    /** True if saw the first chunk of characters */
    protected boolean fFirstChunk = false;

    // data

    /** Base uri stack */
    protected final Stack fBaseURIStack = new Stack<>();

    /** Attribute QName. */
    private final QName fAttrQName = new QName();

    /** Document locator. */
    private XMLLocator fLocator;

    // Default constructor.
    protected AbstractDOMParser(final XMLParserConfiguration config, final Class documentClass) {
        super(config);

        // add recognized features
        parserConfiguration_.addRecognizedFeatures(RECOGNIZED_FEATURES);

        // set default values
        parserConfiguration_.setFeature(CREATE_ENTITY_REF_NODES, true);
        parserConfiguration_.setFeature(INCLUDE_IGNORABLE_WHITESPACE, true);
        parserConfiguration_.setFeature(INCLUDE_COMMENTS_FEATURE, true);
        parserConfiguration_.setFeature(CREATE_CDATA_NODES_FEATURE, true);

        // add recognized properties
        parserConfiguration_.addRecognizedProperties(RECOGNIZED_PROPERTIES);

        setDocumentClass(documentClass);
    }

    /**
     * This method allows the programmer to decide which document factory to use
     * when constructing the DOM tree. However, doing so will lose the functionality
     * of the default factory. Also, a document class other than the default will
     * lose the ability to defer node expansion on the DOM tree produced.
     *
     * @param documentClass The document factory to use when constructing the DOM
     *                      tree.
     */
    protected void setDocumentClass(final Class documentClass) {
        fDocumentClass = documentClass;
    }

    /** @return the DOM document object. */
    public Document getDocument() {
        return fDocument;
    }

    /**
     * Resets the parser state.
     *
     * @throws XNIException Thrown on initialization error.
     */
    @Override
    public void reset() throws XNIException {
        super.reset();

        // get feature state
        fCreateEntityRefNodes = parserConfiguration_.getFeature(CREATE_ENTITY_REF_NODES);

        fIncludeIgnorableWhitespace = parserConfiguration_.getFeature(INCLUDE_IGNORABLE_WHITESPACE);

        fNamespaceAware = parserConfiguration_.getFeature(NAMESPACES);

        fIncludeComments = parserConfiguration_.getFeature(INCLUDE_COMMENTS_FEATURE);

        fCreateCDATANodes = parserConfiguration_.getFeature(CREATE_CDATA_NODES_FEATURE);

        // reset dom information
        fDocument = null;
        fDocumentImpl = null;
        fDocumentType = null;
        fCurrentNode = null;

        // reset string buffer
        fStringBuffer.clear();

        // reset state information
        fInCDATASection = false;
        fFirstChunk = false;
        fCurrentCDATASection = null;

        fBaseURIStack.removeAllElements();
    }

    /**
     * This method notifies the start of a general entity.
     * 
     * Note: This method is not called for entity references
     * appearing as part of attribute values.
     *
     * @param name       The name of the general entity.
     * @param encoding   The auto-detected IANA encoding name of the entity stream.
     *                   This value will be null in those situations where the
     *                   entity encoding is not auto-detected (e.g. internal
     *                   entities or a document entity that is parsed from a
     *                   java.io.Reader).
     * @param augs       Additional information that may include infoset
     *                   augmentations
     *
     * @exception XNIException Thrown by handler to signal an error.
     */
    @Override
    public void startGeneralEntity(final String name, final String encoding, final Augmentations augs) throws XNIException {
        if (DEBUG_EVENTS) {
            System.out.println("==>startGeneralEntity (" + name + ")");
        }

        setCharacterData(true);
        final EntityReference er = fDocument.createEntityReference(name);
        if (fDocumentImpl != null) {
            // REVISIT: baseURI/actualEncoding
            // remove dependency on our implementation when DOM L3 is REC

            final EntityReferenceImpl erImpl = (EntityReferenceImpl) er;
            if (fDocumentType != null) {
                // set actual encoding
                final NamedNodeMap entities = fDocumentType.getEntities();
                fCurrentEntityDecl = (EntityImpl) entities.getNamedItem(name);
                if (fCurrentEntityDecl != null) {
                    fCurrentEntityDecl.setInputEncoding(encoding);
                }

            }
            // we don't need synchronization now, because entity ref will be
            // expanded anyway. Synch only needed when user creates entityRef node
            erImpl.needsSyncChildren(false);
        }
        fCurrentNode.appendChild(er);
        fCurrentNode = er;
    }

    /**
     * Notifies of the presence of a TextDecl line in an entity. If present, this
     * method will be called immediately following the startEntity call.
     * 

     * Note: This method will never be called for the document
     * entity; it is only called for external general entities referenced in
     * document content.
     * 

     * Note: This method is not called for entity references
     * appearing as part of attribute values.
     *
     * @param version  The XML version, or null if not specified.
     * @param encoding The IANA encoding name of the entity.
     * @param augs     Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void textDecl(final String version, final String encoding, final Augmentations augs) throws XNIException {
        if (fCurrentEntityDecl != null) {
            fCurrentEntityDecl.setXmlEncoding(encoding);
            if (version != null) {
                fCurrentEntityDecl.setXmlVersion(version);
            }
        }
    }

    /**
     * A comment.
     *
     * @param text The text in the comment.
     * @param augs Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by application to signal an error.
     */
    @Override
    public void comment(final XMLString text, final Augmentations augs) throws XNIException {
        if (!fIncludeComments) {
            return;
        }

        final Comment comment = fDocument.createComment(text.toString());
        setCharacterData(false);
        fCurrentNode.appendChild(comment);
    }

    /**
     * A processing instruction. Processing instructions consist of a target name
     * and, optionally, text data. The data is only meaningful to the application.
     * 

     * Typically, a processing instruction's data will contain a series of
     * pseudo-attributes. These pseudo-attributes follow the form of element
     * attributes but are not parsed or presented to the
     * application as anything other than text. The application is responsible for
     * parsing the data.
     *
     * @param target The target.
     * @param data   The data or null if none specified.
     * @param augs   Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void processingInstruction(final String target, final XMLString data, final Augmentations augs) throws XNIException {
        if (DEBUG_EVENTS) {
            System.out.println("==>processingInstruction (" + target + ")");
        }

        final ProcessingInstruction pi = fDocument.createProcessingInstruction(target, data.toString());
        setCharacterData(false);
        fCurrentNode.appendChild(pi);
    }

    /**
     * The start of the document.
     *
     * @param locator          The system identifier of the entity if the entity is
     *                         external, null otherwise.
     * @param encoding         The auto-detected IANA encoding name of the entity
     *                         stream. This value will be null in those situations
     *                         where the entity encoding is not auto-detected (e.g.
     *                         internal entities or a document entity that is parsed
     *                         from a java.io.Reader).
     * @param namespaceContext The namespace context in effect at the start of this
     *                         document. This object represents the current context.
     *                         Implementors of this class are responsible for
     *                         copying the namespace bindings from the the current
     *                         context (and its parent contexts) if that information
     *                         is important.
     * @param augs             Additional information that may include infoset
     *                         augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void startDocument(final XMLLocator locator, final String encoding, final NamespaceContext namespaceContext, final Augmentations augs) throws XNIException {

        fLocator = locator;
        if (fDocumentClass == null) {
            fDocument = new DocumentImpl();
            fDocumentImpl = (CoreDocumentImpl) fDocument;
            // REVISIT: when DOM Level 3 is REC rely on Document.support
            // instead of specific class
            // set DOM error checking off
            fDocumentImpl.setStrictErrorChecking(false);
            // set actual encoding
            fDocumentImpl.setInputEncoding(encoding);
            // set documentURI
            fDocumentImpl.setDocumentURI(locator.getExpandedSystemId());
        }
        else {
            // use specified document class
            try {
                fDocument = fDocumentClass.newInstance();
                fDocumentImpl = (CoreDocumentImpl) fDocument;

                // REVISIT: when DOM Level 3 is REC rely on
                // Document.support instead of specific class
                // set DOM error checking off
                fDocumentImpl.setStrictErrorChecking(false);
                // set actual encoding
                fDocumentImpl.setInputEncoding(encoding);
                // set documentURI
                if (locator != null) {
                    fDocumentImpl.setDocumentURI(locator.getExpandedSystemId());
                }
            }
            catch (final Exception e) {
                throw new RuntimeException(DOMMessageFormatter.formatMessage(DOMMessageFormatter.DOM_DOMAIN,
                        "CannotCreateDocumentClass", new Object[] {fDocumentClass.getSimpleName()}));
            }
        }
        fCurrentNode = fDocument;
    }

    /**
     * Notifies of the presence of an XMLDecl line in the document. If present, this
     * method will be called immediately following the startDocument call.
     *
     * @param version    The XML version.
     * @param encoding   The IANA encoding name of the document, or null if not
     *                   specified.
     * @param standalone The standalone value, or null if not specified.
     * @param augs       Additional information that may include infoset
     *                   augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void xmlDecl(final String version, final String encoding, final String standalone, final Augmentations augs) throws XNIException {
        // REVISIT: when DOM Level 3 is REC rely on Document.support
        // instead of specific class
        if (fDocumentImpl != null) {
            if (version != null) {
                fDocumentImpl.setXmlVersion(version);
            }
            fDocumentImpl.setXmlEncoding(encoding);
            fDocumentImpl.setXmlStandalone("yes".equals(standalone));
        }
    }

    /**
     * Notifies of the presence of the DOCTYPE line in the document.
     *
     * @param rootElement The name of the root element.
     * @param publicId    The public identifier if an external DTD or null if the
     *                    external DTD is specified using SYSTEM.
     * @param systemId    The system identifier if an external DTD, null otherwise.
     * @param augs        Additional information that may include infoset
     *                    augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void doctypeDecl(final String rootElement, final String publicId, final String systemId, final Augmentations augs)
            throws XNIException {
        if (fDocumentImpl != null) {
            fDocumentType = fDocumentImpl.createDocumentType(rootElement, publicId, systemId);
            fCurrentNode.appendChild(fDocumentType);
        }
    }

    /**
     * The start of an element. If the document specifies the start element by using
     * an empty tag, then the startElement method will immediately be followed by
     * the endElement method, with no intervening methods.
     *
     * @param element    The name of the element.
     * @param attributes The element attributes.
     * @param augs       Additional information that may include infoset
     *                   augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void startElement(final QName element, final XMLAttributes attributes, final Augmentations augs) throws XNIException {
        if (DEBUG_EVENTS) {
            System.out.println("==>startElement (" + element.getRawname() + ")");
        }

        final Element el = createElementNode(element);
        final int attrCount = attributes.getLength();
        boolean seenSchemaDefault = false;
        for (int i = 0; i < attrCount; i++) {
            attributes.getName(i, fAttrQName);
            final Attr attr = createAttrNode(fAttrQName);

            final String attrValue = attributes.getValue(i);

            attr.setValue(attrValue);
            final boolean specified = attributes.isSpecified(i);
            // Take special care of schema defaulted attributes. Calling the
            // non-namespace aware setAttributeNode() method could overwrite
            // another attribute with the same local name.
            if (!specified && (seenSchemaDefault || (fAttrQName.getUri() != null
                    && fAttrQName.getUri() != NamespaceContext.XMLNS_URI && fAttrQName.getPrefix() == null))) {
                el.setAttributeNodeNS(attr);
                seenSchemaDefault = true;
            }
            else {
                el.setAttributeNode(attr);
            }
            // NOTE: The specified value MUST be set after you set
            // the node value because that turns the "specified"
            // flag to "true" which may overwrite a "false"
            // value from the attribute list. -Ac
            if (fDocumentImpl != null) {
                final AttrImpl attrImpl = (AttrImpl) attr;

                // DTD
                // For DOM Level 3 TypeInfo, the type name must
                // be null if this attribute has not been declared
                // in the DTD.
                attrImpl.setType(null);

                attrImpl.setSpecified(specified);
                // REVISIT: Handle entities in attribute value.
            }
        }
        setCharacterData(false);

        fCurrentNode.appendChild(el);
        fCurrentNode = el;
    }

    /**
     * An empty element.
     *
     * @param element    The name of the element.
     * @param attributes The element attributes.
     * @param augs       Additional information that may include infoset
     *                   augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void emptyElement(final QName element, final XMLAttributes attributes, final Augmentations augs) throws XNIException {
        startElement(element, attributes, augs);
        endElement(element, augs);
    }

    /**
     * Character content.
     *
     * @param text The content.
     * @param augs Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void characters(final XMLString text, final Augmentations augs) throws XNIException {
        if (DEBUG_EVENTS) {
            System.out.println("==>characters(): " + text.toString());
        }

        if (fInCDATASection && fCreateCDATANodes) {
            if (fCurrentCDATASection == null) {
                fCurrentCDATASection = fDocument.createCDATASection(text.toString());
                fCurrentNode.appendChild(fCurrentCDATASection);
                fCurrentNode = fCurrentCDATASection;
            }
            else {
                fCurrentCDATASection.appendData(text.toString());
            }
        }
        else {
            // if type is union (XML Schema) it is possible that we receive
            // character call with empty data
            if (text.length() == 0) {
                return;
            }

            final Node child = fCurrentNode.getLastChild();
            if (child != null && child.getNodeType() == Node.TEXT_NODE) {
                // collect all the data into the string buffer.
                if (fFirstChunk) {
                    if (fDocumentImpl != null) {
                        fStringBuffer.append(((TextImpl) child).removeData());
                    }
                    else {
                        fStringBuffer.append(((Text) child).getData());
                        child.setNodeValue(null);
                    }
                    fFirstChunk = false;
                }
                if (text.length() > 0) {
                    fStringBuffer.append(text);
                }
            }
            else {
                fFirstChunk = true;
                final Text textNode = fDocument.createTextNode(text.toString());
                fCurrentNode.appendChild(textNode);
            }

        }
    }

    /**
     * Ignorable whitespace. For this method to be called, the document source must
     * have some way of determining that the text containing only whitespace
     * characters should be considered ignorable. For example, the validator can
     * determine if a length of whitespace characters in the document are ignorable
     * based on the element content model.
     *
     * @param text The ignorable whitespace.
     * @param augs Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void ignorableWhitespace(final XMLString text, final Augmentations augs) throws XNIException {

        if (!fIncludeIgnorableWhitespace) {
            return;
        }
        final Node child = fCurrentNode.getLastChild();
        if (child != null && child.getNodeType() == Node.TEXT_NODE) {
            final Text textNode = (Text) child;
            textNode.appendData(text.toString());
        }
        else {
            final Text textNode = fDocument.createTextNode(text.toString());
            if (fDocumentImpl != null) {
                final TextImpl textNodeImpl = (TextImpl) textNode;
                textNodeImpl.setIgnorableWhitespace(true);
            }
            fCurrentNode.appendChild(textNode);
        }
    }

    /**
     * The end of an element.
     *
     * @param element The name of the element.
     * @param augs    Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void endElement(final QName element, final Augmentations augs) throws XNIException {
        if (DEBUG_EVENTS) {
            System.out.println("==>endElement (" + element.getRawname() + ")");
        }
        setCharacterData(false);
        fCurrentNode = fCurrentNode.getParentNode();
    }

    /**
     * The start of a CDATA section.
     *
     * @param augs Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void startCDATA(final Augmentations augs) throws XNIException {
        fInCDATASection = true;
        if (fCreateCDATANodes) {
            setCharacterData(false);
        }
    }

    /**
     * The end of a CDATA section.
     *
     * @param augs Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void endCDATA(final Augmentations augs) throws XNIException {

        fInCDATASection = false;

        if (fCurrentCDATASection != null) {
            fCurrentNode = fCurrentNode.getParentNode();
            fCurrentCDATASection = null;
        }
    }

    /**
     * The end of the document.
     *
     * @param augs Additional information that may include infoset augmentations
     *
     * @throws XNIException Thrown by handler to signal an error.
     */
    @Override
    public void endDocument(final Augmentations augs) throws XNIException {
        // REVISIT: when DOM Level 3 is REC rely on Document.support
        // instead of specific class
        // set the actual encoding and set DOM error checking back on
        if (fDocumentImpl != null) {
            if (fLocator != null) {
                fDocumentImpl.setInputEncoding(fLocator.getEncoding());
            }
            fDocumentImpl.setStrictErrorChecking(true);
        }
        fCurrentNode = null;
    }

    /**
     * This method notifies the end of a general entity.
     * 

     * Note: This method is not called for entity references
     * appearing as part of attribute values.
     *
     * @param name The name of the entity.
     * @param augs Additional information that may include infoset augmentations
     *
     * @exception XNIException Thrown by handler to signal an error.
     */
    @Override
    public void endGeneralEntity(final String name, final Augmentations augs) throws XNIException {
        if (DEBUG_EVENTS) {
            System.out.println("==>endGeneralEntity: (" + name + ")");
        }

        setCharacterData(true);

        if (fDocumentType != null) {
            // get current entity declaration
            final NamedNodeMap entities = fDocumentType.getEntities();
            fCurrentEntityDecl = (EntityImpl) entities.getNamedItem(name);
            if (fCurrentEntityDecl != null) {
                if (fCurrentEntityDecl.getFirstChild() == null) {
                    Node child = fCurrentNode.getFirstChild();
                    while (child != null) {
                        final Node copy = child.cloneNode(true);
                        fCurrentEntityDecl.appendChild(copy);
                        child = child.getNextSibling();
                    }
                }
                fCurrentEntityDecl = null;
            }

        }

        if (!fCreateEntityRefNodes) {
            // move entity reference children to the list of
            // siblings of its parent and remove entity reference
            final NodeList children = fCurrentNode.getChildNodes();
            final Node parent = fCurrentNode.getParentNode();
            final int length = children.getLength();
            if (length > 0) {

                // get previous sibling of the entity reference
                Node node = fCurrentNode.getPreviousSibling();
                // normalize text nodes
                final Node child = children.item(0);
                if (node != null && node.getNodeType() == Node.TEXT_NODE && child.getNodeType() == Node.TEXT_NODE) {
                    ((Text) node).appendData(child.getNodeValue());
                    fCurrentNode.removeChild(child);

                }
                else {
                    node = parent.insertBefore(child, fCurrentNode);
                    handleBaseURI(node);
                }

                for (int i = 1; i < length; i++) {
                    node = parent.insertBefore(children.item(0), fCurrentNode);
                    handleBaseURI(node);
                }
            }
            parent.removeChild(fCurrentNode);
            fCurrentNode = parent;
        }
    }

    /**
     * Record baseURI information for the Element (by adding xml:base attribute) or
     * for the ProcessingInstruction (by setting a baseURI field) Non deferred DOM.
     *
     * @param node the node
     */
    protected final void handleBaseURI(final Node node) {
        if (fDocumentImpl != null) {
            // REVISIT: remove dependency on our implementation when
            // DOM L3 becomes REC

            final String baseURI;
            final short nodeType = node.getNodeType();

            if (nodeType == Node.ELEMENT_NODE) {
                // if an element already has xml:base attribute
                // do nothing
                if (fNamespaceAware) {
                    if (((Element) node).getAttributeNodeNS("http://www.w3.org/XML/1998/namespace", "base") != null) {
                        return;
                    }
                }
                else if (((Element) node).getAttributeNode("xml:base") != null) {
                    return;
                }
                // retrive the baseURI from the entity reference
                baseURI = fCurrentNode.getBaseURI();
                if (baseURI != null && !baseURI.equals(fDocumentImpl.getDocumentURI())) {
                    if (fNamespaceAware) {
                        ((Element) node).setAttributeNS("http://www.w3.org/XML/1998/namespace", "xml:base", baseURI);
                    }
                    else {
                        ((Element) node).setAttribute("xml:base", baseURI);
                    }
                }
            }
            else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE) {
                baseURI = fCurrentNode.getBaseURI();
            }
        }
    }

    // method to create an element node.
    // subclasses can override this method to create element nodes in other ways.
    protected Element createElementNode(final QName element) {
        final Element el;

        if (fNamespaceAware) {
            // if we are using xerces DOM implementation, call our
            // own constructor to reuse the strings we have here.
            if (fDocumentImpl != null) {
                el = fDocumentImpl.createElementNS(element.getUri(), element.getRawname(), element.getLocalpart());
            }
            else {
                el = fDocument.createElementNS(element.getUri(), element.getRawname());
            }
        }
        else {
            el = fDocument.createElement(element.getRawname());
        }

        return el;
    }

    // method to create an attribute node.
    // subclasses can override this method to create attribute nodes in other ways.
    protected Attr createAttrNode(final QName attrQName) {
        final Attr attr;

        if (fNamespaceAware) {
            if (fDocumentImpl != null) {
                // if we are using xerces DOM implementation, call our
                // own constructor to reuse the strings we have here.
                attr = fDocumentImpl.createAttributeNS(attrQName.getUri(), attrQName.getRawname(), attrQName.getLocalpart());
            }
            else {
                attr = fDocument.createAttributeNS(attrQName.getUri(), attrQName.getRawname());
            }
        }
        else {
            attr = fDocument.createAttribute(attrQName.getRawname());
        }

        return attr;
    }

    /*
     * When the first characters() call is received, the data is stored in a new
     * Text node. If right after the first characters() we receive another chunk of
     * data, the data from the Text node, following the new characters are appended
     * to the fStringBuffer and the text node data is set to empty.
     *
     * This function is called when the state is changed and the data must be
     * appended to the current node.
     *
     * Note: if DOMFilter is set, you must make sure that if Node is skipped, or
     * removed fFistChunk must be set to true, otherwise some data can be lost.
     *
     */
    protected void setCharacterData(final boolean sawChars) {

        // handle character data
        fFirstChunk = sawChars;

        // if we have data in the buffer we must have created
        // a text node already.

        final Node child = fCurrentNode.getLastChild();
        if (child != null) {
            if (fStringBuffer.length() > 0) {
                // REVISIT: should this check be performed?
                if (child.getNodeType() == Node.TEXT_NODE) {
                    if (fDocumentImpl != null) {
                        ((TextImpl) child).replaceData(fStringBuffer.toString());
                    }
                    else {
                        ((Text) child).setData(fStringBuffer.toString());
                    }
                }
                // reset string buffer
                fStringBuffer.clear();
            }
        }
    }

    /**
     * Parses the input source specified by the given system identifier.
     * 

     * This method is equivalent to the following:
     *
     * 
     * parse(new InputSource(systemId));
     * 
     *
     * @param systemId The system identifier (URI).
     *
     * @exception org.xml.sax.SAXException Throws exception on SAX error.
     * @exception java.io.IOException      Throws exception on i/o error.
     */
    public void parse(final String systemId) throws SAXException, IOException {

        // parse document
        final XMLInputSource source = new XMLInputSource(null, systemId, null);
        try {
            parse(source);
        }

        // wrap XNI exceptions as SAX exceptions
        catch (final XMLParseException e) {
            final Exception ex = e.getException();
            if (ex == null || ex instanceof CharConversionException) {
                // must be a parser exception; mine it for locator info and throw
                // a SAXParseException
                final LocatorImpl locatorImpl = new LocatorImpl();
                locatorImpl.setPublicId(e.getPublicId());
                locatorImpl.setSystemId(e.getExpandedSystemId());
                locatorImpl.setLineNumber(e.getLineNumber());
                locatorImpl.setColumnNumber(e.getColumnNumber());
                throw (ex == null) ? new SAXParseException(e.getMessage(), locatorImpl)
                        : new SAXParseException(e.getMessage(), locatorImpl, ex);
            }
            if (ex instanceof SAXException) {
                // why did we create an XMLParseException?
                throw (SAXException) ex;
            }
            if (ex instanceof IOException) {
                throw (IOException) ex;
            }
            throw new SAXException(ex);
        }
        catch (final XNIException e) {
            e.printStackTrace();
            final Exception ex = e.getException();
            if (ex == null) {
                throw new SAXException(e.getMessage());
            }
            if (ex instanceof SAXException) {
                throw (SAXException) ex;
            }
            if (ex instanceof IOException) {
                throw (IOException) ex;
            }
            throw new SAXException(ex);
        }

    }

    /**
     * Parse.
     *
     * @param inputSource the input source
     *
     * @exception org.xml.sax.SAXException on error
     * @exception java.io.IOException      on error
     */
    public void parse(final InputSource inputSource) throws SAXException, IOException {

        // parse document
        try {
            final XMLInputSource xmlInputSource = new XMLInputSource(inputSource.getPublicId(), inputSource.getSystemId(), null);
            xmlInputSource.setByteStream(inputSource.getByteStream());
            xmlInputSource.setCharacterStream(inputSource.getCharacterStream());
            xmlInputSource.setEncoding(inputSource.getEncoding());
            parse(xmlInputSource);
        }

        // wrap XNI exceptions as SAX exceptions
        catch (final XMLParseException e) {
            final Exception ex = e.getException();
            if (ex == null || ex instanceof CharConversionException) {
                // must be a parser exception; mine it for locator info and throw
                // a SAXParseException
                final LocatorImpl locatorImpl = new LocatorImpl();
                locatorImpl.setPublicId(e.getPublicId());
                locatorImpl.setSystemId(e.getExpandedSystemId());
                locatorImpl.setLineNumber(e.getLineNumber());
                locatorImpl.setColumnNumber(e.getColumnNumber());
                throw (ex == null) ? new SAXParseException(e.getMessage(), locatorImpl)
                        : new SAXParseException(e.getMessage(), locatorImpl, ex);
            }
            if (ex instanceof SAXException) {
                // why did we create an XMLParseException?
                throw (SAXException) ex;
            }
            if (ex instanceof IOException) {
                throw (IOException) ex;
            }
            throw new SAXException(ex);
        }
        catch (final XNIException e) {
            final Exception ex = e.getException();
            if (ex == null) {
                throw new SAXException(e.getMessage());
            }
            if (ex instanceof SAXException) {
                throw (SAXException) ex;
            }
            if (ex instanceof IOException) {
                throw (IOException) ex;
            }
            throw new SAXException(ex);
        }

    }

    /**
     * Allow an application to register an error event handler.
     *
     * 
     * If the application does not register an error handler, all error events
     * reported by the SAX parser will be silently ignored; however, normal
     * processing may not continue. It is highly recommended that all SAX
     * applications implement an error handler to avoid unexpected bugs.
     * 
     *
     * 
     * Applications may register a new or different handler in the middle of a
     * parse, and the SAX parser must begin using the new handler immediately.
     * 
     *
     * @param errorHandler The error handler.
     * @exception java.lang.NullPointerException If the handler argument is null.
     * @see #getErrorHandler
     */
    public void setErrorHandler(final ErrorHandler errorHandler) {
        try {
            final XMLErrorHandler xeh = (XMLErrorHandler) parserConfiguration_.getProperty(ERROR_HANDLER);
            if (xeh instanceof ErrorHandlerWrapper) {
                final ErrorHandlerWrapper ehw = (ErrorHandlerWrapper) xeh;
                ehw.setErrorHandler(errorHandler);
            }
            else {
                parserConfiguration_.setProperty(ERROR_HANDLER, new ErrorHandlerWrapper(errorHandler));
            }
        }
        catch (final XMLConfigurationException e) {
            // do nothing
        }

    }

    /**
     * Return the current error handler.
     *
     * @return The current error handler, or null if none has been registered.
     * @see #setErrorHandler
     */
    public ErrorHandler getErrorHandler() {
        ErrorHandler errorHandler = null;
        try {
            final XMLErrorHandler xmlErrorHandler = (XMLErrorHandler) parserConfiguration_.getProperty(ERROR_HANDLER);
            if (xmlErrorHandler != null && xmlErrorHandler instanceof ErrorHandlerWrapper) {
                errorHandler = ((ErrorHandlerWrapper) xmlErrorHandler).getErrorHandler();
            }
        }
        catch (final XMLConfigurationException e) {
            // do nothing
        }
        return errorHandler;
    }

    /**
     * Set the state of any feature in a SAX2 parser. The parser might not recognize
     * the feature, and if it does recognize it, it might not be able to fulfill the
     * request.
     *
     * @param featureId The unique identifier (URI) of the feature.
     * @param state     The requested state of the feature (true or false).
     *
     * @exception SAXNotRecognizedException If the requested feature is not known.
     * @exception SAXNotSupportedException  If the requested feature is known, but
     *                                      the requested state is not supported.
     */
    public void setFeature(final String featureId, final boolean state) throws SAXNotRecognizedException, SAXNotSupportedException {
        try {
            parserConfiguration_.setFeature(featureId, state);
        }
        catch (final XMLConfigurationException e) {
            final String identifier = e.getIdentifier();
            if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
                throw new SAXNotRecognizedException(SAXMessageFormatter.formatMessage("feature-not-recognized", new Object[] {identifier}));
            }

            throw new SAXNotSupportedException(
                    SAXMessageFormatter.formatMessage("feature-not-supported", new Object[] {identifier}));
        }
    }

    /**
     * Query the state of a feature.
     * 
     * Query the current state of any feature in a SAX2 parser. The parser might not
     * recognize the feature.
     *
     * @param featureId The unique identifier (URI) of the feature being set.
     * @return The current state of the feature.
     * @exception org.xml.sax.SAXNotRecognizedException If the requested feature is
     *                                                  not known.
     * @exception SAXNotSupportedException              If the requested feature is
     *                                                  known but not supported.
     */
    public boolean getFeature(final String featureId) throws SAXNotRecognizedException, SAXNotSupportedException {

        try {
            return parserConfiguration_.getFeature(featureId);
        }
        catch (final XMLConfigurationException e) {
            final String identifier = e.getIdentifier();
            if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
                throw new SAXNotRecognizedException(SAXMessageFormatter.formatMessage("feature-not-recognized", new Object[] {identifier}));
            }

            throw new SAXNotSupportedException(SAXMessageFormatter.formatMessage("feature-not-supported", new Object[] {identifier}));
        }

    }

    /**
     * Set the value of any property in a SAX2 parser. The parser might not
     * recognize the property, and if it does recognize it, it might not support the
     * requested value.
     *
     * @param propertyId The unique identifier (URI) of the property being set.
     * @param value      The value to which the property is being set.
     *
     * @exception SAXNotRecognizedException If the requested property is not known.
     * @exception SAXNotSupportedException  If the requested property is known, but
     *                                      the requested value is not supported.
     */
    public void setProperty(final String propertyId, final Object value)
            throws SAXNotRecognizedException, SAXNotSupportedException {

        try {
            parserConfiguration_.setProperty(propertyId, value);
        }
        catch (final XMLConfigurationException e) {
            final String identifier = e.getIdentifier();
            if (e.getType() == XMLConfigurationException.NOT_RECOGNIZED) {
                throw new SAXNotRecognizedException(SAXMessageFormatter.formatMessage("property-not-recognized", new Object[] {identifier}));
            }

            throw new SAXNotSupportedException(SAXMessageFormatter.formatMessage("property-not-supported", new Object[] {identifier}));
        }

    }

    /**
     * @return this parser's XMLParserConfiguration.
     */
    public XMLParserConfiguration getXMLParserConfiguration() {
        return parserConfiguration_;
    }
}