org.htmlunit.html.parser.neko.HtmlUnitNekoDOMBuilder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of xlt Show documentation
XLT (Xceptance LoadTest) is an extensive load and performance test tool developed and maintained by Xceptance.
There is a newer version: 8.4.1
/*
 * Copyright (c) 2002-2024 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.htmlunit.html.parser.neko;

import static org.htmlunit.BrowserVersionFeatures.HTML_ATTRIBUTE_LOWER_CASE;
import static org.htmlunit.BrowserVersionFeatures.HTML_COMMAND_TAG;
import static org.htmlunit.BrowserVersionFeatures.HTML_ISINDEX_TAG;
import static org.htmlunit.BrowserVersionFeatures.HTML_MAIN_TAG;
import static org.htmlunit.BrowserVersionFeatures.META_X_UA_COMPATIBLE;

import java.io.IOException;
import java.io.StringReader;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Locale;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.Triple;
import org.htmlunit.BrowserVersion;
import org.htmlunit.ObjectInstantiationException;
import org.htmlunit.WebClient;
import org.htmlunit.WebResponse;
import org.htmlunit.cyberneko.HTMLConfiguration;
import org.htmlunit.cyberneko.HTMLElements;
import org.htmlunit.cyberneko.HTMLEventInfo;
import org.htmlunit.cyberneko.HTMLScanner;
import org.htmlunit.cyberneko.HTMLTagBalancingListener;
import org.htmlunit.cyberneko.util.FastHashMap;
import org.htmlunit.cyberneko.xerces.parsers.AbstractSAXParser;
import org.htmlunit.cyberneko.xerces.xni.Augmentations;
import org.htmlunit.cyberneko.xerces.xni.QName;
import org.htmlunit.cyberneko.xerces.xni.XMLAttributes;
import org.htmlunit.cyberneko.xerces.xni.XMLString;
import org.htmlunit.cyberneko.xerces.xni.XNIException;
import org.htmlunit.cyberneko.xerces.xni.parser.XMLInputSource;
import org.htmlunit.cyberneko.xerces.xni.parser.XMLParserConfiguration;
import org.htmlunit.html.DomComment;
import org.htmlunit.html.DomDocumentType;
import org.htmlunit.html.DomElement;
import org.htmlunit.html.DomNode;
import org.htmlunit.html.DomText;
import org.htmlunit.html.ElementFactory;
import org.htmlunit.html.Html;
import org.htmlunit.html.HtmlBody;
import org.htmlunit.html.HtmlElement;
import org.htmlunit.html.HtmlForm;
import org.htmlunit.html.HtmlHiddenInput;
import org.htmlunit.html.HtmlHtml;
import org.htmlunit.html.HtmlImage;
import org.htmlunit.html.HtmlMeta;
import org.htmlunit.html.HtmlPage;
import org.htmlunit.html.HtmlSvg;
import org.htmlunit.html.HtmlTable;
import org.htmlunit.html.HtmlTableRow;
import org.htmlunit.html.HtmlTemplate;
import org.htmlunit.html.ScriptElement;
import org.htmlunit.html.SubmittableElement;
import org.htmlunit.html.XHtmlPage;
import org.htmlunit.html.parser.HTMLParser;
import org.htmlunit.html.parser.HTMLParserDOMBuilder;
import org.htmlunit.html.parser.HTMLParserListener;
import org.htmlunit.javascript.host.html.HTMLBodyElement;
import org.htmlunit.javascript.host.html.HTMLDocument;
import org.htmlunit.util.StringUtils;
import org.w3c.dom.Node;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;

/**
 * INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.

 *
 * The parser and DOM builder. This class subclasses Xerces's AbstractSAXParser and implements
 * the ContentHandler interface. Thus all parser APIs are kept private. The ContentHandler methods
 * consume SAX events to build the page DOM
 *
 * @author Christian Sell
 * @author David K. Taylor
 * @author Chris Erskine
 * @author Ahmed Ashour
 * @author Marc Guillemot
 * @author Ethan Glasser-Camp
 * @author Sudhan Moghe
 * @author Ronald Brill
 * @author Frank Danek
 * @author Carsten Steul
 * @author Ronny Shapiro
 * @author Atsushi Nakagawa
 */
final class HtmlUnitNekoDOMBuilder extends AbstractSAXParser
        implements ContentHandler, LexicalHandler, HTMLTagBalancingListener, HTMLParserDOMBuilder {

    // cache Neko Elements for performance and memory efficiency
    private static final FastHashMap, HTMLElements>
                HTMLELEMENTS_CACHE = new FastHashMap<>();

    static {
        // continue short code enumeration
        final short isIndexShortCode = HTMLElements.UNKNOWN + 1;

        final short commandShortCode = isIndexShortCode + 1;
        final short mainShortCode = commandShortCode + 1;

        // isIndex is special - we have to add it here because all browsers moving this to
        // the body (even if it is not supported)
        final HTMLElements.Element isIndex = new HTMLElements.Element(isIndexShortCode, "ISINDEX",
                HTMLElements.Element.CONTAINER, HTMLElements.BODY, null);
        final HTMLElements.Element isIndexSupported = new HTMLElements.Element(isIndexShortCode, "ISINDEX",
                HTMLElements.Element.BLOCK, HTMLElements.BODY, new short[] {isIndexShortCode});

        final HTMLElements.Element command = new HTMLElements.Element(commandShortCode, "COMMAND",
                HTMLElements.Element.EMPTY, new short[] {HTMLElements.BODY, HTMLElements.HEAD}, null);
        final HTMLElements.Element main = new HTMLElements.Element(mainShortCode, "MAIN",
                HTMLElements.Element.INLINE, HTMLElements.BODY, null);

        Triple key;
        HTMLElements value;

        // !COMMAND_TAG !ISINDEX_TAG !MAIN_TAG
        key = Triple.of(Boolean.FALSE, Boolean.FALSE, Boolean.FALSE);
        value = new HTMLElements();
        value.setElement(isIndex);
        HTMLELEMENTS_CACHE.put(key, value);

        // !COMMAND_TAG !ISINDEX_TAG MAIN_TAG
        key = Triple.of(Boolean.FALSE, Boolean.FALSE, Boolean.TRUE);
        value = new HTMLElements();
        value.setElement(main);
        value.setElement(isIndex);
        HTMLELEMENTS_CACHE.put(key, value);

        // !COMMAND_TAG ISINDEX_TAG !MAIN_TAG
        key = Triple.of(Boolean.FALSE, Boolean.TRUE, Boolean.FALSE);
        value = new HTMLElements();
        value.setElement(isIndexSupported);
        HTMLELEMENTS_CACHE.put(key, value);

        // !COMMAND_TAG ISINDEX_TAG MAIN_TAG
        key = Triple.of(Boolean.FALSE, Boolean.TRUE, Boolean.TRUE);
        value = new HTMLElements();
        value.setElement(isIndexSupported);
        value.setElement(main);
        HTMLELEMENTS_CACHE.put(key, value);

        // COMMAND_TAG !ISINDEX_TAG !MAIN_TAG
        key = Triple.of(Boolean.TRUE, Boolean.FALSE, Boolean.FALSE);
        value = new HTMLElements();
        value.setElement(command);
        value.setElement(isIndex);
        HTMLELEMENTS_CACHE.put(key, value);

        // COMMAND_TAG !ISINDEX_TAG MAIN_TAG
        key = Triple.of(Boolean.TRUE, Boolean.FALSE, Boolean.TRUE);
        value = new HTMLElements();
        value.setElement(command);
        value.setElement(isIndex);
        value.setElement(main);
        HTMLELEMENTS_CACHE.put(key, value);

        // COMMAND_TAG ISINDEX_TAG !MAIN_TAG
        key = Triple.of(Boolean.TRUE, Boolean.TRUE, Boolean.FALSE);
        value = new HTMLElements();
        value.setElement(command);
        value.setElement(isIndexSupported);
        HTMLELEMENTS_CACHE.put(key, value);

        // COMMAND_TAG ISINDEX_TAG MAIN_TAG
        key = Triple.of(Boolean.TRUE, Boolean.TRUE, Boolean.TRUE);
        value = new HTMLElements();
        value.setElement(command);
        value.setElement(isIndexSupported);
        value.setElement(main);
        HTMLELEMENTS_CACHE.put(key, value);
    }

    private enum HeadParsed { YES, SYNTHESIZED, NO }

    private final HTMLParser htmlParser_;
    private final HtmlPage page_;

    private Locator locator_;
    private final Deque stack_ = new ArrayDeque<>();

    /** Did the snippet tried to overwrite the start node? */
    private boolean snippetStartNodeOverwritten_;
    private final int initialSize_;
    private DomNode currentNode_;
    private final boolean createdByJavascript_;
    private final XMLString characters_ = new XMLString();
    private HtmlUnitNekoDOMBuilder.HeadParsed headParsed_ = HeadParsed.NO;
    private HtmlElement body_;
    private boolean lastTagWasSynthesized_;
    private HtmlForm consumingForm_;
    private boolean formEndingIsAdjusting_;
    private boolean insideSvg_;

    private static final String FEATURE_AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
    private static final String FEATURE_PARSE_NOSCRIPT
        = "http://cyberneko.org/html/features/parse-noscript-content";

    /**
     * Parses and then inserts the specified HTML content into the HTML content currently being parsed.
     * @param html the HTML content to push
     */
    @Override
    public void pushInputString(final String html) {
        page_.registerParsingStart();
        page_.registerInlineSnippetParsingStart();
        try {
            final WebResponse webResponse = page_.getWebResponse();
            final Charset charset = webResponse.getContentCharset();
            final String url = webResponse.getWebRequest().getUrl().toString();
            final XMLInputSource in = new XMLInputSource(null, url, null, new StringReader(html), charset.name());
            ((HTMLConfiguration) parserConfiguration_).evaluateInputSource(in);
        }
        finally {
            page_.registerParsingEnd();
            page_.registerInlineSnippetParsingEnd();
        }
    }

    /**
     * Creates a new builder for parsing the specified response contents.
     * @param node the location at which to insert the new content
     * @param url the page's URL
     * @param createdByJavascript if true the (script) tag was created by javascript
     */
    HtmlUnitNekoDOMBuilder(final HTMLParser htmlParser,
            final DomNode node, final URL url, final String htmlContent, final boolean createdByJavascript) {
        super(createConfiguration(node.getPage().getWebClient().getBrowserVersion()));

        htmlParser_ = htmlParser;
        page_ = (HtmlPage) node.getPage();

        currentNode_ = node;
        for (final Node ancestor : currentNode_.getAncestors()) {
            stack_.push((DomNode) ancestor);
        }
        createdByJavascript_ = createdByJavascript;

        final WebClient webClient = page_.getWebClient();
        final HTMLParserListener listener = webClient.getHTMLParserListener();
        final boolean reportErrors = listener != null;
        if (reportErrors) {
            parserConfiguration_.setErrorHandler(new HtmlUnitNekoHTMLErrorHandler(listener, url, htmlContent));
        }

        try {
            setFeature(FEATURE_AUGMENTATIONS, true);
            if (!webClient.getBrowserVersion().hasFeature(HTML_ATTRIBUTE_LOWER_CASE)) {
                setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
            }
            setFeature("http://cyberneko.org/html/features/report-errors", reportErrors);
            setFeature(FEATURE_PARSE_NOSCRIPT, !webClient.isJavaScriptEnabled());
            setFeature(HTMLScanner.ALLOW_SELFCLOSING_IFRAME, false);

            setContentHandler(this);
            setLexicalHandler(this); //comments and CDATA
        }
        catch (final SAXException e) {
            throw new ObjectInstantiationException("unable to create HTML parser", e);
        }
        initialSize_ = stack_.size();
    }

    /**
     * Create the configuration depending on the simulated browser
     * @return the configuration
     */
    private static XMLParserConfiguration createConfiguration(final BrowserVersion browserVersion) {
        final HTMLElements elements = HTMLELEMENTS_CACHE.get(
                Triple.of(browserVersion.hasFeature(HTML_COMMAND_TAG),
                        browserVersion.hasFeature(HTML_ISINDEX_TAG),
                        browserVersion.hasFeature(HTML_MAIN_TAG)));
        return new HTMLConfiguration(elements);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void setDocumentLocator(final Locator locator) {
        locator_ = locator;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void startDocument() throws SAXException {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void startElement(final QName element, final XMLAttributes attributes, final Augmentations augs)
        throws XNIException {
        // augs might change so we store only the interesting part
        lastTagWasSynthesized_ = isSynthesized(augs);
        super.startElement(element, attributes, augs);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void startElement(String namespaceURI, final String localName, final String qName, final Attributes atts)
        throws SAXException {

        if (snippetStartNodeOverwritten_) {
            snippetStartNodeOverwritten_ = false;
            return;
        }
        handleCharacters();

        final String tagLower = org.htmlunit.util.StringUtils.toRootLowerCase(localName);
        if (page_.isParsingHtmlSnippet() && ("html".equals(tagLower) || "body".equals(tagLower))) {
            // we have to push the current node on the stack to make sure
            // the endElement call is able to remove a node from the stack
            stack_.push(currentNode_);
            return;
        }

        if ("head".equals(tagLower)) {
            if (headParsed_ == HeadParsed.YES || page_.isParsingHtmlSnippet()) {
                // we have to push the current node on the stack to make sure
                // the endElement call is able to remove a node from the stack
                stack_.push(currentNode_);
                return;
            }

            headParsed_ = lastTagWasSynthesized_ ? HeadParsed.SYNTHESIZED : HeadParsed.YES;
        }

        if (namespaceURI != null) {
            namespaceURI = namespaceURI.trim();
        }

        // If we're adding a body element, keep track of any temporary synthetic ones
        // that we may have had to create earlier (for document.write(), for example).
        HtmlBody oldBody = null;
        if ("body".equals(qName) && page_.getBody() instanceof HtmlBody) {
            oldBody = (HtmlBody) page_.getBody();
        }

        // Add the new node.
        if (!(page_ instanceof XHtmlPage) && Html.XHTML_NAMESPACE.equals(namespaceURI)) {
            namespaceURI = null;
        }

        final ElementFactory factory =
                htmlParser_.getElementFactory(page_, namespaceURI, qName, insideSvg_, false);
        if (factory == HtmlUnitNekoHtmlParser.SVG_FACTORY) {
            namespaceURI = Html.SVG_NAMESPACE;
        }
        final DomElement newElement = factory.createElementNS(page_, namespaceURI, qName, atts, true);
        newElement.setStartLocation(locator_.getLineNumber(), locator_.getColumnNumber());

        // parse can't replace everything as it does not buffer elements while parsing
        addNodeToRightParent(currentNode_, newElement);

        if (newElement instanceof HtmlSvg) {
            insideSvg_ = true;
        }

        // Forms own elements simply by enclosing source-wise rather than DOM parent-child relationship
        // Forms without a  will keep consuming forever
        if (newElement instanceof HtmlForm) {
            consumingForm_ = (HtmlForm) newElement;
            formEndingIsAdjusting_ = false;
        }
        else if (consumingForm_ != null) {
            // If the current form enclosed a suitable element
            if (newElement instanceof SubmittableElement) {
                // Let these be owned by the form
                if (((HtmlElement) newElement).getEnclosingForm() != consumingForm_) {
                    ((HtmlElement) newElement).setOwningForm(consumingForm_);
                }
            }
        }

        // If we had an old synthetic body and we just added a real body element, quietly
        // remove the old body and move its children to the real body element we just added.
        if (oldBody != null) {
            oldBody.quietlyRemoveAndMoveChildrenTo(newElement);
        }

        if (!insideSvg_ && "body".equals(tagLower)) {
            body_ = (HtmlElement) newElement;
        }
        else if (newElement instanceof HtmlMeta && page_.hasFeature(META_X_UA_COMPATIBLE)) {
            final HtmlMeta meta = (HtmlMeta) newElement;
            if ("X-UA-Compatible".equals(meta.getHttpEquivAttribute())) {
                final String content = meta.getContentAttribute();
                if (content.startsWith("IE=")) {
                    final String mode = content.substring(3).trim();
                    final int version = page_.getWebClient().getBrowserVersion().getBrowserVersionNumeric();
                    try {
                        int value = Integer.parseInt(mode);
                        if (value > version) {
                            value = version;
                        }
                        ((HTMLDocument) page_.getScriptableObject()).forceDocumentMode(value);
                    }
                    catch (final Exception e) {
                        // ignore
                    }
                }
            }
        }
        else if (createdByJavascript_ && newElement instanceof ScriptElement) {
            final ScriptElement script = (ScriptElement) newElement;
            script.markAsCreatedByDomParser();
        }

        currentNode_ = newElement;
        stack_.push(currentNode_);
    }

    /**
     * Adds the new node to the right parent that is not necessary the currentNode in case of
     * malformed HTML code. The method tries to emulate the behavior of Firefox.
     */
    private void addNodeToRightParent(final DomNode currentNode, final DomElement newElement) {
        final String currentNodeName = currentNode.getNodeName();
        final String newNodeName = newElement.getNodeName();

        // First ensure table elements are housed correctly
        if (isTableChild(newNodeName)) {
            final DomNode parent =
                    "table".equals(currentNodeName) ? currentNode : findElementOnStack("table");
            appendChild(parent, newElement);
            return;
        }
        if ("tr".equals(newNodeName)) {
            final DomNode parent =
                    isTableChild(currentNodeName) ? currentNode : findElementOnStack("tbody", "thead", "tfoot");
            appendChild(parent, newElement);
            return;
        }
        if (isTableCell(newNodeName)) {
            final DomNode parent =
                    "tr".equals(currentNodeName) ? currentNode : findElementOnStack("tr");
            appendChild(parent, newElement);
            return;
        }

        // Next ensure non-table elements don't appear in tables
        if ("table".equals(currentNodeName) || isTableChild(currentNodeName) || "tr".equals(currentNodeName)) {
            if ("template".equals(newNodeName)) {
                currentNode.appendChild(newElement);
            }

            // Scripts, forms, and styles are exempt
            else if (!"colgroup".equals(currentNodeName)
                    && ("script".equals(newNodeName)
                        || "form".equals(newNodeName)
                        || "style".equals(newNodeName))) {
                currentNode.appendChild(newElement);
            }

            // These are good
            else if ("col".equals(newNodeName) && "colgroup".equals(currentNodeName)) {
                currentNode.appendChild(newElement);
            }
            else if ("caption".equals(currentNodeName)) {
                currentNode.appendChild(newElement);
            }
            else if (newElement instanceof HtmlHiddenInput) {
                currentNode.appendChild(newElement);
            }
            else {
                // Move before the table
                final DomNode parent = findElementOnStack("table");
                parent.insertBefore(newElement);
            }
            return;
        }

        if (formEndingIsAdjusting_ && "form".equals(currentNodeName)) {
            // We cater to HTMLTagBalancer's shortcomings by moving this node out of the 
            appendChild(currentNode.getParentNode(), newElement);
            return;
        }

        // Everything else
        appendChild(currentNode, newElement);
    }

    private DomNode findElementOnStack(final String... searchedElementNames) {
        DomNode searchedNode = null;
        for (final DomNode node : stack_) {
            if (ArrayUtils.contains(searchedElementNames, node.getNodeName())) {
                searchedNode = node;
                break;
            }
        }

        if (searchedNode == null) {
            searchedNode = stack_.peek(); // this is surely wrong but at least it won't throw a NPE
        }

        return searchedNode;
    }

    private static boolean isTableChild(final String nodeName) {
        return "thead".equals(nodeName)
                || "tbody".equals(nodeName)
                || "tfoot".equals(nodeName)
                || "caption".equals(nodeName)
                || "colgroup".equals(nodeName);
    }

    private static boolean isTableCell(final String nodeName) {
        if (nodeName == null || nodeName.length() != 2) {
            return false;
        }
        return "td".equals(nodeName) || "th".equals(nodeName);
    }

    /** {@inheritDoc} */
    @Override
    public void endElement(final QName element, final Augmentations augs)
        throws XNIException {
        // augs might change so we store only the interesting part
        lastTagWasSynthesized_ = isSynthesized(augs);
        super.endElement(element, augs);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void endElement(final String namespaceURI, final String localName, final String qName)
        throws SAXException {

        final String tagLower = org.htmlunit.util.StringUtils.toRootLowerCase(localName);

        handleCharacters();

        if (page_.isParsingHtmlSnippet()) {
            if ("html".equals(tagLower) || "body".equals(tagLower)) {
                return;
            }
            if (stack_.size() == initialSize_) {
                // a  inside a 
 is valid for innerHTML processing
                // see HTMLParser2Test for more cases
                snippetStartNodeOverwritten_ = !"p".equals(tagLower);
                return;
            }
        }

        if ("svg".equals(tagLower)) {
            insideSvg_ = false;
        }

        // this only avoids a problem when the stack is empty here
        // but for this case we made the problem before - the balancing
        // is broken already
        if (stack_.isEmpty()) {
            return;
        }

        final DomNode previousNode = stack_.pop(); //remove currentElement from stack
        previousNode.setEndLocation(locator_.getLineNumber(), locator_.getColumnNumber());

        if ("form".equals(tagLower) && !lastTagWasSynthesized_) {
            // We get here if the 
 was on the same DOM tree depth as the  that started it,
            // otherwise HTMLTagBalancer gives us the end through ignoredEndElement()
            consumingForm_ = null;
        }

        if (!stack_.isEmpty()) {
            currentNode_ = stack_.peek();
        }

        final boolean postponed = page_.isParsingInlineHtmlSnippet();
        previousNode.onAllChildrenAddedToPage(postponed);
    }

    /** {@inheritDoc} */
    @Override
    public void characters(final char[] ch, final int start, final int length) throws SAXException {
        characters_.append(ch, start, length);
    }

    /** {@inheritDoc} */
    @Override
    public void ignorableWhitespace(final char[] ch, final int start, final int length) throws SAXException {
        characters_.append(ch, start, length);
    }

    /**
     * Picks up the character data accumulated so far and add it to the current element as a text node.
     */
    private void handleCharacters() {
        // make the code easier to read because we remove a nesting level
        if (characters_.length() == 0) {
            return;
        }

        if (currentNode_ instanceof HtmlHtml) {
            // In HTML, the  node only has two possible children:
            // the  and the ; any text is ignored.
            characters_.clear();
            return;
        }

        // Use the normal behavior: append a text node for the accumulated text.
        final String textValue = characters_.toString();
        final DomText textNode = new DomText(page_, textValue);
        characters_.clear();

        if (org.apache.commons.lang3.StringUtils.isNotBlank(textValue)) {
            // malformed HTML: some text => text comes before the table
            if (currentNode_ instanceof HtmlTableRow) {
                final HtmlTableRow row = (HtmlTableRow) currentNode_;
                final HtmlTable enclosingTable = row.getEnclosingTable();
                if (enclosingTable != null) { // may be null when called from Range.createContextualFragment
                    if (enclosingTable.getPreviousSibling() instanceof DomText) {
                        final DomText domText = (DomText) enclosingTable.getPreviousSibling();
                        domText.setTextContent(domText.getWholeText() + textValue);
                    }
                    else {
                        enclosingTable.insertBefore(textNode);
                    }
                }
            }
            else if (currentNode_ instanceof HtmlTable) {
                final HtmlTable enclosingTable = (HtmlTable) currentNode_;
                if (enclosingTable.getPreviousSibling() instanceof DomText) {
                    final DomText domText = (DomText) enclosingTable.getPreviousSibling();
                    domText.setTextContent(domText.getWholeText() + textValue);
                }
                else {
                    enclosingTable.insertBefore(textNode);
                }
            }
            else if (currentNode_ instanceof HtmlImage) {
                currentNode_.getParentNode().appendChild(textNode);
            }
            else {
                appendChild(currentNode_, textNode);
            }
        }
        else {
            appendChild(currentNode_, textNode);
        }
    }

    /** {@inheritDoc} */
    @Override
    public void endDocument() throws SAXException {
        handleCharacters();
        page_.setEndLocation(locator_.getLineNumber(), locator_.getColumnNumber());
    }

    /** {@inheritDoc} */
    @Override
    public void startPrefixMapping(final String prefix, final String uri) throws SAXException {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void endPrefixMapping(final String prefix) throws SAXException {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void processingInstruction(final String target, final String data) throws SAXException {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void skippedEntity(final String name) throws SAXException {
        // nothing to do
    }

    // LexicalHandler methods

    /** {@inheritDoc} */
    @Override
    public void comment(final char[] ch, final int start, final int length) {
        handleCharacters();
        final String data = new String(ch, start, length);
        final DomComment comment = new DomComment(page_, data);
        appendChild(currentNode_, comment);
    }

    /** {@inheritDoc} */
    @Override
    public void endCDATA() {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void endDTD() {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void endEntity(final String name) {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void startCDATA() {
        // nothing to do
    }

    /** {@inheritDoc} */
    @Override
    public void startDTD(final String name, final String publicId, final String systemId) {
        final DomDocumentType type = new DomDocumentType(page_, name, publicId, systemId);
        page_.setDocumentType(type);

        final Node child;
        child = type;
        page_.appendChild(child);
    }

    /** {@inheritDoc} */
    @Override
    public void startEntity(final String name) {
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void ignoredEndElement(final QName element, final Augmentations augs) {
        // HTMLTagBalancer brings us here if  was found in the source on a different
        // DOM tree depth (either above or below) to the  that started it
        if ("form".equals(element.getLocalpart()) && consumingForm_ != null) {
            consumingForm_ = null;

            if (findElementOnStack("table", "form") instanceof HtmlTable) {
                // The  just goes missing for these (really? just tables?)
            }
            else {
                /*
                 * This  was ignored by HTMLTagBalancer as it generates its own
                 *  at the end of the depth with the starting .
                 * e.g. This:
                 * | 
                 * |   
                 * |      
                 * |   
                 * |   
                 *
                 * is turned into:
                 * | 
                 * |   
                 * |   
                 * |   
                 * | 
 
                 *
                 * but this isn't suitable for us because  shouldn't be ignored but
                 * rather moved directly behind the tree it's in to instead become:
                 * | 
                 * |   
                 * |   
                 * | 
 
                 * |  
                 */
                // We cater for this by moving out nodes such as the  in the above
                // diagram out of the form
                formEndingIsAdjusting_ = true;
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void ignoredStartElement(final QName elem, final XMLAttributes attrs, final Augmentations augs) {
        // when multiple html/body elements are encountered, the attributes of the discarded
        // elements are used when not previously defined
        if (attrs != null && body_ != null) {
            String lp = elem.getLocalpart();
            if (lp != null && lp.length() == 4) {
                lp = lp.toLowerCase(Locale.ROOT);
                if ("body".equals(lp)) {
                    copyAttributes(body_, attrs);
                }
                else if ("html".equals(lp)) {
                    copyAttributes((DomElement) body_.getParentNode(), attrs);
                }
            }
        }
    }

    private static void copyAttributes(final DomElement to, final XMLAttributes attrs) {
        final int length = attrs.getLength();

        for (int i = 0; i < length; i++) {
            final String attrName = StringUtils.toRootLowerCase(attrs.getLocalName(i));
            if (to.getAttributes().getNamedItem(attrName) == null) {
                to.setAttribute(attrName, attrs.getValue(i));
                if (attrName.startsWith("on") && to.getPage().getWebClient().isJavaScriptEngineEnabled()
                        && to.getScriptableObject() instanceof HTMLBodyElement) {
                    final HTMLBodyElement jsBody = to.getScriptableObject();
                    jsBody.createEventHandlerFromAttribute(attrName, attrs.getValue(i));
                }
            }
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void parse(final XMLInputSource inputSource) throws XNIException, IOException {
        final HTMLParserDOMBuilder oldBuilder = page_.getDOMBuilder();
        page_.setDOMBuilder(this);
        try {
            super.parse(inputSource);
        }
        finally {
            page_.setDOMBuilder(oldBuilder);
        }
    }

    HtmlElement getBody() {
        return body_;
    }

    private static boolean isSynthesized(final Augmentations augs) {
        return augs instanceof HTMLEventInfo && ((HTMLEventInfo) augs).isSynthesized();
    }

    private static void appendChild(final DomNode parent, final DomNode child) {
        if (parent instanceof HtmlTemplate) {
            ((HtmlTemplate) parent).getContent().appendChild(child);
            return;
        }

        parent.appendChild(child);
    }
}