org.enhydra.xml.xmlc.html.parsers.HTMLDocBuilder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of xmlc-all-runtime
Enhydra XMLC compiler.
The newest version!
/*
 * Enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: HTMLDocBuilder.java,v 1.2 2005/01/26 08:29:24 jkjome Exp $
 */

package org.enhydra.xml.xmlc.html.parsers;

import java.util.HashSet;

import org.enhydra.xml.xmlc.XMLCError;
import org.enhydra.xml.xmlc.XMLCException;
import org.enhydra.xml.xmlc.dom.XMLCDocument;
import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLDocument;
import org.xml.sax.InputSource;

/**
 * Class used by HTML parser to build a DOM.
 * 
 * The document builder functions assume they are being called in the order the
 * document is parsed.  They keep a current node where new child nodes are
 * appended.
 */
public class HTMLDocBuilder {
    /**
     * XMLC Document object.
     */
    private XMLCDocument fXmlcDoc;

    /**
     * Factory for creating the document.
     */
    private XMLCDomFactory fDomFactory;

    /**
     * The document.
     */
    private HTMLDocument fDocument;

    /**
     * Have we got the parser callback for the document element.
     * This is used to determine where to insert comments, since the
     * document element pre-exists.
     */
    private boolean fGotDocElement;

    /**
     * The current node that is being constructed.  This functions as a stack
     * during document construction.
     */
    private Node fCurrentNode;

    /**
     * Table used to determine what tags have been closed by
     * fixUnrecognizedTagNesting.
     */
    private HashSet fClosedUnrecognizedElements = null;

    /**
     * Constructor.  Creates XMLCDocument object.
     */
    public HTMLDocBuilder(XMLCDomFactory domFactory,
                          InputSource input) throws XMLCException {
        fXmlcDoc = new XMLCDocument(domFactory);
        fDomFactory = domFactory;
        Document doc = fXmlcDoc.createDocument(null, null);
        if (!(doc instanceof HTMLDocument)) {
            throw new XMLCException("DOM factory ("
                                    + fDomFactory.getClass().getName()
                                    + ") created a document that was not a HTMLDocument, got "
                                    + doc.getClass().getName());
        }
        fDocument = (HTMLDocument)doc;
        fCurrentNode = fDocument;

        String encoding = input.getEncoding();
        if (encoding != null) {
            fXmlcDoc.setEncoding(encoding);
        }
    }

    /**
     * Generate error about a method being called that should
     * be called before the document is created.
     */
    private void docNotCreatedError() {
        throw new XMLCError("Bug: parser event on document contents occured before document is created");
    }

    /**
     * Get the XMLC document associated with this object.
     */
    public XMLCDocument getXMLCDocument() {
        return fXmlcDoc;
    }

    /**
     * Determine if an element name is a frameset-only element.
     */
    private boolean isFrameSetElement(String tagName) {
        return tagName.equalsIgnoreCase("frameset")
            || tagName.equalsIgnoreCase("noframes");

    }

    /**
     * Start a new Element.
     */
    public void startElement(String tagName) {
        // Document element already exists
        if (tagName.equals("html")) {
            fCurrentNode = fDocument.getDocumentElement();
            fGotDocElement = true;
        } else {
            Element element = fDocument.createElement(tagName);
            fCurrentNode.appendChild(element);
            fCurrentNode = element;
        }

        if (isFrameSetElement(tagName)) {
            fXmlcDoc.setIsHtmlFrameSet();
        }
    }
    
    /**
     * Add an attribute to the element on the top of the
     * stack.
     */
    public void addAttribute(String name, String value) {
        ((Element)fCurrentNode).setAttribute(name, value);
    }

    /**
     * Finish the element being constructed. 
     */
    public void finishElement() {
        if (fCurrentNode == null) {
            throw new XMLCError("node stack underflow; malformed document");
        }
        if (!(fCurrentNode instanceof Element)) {
            throw new XMLCError("DOM node top of stack not a element for end tag");
        }
        fCurrentNode = fCurrentNode.getParentNode();
    }

    /**
     * Add a Text node.
     */
    public void addTextNode(String data) {
        if (fDocument == null) {
            docNotCreatedError();
        }
        fCurrentNode.appendChild(fDocument.createTextNode(data));
    }

    /**
     * Add a Comment node.
     */
    public void addComment(String data) {
        Comment comment = fDocument.createComment(data);
        // Handle insertion before document element (current should always
        // be document, but we might be handling some invalid node).
        if ((!fGotDocElement) && (fCurrentNode == fDocument)) {
            fCurrentNode.insertBefore(comment, fDocument.getDocumentElement());
        } else {
            fCurrentNode.appendChild(comment);
        }
    }

    /**
     * Get the node on the top of the stack during parsing.
     * FIXME: Added to work around bugs in the swing parser.
     */
    public Node getCurrentNode() {
        return fCurrentNode;
    }

    /**
     * Pop the current node off of the stack.  This is *only* used
     * during error recover from a broken parser.
     * FIXME: Added to work around bugs in the swing parser.
     */
    public void popCurrentNode() {
        fCurrentNode = fCurrentNode.getParentNode();
    }

    /**
     * Recursive part of findUnrecognizedTag
     */
    private Node recursiveFindUnrecognizedTag(String tagNameUpper,
                                              Node parent) {
        // Search right to left.
        for (Node child = parent.getLastChild(); child != null;
             child = child.getPreviousSibling()) {
            if (child.getNodeName().equals(tagNameUpper)
                && !fClosedUnrecognizedElements.contains(child)) {
                return child;  // Found it!
            }
        }
        
        // Search up the tree.
        Node grandParent = parent.getParentNode();
        if (grandParent != null) {
            return recursiveFindUnrecognizedTag(tagNameUpper, grandParent);
        } else {
            return null;
        }
    }

    /**
     * Find the element for an unrecognized tag.  This searches up the parse
     * stack, looking at the siblings of each node on the stack.  This starts
     * with the parent of the top of the stack, and searches its children from
     * right to left.  Thus the first node checked is node on the top of the
     * stack.
     */
    private Node findUnrecognizedTag(String tagNameUpper) throws XMLCException {
        Node openingElement = null;
        if (fCurrentNode != null) {
            openingElement = recursiveFindUnrecognizedTag(tagNameUpper,
                                                          fCurrentNode);
        }
        if (openingElement == null) {
            throw new XMLCException("could not find matching opening tag for ");
        }
        if (openingElement.getFirstChild() != null) {
            throw new XMLCError("attempt to fix nesting for  found a node that already has children");
        }
        return openingElement;
    }

    /**
     * Make nodes to the right of an element the element's children.
     */
    private void makeRightSiblingsChildren(Node openingElement) {
        Node parent = openingElement.getParentNode();

        Node sibling;
        while ((sibling = openingElement.getNextSibling()) != null) {
            openingElement.appendChild(sibling);
        }
    }

    /**
     * Used to correct nesting when handling an unknown tag.  This is called
     * when the end tag is encountered. The tree is walked backwards from the
     * top of the stack to find the element pushed for the open tag.  All of
     * the siblings to the right of that element are moved to be children of
     * the element.  The stack is popped back until the parent of the
     * element being closed is on top.  This was put in to support the
     * swing parser.
     */
    public void fixUnrecognizedTagNesting(String tagName)
        throws XMLCException {
        String tagNameUpper = tagName.toUpperCase();
        if (fClosedUnrecognizedElements == null) {
            fClosedUnrecognizedElements = new HashSet();
        }

        // Find and correct
        Node openingElement = findUnrecognizedTag(tagNameUpper);
        makeRightSiblingsChildren(openingElement);
        fClosedUnrecognizedElements.add(openingElement);

        // Clean up the stack
        Node openingParent = openingElement.getParentNode();
        while (fCurrentNode != openingParent) {
            popCurrentNode();
        }
    }
}