All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.enhydra.xml.xmlc.html.parsers.HTMLDocBuilder Maven / Gradle / Ivy

The newest version!
/*
 * Enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: HTMLDocBuilder.java,v 1.2 2005/01/26 08:29:24 jkjome Exp $
 */

package org.enhydra.xml.xmlc.html.parsers;

import java.util.HashSet;

import org.enhydra.xml.xmlc.XMLCError;
import org.enhydra.xml.xmlc.XMLCException;
import org.enhydra.xml.xmlc.dom.XMLCDocument;
import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLDocument;
import org.xml.sax.InputSource;

/**
 * Class used by HTML parser to build a DOM.
 * 

* The document builder functions assume they are being called in the order the * document is parsed. They keep a current node where new child nodes are * appended. */ public class HTMLDocBuilder { /** * XMLC Document object. */ private XMLCDocument fXmlcDoc; /** * Factory for creating the document. */ private XMLCDomFactory fDomFactory; /** * The document. */ private HTMLDocument fDocument; /** * Have we got the parser callback for the document element. * This is used to determine where to insert comments, since the * document element pre-exists. */ private boolean fGotDocElement; /** * The current node that is being constructed. This functions as a stack * during document construction. */ private Node fCurrentNode; /** * Table used to determine what tags have been closed by * fixUnrecognizedTagNesting. */ private HashSet fClosedUnrecognizedElements = null; /** * Constructor. Creates XMLCDocument object. */ public HTMLDocBuilder(XMLCDomFactory domFactory, InputSource input) throws XMLCException { fXmlcDoc = new XMLCDocument(domFactory); fDomFactory = domFactory; Document doc = fXmlcDoc.createDocument(null, null); if (!(doc instanceof HTMLDocument)) { throw new XMLCException("DOM factory (" + fDomFactory.getClass().getName() + ") created a document that was not a HTMLDocument, got " + doc.getClass().getName()); } fDocument = (HTMLDocument)doc; fCurrentNode = fDocument; String encoding = input.getEncoding(); if (encoding != null) { fXmlcDoc.setEncoding(encoding); } } /** * Generate error about a method being called that should * be called before the document is created. */ private void docNotCreatedError() { throw new XMLCError("Bug: parser event on document contents occured before document is created"); } /** * Get the XMLC document associated with this object. */ public XMLCDocument getXMLCDocument() { return fXmlcDoc; } /** * Determine if an element name is a frameset-only element. */ private boolean isFrameSetElement(String tagName) { return tagName.equalsIgnoreCase("frameset") || tagName.equalsIgnoreCase("noframes"); } /** * Start a new Element. */ public void startElement(String tagName) { // Document element already exists if (tagName.equals("html")) { fCurrentNode = fDocument.getDocumentElement(); fGotDocElement = true; } else { Element element = fDocument.createElement(tagName); fCurrentNode.appendChild(element); fCurrentNode = element; } if (isFrameSetElement(tagName)) { fXmlcDoc.setIsHtmlFrameSet(); } } /** * Add an attribute to the element on the top of the * stack. */ public void addAttribute(String name, String value) { ((Element)fCurrentNode).setAttribute(name, value); } /** * Finish the element being constructed. */ public void finishElement() { if (fCurrentNode == null) { throw new XMLCError("node stack underflow; malformed document"); } if (!(fCurrentNode instanceof Element)) { throw new XMLCError("DOM node top of stack not a element for end tag"); } fCurrentNode = fCurrentNode.getParentNode(); } /** * Add a Text node. */ public void addTextNode(String data) { if (fDocument == null) { docNotCreatedError(); } fCurrentNode.appendChild(fDocument.createTextNode(data)); } /** * Add a Comment node. */ public void addComment(String data) { Comment comment = fDocument.createComment(data); // Handle insertion before document element (current should always // be document, but we might be handling some invalid node). if ((!fGotDocElement) && (fCurrentNode == fDocument)) { fCurrentNode.insertBefore(comment, fDocument.getDocumentElement()); } else { fCurrentNode.appendChild(comment); } } /** * Get the node on the top of the stack during parsing. * FIXME: Added to work around bugs in the swing parser. */ public Node getCurrentNode() { return fCurrentNode; } /** * Pop the current node off of the stack. This is *only* used * during error recover from a broken parser. * FIXME: Added to work around bugs in the swing parser. */ public void popCurrentNode() { fCurrentNode = fCurrentNode.getParentNode(); } /** * Recursive part of findUnrecognizedTag */ private Node recursiveFindUnrecognizedTag(String tagNameUpper, Node parent) { // Search right to left. for (Node child = parent.getLastChild(); child != null; child = child.getPreviousSibling()) { if (child.getNodeName().equals(tagNameUpper) && !fClosedUnrecognizedElements.contains(child)) { return child; // Found it! } } // Search up the tree. Node grandParent = parent.getParentNode(); if (grandParent != null) { return recursiveFindUnrecognizedTag(tagNameUpper, grandParent); } else { return null; } } /** * Find the element for an unrecognized tag. This searches up the parse * stack, looking at the siblings of each node on the stack. This starts * with the parent of the top of the stack, and searches its children from * right to left. Thus the first node checked is node on the top of the * stack. */ private Node findUnrecognizedTag(String tagNameUpper) throws XMLCException { Node openingElement = null; if (fCurrentNode != null) { openingElement = recursiveFindUnrecognizedTag(tagNameUpper, fCurrentNode); } if (openingElement == null) { throw new XMLCException("could not find matching opening tag for "); } if (openingElement.getFirstChild() != null) { throw new XMLCError("attempt to fix nesting for found a node that already has children"); } return openingElement; } /** * Make nodes to the right of an element the element's children. */ private void makeRightSiblingsChildren(Node openingElement) { Node parent = openingElement.getParentNode(); Node sibling; while ((sibling = openingElement.getNextSibling()) != null) { openingElement.appendChild(sibling); } } /** * Used to correct nesting when handling an unknown tag. This is called * when the end tag is encountered. The tree is walked backwards from the * top of the stack to find the element pushed for the open tag. All of * the siblings to the right of that element are moved to be children of * the element. The stack is popped back until the parent of the * element being closed is on top. This was put in to support the * swing parser. */ public void fixUnrecognizedTagNesting(String tagName) throws XMLCException { String tagNameUpper = tagName.toUpperCase(); if (fClosedUnrecognizedElements == null) { fClosedUnrecognizedElements = new HashSet(); } // Find and correct Node openingElement = findUnrecognizedTag(tagNameUpper); makeRightSiblingsChildren(openingElement); fClosedUnrecognizedElements.add(openingElement); // Clean up the stack Node openingParent = openingElement.getParentNode(); while (fCurrentNode != openingParent) { popCurrentNode(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy