All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.enhydra.xml.lazydom.html.LazyHTMLDocument Maven / Gradle / Ivy

The newest version!
/*
 * Enhydra Java Application Server Project
 * 
 * The contents of this file are subject to the Enhydra Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License on
 * the Enhydra web site ( http://www.enhydra.org/ ).
 * 
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
 * the License for the specific terms governing rights and limitations
 * under the License.
 * 
 * The Initial Developer of the Enhydra Application Server is Lutris
 * Technologies, Inc. The Enhydra Application Server and portions created
 * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
 * All Rights Reserved.
 * 
 * Contributor(s):
 * 
 * $Id: LazyHTMLDocument.java,v 1.4 2005/01/26 08:29:24 jkjome Exp $
 */
/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 1999,2000 The Apache Software Foundation.  All rights 
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:  
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written 
 *    permission, please contact [email protected].
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * .
 */
package org.enhydra.xml.lazydom.html;

import java.io.StringWriter;
import java.lang.reflect.Constructor;
import java.util.HashMap;

import org.enhydra.xml.dom.DOMAccess;
import org.enhydra.xml.lazydom.LazyDocument;
import org.enhydra.xml.lazydom.LazyElement;
import org.enhydra.xml.lazydom.LazyNode;
import org.enhydra.xml.lazydom.TemplateDOM;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.html.HTMLCollection;
import org.w3c.dom.html.HTMLDocument;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLTitleElement;

/*
 * LazyDOM: This is a modified version of org.apache.html.dom.HTMLDocumentImpl
 * modified to implement the LazyDOM.  While most of the HTMLElement classes
 * are created automatically using a sed script, this was a bit complex for
 * this class, so it was done by hand.  A diff with the Xerces class should
 * make an upgrade of this class easy.
 */


/**
 * Implements an HTML document. Provides access to the top level element in the
 * document, its body and title.
 * 

* Several methods create new nodes of all basic types (comment, text, element, * etc.). These methods create new nodes but do not place them in the document * tree. The nodes may be placed in the document tree using {@link * org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or * they may be placed in some other document tree. *

* Note: <FRAMESET> documents are not supported at the moment, neither * are direct document writing ({@link #open}, {@link #write}) and HTTP attribute * methods ({@link #getURL}, {@link #getCookie}). * * * @version $Revision: 1.4 $ $Date: 2005/01/26 08:29:24 $ * @author Assaf Arkin * @see org.w3c.dom.html.HTMLDocument */ public class LazyHTMLDocument extends LazyDocument implements HTMLDocument { /** * Holds {@link HTMLCollectionImpl} object with live collection of all * anchors in document. This reference is on demand only once. */ private HTMLCollectionImpl _anchors; /** * Holds {@link HTMLCollectionImpl} object with live collection of all * forms in document. This reference is on demand only once. */ private HTMLCollectionImpl _forms; /** * Holds {@link HTMLCollectionImpl} object with live collection of all * images in document. This reference is on demand only once. */ private HTMLCollectionImpl _images; /** * Holds {@link HTMLCollectionImpl} object with live collection of all * links in document. This reference is on demand only once. */ private HTMLCollectionImpl _links; /** * Holds {@link HTMLCollectionImpl} object with live collection of all * applets in document. This reference is on demand only once. */ private HTMLCollectionImpl _applets; /** * Holds string writer used by direct manipulation operation ({@link #open}. * {@link #write}, etc) to write new contents into the document and parse * that text into a document tree. */ private StringWriter _writer; /** * Holds names and classes of HTML element type constructors. When * an element with a particular tag name is created, the matching * {@link java.lang.reflect.Constructor} is used to create the element * object. For example, <A> matches the constructor for * {@link HTMLAnchorElementImpl}. This static table is shared across all * HTML documents. * * @see #createElement */ private static HashMap _elementConstHTML; private static HashMap _tmpElementConstHTML; /** * Signature used to locate constructor of HTML element classes. This * static array is shared across all HTML documents. * * @see #createElement */ private static final Class[] _elemClassSigHTML = new Class[] { LazyHTMLDocument.class, LazyElement.class, String.class }; /** * @see Document#getImplementation */ public DOMImplementation getImplementation() { return LazyHTMLDOMImplementation.getDOMImplementation(); } /** * Find the direct child element of a node given its name. */ private Node getDirectChildElement(String name, Node root) { for (Node child = root.getFirstChild(); child != null; child = child.getNextSibling()) { if (child.getNodeName().equals(name)) { return child; } } return null; } public synchronized Element getDocumentElement() { // Enhydra modified: Original Xerces code tried to reorder nodes to // make things right, which moved around comments to weird locations. // Throwing an error would be more appropriate, but we were afraid of // breaking existing code, so just get the node. Element html = (Element)getDirectChildElement("HTML", this); if (html == null) { // Create, HTML element must exist as a child of the document. html = new HTMLHtmlElementImpl(this, null, "HTML"); appendChild(html); } return html; } /** * Obtains the <HEAD> element in the document, creating one if does * not exist before. The <HEAD> element is the first element in the * <HTML> in the document. The <HTML> element is obtained by * calling {@link #getDocumentElement}. If the element does not exist, one * is created. *

* Called by {@link #getTitle}, {@link #setTitle}, {@link #getBody} and * {@link #setBody} to assure the document has the <HEAD> element * correctly placed. * * @return The <HEAD> element */ public synchronized HTMLElement getHead() { // Enhydra modified: Original Xerces code tried to reorder nodes to // make things right, which moved around comments to weird locations. // Throwing an error would be more appropriate, but we were afraid of // breaking existing code, so just get the node. // Search for HEAD under HTML element. Element html = getDocumentElement(); HTMLElement head = (HTMLElement)getDirectChildElement("HEAD", html); if (head == null) { // Head does not exist, create a new one. head = new HTMLHeadElementImpl(this, null, "HEAD"); html.insertBefore(head, html.getFirstChild()); } return head; } public synchronized String getTitle() { // Enhydra modified: Original Xerces code is some what strange, it // called getElementsByTagName() twice, but only used the second // result. We assume it' a direct child of HEAD (although more // error checking might be better). HTMLTitleElement title = (HTMLTitleElement)getDirectChildElement("TITLE", getHead()); if (title == null) { return ""; // No TITLE found, return an empty string. } else { return title.getText(); } } public synchronized void setTitle(String newTitle) { // Enhydra modified: Original Xerces code used getElementsByTagName() // to find the title. We assume it' a direct child of HEAD (although // more error checking might be better). HTMLElement head = getHead(); HTMLTitleElement title = (HTMLTitleElement)getDirectChildElement("TITLE", head); if (title == null) { title = new HTMLTitleElementImpl(this, null, "TITLE"); } title.setText(newTitle); } /** * Find a BODY or FRAMESET element. */ private HTMLElement findBody(Element html) { HTMLElement body = (HTMLElement)getDirectChildElement("BODY", html); if (body == null) { body = (HTMLElement)getDirectChildElement("FRAMESET", html); } return body; } public synchronized HTMLElement getBody() { // Enhydra modified: Original Xerces code tried to reorder nodes to // make things right, which moved around comments to weird locations. // Throwing an error would be more appropriate, but we were afraid of // breaking existing code, so just get the node. // Find BODY or FRAMESET Element html = getDocumentElement(); HTMLElement body = findBody(html); if (body == null) { // Create new body, and place it a the end of the HTML element. body = new HTMLBodyElementImpl(this, null, "BODY"); html.appendChild(body); } return body; } public synchronized void setBody(HTMLElement newBody) { // Enhydra modified: Original Xerces code tried to reorder nodes to // make things right, which moved around comments to weird locations. // Throwing an error would be more appropriate, but we were afraid of // breaking existing code, so just get the node. // Find BODY or FRAMESET Element html = getDocumentElement(); HTMLElement body = findBody(html); if (body == null) { html.appendChild(newBody); } else { html.replaceChild(newBody, body); } } public synchronized Element getElementById( String elementId ) { return getElementById( elementId, this ); } public NodeList getElementsByName( String elementName ) { return new NameNodeListImpl( this, elementName ); } public final NodeList getElementsByTagName( String tagName ) { return super.getElementsByTagName( tagName.toUpperCase() ); } public final NodeList getElementsByTagNameNS( String namespaceURI, String localName ) { if ( namespaceURI != null && namespaceURI.length() > 0 ) return super.getElementsByTagNameNS( namespaceURI, localName.toUpperCase() ); else return super.getElementsByTagName( localName.toUpperCase() ); } public Element createElementNS( String namespaceURI, String qualifiedName ) { if ( namespaceURI == null || namespaceURI.length() == 0 ) return createElement( qualifiedName ); else return super.createElementNS( namespaceURI, qualifiedName ); } public Element createElement( LazyElement template, String tagName ) throws DOMException { Constructor cnst; // First, make sure tag name is all upper case, next get the associated // element class. If no class is found, generate a generic HTML element. // Do so also if an unexpected exception occurs. tagName = tagName.toUpperCase(); cnst = (Constructor) _elementConstHTML.get( tagName ); if ( cnst != null ) { // Get the constructor for the element. The signature specifies an // owner document and a tag name. Use the constructor to instantiate // a new object and return it. try { return (Element) cnst.newInstance( new Object[] { this, template, tagName } ); } catch ( Exception except ) { Throwable thrw; if ( except instanceof java.lang.reflect.InvocationTargetException ) thrw = ( (java.lang.reflect.InvocationTargetException) except ).getTargetException(); else thrw = except; throw new IllegalStateException( "HTM15 Tag '" + tagName + "' associated with an Element class that failed to construct.\n" + tagName); } } return new LazyHTMLElement( this, template, tagName ); } /** * Creates an Attribute having this Document as its OwnerDoc. * Overrides {@link org.enhydra.apache.xerces.dom.DocumentImpl#createAttribute} and returns * and attribute whose name is lower case. * * @param name The name of the attribute * @return An attribute whose name is all lower case * @throws DOMException(INVALID_NAME_ERR) if the attribute name * is not acceptable */ public Attr createAttribute( String name ) throws DOMException { return super.createAttribute( name.toLowerCase() ); } public String getReferrer() { // Information not available on server side. return null; } public String getDomain() { // Information not available on server side. return null; } public String getURL() { // Information not available on server side. return null; } public String getCookie() { // Information not available on server side. return null; } public void setCookie( String cookie ) { // Information not available on server side. } public HTMLCollection getImages() { // For more information see HTMLCollection#collectionMatch if ( _images == null ) _images = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.IMAGE ); return _images; } public HTMLCollection getApplets() { // For more information see HTMLCollection#collectionMatch if ( _applets == null ) _applets = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.APPLET ); return _applets; } public HTMLCollection getLinks() { // For more information see HTMLCollection#collectionMatch if ( _links == null ) _links = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.LINK ); return _links; } public HTMLCollection getForms() { // For more information see HTMLCollection#collectionMatch if ( _forms == null ) _forms = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.FORM ); return _forms; } public HTMLCollection getAnchors() { // For more information see HTMLCollection#collectionMatch if ( _anchors == null ) _anchors = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.ANCHOR ); return _anchors; } public void open() { // When called an in-memory is prepared. The document tree is still // accessible the old way, until this writer is closed. if ( _writer == null ) _writer = new StringWriter(); } public void close() { // ! NOT IMPLEMENTED, REQUIRES PARSER ! if ( _writer != null ) { _writer = null; } } public void write( String text ) { // Write a string into the in-memory writer. if ( _writer != null ) _writer.write( text ); } public void writeln( String text ) { // Write a line into the in-memory writer. if ( _writer != null ) _writer.write( text + "\n" ); } public Node cloneNode( boolean deep ) { LazyHTMLDocument clone; LazyNode node; clone = new LazyHTMLDocument(); if ( deep ) { node = (LazyNode) getFirstChild(); while ( node != null ) { clone.appendChild( clone.importNode( node, true ) ); node = (LazyNode) node.getNextSibling(); } } return clone; } /** * Recursive method retreives an element by its id attribute. * This is LazyDOM aware and will not expand elements during search. * * @param elementId The id value to look for * @return The node in which to look for */ private Element getElementById(String elementId, Node node){ Node child; Element element; child = DOMAccess.accessFirstChild(this, node); while (child != null) { if (child instanceof Element) { element = (Element)child; Attr attr = DOMAccess.accessAttribute(this, (Element)child, null, "id"); if ((attr != null) && elementId.equals(DOMAccess.accessAttributeValue(this, attr))) { return DOMAccess.getExpandedElement(this, element); } // Recurse element = getElementById(elementId, child); if (element != null) { return DOMAccess.getExpandedElement(this, element); } } child = DOMAccess.accessNextSibling(this, child); } return null; } /** * Called by the constructor to populate the element constructor list * (see {@link #_elementConstHTML}). Will be called multiple times but * populate the list * only the first time. Replacement for static * constructor. */ private static void populateElementTypes() { // This class looks like it is due to some strange // (read: inconsistent) JVM bugs. // Initially all this code was placed in the static constructor, // but that caused some early JVMs (1.1) to go mad, and if a // class could not be found (as happened during development), // the JVM would die. // Bertrand Delacretaz pointed out // several configurations where HTMLAnchorElementImpl.class // failed, forcing me to revert back to Class.forName(). if ( _elementConstHTML != null ) return; _tmpElementConstHTML = new HashMap( 63 ); populateElementType( "A", "HTMLAnchorElementImpl" ); populateElementType( "APPLET", "HTMLAppletElementImpl" ); populateElementType( "AREA", "HTMLAreaElementImpl" ); populateElementType( "BASE", "HTMLBaseElementImpl" ); populateElementType( "BASEFONT", "HTMLBaseFontElementImpl" ); populateElementType( "BLOCKQUOTE", "HTMLQuoteElementImpl" ); populateElementType( "BODY", "HTMLBodyElementImpl" ); populateElementType( "BR", "HTMLBRElementImpl" ); populateElementType( "BUTTON", "HTMLButtonElementImpl" ); populateElementType( "DEL", "HTMLModElementImpl" ); populateElementType( "DIR", "HTMLDirectoryElementImpl" ); populateElementType( "DIV", "HTMLDivElementImpl" ); populateElementType( "DL", "HTMLDListElementImpl" ); populateElementType( "FIELDSET", "HTMLFieldSetElementImpl" ); populateElementType( "FONT", "HTMLFontElementImpl" ); populateElementType( "FORM", "HTMLFormElementImpl" ); populateElementType( "FRAME","HTMLFrameElementImpl" ); populateElementType( "FRAMESET", "HTMLFrameSetElementImpl" ); populateElementType( "HEAD", "HTMLHeadElementImpl" ); populateElementType( "H1", "HTMLHeadingElementImpl" ); populateElementType( "H2", "HTMLHeadingElementImpl" ); populateElementType( "H3", "HTMLHeadingElementImpl" ); populateElementType( "H4", "HTMLHeadingElementImpl" ); populateElementType( "H5", "HTMLHeadingElementImpl" ); populateElementType( "H6", "HTMLHeadingElementImpl" ); populateElementType( "HR", "HTMLHRElementImpl" ); populateElementType( "HTML", "HTMLHtmlElementImpl" ); populateElementType( "IFRAME", "HTMLIFrameElementImpl" ); populateElementType( "IMG", "HTMLImageElementImpl" ); populateElementType( "INPUT", "HTMLInputElementImpl" ); populateElementType( "INS", "HTMLModElementImpl" ); populateElementType( "ISINDEX", "HTMLIsIndexElementImpl" ); populateElementType( "LABEL", "HTMLLabelElementImpl" ); populateElementType( "LEGEND", "HTMLLegendElementImpl" ); populateElementType( "LI", "HTMLLIElementImpl" ); populateElementType( "LINK", "HTMLLinkElementImpl" ); populateElementType( "MAP", "HTMLMapElementImpl" ); populateElementType( "MENU", "HTMLMenuElementImpl" ); populateElementType( "META", "HTMLMetaElementImpl" ); populateElementType( "OBJECT", "HTMLObjectElementImpl" ); populateElementType( "OL", "HTMLOListElementImpl" ); populateElementType( "OPTGROUP", "HTMLOptGroupElementImpl" ); populateElementType( "OPTION", "HTMLOptionElementImpl" ); populateElementType( "P", "HTMLParagraphElementImpl" ); populateElementType( "PARAM", "HTMLParamElementImpl" ); populateElementType( "PRE", "HTMLPreElementImpl" ); populateElementType( "Q", "HTMLQuoteElementImpl" ); populateElementType( "SCRIPT", "HTMLScriptElementImpl" ); populateElementType( "SELECT", "HTMLSelectElementImpl" ); populateElementType( "STYLE", "HTMLStyleElementImpl" ); populateElementType( "TABLE", "HTMLTableElementImpl" ); populateElementType( "CAPTION", "HTMLTableCaptionElementImpl" ); populateElementType( "TD", "HTMLTableCellElementImpl" ); populateElementType( "TH", "HTMLTableCellElementImpl" ); populateElementType( "COL", "HTMLTableColElementImpl" ); populateElementType( "COLGROUP", "HTMLTableColElementImpl" ); populateElementType( "TR", "HTMLTableRowElementImpl" ); populateElementType( "TBODY", "HTMLTableSectionElementImpl" ); populateElementType( "THEAD", "HTMLTableSectionElementImpl" ); populateElementType( "TFOOT", "HTMLTableSectionElementImpl" ); populateElementType( "TEXTAREA", "HTMLTextAreaElementImpl" ); populateElementType( "TITLE", "HTMLTitleElementImpl" ); populateElementType( "UL", "HTMLUListElementImpl" ); _elementConstHTML = _tmpElementConstHTML; } private static void populateElementType(String tagName, String className ) { try { Class cl = Class.forName( "org.enhydra.xml.lazydom.html." + className ); _tmpElementConstHTML.put( tagName, cl.getConstructor( _elemClassSigHTML )); } catch ( ClassNotFoundException except ) { new RuntimeException( "HTM019 OpenXML Error: Could not find class " + className + " implementing HTML element " + tagName + "\n" + className + "\t" + tagName); } catch ( NoSuchMethodException except ) { new RuntimeException( "HTM019 OpenXML Error: Could not find constructor for class " + className + " implementing HTML element " + tagName + "\n" + className + "\t" + tagName); } } /** * Constructor with no template. */ public LazyHTMLDocument() { super(); populateElementTypes(); } /** * LazyDOM: Constructor with TemplateDOM. */ public LazyHTMLDocument(TemplateDOM templateDOM) { super(null, templateDOM); populateElementTypes(); } /** * LazyDOM: standard createElement method, passes null template element. */ public Element createElement(String tagName) throws DOMException { return createElement(null, tagName); } /* * Lazy DOM override to pick up HTML elements. */ public LazyElement createElement(int nodeId) throws DOMException { LazyElement template = (LazyElement)getTemplateNode(nodeId); return (LazyElement)createElement(template, template.getNodeName()); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy