
org.enhydra.xml.lazydom.html.LazyHTMLDocument Maven / Gradle / Ivy
/*
* Enhydra Java Application Server Project
*
* The contents of this file are subject to the Enhydra Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License on
* the Enhydra web site ( http://www.enhydra.org/ ).
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific terms governing rights and limitations
* under the License.
*
* The Initial Developer of the Enhydra Application Server is Lutris
* Technologies, Inc. The Enhydra Application Server and portions created
* by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
* All Rights Reserved.
*
* Contributor(s):
*
* $Id: LazyHTMLDocument.java,v 1.4 2005/01/26 08:29:24 jkjome Exp $
*/
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999,2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [email protected].
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* .
*/
package org.enhydra.xml.lazydom.html;
import java.io.StringWriter;
import java.lang.reflect.Constructor;
import java.util.HashMap;
import org.enhydra.xml.dom.DOMAccess;
import org.enhydra.xml.lazydom.LazyDocument;
import org.enhydra.xml.lazydom.LazyElement;
import org.enhydra.xml.lazydom.LazyNode;
import org.enhydra.xml.lazydom.TemplateDOM;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.html.HTMLCollection;
import org.w3c.dom.html.HTMLDocument;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLTitleElement;
/*
* LazyDOM: This is a modified version of org.apache.html.dom.HTMLDocumentImpl
* modified to implement the LazyDOM. While most of the HTMLElement classes
* are created automatically using a sed script, this was a bit complex for
* this class, so it was done by hand. A diff with the Xerces class should
* make an upgrade of this class easy.
*/
/**
* Implements an HTML document. Provides access to the top level element in the
* document, its body and title.
*
* Several methods create new nodes of all basic types (comment, text, element,
* etc.). These methods create new nodes but do not place them in the document
* tree. The nodes may be placed in the document tree using {@link
* org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or
* they may be placed in some other document tree.
*
* Note: <FRAMESET> documents are not supported at the moment, neither
* are direct document writing ({@link #open}, {@link #write}) and HTTP attribute
* methods ({@link #getURL}, {@link #getCookie}).
*
*
* @version $Revision: 1.4 $ $Date: 2005/01/26 08:29:24 $
* @author Assaf Arkin
* @see org.w3c.dom.html.HTMLDocument
*/
public class LazyHTMLDocument
extends LazyDocument
implements HTMLDocument
{
/**
* Holds {@link HTMLCollectionImpl} object with live collection of all
* anchors in document. This reference is on demand only once.
*/
private HTMLCollectionImpl _anchors;
/**
* Holds {@link HTMLCollectionImpl} object with live collection of all
* forms in document. This reference is on demand only once.
*/
private HTMLCollectionImpl _forms;
/**
* Holds {@link HTMLCollectionImpl} object with live collection of all
* images in document. This reference is on demand only once.
*/
private HTMLCollectionImpl _images;
/**
* Holds {@link HTMLCollectionImpl} object with live collection of all
* links in document. This reference is on demand only once.
*/
private HTMLCollectionImpl _links;
/**
* Holds {@link HTMLCollectionImpl} object with live collection of all
* applets in document. This reference is on demand only once.
*/
private HTMLCollectionImpl _applets;
/**
* Holds string writer used by direct manipulation operation ({@link #open}.
* {@link #write}, etc) to write new contents into the document and parse
* that text into a document tree.
*/
private StringWriter _writer;
/**
* Holds names and classes of HTML element type constructors. When
* an element with a particular tag name is created, the matching
* {@link java.lang.reflect.Constructor} is used to create the element
* object. For example, <A> matches the constructor for
* {@link HTMLAnchorElementImpl}. This static table is shared across all
* HTML documents.
*
* @see #createElement
*/
private static HashMap _elementConstHTML;
private static HashMap _tmpElementConstHTML;
/**
* Signature used to locate constructor of HTML element classes. This
* static array is shared across all HTML documents.
*
* @see #createElement
*/
private static final Class[] _elemClassSigHTML =
new Class[] { LazyHTMLDocument.class, LazyElement.class, String.class };
/**
* @see Document#getImplementation
*/
public DOMImplementation getImplementation() {
return LazyHTMLDOMImplementation.getDOMImplementation();
}
/**
* Find the direct child element of a node given its name.
*/
private Node getDirectChildElement(String name,
Node root) {
for (Node child = root.getFirstChild(); child != null;
child = child.getNextSibling()) {
if (child.getNodeName().equals(name)) {
return child;
}
}
return null;
}
public synchronized Element getDocumentElement() {
// Enhydra modified: Original Xerces code tried to reorder nodes to
// make things right, which moved around comments to weird locations.
// Throwing an error would be more appropriate, but we were afraid of
// breaking existing code, so just get the node.
Element html = (Element)getDirectChildElement("HTML", this);
if (html == null) {
// Create, HTML element must exist as a child of the document.
html = new HTMLHtmlElementImpl(this, null, "HTML");
appendChild(html);
}
return html;
}
/**
* Obtains the <HEAD> element in the document, creating one if does
* not exist before. The <HEAD> element is the first element in the
* <HTML> in the document. The <HTML> element is obtained by
* calling {@link #getDocumentElement}. If the element does not exist, one
* is created.
*
* Called by {@link #getTitle}, {@link #setTitle}, {@link #getBody} and
* {@link #setBody} to assure the document has the <HEAD> element
* correctly placed.
*
* @return The <HEAD> element
*/
public synchronized HTMLElement getHead() {
// Enhydra modified: Original Xerces code tried to reorder nodes to
// make things right, which moved around comments to weird locations.
// Throwing an error would be more appropriate, but we were afraid of
// breaking existing code, so just get the node.
// Search for HEAD under HTML element.
Element html = getDocumentElement();
HTMLElement head
= (HTMLElement)getDirectChildElement("HEAD", html);
if (head == null) {
// Head does not exist, create a new one.
head = new HTMLHeadElementImpl(this, null, "HEAD");
html.insertBefore(head, html.getFirstChild());
}
return head;
}
public synchronized String getTitle() {
// Enhydra modified: Original Xerces code is some what strange, it
// called getElementsByTagName() twice, but only used the second
// result. We assume it' a direct child of HEAD (although more
// error checking might be better).
HTMLTitleElement title
= (HTMLTitleElement)getDirectChildElement("TITLE", getHead());
if (title == null) {
return ""; // No TITLE found, return an empty string.
} else {
return title.getText();
}
}
public synchronized void setTitle(String newTitle) {
// Enhydra modified: Original Xerces code used getElementsByTagName()
// to find the title. We assume it' a direct child of HEAD (although
// more error checking might be better).
HTMLElement head = getHead();
HTMLTitleElement title
= (HTMLTitleElement)getDirectChildElement("TITLE", head);
if (title == null) {
title = new HTMLTitleElementImpl(this, null, "TITLE");
}
title.setText(newTitle);
}
/**
* Find a BODY or FRAMESET element.
*/
private HTMLElement findBody(Element html) {
HTMLElement body = (HTMLElement)getDirectChildElement("BODY", html);
if (body == null) {
body = (HTMLElement)getDirectChildElement("FRAMESET", html);
}
return body;
}
public synchronized HTMLElement getBody() {
// Enhydra modified: Original Xerces code tried to reorder nodes to
// make things right, which moved around comments to weird locations.
// Throwing an error would be more appropriate, but we were afraid of
// breaking existing code, so just get the node.
// Find BODY or FRAMESET
Element html = getDocumentElement();
HTMLElement body = findBody(html);
if (body == null) {
// Create new body, and place it a the end of the HTML element.
body = new HTMLBodyElementImpl(this, null, "BODY");
html.appendChild(body);
}
return body;
}
public synchronized void setBody(HTMLElement newBody) {
// Enhydra modified: Original Xerces code tried to reorder nodes to
// make things right, which moved around comments to weird locations.
// Throwing an error would be more appropriate, but we were afraid of
// breaking existing code, so just get the node.
// Find BODY or FRAMESET
Element html = getDocumentElement();
HTMLElement body = findBody(html);
if (body == null) {
html.appendChild(newBody);
} else {
html.replaceChild(newBody, body);
}
}
public synchronized Element getElementById( String elementId )
{
return getElementById( elementId, this );
}
public NodeList getElementsByName( String elementName )
{
return new NameNodeListImpl( this, elementName );
}
public final NodeList getElementsByTagName( String tagName )
{
return super.getElementsByTagName( tagName.toUpperCase() );
}
public final NodeList getElementsByTagNameNS( String namespaceURI,
String localName )
{
if ( namespaceURI != null && namespaceURI.length() > 0 )
return super.getElementsByTagNameNS( namespaceURI, localName.toUpperCase() );
else
return super.getElementsByTagName( localName.toUpperCase() );
}
public Element createElementNS( String namespaceURI, String qualifiedName )
{
if ( namespaceURI == null || namespaceURI.length() == 0 )
return createElement( qualifiedName );
else
return super.createElementNS( namespaceURI, qualifiedName );
}
public Element createElement( LazyElement template, String tagName )
throws DOMException
{
Constructor cnst;
// First, make sure tag name is all upper case, next get the associated
// element class. If no class is found, generate a generic HTML element.
// Do so also if an unexpected exception occurs.
tagName = tagName.toUpperCase();
cnst = (Constructor) _elementConstHTML.get( tagName );
if ( cnst != null )
{
// Get the constructor for the element. The signature specifies an
// owner document and a tag name. Use the constructor to instantiate
// a new object and return it.
try
{
return (Element) cnst.newInstance( new Object[] { this, template, tagName } );
}
catch ( Exception except )
{
Throwable thrw;
if ( except instanceof java.lang.reflect.InvocationTargetException )
thrw = ( (java.lang.reflect.InvocationTargetException) except ).getTargetException();
else
thrw = except;
throw new IllegalStateException( "HTM15 Tag '" + tagName + "' associated with an Element class that failed to construct.\n" + tagName);
}
}
return new LazyHTMLElement( this, template, tagName );
}
/**
* Creates an Attribute having this Document as its OwnerDoc.
* Overrides {@link org.enhydra.apache.xerces.dom.DocumentImpl#createAttribute} and returns
* and attribute whose name is lower case.
*
* @param name The name of the attribute
* @return An attribute whose name is all lower case
* @throws DOMException(INVALID_NAME_ERR) if the attribute name
* is not acceptable
*/
public Attr createAttribute( String name )
throws DOMException
{
return super.createAttribute( name.toLowerCase() );
}
public String getReferrer()
{
// Information not available on server side.
return null;
}
public String getDomain()
{
// Information not available on server side.
return null;
}
public String getURL()
{
// Information not available on server side.
return null;
}
public String getCookie()
{
// Information not available on server side.
return null;
}
public void setCookie( String cookie )
{
// Information not available on server side.
}
public HTMLCollection getImages()
{
// For more information see HTMLCollection#collectionMatch
if ( _images == null )
_images = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.IMAGE );
return _images;
}
public HTMLCollection getApplets()
{
// For more information see HTMLCollection#collectionMatch
if ( _applets == null )
_applets = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.APPLET );
return _applets;
}
public HTMLCollection getLinks()
{
// For more information see HTMLCollection#collectionMatch
if ( _links == null )
_links = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.LINK );
return _links;
}
public HTMLCollection getForms()
{
// For more information see HTMLCollection#collectionMatch
if ( _forms == null )
_forms = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.FORM );
return _forms;
}
public HTMLCollection getAnchors()
{
// For more information see HTMLCollection#collectionMatch
if ( _anchors == null )
_anchors = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.ANCHOR );
return _anchors;
}
public void open()
{
// When called an in-memory is prepared. The document tree is still
// accessible the old way, until this writer is closed.
if ( _writer == null )
_writer = new StringWriter();
}
public void close()
{
// ! NOT IMPLEMENTED, REQUIRES PARSER !
if ( _writer != null )
{
_writer = null;
}
}
public void write( String text )
{
// Write a string into the in-memory writer.
if ( _writer != null )
_writer.write( text );
}
public void writeln( String text )
{
// Write a line into the in-memory writer.
if ( _writer != null )
_writer.write( text + "\n" );
}
public Node cloneNode( boolean deep )
{
LazyHTMLDocument clone;
LazyNode node;
clone = new LazyHTMLDocument();
if ( deep ) {
node = (LazyNode) getFirstChild();
while ( node != null ) {
clone.appendChild( clone.importNode( node, true ) );
node = (LazyNode) node.getNextSibling();
}
}
return clone;
}
/**
* Recursive method retreives an element by its id
attribute.
* This is LazyDOM aware and will not expand elements during search.
*
* @param elementId The id
value to look for
* @return The node in which to look for
*/
private Element getElementById(String elementId,
Node node){
Node child;
Element element;
child = DOMAccess.accessFirstChild(this, node);
while (child != null) {
if (child instanceof Element) {
element = (Element)child;
Attr attr = DOMAccess.accessAttribute(this, (Element)child, null, "id");
if ((attr != null)
&& elementId.equals(DOMAccess.accessAttributeValue(this, attr))) {
return DOMAccess.getExpandedElement(this, element);
}
// Recurse
element = getElementById(elementId, child);
if (element != null) {
return DOMAccess.getExpandedElement(this, element);
}
}
child = DOMAccess.accessNextSibling(this, child);
}
return null;
}
/**
* Called by the constructor to populate the element constructor list
* (see {@link #_elementConstHTML}). Will be called multiple times but
* populate the list * only the first time. Replacement for static
* constructor.
*/
private static void populateElementTypes()
{
// This class looks like it is due to some strange
// (read: inconsistent) JVM bugs.
// Initially all this code was placed in the static constructor,
// but that caused some early JVMs (1.1) to go mad, and if a
// class could not be found (as happened during development),
// the JVM would die.
// Bertrand Delacretaz pointed out
// several configurations where HTMLAnchorElementImpl.class
// failed, forcing me to revert back to Class.forName().
if ( _elementConstHTML != null )
return;
_tmpElementConstHTML = new HashMap( 63 );
populateElementType( "A", "HTMLAnchorElementImpl" );
populateElementType( "APPLET", "HTMLAppletElementImpl" );
populateElementType( "AREA", "HTMLAreaElementImpl" );
populateElementType( "BASE", "HTMLBaseElementImpl" );
populateElementType( "BASEFONT", "HTMLBaseFontElementImpl" );
populateElementType( "BLOCKQUOTE", "HTMLQuoteElementImpl" );
populateElementType( "BODY", "HTMLBodyElementImpl" );
populateElementType( "BR", "HTMLBRElementImpl" );
populateElementType( "BUTTON", "HTMLButtonElementImpl" );
populateElementType( "DEL", "HTMLModElementImpl" );
populateElementType( "DIR", "HTMLDirectoryElementImpl" );
populateElementType( "DIV", "HTMLDivElementImpl" );
populateElementType( "DL", "HTMLDListElementImpl" );
populateElementType( "FIELDSET", "HTMLFieldSetElementImpl" );
populateElementType( "FONT", "HTMLFontElementImpl" );
populateElementType( "FORM", "HTMLFormElementImpl" );
populateElementType( "FRAME","HTMLFrameElementImpl" );
populateElementType( "FRAMESET", "HTMLFrameSetElementImpl" );
populateElementType( "HEAD", "HTMLHeadElementImpl" );
populateElementType( "H1", "HTMLHeadingElementImpl" );
populateElementType( "H2", "HTMLHeadingElementImpl" );
populateElementType( "H3", "HTMLHeadingElementImpl" );
populateElementType( "H4", "HTMLHeadingElementImpl" );
populateElementType( "H5", "HTMLHeadingElementImpl" );
populateElementType( "H6", "HTMLHeadingElementImpl" );
populateElementType( "HR", "HTMLHRElementImpl" );
populateElementType( "HTML", "HTMLHtmlElementImpl" );
populateElementType( "IFRAME", "HTMLIFrameElementImpl" );
populateElementType( "IMG", "HTMLImageElementImpl" );
populateElementType( "INPUT", "HTMLInputElementImpl" );
populateElementType( "INS", "HTMLModElementImpl" );
populateElementType( "ISINDEX", "HTMLIsIndexElementImpl" );
populateElementType( "LABEL", "HTMLLabelElementImpl" );
populateElementType( "LEGEND", "HTMLLegendElementImpl" );
populateElementType( "LI", "HTMLLIElementImpl" );
populateElementType( "LINK", "HTMLLinkElementImpl" );
populateElementType( "MAP", "HTMLMapElementImpl" );
populateElementType( "MENU", "HTMLMenuElementImpl" );
populateElementType( "META", "HTMLMetaElementImpl" );
populateElementType( "OBJECT", "HTMLObjectElementImpl" );
populateElementType( "OL", "HTMLOListElementImpl" );
populateElementType( "OPTGROUP", "HTMLOptGroupElementImpl" );
populateElementType( "OPTION", "HTMLOptionElementImpl" );
populateElementType( "P", "HTMLParagraphElementImpl" );
populateElementType( "PARAM", "HTMLParamElementImpl" );
populateElementType( "PRE", "HTMLPreElementImpl" );
populateElementType( "Q", "HTMLQuoteElementImpl" );
populateElementType( "SCRIPT", "HTMLScriptElementImpl" );
populateElementType( "SELECT", "HTMLSelectElementImpl" );
populateElementType( "STYLE", "HTMLStyleElementImpl" );
populateElementType( "TABLE", "HTMLTableElementImpl" );
populateElementType( "CAPTION", "HTMLTableCaptionElementImpl" );
populateElementType( "TD", "HTMLTableCellElementImpl" );
populateElementType( "TH", "HTMLTableCellElementImpl" );
populateElementType( "COL", "HTMLTableColElementImpl" );
populateElementType( "COLGROUP", "HTMLTableColElementImpl" );
populateElementType( "TR", "HTMLTableRowElementImpl" );
populateElementType( "TBODY", "HTMLTableSectionElementImpl" );
populateElementType( "THEAD", "HTMLTableSectionElementImpl" );
populateElementType( "TFOOT", "HTMLTableSectionElementImpl" );
populateElementType( "TEXTAREA", "HTMLTextAreaElementImpl" );
populateElementType( "TITLE", "HTMLTitleElementImpl" );
populateElementType( "UL", "HTMLUListElementImpl" );
_elementConstHTML = _tmpElementConstHTML;
}
private static void populateElementType(String tagName, String className )
{
try {
Class cl = Class.forName( "org.enhydra.xml.lazydom.html." + className );
_tmpElementConstHTML.put( tagName, cl.getConstructor( _elemClassSigHTML ));
} catch ( ClassNotFoundException except ) {
new RuntimeException( "HTM019 OpenXML Error: Could not find class " + className + " implementing HTML element " + tagName
+ "\n" + className + "\t" + tagName);
} catch ( NoSuchMethodException except ) {
new RuntimeException( "HTM019 OpenXML Error: Could not find constructor for class " + className + " implementing HTML element " + tagName
+ "\n" + className + "\t" + tagName);
}
}
/**
* Constructor with no template.
*/
public LazyHTMLDocument()
{
super();
populateElementTypes();
}
/**
* LazyDOM: Constructor with TemplateDOM.
*/
public LazyHTMLDocument(TemplateDOM templateDOM) {
super(null, templateDOM);
populateElementTypes();
}
/**
* LazyDOM: standard createElement method, passes null template element.
*/
public Element createElement(String tagName) throws DOMException {
return createElement(null, tagName);
}
/*
* Lazy DOM override to pick up HTML elements.
*/
public LazyElement createElement(int nodeId) throws DOMException {
LazyElement template = (LazyElement)getTemplateNode(nodeId);
return (LazyElement)createElement(template, template.getNodeName());
}
}