org.enhydra.apache.html.dom.HTMLCollectionImpl Maven / Gradle / Ivy
/*
* The Apache Software License, Version 1.1
*
*
* Copyright (c) 1999,2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact [email protected].
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation and was
* originally based on software copyright (c) 1999, International
* Business Machines, Inc., http://www.apache.org. For more
* information on the Apache Software Foundation, please see
* .
*/
package org.enhydra.apache.html.dom;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLAnchorElement;
import org.w3c.dom.html.HTMLAppletElement;
import org.w3c.dom.html.HTMLAreaElement;
import org.w3c.dom.html.HTMLCollection;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLFormElement;
import org.w3c.dom.html.HTMLImageElement;
import org.w3c.dom.html.HTMLObjectElement;
import org.w3c.dom.html.HTMLOptionElement;
import org.w3c.dom.html.HTMLTableCellElement;
import org.w3c.dom.html.HTMLTableRowElement;
import org.w3c.dom.html.HTMLTableSectionElement;
/**
* Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named
* elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to
* look for is identified in the constructor by code. This collection is not
* optimized for traversing large trees.
*
* The collection has to meet two requirements: it has to be live, and it has
* to traverse depth first and always return results in that order. As such,
* using an object container (such as {@link java.util.Vector}) is expensive on
* insert/remove operations. Instead, the collection has been implemented using
* three traversing functions. As a result, operations on large documents will
* result in traversal of the entire document tree and consume a considerable
* amount of time.
*
* Note that synchronization on the traversed document cannot be achieved.
* The document itself cannot be locked, and locking each traversed node is
* likely to lead to a dead lock condition. Therefore, there is a chance of the
* document being changed as results are fetched; in all likelihood, the results
* might be out dated, but not erroneous.
*
*
* @version $Revision: 1.2 $ $Date: 2005/01/26 08:28:44 $
* @author Assaf Arkin
* @see org.w3c.dom.html.HTMLCollection
*/
class HTMLCollectionImpl
implements HTMLCollection
{
/**
* Request collection of all anchors in document: <A> elements that
* have a name
attribute.
*/
static final short ANCHOR = 1;
/**
* Request collection of all forms in document: <FORM> elements.
*/
static final short FORM = 2;
/**
* Request collection of all images in document: <IMAGE> elements.
*/
static final short IMAGE = 3;
/**
* Request collection of all Applets in document: <APPLET> and
* <OBJECT> elements (<OBJECT> must contain an Applet).
*/
static final short APPLET = 4;
/**
* Request collection of all links in document: <A> and <AREA>
* elements (must have a href
attribute).
*/
static final short LINK = 5;
/**
* Request collection of all options in selection: <OPTION> elments in
* <SELECT> or <OPTGROUP>.
*/
static final short OPTION = 6;
/**
* Request collection of all rows in table: <TR> elements in table or
* table section.
*/
static final short ROW = 7;
/**
* Request collection of all form elements: <INPUT>, <BUTTON>,
* <SELECT>, <TEXT> and <TEXTAREA> elements inside form
* <FORM>.
*/
static final short ELEMENT = 8;
/**
* Request collection of all areas in map: <AREA> element in <MAP>
* (non recursive).
*/
static final short AREA = -1;
/**
* Request collection of all table bodies in table: <TBODY> element in
* table <TABLE> (non recursive).
*/
static final short TBODY = -2;
/**
* Request collection of all cells in row: <TD> elements in <TR>
* (non recursive).
*/
static final short CELL = -3;
/**
* Indicates what this collection is looking for. Holds one of the enumerated
* values and used by {@link #collectionMatch}. Set by the constructor and
* determine the collection's use for its life time.
*/
private short _lookingFor;
/**
* This is the top level element underneath which the collection exists.
*/
private Element _topLevel;
/**
* Construct a new collection that retrieves element of the specific type
* (lookingFor
) from the specific document portion
* (topLevel
).
*
* @param topLevel The element underneath which the collection exists
* @param lookingFor Code indicating what elements to look for
*/
HTMLCollectionImpl( HTMLElement topLevel, short lookingFor )
{
if ( topLevel == null )
throw new NullPointerException( "HTM011 Argument 'topLevel' is null." );
_topLevel = topLevel;
_lookingFor = lookingFor;
}
/**
* Returns the length of the collection. This method might traverse the
* entire document tree.
*
* @return Length of the collection
*/
public final int getLength()
{
// Call recursive function on top-level element.
return getLength( _topLevel );
}
/**
* Retrieves the indexed node from the collection. Nodes are numbered in
* tree order - depth-first traversal order. This method might traverse
* the entire document tree.
*
* @param index The index of the node to return
* @return The specified node or null if no such node found
*/
public final Node item( int index )
{
if ( index < 0 )
throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." );
// Call recursive function on top-level element.
return item( _topLevel, new CollectionIndex( index ) );
}
/**
* Retrieves the named node from the collection. The name is matched case
* sensitive against the id attribute of each element in the
* collection, returning the first match. The tree is traversed in
* depth-first order. This method might traverse the entire document tree.
*
* @param name The name of the node to return
* @return The specified node or null if no such node found
*/
public final Node namedItem( String name )
{
if ( name == null )
throw new NullPointerException( "HTM013 Argument 'name' is null." );
// Call recursive function on top-level element.
return namedItem( _topLevel, name );
}
/**
* Recursive function returns the number of elements of a particular type
* that exist under the top level element. This is a recursive function
* and the top level element is passed along.
*
* @param topLevel Top level element from which to scan
* @return Number of elements
*/
private int getLength( Element topLevel )
{
int length;
Node node;
synchronized ( topLevel )
{
// Always count from zero and traverse all the childs of the
// current element in the order they appear.
length = 0;
node = topLevel.getFirstChild();
while ( node != null )
{
// If a particular node is an element (could be HTML or XML),
// do two things: if it's the one we're looking for, count
// another matched element; at any rate, traverse it's
// children as well.
if ( node instanceof Element )
{
if ( collectionMatch( (Element) node, null ) )
++ length;
else if ( recurse() )
length += getLength( (Element) node );
}
node = node.getNextSibling();
}
}
return length;
}
/**
* Recursive function returns the numbered element of a particular type
* that exist under the top level element. This is a recursive function
* and the top level element is passed along.
*
* Note that this function must call itself with an index and get back both
* the element (if one was found) and the new index which is decremeneted
* for any like element found. Since integers are only passed by value,
* this function makes use of a separate class ({@link CollectionIndex})
* to hold that index.
*
* @param topLevel Top level element from which to scan
* @param index The index of the item to retreive
* @return Number of elements
* @see CollectionIndex
*/
private Node item( Element topLevel, CollectionIndex index )
{
Node node;
Node result;
synchronized ( topLevel )
{
// Traverse all the childs of the current element in the order
// they appear. Count from the index backwards until you reach
// matching element with an index of zero. Return that element.
node = topLevel.getFirstChild();
while ( node != null )
{
// If a particular node is an element (could be HTML or XML),
// do two things: if it's the one we're looking for, decrease
// the index and if zero, return this node; at any rate,
// traverse it's children as well.
if ( node instanceof Element )
{
if ( collectionMatch( (Element) node, null ) )
{
if ( index.isZero() )
return node;
index.decrement();
} else if ( recurse() )
{
result = item( (Element) node, index );
if ( result != null )
return result;
}
}
node = node.getNextSibling();
}
}
return null;
}
/**
* Recursive function returns an element of a particular type with the
* specified name (id attribute).
*
* @param topLevel Top level element from which to scan
* @param name The named element to look for
* @return The first named element found
*/
private Node namedItem( Element topLevel, String name )
{
Node node;
Node result;
synchronized ( topLevel )
{
// Traverse all the childs of the current element in the order
// they appear.
node = topLevel.getFirstChild();
while ( node != null )
{
// If a particular node is an element (could be HTML or XML),
// do two things: if it's the one we're looking for, and the
// name (id attribute) attribute is the one we're looking for,
// return this element; otherwise, traverse it's children.
if ( node instanceof Element )
{
if ( collectionMatch( (Element) node, name ) )
return node;
else if ( recurse() )
{
result = namedItem( (Element) node, name );
if ( result != null )
return result;
}
}
node = node.getNextSibling();
}
return node;
}
}
/**
* Returns true if scanning methods should iterate through the collection.
* When looking for elements in the document, recursing is needed to traverse
* the full document tree. When looking inside a specific element (e.g. for a
* cell inside a row), recursing can lead to erroneous results.
*
* @return True if methods should recurse to traverse entire tree
*/
protected boolean recurse()
{
return _lookingFor > 0;
}
/**
* Determines if current element matches based on what we're looking for.
* The element is passed along with an optional identifier name. If the
* element is the one we're looking for, return true. If the name is also
* specified, the name must match the id
attribute
* (match name
first for anchors).
*
* @param elem The current element
* @param name The identifier name or null
* @return The element matches what we're looking for
*/
protected boolean collectionMatch( Element elem, String name )
{
boolean match;
synchronized ( elem )
{
// Begin with no matching. Depending on what we're looking for,
// attempt to match based on the element type. This is the quickest
// way to match involving only a cast. Do the expensive string
// comparison later on.
match = false;
switch ( _lookingFor )
{
case ANCHOR:
// Anchor is an element with a 'name' attribute. Otherwise, it's
// just a link.
match = ( elem instanceof HTMLAnchorElement ) &&
elem.getAttribute( "name" ).length() > 0;
break;
case FORM:
// Any