org.apache.html.dom.HTMLCollectionImpl Maven / Gradle / Ivy
Show all versions of xercesImpl Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.html.dom;
import java.io.Serializable;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.html.HTMLAnchorElement;
import org.w3c.dom.html.HTMLAppletElement;
import org.w3c.dom.html.HTMLAreaElement;
import org.w3c.dom.html.HTMLCollection;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLFormElement;
import org.w3c.dom.html.HTMLImageElement;
import org.w3c.dom.html.HTMLObjectElement;
import org.w3c.dom.html.HTMLOptionElement;
import org.w3c.dom.html.HTMLTableCellElement;
import org.w3c.dom.html.HTMLTableRowElement;
import org.w3c.dom.html.HTMLTableSectionElement;
/**
* Implements {@link org.w3c.dom.html.HTMLCollection} to traverse any named
* elements on a {@link org.w3c.dom.html.HTMLDocument}. The elements type to
* look for is identified in the constructor by code. This collection is not
* optimized for traversing large trees.
*
* The collection has to meet two requirements: it has to be live, and it has
* to traverse depth first and always return results in that order. As such,
* using an object container (such as {@link java.util.Vector}) is expensive on
* insert/remove operations. Instead, the collection has been implemented using
* three traversing functions. As a result, operations on large documents will
* result in traversal of the entire document tree and consume a considerable
* amount of time.
*
* Note that synchronization on the traversed document cannot be achieved.
* The document itself cannot be locked, and locking each traversed node is
* likely to lead to a dead lock condition. Therefore, there is a chance of the
* document being changed as results are fetched; in all likelihood, the results
* might be out dated, but not erroneous.
*
* @xerces.internal
*
* @version $Revision: 1035042 $ $Date: 2010-11-14 19:41:49 +0100 (Sun, 14 Nov 2010) $
* @author Assaf Arkin
* @see org.w3c.dom.html.HTMLCollection
*/
class HTMLCollectionImpl
implements HTMLCollection, Serializable
{
private static final long serialVersionUID = 9112122196669185082L;
/**
* Request collection of all anchors in document: <A> elements that
* have a name
attribute.
*/
static final short ANCHOR = 1;
/**
* Request collection of all forms in document: <FORM> elements.
*/
static final short FORM = 2;
/**
* Request collection of all images in document: <IMG> elements.
*/
static final short IMAGE = 3;
/**
* Request collection of all Applets in document: <APPLET> and
* <OBJECT> elements (<OBJECT> must contain an Applet).
*/
static final short APPLET = 4;
/**
* Request collection of all links in document: <A> and <AREA>
* elements (must have a href
attribute).
*/
static final short LINK = 5;
/**
* Request collection of all options in selection: <OPTION> elements in
* <SELECT> or <OPTGROUP>.
*/
static final short OPTION = 6;
/**
* Request collection of all rows in table: <TR> elements in table or
* table section.
*/
static final short ROW = 7;
/**
* Request collection of all form elements: <INPUT>, <BUTTON>,
* <SELECT>, and <TEXTAREA> elements inside form <FORM>.
*/
static final short ELEMENT = 8;
/**
* Request collection of all areas in map: <AREA> element in <MAP>
* (non recursive).
*/
static final short AREA = -1;
/**
* Request collection of all table bodies in table: <TBODY> element in
* table <TABLE> (non recursive).
*/
static final short TBODY = -2;
/**
* Request collection of all cells in row: <TD> and <TH>
* elements in <TR> (non recursive).
*/
static final short CELL = -3;
/**
* Indicates what this collection is looking for. Holds one of the enumerated
* values and used by {@link #collectionMatch}. Set by the constructor and
* determine the collection's use for its life time.
*/
private short _lookingFor;
/**
* This is the top level element underneath which the collection exists.
*/
private Element _topLevel;
/**
* Construct a new collection that retrieves element of the specific type
* (lookingFor
) from the specific document portion
* (topLevel
).
*
* @param topLevel The element underneath which the collection exists
* @param lookingFor Code indicating what elements to look for
*/
HTMLCollectionImpl( HTMLElement topLevel, short lookingFor )
{
if ( topLevel == null )
throw new NullPointerException( "HTM011 Argument 'topLevel' is null." );
_topLevel = topLevel;
_lookingFor = lookingFor;
}
/**
* Returns the length of the collection. This method might traverse the
* entire document tree.
*
* @return Length of the collection
*/
public final int getLength()
{
// Call recursive function on top-level element.
return getLength( _topLevel );
}
/**
* Retrieves the indexed node from the collection. Nodes are numbered in
* tree order - depth-first traversal order. This method might traverse
* the entire document tree.
*
* @param index The index of the node to return
* @return The specified node or null if no such node found
*/
public final Node item( int index )
{
if ( index < 0 )
throw new IllegalArgumentException( "HTM012 Argument 'index' is negative." );
// Call recursive function on top-level element.
return item( _topLevel, new CollectionIndex( index ) );
}
/**
* Retrieves the named node from the collection. The name is matched case
* sensitive against the id attribute of each element in the
* collection, returning the first match. The tree is traversed in
* depth-first order. This method might traverse the entire document tree.
*
* @param name The name of the node to return
* @return The specified node or null if no such node found
*/
public final Node namedItem( String name )
{
if ( name == null )
throw new NullPointerException( "HTM013 Argument 'name' is null." );
// Call recursive function on top-level element.
return namedItem( _topLevel, name );
}
/**
* Recursive function returns the number of elements of a particular type
* that exist under the top level element. This is a recursive function
* and the top level element is passed along.
*
* @param topLevel Top level element from which to scan
* @return Number of elements
*/
private int getLength( Element topLevel )
{
int length;
Node node;
synchronized ( topLevel )
{
// Always count from zero and traverse all the childs of the
// current element in the order they appear.
length = 0;
node = topLevel.getFirstChild();
while ( node != null )
{
// If a particular node is an element (could be HTML or XML),
// do two things: if it's the one we're looking for, count
// another matched element; at any rate, traverse it's
// children as well.
if ( node instanceof Element )
{
if ( collectionMatch( (Element) node, null ) )
++ length;
else if ( recurse() )
length += getLength( (Element) node );
}
node = node.getNextSibling();
}
}
return length;
}
/**
* Recursive function returns the numbered element of a particular type
* that exist under the top level element. This is a recursive function
* and the top level element is passed along.
*
* Note that this function must call itself with an index and get back both
* the element (if one was found) and the new index which is decremeneted
* for any like element found. Since integers are only passed by value,
* this function makes use of a separate class ({@link CollectionIndex})
* to hold that index.
*
* @param topLevel Top level element from which to scan
* @param index The index of the item to retreive
* @return Number of elements
* @see CollectionIndex
*/
private Node item( Element topLevel, CollectionIndex index )
{
Node node;
Node result;
synchronized ( topLevel )
{
// Traverse all the childs of the current element in the order
// they appear. Count from the index backwards until you reach
// matching element with an index of zero. Return that element.
node = topLevel.getFirstChild();
while ( node != null )
{
// If a particular node is an element (could be HTML or XML),
// do two things: if it's the one we're looking for, decrease
// the index and if zero, return this node; at any rate,
// traverse it's children as well.
if ( node instanceof Element )
{
if ( collectionMatch( (Element) node, null ) )
{
if ( index.isZero() )
return node;
index.decrement();
} else if ( recurse() )
{
result = item( (Element) node, index );
if ( result != null )
return result;
}
}
node = node.getNextSibling();
}
}
return null;
}
/**
* Recursive function returns an element of a particular type with the
* specified name (id attribute).
*
* @param topLevel Top level element from which to scan
* @param name The named element to look for
* @return The first named element found
*/
private Node namedItem( Element topLevel, String name )
{
Node node;
Node result;
synchronized ( topLevel )
{
// Traverse all the childs of the current element in the order
// they appear.
node = topLevel.getFirstChild();
while ( node != null )
{
// If a particular node is an element (could be HTML or XML),
// do two things: if it's the one we're looking for, and the
// name (id attribute) attribute is the one we're looking for,
// return this element; otherwise, traverse it's children.
if ( node instanceof Element )
{
if ( collectionMatch( (Element) node, name ) )
return node;
else if ( recurse() )
{
result = namedItem( (Element) node, name );
if ( result != null )
return result;
}
}
node = node.getNextSibling();
}
return node;
}
}
/**
* Returns true if scanning methods should iterate through the collection.
* When looking for elements in the document, recursing is needed to traverse
* the full document tree. When looking inside a specific element (e.g. for a
* cell inside a row), recursing can lead to erroneous results.
*
* @return True if methods should recurse to traverse entire tree
*/
protected boolean recurse()
{
return _lookingFor > 0;
}
/**
* Determines if current element matches based on what we're looking for.
* The element is passed along with an optional identifier name. If the
* element is the one we're looking for, return true. If the name is also
* specified, the name must match the id
attribute
* (match name
first for anchors).
*
* @param elem The current element
* @param name The identifier name or null
* @return The element matches what we're looking for
*/
protected boolean collectionMatch( Element elem, String name )
{
boolean match;
synchronized ( elem )
{
// Begin with no matching. Depending on what we're looking for,
// attempt to match based on the element type. This is the quickest
// way to match involving only a cast. Do the expensive string
// comparison later on.
match = false;
switch ( _lookingFor )
{
case ANCHOR:
// Anchor is an element with a 'name' attribute. Otherwise, it's
// just a link.
match = ( elem instanceof HTMLAnchorElement ) &&
elem.getAttribute( "name" ).length() > 0;
break;
case FORM:
// Any