All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.html.dom.HTMLDocumentImpl Maven / Gradle / Ivy

There is a newer version: 0.4.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.html.dom;

import java.io.StringWriter;
import java.lang.reflect.Constructor;
import java.util.Hashtable;
import java.util.Locale;

import org.apache.xerces.dom.DocumentImpl;
import org.apache.xerces.dom.ElementImpl;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.UserDataHandler;
import org.w3c.dom.html.HTMLBodyElement;
import org.w3c.dom.html.HTMLCollection;
import org.w3c.dom.html.HTMLDocument;
import org.w3c.dom.html.HTMLElement;
import org.w3c.dom.html.HTMLFrameSetElement;
import org.w3c.dom.html.HTMLHeadElement;
import org.w3c.dom.html.HTMLHtmlElement;
import org.w3c.dom.html.HTMLTitleElement;

/**
 * Implements an HTML document. Provides access to the top level element in the
 * document, its body and title.
 * 

* Several methods create new nodes of all basic types (comment, text, element, * etc.). These methods create new nodes but do not place them in the document * tree. The nodes may be placed in the document tree using {@link * org.w3c.dom.Node#appendChild} or {@link org.w3c.dom.Node#insertBefore}, or * they may be placed in some other document tree. *

* Note: <FRAMESET> documents are not supported at the moment, neither * are direct document writing ({@link #open}, {@link #write}) and HTTP attribute * methods ({@link #getURL}, {@link #getCookie}). * * @xerces.internal * * @version $Revision: 1029415 $ $Date: 2010-10-31 13:02:22 -0400 (Sun, 31 Oct 2010) $ * @author Assaf Arkin * @see org.w3c.dom.html.HTMLDocument */ public class HTMLDocumentImpl extends DocumentImpl implements HTMLDocument { private static final long serialVersionUID = 4285791750126227180L; /** * Holds HTMLCollectionImpl object with live collection of all * anchors in document. This reference is on demand only once. */ private HTMLCollectionImpl _anchors; /** * Holds HTMLCollectionImpl object with live collection of all * forms in document. This reference is on demand only once. */ private HTMLCollectionImpl _forms; /** * Holds HTMLCollectionImpl object with live collection of all * images in document. This reference is on demand only once. */ private HTMLCollectionImpl _images; /** * Holds HTMLCollectionImpl object with live collection of all * links in document. This reference is on demand only once. */ private HTMLCollectionImpl _links; /** * Holds HTMLCollectionImpl object with live collection of all * applets in document. This reference is on demand only once. */ private HTMLCollectionImpl _applets; /** * Holds string writer used by direct manipulation operation ({@link #open}. * {@link #write}, etc) to write new contents into the document and parse * that text into a document tree. */ private StringWriter _writer; /** * Holds names and classes of HTML element types. When an element with a * particular tag name is created, the matching {@link java.lang.Class} * is used to create the element object. For example, <A> matches * {@link HTMLAnchorElementImpl}. This static table is shared across all * HTML documents. * * @see #createElement */ private static Hashtable _elementTypesHTML; /** * Signature used to locate constructor of HTML element classes. This * static array is shared across all HTML documents. * * @see #createElement */ private static final Class[] _elemClassSigHTML = new Class[] { HTMLDocumentImpl.class, String.class }; /** */ public HTMLDocumentImpl() { super(); populateElementTypes(); } public synchronized Element getDocumentElement() { Node html; Node child; Node next; // The document element is the top-level HTML element of the HTML // document. Only this element should exist at the top level. // If the HTML element is found, all other elements that might // precede it are placed inside the HTML element. html = getFirstChild(); while ( html != null ) { if ( html instanceof HTMLHtmlElement ) { // REVISIT: [Q] Why is this code even here? In fact, the // original code is in error because it will // try to move ALL nodes to be children of the // HTML tag. This is not the intended behavior // for comments and processing instructions // outside the root element; it will throw a // hierarchy request error exception for doctype // nodes; *and* this code shouldn't even be // needed because the parser should never build // a document that contains more than a single // root element, anyway! -Ac /*** synchronized ( html ) { child = getFirstChild(); while ( child != null && child != html ) { next = child.getNextSibling(); html.appendChild( child ); child = next; } } /***/ return (HTMLElement) html; } html = html.getNextSibling(); } // HTML element must exist. Create a new element and dump the // entire contents of the document into it in the same order as // they appear now. html = new HTMLHtmlElementImpl( this, "HTML" ); child = getFirstChild(); while ( child != null ) { next = child.getNextSibling(); html.appendChild( child ); child = next; } appendChild( html ); return (HTMLElement) html; } /** * Obtains the <HEAD> element in the document, creating one if does * not exist before. The <HEAD> element is the first element in the * <HTML> in the document. The <HTML> element is obtained by * calling {@link #getDocumentElement}. If the element does not exist, one * is created. *

* Called by {@link #getTitle}, {@link #setTitle}, {@link #getBody} and * {@link #setBody} to assure the document has the <HEAD> element * correctly placed. * * @return The <HEAD> element */ public synchronized HTMLElement getHead() { Node head; Node html; Node child; Node next; // Call getDocumentElement() to get the HTML element that is also the // top-level element in the document. Get the first element in the // document that is called HEAD. Work with that. html = getDocumentElement(); synchronized ( html ) { head = html.getFirstChild(); while ( head != null && ! ( head instanceof HTMLHeadElement ) ) head = head.getNextSibling(); // HEAD exists but might not be first element in HTML: make sure // it is and return it. if ( head != null ) { synchronized ( head ) { child = html.getFirstChild(); while ( child != null && child != head ) { next = child.getNextSibling(); head.insertBefore( child, head.getFirstChild() ); child = next; } } return (HTMLElement) head; } // Head does not exist, create a new one, place it at the top of the // HTML element and return it. head = new HTMLHeadElementImpl( this, "HEAD" ); html.insertBefore( head, html.getFirstChild() ); } return (HTMLElement) head; } public synchronized String getTitle() { HTMLElement head; NodeList list; Node title; // Get the HEAD element and look for the TITLE element within. // When found, make sure the TITLE is a direct child of HEAD, // and return the title's text (the Text node contained within). head = getHead(); list = head.getElementsByTagName( "TITLE" ); if ( list.getLength() > 0 ) { title = list.item( 0 ); return ( (HTMLTitleElement) title ).getText(); } // No TITLE found, return an empty string. return ""; } public synchronized void setTitle( String newTitle ) { HTMLElement head; NodeList list; Node title; // Get the HEAD element and look for the TITLE element within. // When found, make sure the TITLE is a direct child of HEAD, // and set the title's text (the Text node contained within). head = getHead(); list = head.getElementsByTagName( "TITLE" ); if ( list.getLength() > 0 ) { title = list.item( 0 ); if ( title.getParentNode() != head ) head.appendChild( title ); ( (HTMLTitleElement) title ).setText( newTitle ); } else { // No TITLE found, create a new element and place it at the end // of the HEAD element. title = new HTMLTitleElementImpl( this, "TITLE" ); ( (HTMLTitleElement) title ).setText( newTitle ); head.appendChild( title ); } } public synchronized HTMLElement getBody() { Node html; Node head; Node body; Node child; Node next; // Call getDocumentElement() to get the HTML element that is also the // top-level element in the document. Get the first element in the // document that is called BODY. Work with that. html = getDocumentElement(); head = getHead(); synchronized ( html ) { body = head.getNextSibling(); while ( body != null && ! ( body instanceof HTMLBodyElement ) && ! ( body instanceof HTMLFrameSetElement ) ) body = body.getNextSibling(); // BODY/FRAMESET exists but might not be second element in HTML // (after HEAD): make sure it is and return it. if ( body != null ) { synchronized ( body ) { child = head.getNextSibling(); while ( child != null && child != body ) { next = child.getNextSibling(); body.insertBefore( child, body.getFirstChild() ); child = next; } } return (HTMLElement) body; } // BODY does not exist, create a new one, place it in the HTML element // right after the HEAD and return it. body = new HTMLBodyElementImpl( this, "BODY" ); html.appendChild( body ); } return (HTMLElement) body; } public synchronized void setBody( HTMLElement newBody ) { Node html; Node body; Node head; Node child; NodeList list; synchronized ( newBody ) { // Call getDocumentElement() to get the HTML element that is also the // top-level element in the document. Get the first element in the // document that is called BODY. Work with that. html = getDocumentElement(); head = getHead(); synchronized ( html ) { list = this.getElementsByTagName( "BODY" ); if ( list.getLength() > 0 ) { // BODY exists but might not follow HEAD in HTML. If not, // make it so and replce it. Start with the HEAD and make // sure the BODY is the first element after the HEAD. body = list.item( 0 ); synchronized ( body ) { child = head; while ( child != null ) { if ( child instanceof Element ) { if ( child != body ) html.insertBefore( newBody, child ); else html.replaceChild( newBody, body ); return; } child = child.getNextSibling(); } html.appendChild( newBody ); } return; } // BODY does not exist, place it in the HTML element // right after the HEAD. html.appendChild( newBody ); } } } public synchronized Element getElementById( String elementId ) { Element idElement = super.getElementById(elementId); if (idElement != null) { return idElement; } return getElementById( elementId, this ); } public NodeList getElementsByName( String elementName ) { return new NameNodeListImpl( this, elementName ); } public final NodeList getElementsByTagName( String tagName ) { return super.getElementsByTagName( tagName.toUpperCase(Locale.ENGLISH) ); } public final NodeList getElementsByTagNameNS( String namespaceURI, String localName ) { if ( namespaceURI != null && namespaceURI.length() > 0 ) { return super.getElementsByTagNameNS( namespaceURI, localName.toUpperCase(Locale.ENGLISH) ); } return super.getElementsByTagName( localName.toUpperCase(Locale.ENGLISH) ); } /** * Xerces-specific constructor. "localName" is passed in, so we don't need * to create a new String for it. * * @param namespaceURI The namespace URI of the element to * create. * @param qualifiedName The qualified name of the element type to * instantiate. * @param localpart The local name of the element to instantiate. * @return Element A new Element object with the following attributes: * @throws DOMException INVALID_CHARACTER_ERR: Raised if the specified * name contains an invalid character. */ public Element createElementNS(String namespaceURI, String qualifiedName, String localpart) throws DOMException { return createElementNS(namespaceURI, qualifiedName); } public Element createElementNS( String namespaceURI, String qualifiedName ) { if ( namespaceURI == null || namespaceURI.length() == 0 ) { return createElement( qualifiedName ); } return super.createElementNS( namespaceURI, qualifiedName ); } public Element createElement( String tagName ) throws DOMException { Class elemClass; Constructor cnst; // First, make sure tag name is all upper case, next get the associated // element class. If no class is found, generate a generic HTML element. // Do so also if an unexpected exception occurs. tagName = tagName.toUpperCase(Locale.ENGLISH); elemClass = (Class) _elementTypesHTML.get( tagName ); if ( elemClass != null ) { // Get the constructor for the element. The signature specifies an // owner document and a tag name. Use the constructor to instantiate // a new object and return it. try { cnst = elemClass.getConstructor( _elemClassSigHTML ); return (Element) cnst.newInstance( new Object[] { this, tagName } ); } catch ( Exception except ) { /* Throwable thrw; if ( except instanceof java.lang.reflect.InvocationTargetException ) thrw = ( (java.lang.reflect.InvocationTargetException) except ).getTargetException(); else thrw = except; System.out.println( "Exception " + thrw.getClass().getName() ); System.out.println( thrw.getMessage() ); */ throw new IllegalStateException( "HTM15 Tag '" + tagName + "' associated with an Element class that failed to construct.\n" + tagName); } } return new HTMLElementImpl( this, tagName ); } /** * Creates an Attribute having this Document as its OwnerDoc. * Overrides {@link DocumentImpl#createAttribute} and returns * and attribute whose name is lower case. * * @param name The name of the attribute * @return An attribute whose name is all lower case * @throws DOMException(INVALID_NAME_ERR) if the attribute name * is not acceptable */ public Attr createAttribute( String name ) throws DOMException { return super.createAttribute( name.toLowerCase(Locale.ENGLISH) ); } public String getReferrer() { // Information not available on server side. return null; } public String getDomain() { // Information not available on server side. return null; } public String getURL() { // Information not available on server side. return null; } public String getCookie() { // Information not available on server side. return null; } public void setCookie( String cookie ) { // Information not available on server side. } public HTMLCollection getImages() { // For more information see HTMLCollection#collectionMatch if ( _images == null ) _images = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.IMAGE ); return _images; } public HTMLCollection getApplets() { // For more information see HTMLCollection#collectionMatch if ( _applets == null ) _applets = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.APPLET ); return _applets; } public HTMLCollection getLinks() { // For more information see HTMLCollection#collectionMatch if ( _links == null ) _links = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.LINK ); return _links; } public HTMLCollection getForms() { // For more information see HTMLCollection#collectionMatch if ( _forms == null ) _forms = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.FORM ); return _forms; } public HTMLCollection getAnchors() { // For more information see HTMLCollection#collectionMatch if ( _anchors == null ) _anchors = new HTMLCollectionImpl( getBody(), HTMLCollectionImpl.ANCHOR ); return _anchors; } public void open() { // When called an in-memory is prepared. The document tree is still // accessible the old way, until this writer is closed. if ( _writer == null ) _writer = new StringWriter(); } public void close() { // ! NOT IMPLEMENTED, REQUIRES PARSER ! if ( _writer != null ) { _writer = null; } } public void write( String text ) { // Write a string into the in-memory writer. if ( _writer != null ) _writer.write( text ); } public void writeln( String text ) { // Write a line into the in-memory writer. if ( _writer != null ) _writer.write( text + "\n" ); } public Node cloneNode( boolean deep ) { HTMLDocumentImpl newdoc = new HTMLDocumentImpl(); callUserDataHandlers(this, newdoc, UserDataHandler.NODE_CLONED); cloneNode(newdoc, deep); return newdoc; } /* (non-Javadoc) * @see CoreDocumentImpl#canRenameElements() */ protected boolean canRenameElements(String newNamespaceURI, String newNodeName, ElementImpl el) { if (el.getNamespaceURI() != null) { // element is not HTML: // can be renamed if not changed to HTML return newNamespaceURI != null; } // check whether a class change is required Class newClass = (Class) _elementTypesHTML.get(newNodeName.toUpperCase(Locale.ENGLISH)); Class oldClass = (Class) _elementTypesHTML.get(el.getTagName()); return newClass == oldClass; } /** * Recursive method retreives an element by its id attribute. * Called by {@link #getElementById(String)}. * * @param elementId The id value to look for * @return The node in which to look for */ private Element getElementById( String elementId, Node node ) { Node child; Element result; child = node.getFirstChild(); while ( child != null ) { if ( child instanceof Element ) { if ( elementId.equals( ( (Element) child ).getAttribute( "id" ) ) ) return (Element) child; result = getElementById( elementId, child ); if ( result != null ) return result; } child = child.getNextSibling(); } return null; } /** * Called by the constructor to populate the element types list (see {@link * #_elementTypesHTML}). Will be called multiple times but populate the list * only the first time. Replacement for static constructor. */ private synchronized static void populateElementTypes() { // This class looks like it is due to some strange // (read: inconsistent) JVM bugs. // Initially all this code was placed in the static constructor, // but that caused some early JVMs (1.1) to go mad, and if a // class could not be found (as happened during development), // the JVM would die. // Bertrand Delacretaz pointed out // several configurations where HTMLAnchorElementImpl.class // failed, forcing me to revert back to Class.forName(). if ( _elementTypesHTML != null ) return; _elementTypesHTML = new Hashtable( 63 ); populateElementType( "A", "HTMLAnchorElementImpl" ); populateElementType( "APPLET", "HTMLAppletElementImpl" ); populateElementType( "AREA", "HTMLAreaElementImpl" ); populateElementType( "BASE", "HTMLBaseElementImpl" ); populateElementType( "BASEFONT", "HTMLBaseFontElementImpl" ); populateElementType( "BLOCKQUOTE", "HTMLQuoteElementImpl" ); populateElementType( "BODY", "HTMLBodyElementImpl" ); populateElementType( "BR", "HTMLBRElementImpl" ); populateElementType( "BUTTON", "HTMLButtonElementImpl" ); populateElementType( "DEL", "HTMLModElementImpl" ); populateElementType( "DIR", "HTMLDirectoryElementImpl" ); populateElementType( "DIV", "HTMLDivElementImpl" ); populateElementType( "DL", "HTMLDListElementImpl" ); populateElementType( "FIELDSET", "HTMLFieldSetElementImpl" ); populateElementType( "FONT", "HTMLFontElementImpl" ); populateElementType( "FORM", "HTMLFormElementImpl" ); populateElementType( "FRAME","HTMLFrameElementImpl" ); populateElementType( "FRAMESET", "HTMLFrameSetElementImpl" ); populateElementType( "HEAD", "HTMLHeadElementImpl" ); populateElementType( "H1", "HTMLHeadingElementImpl" ); populateElementType( "H2", "HTMLHeadingElementImpl" ); populateElementType( "H3", "HTMLHeadingElementImpl" ); populateElementType( "H4", "HTMLHeadingElementImpl" ); populateElementType( "H5", "HTMLHeadingElementImpl" ); populateElementType( "H6", "HTMLHeadingElementImpl" ); populateElementType( "HR", "HTMLHRElementImpl" ); populateElementType( "HTML", "HTMLHtmlElementImpl" ); populateElementType( "IFRAME", "HTMLIFrameElementImpl" ); populateElementType( "IMG", "HTMLImageElementImpl" ); populateElementType( "INPUT", "HTMLInputElementImpl" ); populateElementType( "INS", "HTMLModElementImpl" ); populateElementType( "ISINDEX", "HTMLIsIndexElementImpl" ); populateElementType( "LABEL", "HTMLLabelElementImpl" ); populateElementType( "LEGEND", "HTMLLegendElementImpl" ); populateElementType( "LI", "HTMLLIElementImpl" ); populateElementType( "LINK", "HTMLLinkElementImpl" ); populateElementType( "MAP", "HTMLMapElementImpl" ); populateElementType( "MENU", "HTMLMenuElementImpl" ); populateElementType( "META", "HTMLMetaElementImpl" ); populateElementType( "OBJECT", "HTMLObjectElementImpl" ); populateElementType( "OL", "HTMLOListElementImpl" ); populateElementType( "OPTGROUP", "HTMLOptGroupElementImpl" ); populateElementType( "OPTION", "HTMLOptionElementImpl" ); populateElementType( "P", "HTMLParagraphElementImpl" ); populateElementType( "PARAM", "HTMLParamElementImpl" ); populateElementType( "PRE", "HTMLPreElementImpl" ); populateElementType( "Q", "HTMLQuoteElementImpl" ); populateElementType( "SCRIPT", "HTMLScriptElementImpl" ); populateElementType( "SELECT", "HTMLSelectElementImpl" ); populateElementType( "STYLE", "HTMLStyleElementImpl" ); populateElementType( "TABLE", "HTMLTableElementImpl" ); populateElementType( "CAPTION", "HTMLTableCaptionElementImpl" ); populateElementType( "TD", "HTMLTableCellElementImpl" ); populateElementType( "TH", "HTMLTableCellElementImpl" ); populateElementType( "COL", "HTMLTableColElementImpl" ); populateElementType( "COLGROUP", "HTMLTableColElementImpl" ); populateElementType( "TR", "HTMLTableRowElementImpl" ); populateElementType( "TBODY", "HTMLTableSectionElementImpl" ); populateElementType( "THEAD", "HTMLTableSectionElementImpl" ); populateElementType( "TFOOT", "HTMLTableSectionElementImpl" ); populateElementType( "TEXTAREA", "HTMLTextAreaElementImpl" ); populateElementType( "TITLE", "HTMLTitleElementImpl" ); populateElementType( "UL", "HTMLUListElementImpl" ); } private static void populateElementType( String tagName, String className ) { try { _elementTypesHTML.put( tagName, ObjectFactory.findProviderClass("org.apache.html.dom." + className, HTMLDocumentImpl.class.getClassLoader(), true) ); } catch ( Exception except ) { throw new RuntimeException( "HTM019 OpenXML Error: Could not find or execute class " + className + " implementing HTML element " + tagName + "\n" + className + "\t" + tagName); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy