All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cat.inspiracio.html.HTMLDocumentImp Maven / Gradle / Ivy

/*
Copyright 2015 Alexander Bunkenburg 

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cat.inspiracio.html;

import static java.util.Locale.ENGLISH;
import static org.w3c.dom.UserDataHandler.NODE_CLONED;

import java.io.Serializable;
import java.io.StringWriter;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.xerces.dom.ChildNode;
import org.apache.xerces.dom.DocumentImpl;
import org.apache.xerces.dom.ElementImpl;
import org.w3c.dom.Attr;
import org.w3c.dom.DOMException;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import cat.inspiracio.dom.HTMLCollection;
import cat.inspiracio.dom.HTMLCollectionImp;

/** The class is public only so that we can extend it. */
public class HTMLDocumentImp extends DocumentImpl implements HTMLDocument, Serializable{
	private static final long serialVersionUID = 4414078004022787530L;
	
	// state -----------------------------------
    
	private HTMLDOMImplementation implementation;
	
    private String cookie;
    private String domain;
	private Date lastModified;
	private Location location;
	private String referrer;

	// construction --------------------------
	
	/** Protected: outside this package, only subclasses can instantiate. 
	 * Starts off with no elements at all. 
	 * @deprecated Use new HTMLDocumentImp(HTMLDOMImplementation) */
	protected HTMLDocumentImp(){
		throw new RuntimeException("deprecated");
	}
	
	/** Protected: outside this package, only subclasses can instantiate. 
	 * Starts off with no elements at all.
	 * @param i ... */
	protected HTMLDocumentImp(HTMLDOMImplementation i){
		super();
		implementation=i;
	}
	
	void setImplementation(HTMLDOMImplementation i){
		implementation=i;
	}
	
	/** First adopts the new child, then inserts it.
	 * 
	 * I do this so that we can insert elements that were created with a 
	 * different owner document or no owner document, such as from
	 * "new Image()" in javascript.
	 * 
	 * @param newChild the new child
	 * @param refChild If null, inserts at end.
	 * 
	 * @see org.apache.xerces.dom.NodeImpl#appendChild(org.w3c.dom.Node)
	 */
	@Override public Node insertBefore(Node newChild, Node refChild) throws DOMException{
		adoptNode(newChild);
		return super.insertBefore(newChild, refChild);
	}
	
	/** First adopts the new child, then appends it.
	 * 
	 * I do this so that we can insert elements that were created with a 
	 * different owner document or no owner document, such as from
	 * "new Image()" in javascript.
	 * 
	 * @param newChild the new child
	 * @see org.apache.xerces.dom.NodeImpl#appendChild(org.w3c.dom.Node)
	 */
	@Override public Node appendChild(Node newChild) throws DOMException {
		//We don't need adoptChild(newChild) because super.appendChild() calls insertBefore(n,r).
		return super.appendChild(newChild);
	}

	// ---------------------------------------
	
	//Overwrite these methods because in super-class they instantiate elements,
	//and we want to instantiate our own implementations of the elements.
	
	/** Returns null if there is no html element yet. 
	 * Does not create elements. */
    @Override public HTMLHtmlElement getDocumentElement(){
    	//Maybe this casting is too strong.
    	//It makes toString() fail for documents
    	//that have a wrong root element.
    	return(HTMLHtmlElement)docElement;//from CoreDocumentImpl
    }
    
	/** Returns null if there is no html element yet. Does not create elements. */
    @Override public HTMLHtmlElement getHtml(){return(HTMLHtmlElement)docElement;}
    
    private void setDocumentElement(HTMLHtmlElement html){
    	if(docElement!=null)
    		throw new RuntimeException();// Implement replacing the document element!
    	docElement=(ElementImpl) html;
    	firstChild=(ChildNode)html;
		((HTMLHtmlElementImp)html).setOwned(true);
    }

    /** Returns null if there is no head element yet.
     * Does not create elements. */
	@Override public HTMLHeadElement getHead(){
		return (HTMLHeadElement)getElementByTagName("head");
    }

	/** Gets the text within the first title element within head element, or null. 
	 * Does not create elements. */
    @Override public String getTitle(){
        HTMLHeadElement head=getHead();
        if(head==null)
        	return null;
        NodeList list=head.getElementsByTagName("title");
        if(0 0 ) {
                    // BODY exists but might not follow HEAD in HTML. If not,
                    // make it so and replace it. Start with the HEAD and make
                    // sure the BODY is the first element after the HEAD.
                    Node body = list.item( 0 );
                    synchronized ( body ){
                        Node child = head;
                        while ( child != null ){
                            if ( child instanceof Element){
                                if ( child != body )
                                    html.insertBefore( newBody, child );
                                else
                                    html.replaceChild( newBody, body );
                                return;
                            }
                            child = child.getNextSibling();
                        }
                        html.appendChild( newBody );
                    }
                    return;
                }
                // BODY does not exist, place it in the HTML element
                // right after the HEAD.
                html.appendChild( newBody );
            }
        }
    }

	@Override public HTMLCollection getImages(){
		NodeList nodes=getElementsByTagName("img");
		HTMLCollectionImpimages=new HTMLCollectionImp<>();
		for(int i=0; i getLinks(){
		HTMLCollectionImpcollection=new HTMLCollectionImp<>();
		HTMLCollection all=getAll();		
		for(int i=0; i getForms(){
		NodeList nodes=getElementsByTagName("form");
		HTMLCollectionImpcollection=new HTMLCollectionImp<>();
		for(int i=0; i
//			nu.validator.htmlparser
//			htmlparser
//			1.4
//		
// (In order to update to
// 
//			nu.validator
//			htmlparser
//			1.4.16
//		,
// you have to find the new correct ine numbers, or find a different trick.)
// The elements , ,  may be fake in order to make a complete document.
// Detect whether they are faked or really in the source.
// Fake element creation is in class nu.validator.htmlparser.impl.TreeBuilder:
//
//	
//	In 4789, method appendHtmlElementToDocumentAndPush(), called from
//		1093 
//		1372 
//		2730 
//		3786 <--maybe fake
//
//	
//	In 4797, appendToCurrentNodeAndPushHeadElement(HtmlAttributes), called from
//		1113 
//		1381 
//		2773 
//		3803  <--maybe fake
//
//	
//	In 4817, appendToCurrentNodeAndPushBodyElement(), called from
//		1174 <-- fake
//		1405 
//		2886 <-- fake
//		3846 <-- I think fake
		boolean fake=false;
		if("html".equals(tag)){
			StackTraceElement target=new StackTraceElement(
					"nu.validator.htmlparser.impl.TreeBuilder", 
					"appendHtmlElementToDocumentAndPush", null, 4789);
			StackTraceElement caller=caller(target);
			fake=line(caller, 1093, 2730, 1372);//3786 may be fake
		}
		else if("head".equals(tag)){
			StackTraceElement target=new StackTraceElement(
					"nu.validator.htmlparser.impl.TreeBuilder", 
					"appendToCurrentNodeAndPushHeadElement", null, 4797);
			StackTraceElement caller=caller(target);
			fake=line(caller, 1113, 1381, 2773);//3803 may be fake
		}
		else if("body".equals(tag)){
			StackTraceElement target=new StackTraceElement(
					"nu.validator.htmlparser.impl.TreeBuilder", 
					"appendToCurrentNodeAndPushBodyElement", null, 4817);
			StackTraceElement caller=caller(target);
			fake=line(caller, 1174, 1405, 2886, 3846);
		}
		if(fake)
			element.setAttribute("fake", "true");
		
		return element;
	}

	/** If the stack trace element is not null and its line numbers is one of the 
	 * arguments, returns true. Otherwise false. */
	private boolean line(StackTraceElement e, int... lines){
		if(e==null)
			return false;
		int line=e.getLineNumber();
		for(int i : lines)
			if(i==line)
				return true;
		return false;
	}
	
	/** Returns who called the identified method, or null. 
	 * @param t use method name and line number */
	private StackTraceElement caller(StackTraceElement t){
		StackTraceElement[]stack=new Exception().fillInStackTrace().getStackTrace();
		String method=t.getMethodName();
		int line=t.getLineNumber();
		for(int i=0; i A subinterface of HTMLElement that represents one html element
	 * @param c The class object
	 * @return A fresh instance of the element
	 * @throws UnsupportedOperationException Not (yet) implemented for this interface.
	 * */
	@Override public  T createElement(Class c){
		return implementation.createElement(this, c);
	}
	
	// Object --------------------------------
	
	/** Formats the whole element. Not efficient. */
	@Override public String toString(){
		try{
			StringWriter writer=new StringWriter();
			DocumentWriter w=new DocumentWriter(writer);
			w.document(this);
			return writer.toString();
		}
		catch(Exception e){
			throw new RuntimeException(e);
		}
	}

	// accessors -----------------------------

    @Override public synchronized HTMLElement getElementById(String id){
        Element e=super.getElementById(id);
        if(e!=null)return (HTMLElement)e;
        return getElementById(id, this);
    }

    /**
     * Recursive method retrieves an element by its id attribute.
     * Called by {@link #getElementById(String)}.
     *
     * @param elementId The id value to look for
     * @return The node in which to look for
     */
    private HTMLElement getElementById( String elementId, Node node ){
        Node child = node.getFirstChild();
        while ( child != null ){
            if ( child instanceof Element ){
                if ( elementId.equals( ( (Element) child ).getAttribute( "id" ) ) )
                    return (HTMLElement) child;
                HTMLElement result = getElementById( elementId, child );
                if ( result != null )
                    return result;
            }
            child = child.getNextSibling();
        }
        return null;
    }

	/** Returns null if there is no such element. */
	HTMLElement getElementByTagName(String tag, int index){
		NodeList nodes = getElementsByTagName(tag);
		Node node = nodes.item(index);
		return (HTMLElement)node;
	}
	
	/** Convenience: gets the first element with that tag,
	 * or null if there are none. */
	@Override public HTMLElement getElementByTagName(String tag){return getElementByTagName(tag, 0);}
	
	/** @param tag Will be treated as lower-case. */
    @Override public final NodeList getElementsByTagName( String tag ){
    	tag=tag.toLowerCase(ENGLISH);
        return super.getElementsByTagName( tag);
    }

    /** @param key will be converted to lower-case */
    @Override public final NodeList getElementsByTagNameNS( String namespace, String key){
    	key=key.toLowerCase(ENGLISH);
    	if ( namespace != null && 0 getEmbeds(){
		NodeList nodes=getElementsByTagName("embed");
		HTMLCollectionImpcollection=new HTMLCollectionImp<>();
		for(int i=0; i getPlugins(){return getEmbeds();}
	
	@Override public HTMLCollection getScripts(){
		NodeList nodes=getElementsByTagName("script");
		HTMLCollectionImpcollection=new HTMLCollectionImp<>();
		for(int i=0; i -----------------------
    
    /** Gets an element by name. */
	@Override public HTMLElement get(String name){
		NodeList list=getElementsByName(name);
		if(list.getLength()==0)return null;
		return (HTMLElement)list.item(0);
	}
	
	/** Has the document got an element with this name? */
	@Override public boolean has(String name){
		NodeList list=getElementsByName(name);
		return 0 getElementsByName(String name){
		
		HTMLCollectionImpelements=new HTMLCollectionImp<>();
		if(name==null || 0==name.length())
			return elements;
		
		//iterate over all elements
		HTMLCollection all=getAll();
		for(HTMLElement e : all){
			if(name.equals(e.getName()))
				elements.add(e);
		}
		return elements;
	}
	
	private HTMLCollection getAll(){
		HTMLAllCollectionImp all=new HTMLAllCollectionImp();
		all.addAll(getDocumentElement());
		return all;
	}

	/** Not implemented. */
	@Override public EventHandler getOnreadystatechange(){throw new UnsupportedOperationException();}
	
	/** Not implemented. */
	@Override public void setOnreadystatechange(EventHandler h){throw new UnsupportedOperationException();}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy