All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cat.inspiracio.html.DocumentRecurser Maven / Gradle / Ivy

Go to download

HTML-parser provides a parser for HTML 5 that produces HTML 5 document object model. It aims to be a Java-implementation of http://www.w3.org/TR/html5/. It is for use in the server. It does not implement features that are relevant in the client, like event handling. It is for use from javascript, via Java's scripting library.

The newest version!
/*
Copyright 2016 Alexander Bunkenburg 

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cat.inspiracio.html;

import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Notation;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;

/** Recurses over a document. Subclasses can do interesting things on the way. 
 * 
 * There is a method for each of the objects in a document. 
 * 
 * All methods declare Exception so that they can throw anything.
 * Alternative: parametrise exception type. But then there can be only one
 * exception type.
 * 
 * @param  is the type of the outcome, returned by close(Document).
 */
public class DocumentRecurser {

    public DocumentRecurser(){}
    
    // Recursing methods -----------------------------------------

    /** Processes a document and returns the result.
     * This method calls 
     * 
        open(d);
        doctype(d.getDoctype());
        documentURI(d.getDocumentURI());
        Element e=d.getDocumentElement();
        if(e!=null)
            element(e);
        return close(d);
     * 
* @param d the document * @return the outcome * @throws Exception something wrong */ public T document(Document d) throws Exception{ open(d); doctype(d.getDoctype()); documentURI(d.getDocumentURI()); Element e=d.getDocumentElement(); if(e!=null) element(e); return close(d); } /** Processes an element. * This method calls *
        open(e);
        nodes(e.getChildNodes());
        close(e);
     * 
* @param e the element * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser element(Element e) throws Exception{ open(e); nodes(e.getChildNodes()); return close(e); } /** Processes the attributes of an element. * This method calls
attribute(a)
for each. * @param e the element * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser attributes(Element e) throws Exception{ NamedNodeMap as=e.getAttributes(); for(int i=0; iattribute(a.getName(), a.getValue())
* @param a the attribute * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser attribute(Attr a) throws Exception{ String key=a.getName();//a.getLocalName(); String value=a.getValue(); return attribute(key, value); } /** Processes a Text. * This method calls
text(t.getData()).
* @param t the text * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser text(Text t) throws Exception{ String s=t.getData(); return text(s); } /** Processes a Comment. * This method calls
comment(c.getData()).
* @param c the comment * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser comment(Comment c) throws Exception{ String s=c.getData(); return comment(s); } /** Processes a CData section. * This method calls
cdata(cd.getData()).
* @param cd the CData * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser cdata(CDATASection cd) throws Exception { String s=cd.getData(); return cdata(s); } /** Processes a list of nodes. * This method calls
node(n)
for each. * @param ns the node list * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser nodes(NodeList ns) throws Exception{ for(int i=0; i node(Node n) throws Exception{ //Discriminate by node type, not by subclass, //because for example CDATASection extends Text (confusingly). short type=n.getNodeType(); switch(type){ case Node.ATTRIBUTE_NODE: attribute((Attr)n); break; case Node.CDATA_SECTION_NODE: cdata((CDATASection)n); break; case Node.COMMENT_NODE: comment((Comment)n); break; case Node.DOCUMENT_FRAGMENT_NODE: fragment((DocumentFragment)n); break; case Node.DOCUMENT_NODE: document((Document)n); break; case Node.DOCUMENT_TYPE_NODE: doctype((DocumentType)n); break; case Node.ELEMENT_NODE: element((Element)n); break; case Node.ENTITY_NODE: entity((Entity)n); break; case Node.ENTITY_REFERENCE_NODE: entityReference((EntityReference)n); break; case Node.NOTATION_NODE: notation((Notation)n); break; case Node.PROCESSING_INSTRUCTION_NODE: processingInstruction((ProcessingInstruction)n); break; case Node.TEXT_NODE: text((Text)n); break; } return this; } // Working methods ------------------------------------------- /** Prepares processing a document. * This methods does nothing. * @param d the document * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser open(Document d)throws Exception{return this;} /** Processes the doctype. * This methods does nothing. * @param type the type * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser doctype(DocumentType type)throws Exception{return this;} /** Processes the document URI. * This methods does nothing. * @param uri the document URI * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser documentURI(String uri)throws Exception{return this;} /** Finishes processing the document and returns the result. * This methods does nothing and returns null. * @param d the document * @return the recurser * @throws Exception something wrong */ protected T close(Document d)throws Exception{return null;} /** Processes the opening tag of an element. * This method just calls
attributes(e).
* When you override, you must call attributes(e) yourself. * @param e the element * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser open(Element e)throws Exception{return attributes(e);} /** Processes the closing tag of an element. * This method does nothing. * @param e the element * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser close(Element e)throws Exception{return this;} /** Processes an attribute. * @param key the key * @param value the value * @throws Exception something wrong * @return this, for fluent style */ protected DocumentRecurser attribute(String key, String value)throws Exception{return this;} /** Processes a text. * This method does nothing. * @param s the text * @return the recurser * @throws Exception something wrong * */ protected DocumentRecurser text(String s)throws Exception {return this;} /** Processes a cdata section. * This methods does nothing. * @param s the CData text * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser cdata(String s)throws Exception{return this;} /** Processes a comment. * This method does nothing. * @param s the comment text * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser comment(String s)throws Exception {return this;} /** Processes an entity. * This method does nothing. * @param e the entity * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser entity(Entity e)throws Exception {return this;} /** Processes an entity reference. * This method does nothing. * @param er the entity reference * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser entityReference(EntityReference er)throws Exception {return this;} /** Processes a notation node. * This method does nothing. * @param n the notation * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser notation(Notation n)throws Exception {return this;} /** Processes a processing instruction. * This method does nothing. * @param i the processing instruction * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser processingInstruction(ProcessingInstruction i)throws Exception {return this;} /** Processes a document fragment. * This method processes the child nodes of the fragment. * @param f the document fragment * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser fragment(DocumentFragment f)throws Exception { return nodes(f.getChildNodes()); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy