cat.inspiracio.html.DocumentRecurser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of html-parser Show documentation
Show all versions of html-parser Show documentation
HTML-parser provides a parser for HTML 5 that produces
HTML 5 document object model.
It aims to be a Java-implementation of
http://www.w3.org/TR/html5/.
It is for use in the server. It does not implement features
that are relevant in the client, like event handling.
It is for use from javascript, via Java's scripting library.
The newest version!
/*
Copyright 2016 Alexander Bunkenburg
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cat.inspiracio.html;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentFragment;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Entity;
import org.w3c.dom.EntityReference;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Notation;
import org.w3c.dom.ProcessingInstruction;
import org.w3c.dom.Text;
/** Recurses over a document. Subclasses can do interesting things on the way.
*
* There is a method for each of the objects in a document.
*
* All methods declare Exception so that they can throw anything.
* Alternative: parametrise exception type. But then there can be only one
* exception type.
*
* @param is the type of the outcome, returned by close(Document).
*/
public class DocumentRecurser {
public DocumentRecurser(){}
// Recursing methods -----------------------------------------
/** Processes a document and returns the result.
* This method calls
*
open(d);
doctype(d.getDoctype());
documentURI(d.getDocumentURI());
Element e=d.getDocumentElement();
if(e!=null)
element(e);
return close(d);
*
* @param d the document
* @return the outcome
* @throws Exception something wrong */
public T document(Document d) throws Exception{
open(d);
doctype(d.getDoctype());
documentURI(d.getDocumentURI());
Element e=d.getDocumentElement();
if(e!=null)
element(e);
return close(d);
}
/** Processes an element.
* This method calls
*
open(e);
nodes(e.getChildNodes());
close(e);
*
* @param e the element
* @return the recurser
* @throws Exception something wrong */
protected DocumentRecurser element(Element e) throws Exception{
open(e);
nodes(e.getChildNodes());
return close(e);
}
/** Processes the attributes of an element.
* This method calls attribute(a)
for each.
* @param e the element
* @return the recurser
* @throws Exception something wrong */
protected DocumentRecurser attributes(Element e) throws Exception{
NamedNodeMap as=e.getAttributes();
for(int i=0; iattribute(a.getName(), a.getValue())
* @param a the attribute
* @return the recurser
* @throws Exception something wrong */
protected DocumentRecursertext(t.getData()).* @param t the text * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser
comment(c.getData()).* @param c the comment * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser
cdata(cd.getData()).* @param cd the CData * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser
node(n)for each. * @param ns the node list * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser
attributes(e).* When you override, you must call attributes(e) yourself. * @param e the element * @return the recurser * @throws Exception something wrong */ protected DocumentRecurser
© 2015 - 2025 Weber Informatics LLC | Privacy Policy