org.htmlparser.visitors.NodeVisitor Maven / Gradle / Ivy
// HTMLParser Library $Name: v1_5 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Somik Raha
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/visitors/NodeVisitor.java,v $
// $Author: derrickoswald $
// $Date: 2005/04/24 17:48:27 $
// $Revision: 1.39 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.visitors;
import org.htmlparser.Remark;
import org.htmlparser.Text;
import org.htmlparser.Tag;
/**
* The base class for the 'Visitor' pattern.
* Classes that wish to use visitAllNodesWith()
will subclass
* this class and provide implementations for methods they are interested in
* processing.
* The operation of visitAllNodesWith()
is to call
* beginParsing()
, then visitXXX()
according to the
* types of nodes encountered in depth-first order and finally
* finishedParsing()
.
* Typical code to print all the link tags:
*
* import org.htmlparser.Parser;
* import org.htmlparser.Tag;
* import org.htmlparser.Text;
* import org.htmlparser.util.ParserException;
* import org.htmlparser.visitors.NodeVisitor;
*
* public class MyVisitor extends NodeVisitor
* {
* public MyVisitor ()
* {
* }
*
* public void visitTag (Tag tag)
* {
* System.out.println ("\n" + tag.getTagName () + tag.getStartPosition ());
* }
*
* public void visitStringNode (Text string)
* {
* System.out.println (string);
* }
*
* public static void main (String[] args) throws ParserException
* {
* Parser parser = new Parser ("http://cbc.ca");
* Visitor visitor = new MyVisitor ();
* parser.visitAllNodesWith (visitor);
* }
* }
*
* If you want to handle more than one tag type with the same visitor
* you will need to check the tag type in the visitTag method. You can
* do that by either checking the tag name:
*
* public void visitTag (Tag tag)
* {
* if (tag.getName ().equals ("BODY"))
* ... do something with the BODY tag
* else if (tag.getName ().equals ("FRAME"))
* ... do something with the FRAME tag
* }
*
* or you can use instanceof
if all the tags you want to handle
* have a {@link org.htmlparser.PrototypicalNodeFactory#registerTag registered}
* tag (i.e. they are generated by the NodeFactory):
*
* public void visitTag (Tag tag)
* {
* if (tag instanceof BodyTag)
* {
* BodyTag body = (BodyTag)tag;
* ... do something with body
* }
* else if (tag instanceof FrameTag)
* {
* FrameTag frame = (FrameTag)tag;
* ... do something with frame
* }
* else // other specific tags and generic TagNode objects
* {
* }
* }
*/
public abstract class NodeVisitor
{
private boolean mRecurseChildren;
private boolean mRecurseSelf;
/**
* Creates a node visitor that recurses itself and it's children.
*/
public NodeVisitor ()
{
this (true);
}
/**
* Creates a node visitor that recurses itself and it's children
* only if recurseChildren
is true
.
* @param recurseChildren If true
, the visitor will
* visit children, otherwise only the top level nodes are recursed.
*/
public NodeVisitor (boolean recurseChildren)
{
this (recurseChildren, true);
}
/**
* Creates a node visitor that recurses itself only if
* recurseSelf
is true
and it's children
* only if recurseChildren
is true
.
* @param recurseChildren If true
, the visitor will
* visit children, otherwise only the top level nodes are recursed.
* @param recurseSelf If true
, the visitor will
* visit the top level node.
*/
public NodeVisitor (boolean recurseChildren, boolean recurseSelf)
{
mRecurseChildren = recurseChildren;
mRecurseSelf = recurseSelf;
}
/**
* Override this method if you wish to do special
* processing prior to the start of parsing.
*/
public void beginParsing ()
{
}
/**
* Called for each Tag
visited.
* @param tag The tag being visited.
*/
public void visitTag (Tag tag)
{
}
/**
* Called for each Tag
visited that is an end tag.
* @param tag The end tag being visited.
*/
public void visitEndTag (Tag tag)
{
}
/**
* Called for each StringNode
visited.
* @param string The string node being visited.
*/
public void visitStringNode (Text string)
{
}
/**
* Called for each RemarkNode
visited.
* @param remark The remark node being visited.
*/
public void visitRemarkNode (Remark remark)
{
}
/**
* Override this method if you wish to do special
* processing upon completion of parsing.
*/
public void finishedParsing ()
{
}
/**
* Depth traversal predicate.
* @return true
if children are to be visited.
*/
public boolean shouldRecurseChildren ()
{
return (mRecurseChildren);
}
/**
* Self traversal predicate.
* @return true
if a node itself is to be visited.
*/
public boolean shouldRecurseSelf ()
{
return (mRecurseSelf);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy