All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlparser.beans.LinkBean Maven / Gradle / Ivy

// HTMLParser Library $Name: v1_5 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/beans/LinkBean.java,v $
// $Author: derrickoswald $
// $Date: 2005/05/15 11:49:03 $
// $Revision: 1.32 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser.beans;

import java.beans.PropertyChangeListener;
import java.beans.PropertyChangeSupport;
import java.io.Serializable;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;

import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.EncodingChangeException;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

/**
 * Extract links from a URL.
 */
public class LinkBean extends Object implements Serializable
{
    /**
     * Property name in event where the URL contents changes.
     */
    public static final String PROP_LINKS_PROPERTY = "links";

    /**
     * Property name in event where the URL changes.
     */
    public static final String PROP_URL_PROPERTY = "URL";

    /**
     * Bound property support.
     */
    protected PropertyChangeSupport mPropertySupport;

    /**
     * The strings extracted from the URL.
     */
    protected URL[] mLinks;

    /**
     * The parser used to extract strings.
     */
    protected Parser mParser;

    /** Creates new LinkBean */
    public LinkBean ()
    {
        mPropertySupport = new PropertyChangeSupport (this);
        mLinks = null;
        mParser = new Parser ();
    }

    //
    // internals
    //

    /**
     * Internal routine to extract all the links from the parser.
     * @return A list of all links on the page as URLs.
     * @exception ParserException If the parse fails.
     */
    protected URL[] extractLinks () throws ParserException
    {
        NodeFilter filter;
        NodeList list;
        List vector;
        LinkTag link;
        URL[] ret;

        mParser.reset ();
        filter = new NodeClassFilter (LinkTag.class);
        try
        {
            list = mParser.extractAllNodesThatMatch (filter);
        }
        catch (EncodingChangeException ece)
        {
            mParser.reset ();
            list = mParser.extractAllNodesThatMatch (filter);
        }
        vector = new ArrayList();
        for (int i = 0; i < list.size (); i++)
            try
            {
                link = (LinkTag)list.elementAt (i);
                vector.add(new URL (link.getLink ()));
            }
            catch (MalformedURLException murle)
            {
                //vector.remove (i);
                //i--;
            }
        ret = new URL[vector.size ()];
        for(int i = 0;  i < vector.size(); i++)
        	ret[i] = (URL)vector.get(i);

        return (ret);
    }

    /**
     * Determine if two arrays of URL's are the same.
     * @param array1 One array of URL's
     * @param array2 Another array of URL's
     * @return true if the URL's match in number and value,
     * false otherwise.
     */
    protected boolean equivalent (URL[] array1, URL[] array2)
    {
        boolean ret;

        ret = false;
        if ((null == array1) && (null == array2))
            ret = true;
        else if ((null != array1) && (null != array2))
            if (array1.length == array2.length)
            {
                ret = true;
                for (int i = 0; i < array1.length && ret; i++)
                    if (!(array1[i] == array2[i]))
                        ret = false;
            }

        return (ret);
    }

    //
    // Property change support.
    //

    /**
     * Add a PropertyChangeListener to the listener list.
     * The listener is registered for all properties.
     * @param listener The PropertyChangeListener to be added.
     */
    public void addPropertyChangeListener (PropertyChangeListener listener)
    {
        mPropertySupport.addPropertyChangeListener (listener);
    }

    /**
     * Remove a PropertyChangeListener from the listener list.
     * This removes a registered PropertyChangeListener.
     * @param listener The PropertyChangeListener to be removed.
     */
    public void removePropertyChangeListener (PropertyChangeListener listener)
    {
        mPropertySupport.removePropertyChangeListener (listener);
    }

    //
    // Properties
    //

    /**
     * Refetch the URL contents.
     */
    private void setLinks ()
    {
        String url;
        URL[] urls;
        URL[] oldValue;

        url = getURL ();
        if (null != url)
            try
            {
                urls = extractLinks ();
                if (!equivalent (mLinks, urls))
                {
                    oldValue = mLinks;
                    mLinks = urls;
                    mPropertySupport.firePropertyChange (
                        PROP_LINKS_PROPERTY, oldValue, mLinks);
                }
            }
            catch (ParserException hpe)
            {
                mLinks = null;
            }
    }

    /**
     * Getter for property links.
     * @return Value of property links.
     */
    public URL[] getLinks ()
    {
        if (null == mLinks)
            try
            {
                mLinks = extractLinks ();
                mPropertySupport.firePropertyChange (
                    PROP_LINKS_PROPERTY, null, mLinks);
            }
            catch (ParserException hpe)
            {
                mLinks = null;
            }

        return (mLinks);
    }


    /**
     * Getter for property URL.
     * @return Value of property URL.
     */
    public String getURL ()
    {
        return (mParser.getURL ());
    }

    /**
     * Setter for property URL.
     * @param url New value of property URL.
     */
    public void setURL (String url)
    {
        String old;

        old = getURL ();
        if (((null == old) && (null != url)) || ((null != old)
            && !old.equals (url)))
        {
            try
            {
                mParser.setURL (url);
                mPropertySupport.firePropertyChange (
                    PROP_URL_PROPERTY, old, getURL ());
                setLinks ();
            }
            catch (ParserException hpe)
            {
                // failed... now what
            }
        }
    }

    /**
     * Getter for property Connection.
     * @return Value of property Connection.
     */
    public URLConnection getConnection ()
    {
        return (mParser.getConnection ());
    }

    /**
     * Setter for property Connection.
     * @param connection New value of property Connection.
     */
    public void setConnection (URLConnection connection)
    {
        try
        {
            mParser.setConnection (connection);
            setLinks ();
        }
        catch (ParserException hpe)
        {
            // failed... now what
        }
    }

    /**
     * Unit test.
     * @param args Pass arg[0] as the URL to process.
     */
    public static void main (String[] args)
    {
        if (0 >= args.length)
            System.out.println ("Usage: java -classpath htmlparser.jar"
                + " org.htmlparser.beans.LinkBean ");
        else
        {
            LinkBean lb = new LinkBean ();
            lb.setURL (args[0]);
            URL[] urls = lb.getLinks ();
            for (int i = 0; i < urls.length; i++)
                System.out.println (urls[i]);
        }
    }
}






© 2015 - 2025 Weber Informatics LLC | Privacy Policy