com.sittinglittleduck.DirBuster.HTMLparse Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dirbuster Show documentation
DirBuster is a multi threaded java application designed to brute force directories and files names on web/application servers. Often is the case now of what looks like a web server in a state of default installation is actually not, and has pages and applications hidden within. DirBuster attempts to find these.
The newest version!
/*
 * HTMLparse.java
 *
 * Copyright 2007 James Fisher
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
 *///TODO convert this over to a thread, so it doe snot tie up the workers :)
package com.sittinglittleduck.DirBuster;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;
import java.util.Vector;

import net.htmlparser.jericho.Attribute;
import net.htmlparser.jericho.Attributes;
import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;

/**
 * This class is to paser the returned html pages and extract other dirs and files from them
 * @author james
 */
public class HTMLparse extends Thread
{

    private String sourceAsString = null;
    private WorkUnit work = null;
    private Manager manager;
    boolean working;
    private boolean continueWorking = true;

    /** Creates a new instance of HTMLparse */
    public HTMLparse()
    {

        manager = Manager.getInstance();
    }

    public void stopWorking()
    {
        continueWorking = false;
        this.interrupt();

    }

    public void run()
    {
        while(continueWorking)
        {
            working = false;
            sourceAsString = "";
            work = null;
            HTMLparseWorkUnit parseUnit = null;
            try
            {
                parseUnit = manager.parseQueue.take();
            }
            catch(InterruptedException ex)
            {
                //ex.printStackTrace();
                return;
            }
            working = true;
            sourceAsString = parseUnit.getHtmlToParse();
            work = parseUnit.getWorkUnit();



            if(sourceAsString != null || work != null)
            {
                if(!sourceAsString.equals(""))
                {




                    if(Config.debug)
                    {

                        System.out.println("DEBUG HTMLParser: Parsing text from " + work.getWork().toString());
                        System.out.println("DEBUG HTMLParser: text - " + sourceAsString);
                    }

                    Vector links = new Vector(50, 10);
                    Vector imageLinks = new Vector(50, 10);
                    Vector foundItems = new Vector(20, 10);

                    manager = Manager.getInstance();

                    //create the source
                    Source source = new Source(sourceAsString);

                    Vector elementsToParse = manager.getElementsToParse();

                    //loop trought all the things we wish to parse
                    for(int z = 0; z < elementsToParse.size(); z++)
                    {
                        HTMLelementToParse elementToParse = (HTMLelementToParse) elementsToParse.elementAt(z);

                        for(Iterator i = source.getAllElements(elementToParse.getTag()).iterator(); i.hasNext();)
                        {
                            Element element = (Element) i.next();
                            Attributes attributes = element.getAttributes();
                            Attribute attr = attributes.get(elementToParse.getAttr());
                            //System.out.println(href.getValue());
                            try
                            {
                                if(attr != null)
                                {
                                    //creates a full qulaifed domian name, based on the page we have just tested
                                    URL tempURL = new URL(work.getWork(), attr.getValue());

                                    String urlString = tempURL.getPath();
                                    //check it is not already there and the link is from the same host
                                    if(!links.contains(urlString) && tempURL.getHost().equalsIgnoreCase(work.getWork().getHost()))
                                    {
                                        //add to vector to remove duplicates
                                        //links.addElement(urlString);
                                        Vector found = processURL(tempURL);
                                        
                                        if(found != null)
                                        {
                                            for(int a = 0; a < found.size(); a++)
                                            {
                                                String item = (String) found.elementAt(a);
                                                if(!foundItems.contains(item))
                                                {
                                                    foundItems.addElement(item);
                                                }
                                            }
                                        }
                                    }
                                }

                            }
                            catch(MalformedURLException e)
                            {
                                //System.out.println("Man thats a bad url!");
                            }
                        }

                        try
                        {

                            Thread.sleep(100);
                        }
                        catch(InterruptedException ex)
                        {
                            return;
                        //ex.printStackTrace();
                        }
                    }//end of for loop for elements


                    //process all the found items
                    for(int a = 0; a < foundItems.size(); a++)
                    {
                        String founditem = (String) foundItems.elementAt(a);
                        //System.out.println((String) foundItems.elementAt(a));

                        boolean process = true;


                        for(int b = 0; b < manager.extsToMiss.size(); b++)
                        {
                            if(founditem.endsWith("." + (String) manager.extsToMiss.elementAt(b)))
                            {
                                process = false;
                                break;
                            }
                        }

                        //if it is ok to process the link
                        if(process)
                        {

                            //check if the found item has already been procced
                            //System.out.println("Testing to see if found item (" + founditem + ") has already been done");
                            if(!manager.hasLinkBeenDone(founditem))
                            {
                                //System.out.println(founditem + " has not already been done");
                                //get base case for item
                                BaseCase baseCase = findBaseCasePoint(founditem);
                                if(baseCase != null)
                                {
                                    String method = "";
                                    //create work unit for item
                                    if(manager.getAuto() && !baseCase.useContentAnalysisMode() && !baseCase.isUseRegexInstead())
                                    {
                                        method = "HEAD";
                                    }
                                    else
                                    {
                                        method = "GET";
                                    }

                                    try
                                    {
                                        //create work unit, so item can be added to the queue
                                    	// ZAP: Added port - otherwise will fail on non standard ports
                                        WorkUnit workUnit = new WorkUnit(
                                                new URL(work.getWork().getProtocol(), work.getWork().getHost(), work.getWork().getPort(), founditem),
                                                founditem.endsWith("/"), method, baseCase, null);

                                        //add item to the work queue to tested
                                        if(manager.addParsedLink(founditem))
                                        {
                                            //increment the counter for the amount of work done
                                            manager.addParsedLinksProcessed();
                                            manager.workQueue.put(workUnit);
                                        //System.out.println("added " + workUnit.getWork().toString() + " to the work queue");
                                        }
                                    }
                                    catch(MalformedURLException ex)
                                    {
                                        ex.printStackTrace();
                                    }
                                    catch(InterruptedException ex)
                                    {
                                        return;
                                    //ex.printStackTrace();
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }//end of wile
    }

    /**
     * Splits up the URL found
     * @param url url to be processed
     */
    private Vector processURL(URL url)
    {
        try
        {
            Vector foundItems = new Vector(10, 10);

            String toProcess = url.getPath();
            boolean noFile = url.getPath().endsWith("/");
            String split[] = toProcess.split("/");

            String found = "";

            for(int a = 0; a < split.length; a++)
            {
                //if is the last element and there is a file
                if(a == (split.length - 1) && !noFile)
                {
                    found = found + split[a];
                }
                else
                {
                    found = found + split[a] + "/";
                }
                //System.out.println("Item = " + found);


                foundItems.addElement(found);
            }
            Thread.sleep(10);

            return foundItems;
        }
        catch(InterruptedException ex)
        {
            return null;

        }

    }

    private BaseCase findBaseCasePoint(String item)
    {

        try
        {
            boolean isDir = false;
            String fileExtention = null;
            if(item.length() == 1)
            {
                //System.out.println("found a / in findBaseCasePoint");
                return GenBaseCase.genBaseCase(manager.getFirstPartOfURL() + "/", true, null);
            }
            String array[] = item.split("/");

            String baseItem = "";
            for(int a = 0; a < array.length - 1; a++)
            {
                baseItem = baseItem + array[a] + "/";
            }

            if(item.endsWith("/"))
            {
                isDir = true;
                fileExtention = null;
            }
            else
            {
                String file = array[array.length - 1];
                int loc = file.indexOf(".");
                if(loc != -1)
                {
                    fileExtention = file.substring(loc + 1);
                }
                else
                {
                    fileExtention = "";
                }
            }
            //System.out.println("baseItem = " + baseItem);
            //System.out.println("file extention = " + fileExtention);


            Thread.sleep(100);

            return GenBaseCase.genBaseCase(manager.getFirstPartOfURL() + baseItem, isDir, fileExtention);
        }
        catch(MalformedURLException ex)
        {
            ex.printStackTrace();
        }
        catch(IOException ex)
        {
            ex.printStackTrace();
        }
        catch(InterruptedException ex)
        {
            //ex.printStackTrace();
            return null;
        }

        return null;
    }

    public boolean isWorking()
    {
        return working;
    }
}