All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.antbrains.urlcrawler.crawler.Fetcher Maven / Gradle / Ivy

package com.antbrains.urlcrawler.crawler;

import java.util.HashMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;

import org.apache.log4j.Logger;

import com.antbrains.httpclientfetcher.HttpClientFetcher;
import com.antbrains.urlcrawler.db.CrawlTask;
import com.google.gson.Gson; 

public class Fetcher extends Thread{
	protected static Logger logger=Logger.getLogger(Fetcher.class);
	HttpClientFetcher fetcher;
	BlockingQueue taskQueue;
	BlockingQueue resultQueue; 
	FetcherAndExtractor fae;
	public Fetcher(HttpClientFetcher fetcher, BlockingQueue taskQueue,
			BlockingQueue resultQueue, FetcherAndExtractor fae){
		this.fetcher=fetcher;
		this.taskQueue=taskQueue;
		this.resultQueue=resultQueue; 
		this.fae=fae;
	}
	
	private volatile boolean bStop;
	public void stopMe(){
		bStop=true;
	}
	
	@Override
	public void run(){
		while(!bStop){
			CrawlTask task;
			try {
				task = this.taskQueue.poll(3, TimeUnit.SECONDS);
				if(task==null) continue;
				this.doWork(task);
			} catch (InterruptedException e) {
			}
		}
		
	}

	private Gson gson=new Gson();
	private void doWork(CrawlTask task){
	    fae.processTask(fetcher, task);
	    
		try {
			this.resultQueue.put(task);
		} catch (InterruptedException e) {
		}
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy