
com.antbrains.urlcrawler.crawler.Fetcher Maven / Gradle / Ivy
package com.antbrains.urlcrawler.crawler;
import java.util.HashMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import com.antbrains.httpclientfetcher.HttpClientFetcher;
import com.antbrains.urlcrawler.db.CrawlTask;
import com.google.gson.Gson;
public class Fetcher extends Thread{
protected static Logger logger=Logger.getLogger(Fetcher.class);
HttpClientFetcher fetcher;
BlockingQueue taskQueue;
BlockingQueue resultQueue;
FetcherAndExtractor fae;
public Fetcher(HttpClientFetcher fetcher, BlockingQueue taskQueue,
BlockingQueue resultQueue, FetcherAndExtractor fae){
this.fetcher=fetcher;
this.taskQueue=taskQueue;
this.resultQueue=resultQueue;
this.fae=fae;
}
private volatile boolean bStop;
public void stopMe(){
bStop=true;
}
@Override
public void run(){
while(!bStop){
CrawlTask task;
try {
task = this.taskQueue.poll(3, TimeUnit.SECONDS);
if(task==null) continue;
this.doWork(task);
} catch (InterruptedException e) {
}
}
}
private Gson gson=new Gson();
private void doWork(CrawlTask task){
fae.processTask(fetcher, task);
try {
this.resultQueue.put(task);
} catch (InterruptedException e) {
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy