cn.wanghaomiao.seimi.def.BaseSeimiCrawler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of SeimiCrawler Show documentation
Show all versions of SeimiCrawler Show documentation
一个支持分布式的可以高效开发且可以高效运行的爬虫框架。设计思想上融合了spring与scrapy的优点。
package cn.wanghaomiao.seimi.def;
import cn.wanghaomiao.seimi.core.SeimiCrawler;
import cn.wanghaomiao.seimi.core.SeimiQueue;
import cn.wanghaomiao.seimi.struct.Request;
import org.apache.http.client.CookieStore;
import org.apache.http.impl.client.BasicCookieStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author 汪浩淼 [email protected]
* @since 2015/8/17.
*/
public abstract class BaseSeimiCrawler implements SeimiCrawler {
protected SeimiQueue queue;
protected CookieStore cookieStore = new BasicCookieStore();
protected Logger logger = LoggerFactory.getLogger(getClass());
protected String crawlerName;
protected void push(Request request){
request.setCrawlerName(crawlerName);
queue.push(request);
}
@Override
public String getUserAgent() {
return "SeimiCrawler/JsoupXpath";
}
@Override
public CookieStore getCookieStore() {
return cookieStore;
}
public void setQueue(SeimiQueue queue) {
this.queue = queue;
}
public void setCrawlerName(String crawlerName) {
this.crawlerName = crawlerName;
}
public String getCrawlerName() {
return crawlerName;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy