All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.wanghaomiao.seimi.def.BaseSeimiCrawler Maven / Gradle / Ivy

Go to download

一个支持分布式的可以高效开发且可以高效运行的爬虫框架。设计思想上融合了spring与scrapy的优点。

There is a newer version: 2.1.4
Show newest version
package cn.wanghaomiao.seimi.def;

import cn.wanghaomiao.seimi.core.SeimiCrawler;
import cn.wanghaomiao.seimi.core.SeimiQueue;
import cn.wanghaomiao.seimi.struct.Request;
import org.apache.http.client.CookieStore;
import org.apache.http.impl.client.BasicCookieStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author 汪浩淼 [email protected]
 * @since 2015/8/17.
 */
public abstract class BaseSeimiCrawler implements SeimiCrawler {

    protected SeimiQueue queue;
    protected CookieStore cookieStore = new BasicCookieStore();
    protected Logger logger = LoggerFactory.getLogger(getClass());
    protected String crawlerName;

    protected void push(Request request){
        request.setCrawlerName(crawlerName);
        queue.push(request);
    }

    @Override
    public String getUserAgent() {
        return "SeimiCrawler/JsoupXpath";
    }
    @Override
    public CookieStore getCookieStore() {
        return cookieStore;
    }

    public void setQueue(SeimiQueue queue) {
        this.queue = queue;
    }

    public void setCrawlerName(String crawlerName) {
        this.crawlerName = crawlerName;
    }

    public String getCrawlerName() {
        return crawlerName;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy