All Downloads are FREE. Search and download functionalities are using the official Maven repository.

mtons.spider.fetcher.impl.ListFetcher Maven / Gradle / Ivy

package mtons.spider.fetcher.impl;

import org.apache.log4j.Logger;
import org.jsoup.select.Elements;
import mtons.spider.fetcher.Fetcher;
import mtons.spider.http.Request;
import mtons.spider.http.Response;
import mtons.spider.http.Style;
import mtons.spider.kit.Kit;
import mtons.spider.selector.Selectable;

/**
 * 列表页解析器
 *
 * - 实现了 Fetcher 接口, 拦截了 accept 方法的执行并抛出 onAccept、onDetail 方法由子类扩展
 *
 * Created by langhsu on 16/6/15.
 */
public abstract class ListFetcher extends FetcherSupport implements Fetcher {
    protected Logger logger = Logger.getLogger("spider.fetcher");

    @Override
    public String getStub() {
        return "default";
    }

    /**
     * 实现 accept 方法
     * @param response 响应结果
     */
    @Override
    public void accept(Response response) {
        if (response.getRequest().getStyle() == Style.DETAIL) {
            onAccept(response);
        }

        if (response.getRequest().getStyle() == Style.LIST) {
            parseDetail(response);
        }
    }

    /**
     * 解析方法, 由具体的业务类实现
     *
     * @param response 响应结果
     */
    public abstract void onAccept(Response response);

    /**
     * 解析详情页链接, 并接入到任务队列
     * @param response 响应结果
     */
    public final void parseDetail(Response response) {
        Selectable selectable = onDetail();

        Elements els = selectable.accept(response.getDocument());

        if (els != null && els.size() > 0) {
            els.forEach(el -> {
                logger.debug("view - " + el.attr("href"));
                if (Kit.isNotEmptyUrl(el.attr("href"))) {
                    Request request = Request.get(Style.DETAIL, el.attr("abs:href"));
                    request.cloneFrom(response.getRequest());
                    response.addNextRequest(request);
                }
            });
        }

    }

    /**
     * 详情页链接解析规则
     *
     * example:
     *
     * new Selectable() {
     *     public Elements accept(Document document) {
     *         return document.select("#id");
     *     }
     * }
     *
     * @return Selectable 选择器
     */
    public abstract Selectable onDetail();

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy