All Downloads are FREE. Search and download functionalities are using the official Maven repository.

mtons.spider.fetcher.impl.PaginationFetcher Maven / Gradle / Ivy

package mtons.spider.fetcher.impl;

import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import mtons.spider.http.Request;
import mtons.spider.http.Response;
import mtons.spider.http.Style;
import mtons.spider.kit.Kit;
import mtons.spider.selector.Selectable;

/**
 * 可翻页的列表解析器
 *
 * Created by langhsu on 16/6/15.
 */
public abstract class PaginationFetcher extends ListFetcher {

    @Override
    public String getStub() {
        return "default";
    }

    /**
     * 实现 accept 方法
     * @param response 响应结果
     */
    @Override
    public void accept(Response response) {
        Request request = response.getRequest();
        if (request.getStyle() == Style.DETAIL) {
            onAccept(response);
        }

        if (request.getStyle() == Style.LIST) {
            parseDetail(response);
            parseNext(response);
        }
    }

    /**
     * 解析下一页链接, 并接入到任务队列
     * @param response 响应结果
     */
    public void parseNext(Response response) {
        Selectable selectable = onNext();

        Elements els = selectable.accept(response.getDocument());

        if (els != null && els.size() > 0) {
            Element btn = els.first();
            logger.debug("next >>> " + btn.attr("href"));
            if (Kit.isNotEmptyUrl(btn.attr("href"))) {
                Request request = Request.get(Style.LIST, btn.attr("abs:href"));
                request.cloneFrom(response.getRequest());
                response.addNextRequest(request);
            }
        }
    }

    /**
     * 下一页链接解析规则
     *
     * example:
     *
     * new Selectable() {
     *     public Elements accept(Document document) {
     *         return document.select("#page").select("a:containsOwn(下一页)");
     *     }
     * }
     *
     * @return Selectable 选择器
     */
    public abstract Selectable onNext();

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy