
mtons.spider.fetcher.impl.PaginationFetcher Maven / Gradle / Ivy
package mtons.spider.fetcher.impl;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import mtons.spider.http.Request;
import mtons.spider.http.Response;
import mtons.spider.http.Style;
import mtons.spider.kit.Kit;
import mtons.spider.selector.Selectable;
/**
* 可翻页的列表解析器
*
* Created by langhsu on 16/6/15.
*/
public abstract class PaginationFetcher extends ListFetcher {
@Override
public String getStub() {
return "default";
}
/**
* 实现 accept 方法
* @param response 响应结果
*/
@Override
public void accept(Response response) {
Request request = response.getRequest();
if (request.getStyle() == Style.DETAIL) {
onAccept(response);
}
if (request.getStyle() == Style.LIST) {
parseDetail(response);
parseNext(response);
}
}
/**
* 解析下一页链接, 并接入到任务队列
* @param response 响应结果
*/
public void parseNext(Response response) {
Selectable selectable = onNext();
Elements els = selectable.accept(response.getDocument());
if (els != null && els.size() > 0) {
Element btn = els.first();
logger.debug("next >>> " + btn.attr("href"));
if (Kit.isNotEmptyUrl(btn.attr("href"))) {
Request request = Request.get(Style.LIST, btn.attr("abs:href"));
request.cloneFrom(response.getRequest());
response.addNextRequest(request);
}
}
}
/**
* 下一页链接解析规则
*
* example:
*
* new Selectable() {
* public Elements accept(Document document) {
* return document.select("#page").select("a:containsOwn(下一页)");
* }
* }
*
* @return Selectable 选择器
*/
public abstract Selectable onNext();
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy