us.codecraft.webmagic.scheduler.Scheduler Maven / Gradle / Ivy
The newest version!
package us.codecraft.webmagic.scheduler;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
/**
* Scheduler is the part of url management.
* You can implement interface Scheduler to do:
* manage urls to fetch
* remove duplicate urls
*
* @author [email protected]
* @since 0.1.0
*/
public interface Scheduler {
/**
* add a url to fetch
*
* @param request request
* @param task task
*/
public void push(Request request, Task task);
/**
* get an url to crawl
*
* @param task the task of spider
* @return the url to crawl
*/
public Request poll(Task task);
}