
us.codecraft.webmagic.scheduler.QueueScheduler Maven / Gradle / Ivy
package us.codecraft.webmagic.scheduler;
import org.apache.http.annotation.ThreadSafe;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
/**
* Basic Scheduler implementation.
* Store urls to fetch in LinkedBlockingQueue and remove duplicate urls by HashMap.
*
* @author [email protected]
* @since 0.1.0
*/
@ThreadSafe
public class QueueScheduler extends DuplicateRemovedScheduler implements MonitorableScheduler {
private BlockingQueue queue = new LinkedBlockingQueue();
@Override
public void pushWhenNoDuplicate(Request request, Task task) {
queue.add(request);
}
@Override
public synchronized Request poll(Task task) {
return queue.poll();
}
@Override
public int getLeftRequestsCount(Task task) {
return queue.size();
}
@Override
public int getTotalRequestsCount(Task task) {
return getDuplicateRemover().getTotalRequestsCount(task);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy