
us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover Maven / Gradle / Ivy
package us.codecraft.webmagic.scheduler.component;
import com.google.common.collect.Sets;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
/**
* @author [email protected]
*/
public class HashSetDuplicateRemover implements DuplicateRemover {
private Set urls = Sets.newSetFromMap(new ConcurrentHashMap());
@Override
public boolean isDuplicate(Request request, Task task) {
return !urls.add(getUrl(request));
}
protected String getUrl(Request request) {
return request.getUrl();
}
@Override
public void resetDuplicateCheck(Task task) {
urls.clear();
}
@Override
public int getTotalRequestsCount(Task task) {
return urls.size();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy