All Downloads are FREE. Search and download functionalities are using the official Maven repository.

us.codecraft.webmagic.scheduler.component.HashSetDuplicateRemover Maven / Gradle / Ivy

There is a newer version: 1.0.2
Show newest version
package us.codecraft.webmagic.scheduler.component;

import com.google.common.collect.Sets;
import us.codecraft.webmagic.Request;
import us.codecraft.webmagic.Task;

import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;

/**
 * @author [email protected]
 */
public class HashSetDuplicateRemover implements DuplicateRemover {

    private Set urls = Sets.newSetFromMap(new ConcurrentHashMap());

    @Override
    public boolean isDuplicate(Request request, Task task) {
        return !urls.add(getUrl(request));
    }

    protected String getUrl(Request request) {
        return request.getUrl();
    }

    @Override
    public void resetDuplicateCheck(Task task) {
        urls.clear();
    }

    @Override
    public int getTotalRequestsCount(Task task) {
        return urls.size();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy