All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.modules.deciderules.NotMatchesStatusCodeDecideRule Maven / Gradle / Ivy

Go to download

This project contains some of the configurable modules used within the Heritrix application to crawl the web. The modules in this project can be used in applications other than Heritrix, however.

There is a newer version: 3.6.0
Show newest version
package org.archive.modules.deciderules;

import org.archive.modules.CrawlURI;


/**
 * Provides a rule that returns "true" for any CrawlURIs which has a fetch
 * status code that does not fall within the provided inclusive range. For
 * instance, to reject any URIs with a "client error" status code you must
 * provide the range 400 to 499.
 * 
 * @author cmiles74
 */
public class NotMatchesStatusCodeDecideRule extends MatchesStatusCodeDecideRule {

    /**
     * Sets the upper bound on the range of acceptable status codes.
     */
    public void setUpperBound(Integer statusCode) {

        kp.put("upperBound", statusCode);
    }

    /**
     * Returns the upper bound on the range of acceptable status codes.
     *
     * @return Integer Status code
     */
    public Integer getUpperBound() {

        Object value = kp.get("upperBound");

        if(value != null) {

            return((Integer) value);
        }

        return(null);
    }

    /**
     * Returns "true" if the provided CrawlURI has a fetch status that does not
     * fall within this instance's specified range.
     * 
     * @return true If the CrawlURI has a fetch status outside the specified
     *         range
     */
    @Override
    protected boolean evaluate(CrawlURI uri) {

        // by default, we'll return false
        boolean value = false;

        int statusCode = uri.getFetchStatus();

        if (statusCode <= getLowerBound().intValue()
                || statusCode >= getUpperBound().intValue()) {

            value = true;
        }

        return (value);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy