![JAR search and dependency download from the Maven repository](/logo.png)
fr.whimtrip.ext.jwhtscrapper.service.base.HttpManagerClient Maven / Gradle / Ivy
Show all versions of whimtrip-ext-scrapper Show documentation
package fr.whimtrip.ext.jwhtscrapper.service.base;
import fr.whimtrip.ext.jwhtscrapper.exception.RequestMaxRetriesReachedException;
import fr.whimtrip.ext.jwhtscrapper.exception.RequestTimeoutException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperUnsupportedException;
import fr.whimtrip.ext.jwhtscrapper.intfr.HttpMetrics;
import fr.whimtrip.ext.jwhtscrapper.intfr.ProxyFinder;
import fr.whimtrip.ext.jwhtscrapper.service.holder.HttpManagerConfig;
import org.asynchttpclient.BoundRequestBuilder;
import org.jetbrains.annotations.NotNull;
/**
* Part of project jwht-scrapper
* Created on 28/07/18
*
*
* An implementing class of this interface should be able to deal
* with all of the requirements set up by {@link HttpManagerConfig}.
* Please see each method javadoc in order to understand and apply
* correctly all the requirements.
*
*
* @author Louis-wht
* @since 1.0.0
*/
public interface HttpManagerClient {
/**
*
* This method should prepare a GET request while returning an
* {@link BoundRequestBuilder}. The elements to be prepared by
* any implementing class are the following ;
*
*
* - The eventual proxy to use if proxies are enabled
* - The default headers to use
* - The default cookies to use
* - The rotating User Agent header if enabled
* - The url to scrap the request to
*
* @param url the url to prepare
* @return the prepared {@link BoundRequestBuilder} ready to be executed.
*/
BoundRequestBuilder prepareGet(String url);
/**
*
* This method should prepare a GET request while returning an
* {@link BoundRequestBuilder}. The elements to be prepared by
* any implementing class are the following ;
*
*
* - The eventual proxy to use if proxies are enabled
* - The default headers to use
* - The default cookies to use
* - The default POST fields to use
* - The rotating User Agent header if enabled
* - The url to scrap the request to
*
* @param url the url to prepare
* @return the prepared {@link BoundRequestBuilder} ready to be executed.
*/
BoundRequestBuilder preparePost(String url);
/**
*
* This method should call {@link #getResponse(BoundRequestBuilder, boolean)}
* with {@link HttpManagerConfig#followRedirections} value as the second
* parameter.
*
* @param req the prepared request to get a response for.
* @return a string of the response body.
* @throws RequestTimeoutException when the request timed out / failed on every attempts
* made. ({@link HttpManagerConfig#getMaxRequestRetries()}
* will give the number of requests attempted
* before throwing this exception.
*/
String getResponse(BoundRequestBuilder req) throws RequestMaxRetriesReachedException;
/**
*
* This method should perform an HTTP request with the prepared
* {@link BoundRequestBuilder} request. This should respect all
* conditions given in {@link HttpManagerConfig}. This include :
*
*
* -
* Each consecutive request, even when they are performed in
* separate threads must be synchronized on the same unique
* method so that they wait a minimum time between each request
* specified in {@link HttpManagerConfig#getAwaitBetweenRequests()}
*
* -
* Respect the given proxy change rate {@link HttpManagerConfig#getProxyChangeRate()}.
*
* - Respect the timeout given by {@link HttpManagerConfig#getTimeout()}
* - Rotate User Agent header if required by {@link HttpManagerConfig#rotateUserAgent()}
* -
* Retries timed out request at least and not more than :
* {@link HttpManagerConfig#getMaxRequestRetries()}
*
* -
* Use proxies if required by {@link HttpManagerConfig#useProxy()} with the
* provided {@link ProxyFinder}.
*
* -
* Connect (TCP Connect) to the url before making the actual HTTP request
* if required by {@link HttpManagerConfig#connectToProxyBeforeRequest()}
*
* -
* Follow HTTP 301 and 302 redirections if required by {@code followRedirections}
* parameter of this method. It should follow redirections maximum once except if
* {@link HttpManagerConfig#allowInfiniteRedirections()} is enabled.
*
*
* @param req the prepared request to get a response for.
* @param followRedirections to stipulate if HTTP redirections should be followed.
* @return a string of the response body.
* @throws RequestTimeoutException when the request timed out / failed on every attempts
* made. ({@link HttpManagerConfig#getMaxRequestRetries()}
* will give the number of requests attempted
* before throwing this exception.
*/
String getResponse(BoundRequestBuilder req, boolean followRedirections) throws RequestMaxRetriesReachedException;
/**
* @return the current Http metrics {@link HttpMetrics} of the current instance of
* the HttpManagerClient.
* @throws ScrapperUnsupportedException If your implementation does not support returning
* HttpMetrics.
*/
@NotNull
HttpMetrics getHttpMetrics() throws ScrapperUnsupportedException;
/**
*
* Default implementation uses a map of request mapped to
* their respective context. This context needs to be wiped
* out when the request is finished (understand that all
* retries operations, warning sign catches... must have
* been handled) properly foremost.
*
* @param req the request to remove the context for.
*/
void removeContext(@NotNull final BoundRequestBuilder req);
}