fr.whimtrip.ext.jwhtscrapper.service.base.AutomaticInnerScrapperClient Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of whimtrip-ext-scrapper Show documentation
Fully featured highly pluggable and customizable Java scrapping framework
The newest version!
package fr.whimtrip.ext.jwhtscrapper.service.base;

import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperAlreadyFinishedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperAlreadyStartedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperUnsupportedException;
import fr.whimtrip.ext.jwhtscrapper.intfr.AutomaticScrapperClient;
import fr.whimtrip.ext.jwhtscrapper.intfr.HttpMetrics;
import fr.whimtrip.ext.jwhtscrapper.intfr.ScrapperHelper;
import fr.whimtrip.ext.jwhtscrapper.intfr.ScrappingStats;
import fr.whimtrip.ext.jwhtscrapper.service.holder.RequestsScrappingContext;
import fr.whimtrip.ext.jwhtscrapper.service.holder.ScrappingContext;

import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;

/**
 * Part of project jwht-scrapper
 * Created on 07/08/18
 *
 * 
 *     Inner Client class that should perform parralelizing of scraps to be
 *     started in multiple threads while filling threads with new scraps when
 *     terminated so that the accounted max number of parrallel threads is
 *     always reached until the queue of scraps unstarted is not empty.
 * 
 *
 * 
 *     It should also be able to accept new elements to scrap in a thread
 *     safe manner.
 * 
 *
 * 
 *     The implementing class should also be able to stop all current running tasks
 *     and terminate the scrapping process.
 * 
 *
 * 
 *     Finally, returning running tasks, scrapping context, current {@link HttpMetrics}
 *     and {@link ScrappingStats} should also be part of the implementing class processing
 *     unit.
 * 
 *
 * @param  Parent Type
 * @param  Model on which response body will be mapped
 * @author Louis-wht
 * @since 1.0.0
 */
public interface AutomaticInnerScrapperClient {


    /**
     * 

     *     Main method of this processing unit. It should be thread safe
     *     to ensure {@link ScrapperAlreadyStartedException} is thrown
     *     correctly and no two scraps are started from two separate
     *     threads on the same client, leading to further potential leaks
     *     and other uncontrolled exceptions.
     * 
     * 
     *     This method should empty the queue of tasks to be run while not
     *     starting more than the maximum number of threads allowed by the
     *     inner {@link ScrappingContext}. Therefore it should replace
     *     periodically finished threads with new scraps to run.
     * 
     * 
     *     This method might be synchronous running the whole scrap before
     *     returning its result. It should basically be wrapped into a
     *     {@link AutomaticScrapperClient} that will handle asynchronous
     *     thread start, terminations messages and new scraps queuing.
     * 
     * @return the results of the scrapping process which is the list of objects
     *         returned by each scrap from the method {@link ScrapperHelper#returnResult(Object, Object)}.
     *
     * @throws InterruptedException if the one of the scrapping gets interrupted.
     *                              Will typically happen when {@link #terminate()}
     *                              is called.
     * @throws ExecutionException when one of the scrapping failed with an uncaught exception
     *                            if request context {@link RequestsScrappingContext#isThrowExceptions()}
     *                            returns true.
     * @throws ScrapperAlreadyStartedException if called twice. Each single client
     *                                         should be able to start a scrap only
     *                                         once in its lifetime.
     */
    List scrap() throws InterruptedException, ExecutionException, ScrapperAlreadyStartedException;


    /**
     * 
     *     This method will add new elements to the queue of scraps to perform
     *     in separate new threads and should be synchronized.
     * 
     * @param objectsToScrap the list of new elements to scrap.
     * @throws ScrapperAlreadyFinishedException when the scrap is already ternminated
     *                                          or the queue of scraps to run has been
     *                                          emptied.
     */
    void addObjectsToScrap(List objectsToScrap) throws ScrapperAlreadyFinishedException;


    /**
     * 

     *     This method should interrupt the running tasks and stop the current scrapping
     *     process so that no other remaining task can be run afterward. This is definitive
     *     and can't be rolled back. This method should warranty thread safe access.
     * 
     *
     */
    void terminate();


    /**
     * @return the current running tasks.
     */
    List> getRunningTasks();


    /**
     * @return the {@link ScrappingContext} used to build and drive this
     *         {@link AutomaticInnerScrapperClient}
     */
    ScrappingContext> getContext();


    /**
     * @see HttpMetrics
     * @return the current {@link HttpMetrics}.
     * @throws ScrapperUnsupportedException if the underlying {@link HttpManagerClient}
     *                                      or other processing unit used does not support
     *                                      returning valid Http Metrics.
     */
    HttpMetrics getHttpMetrics() throws ScrapperUnsupportedException;


    /**
     * @see ScrappingStats
     * @return the current {@link ScrappingStats}.
     */
    ScrappingStats getScrapingStats();



}