fr.whimtrip.ext.jwhtscrapper.intfr.AutomaticScrapperClient Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of whimtrip-ext-scrapper Show documentation
Fully featured highly pluggable and customizable Java scrapping framework
The newest version!
package fr.whimtrip.ext.jwhtscrapper.intfr;

import fr.whimtrip.ext.jwhtscrapper.exception.ScrapFailedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapNotFinishedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperAlreadyFinishedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperAlreadyStartedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperUnsupportedException;
import fr.whimtrip.ext.jwhtscrapper.impl.ScrappingStatsImpl;
import fr.whimtrip.ext.jwhtscrapper.service.base.HttpManagerClient;
import org.jetbrains.annotations.NotNull;

import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;

/**
 * Part of project jwht-scrapper
 * Created on 26/07/18
 *
 * 
 *     This interface defines how a custom {@link AutomaticScrapperClient}
 *     should behave in case you'd like to provide your custom implementation
 *     and plug it somewhere.
 * 
 *
 * Any implementing class should be able to :
 * 
 *     Start the scrapping operation asynchronously in a separate thread.
 *     
 *         Add new objects to scrap during the operation if the scrap is not
 *         terminated.
 *     
 *     
 *         Return {@link ScrappingStats} to describe the current state of the
 *         scrapping
 *     
 *     Tell if the scrap is finished or not.
 *     
 *         Return the results of the scrapping (which is a list of the objects
 *         returned by {@link ScrapperHelper#returnResult(Object, Object)}).
 *     
 *     Terminate the scrapper while closing all current running tasks.
 * 
 * @param  the parent type whose instances will help initiating each scrap.
 * @author Louis-wht
 * @since 1.0.0
 */
public interface AutomaticScrapperClient
 {


    /**
     * 

     *     Should start the scrapping in a dedicated Thread.
     *     Overriding methods should be {@code synchronized}
     *     to avoid several scraping being run concurrently.
     * 
     * @throws ScrapperAlreadyStartedException if another scrap is already
     *                                         running with the same instance
     */
    void scrap() throws ScrapperAlreadyStartedException;


    /**
     * @param l the list of {@code } objects to add to the queue of objects
     *          to be scrapped.
     *          This method must be synchronized and / or provide a way to add
     *          elements in a synchronous way to avoid concurrency problems
     *          occurring when performing multiple read and write operations on
     *          the subjacent master list.
     * @throws ScrapperAlreadyFinishedException when the scrap is already ternminated
     *                                          or the queue of scraps to run has been
     *                                          emptied.
     */
    void add(List
 l) throws ScrapperAlreadyFinishedException;


    /**
     * @return 

     *             Return the results of the scrapping (which is a list of the objects
     *             returned by {@link ScrapperHelper#returnResult(Object, Object)}).
     *         
     *
     * @throws ScrapFailedException If the underlying {@link Callable#call()} throws
     *                              an exception, or if the thread execution was
     *                              interrupted.
     *
     * @throws ScrapNotFinishedException If the method was called when {@link #isCompleted()}
     *                                   still returns {@code false}.
     */
    List getResults() throws ScrapFailedException, ScrapNotFinishedException;



    /**
     * @return 
     *             Return the results of the scrapping (which is a list of the objects
     *             returned by {@link ScrapperHelper#returnResult(Object, Object)}).
     *         
     *         
     *             This method will wait synchronously for the scrap to end.
     *         
     *
     *
     * @throws ScrapFailedException If the underlying {@link Callable#call()} throws
     *                              an exception, or if the thread execution was
     *                              interrupted.
     *
     */
    List waitAndGetResults() throws ScrapFailedException;

    /**
     * @param timeout 
     *                  the time that this method will be waiting for the underlying
     *                  scrapping thread to end.
     *                
     *                
     *                  If a null value is provided then,
     *                  it's equivalent to the the timeout being equal to 0. Therefore
     *                  {@link ScrapNotFinishedException} will be thrown if {@link #isCompleted()}
     *                  returns {@code false}.
     *                
     * @param timeUnit the {@link TimeUnit} of the {@code timeout}.
     * @return 
     *             Return the results of the scrapping (which is a list of the objects
     *             returned by {@link ScrapperHelper#returnResult(Object, Object)}).
     *         
     *
     * @throws ScrapFailedException
     *        
     *            If the underlying {@link Callable#call()} throws an exception, or if
     *            the thread execution was interrupted.
     *        
     *        
     *            This method will trigger under the hood {@link FutureTask#get()}
     *            method which means that it will wait for the underlying thread
     *            to end for the submitted {@code timeout}.
     *        
     * @throws ScrapNotFinishedException
     *         if once the timeout is reached, the scrapper still runs.
     *         
     *             
     *                 Warning! Once this exception has been thrown once,
     *                 the scrapper won't run anymore and will call under
     *                 the hood {@link #terminate()}.
     *                 To continue scrapping, you should instanciate a new
     *                 {@link AutomaticScrapperClient}.
     *             
     *         
     */
    List getResults(Long timeout, TimeUnit timeUnit) throws ScrapFailedException, ScrapNotFinishedException;

    /**
     * This method should close all running thread and keep the Scrapper
     * from opening new threads.
     *             
     *                 Warning! Once this method has been called once, this
     *                 scrapper cannot be reused.
     *                 To continue scrapping, you should instanciate a new
     *                 {@link AutomaticScrapperClient}.
     *             
     */
    void terminate();

    /**
     * @return {@code true} if the task is either completed or terminated,
     *         {@code false} otherwise.
     */
    boolean isCompleted();

    /**
     * @return 
     *              current stats of the {@link AutomaticScrapperClient}.
     *              Default implementation will return an {@link ScrappingStatsImpl}
     *              which is an immutable implementation of {@link ScrappingStats}.
     *              Therefore, it won't be updated when the scrapping stats
     *              will be modified. Rather than this, you should reuse this
     *              method once again.
     *         
     *         
     *             If your implementation features mutable implementation of
     *             {@link ScrappingStats} and if this method returns always the
     *             same object reference, you can implement this method much in
     *             a getter style way.
     *         
     */
    ScrappingStats getScrappingStats();

    /**
     * @return the current Http metrics {@link HttpMetrics} of the current instance of
     *         the HttpManagerClient.
     * @throws ScrapperUnsupportedException If your implementation does not support returning
     *                                      or if underlying classes does not support returning
     *                                      HttpMetrics. (see {@link HttpManagerClient} and
     *                                      {@link HtmlAutoScrapper}).
     */
    @NotNull
    HttpMetrics getHttpMetrics() throws ScrapperUnsupportedException;

}