All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.whimtrip.ext.jwhtscrapper.intfr.AutomaticScrapperClient Maven / Gradle / Ivy

The newest version!
package fr.whimtrip.ext.jwhtscrapper.intfr;

import fr.whimtrip.ext.jwhtscrapper.exception.ScrapFailedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapNotFinishedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperAlreadyFinishedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperAlreadyStartedException;
import fr.whimtrip.ext.jwhtscrapper.exception.ScrapperUnsupportedException;
import fr.whimtrip.ext.jwhtscrapper.impl.ScrappingStatsImpl;
import fr.whimtrip.ext.jwhtscrapper.service.base.HttpManagerClient;
import org.jetbrains.annotations.NotNull;

import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.FutureTask;
import java.util.concurrent.TimeUnit;

/**
 * 

Part of project jwht-scrapper

*

Created on 26/07/18

* *

* This interface defines how a custom {@link AutomaticScrapperClient} * should behave in case you'd like to provide your custom implementation * and plug it somewhere. *

* *

Any implementing class should be able to :

*
    *
  • Start the scrapping operation asynchronously in a separate thread.
  • *
  • * Add new objects to scrap during the operation if the scrap is not * terminated. *
  • *
  • * Return {@link ScrappingStats} to describe the current state of the * scrapping *
  • *
  • Tell if the scrap is finished or not.
  • *
  • * Return the results of the scrapping (which is a list of the objects * returned by {@link ScrapperHelper#returnResult(Object, Object)}). *
  • *
  • Terminate the scrapper while closing all current running tasks.
  • *
* @param

the parent type whose instances will help initiating each scrap. * @author Louis-wht * @since 1.0.0 */ public interface AutomaticScrapperClient

{ /** *

* Should start the scrapping in a dedicated Thread. * Overriding methods should be {@code synchronized} * to avoid several scraping being run concurrently. *

* @throws ScrapperAlreadyStartedException if another scrap is already * running with the same instance */ void scrap() throws ScrapperAlreadyStartedException; /** * @param l the list of {@code

} objects to add to the queue of objects * to be scrapped. * This method must be synchronized and / or provide a way to add * elements in a synchronous way to avoid concurrency problems * occurring when performing multiple read and write operations on * the subjacent master list. * @throws ScrapperAlreadyFinishedException when the scrap is already ternminated * or the queue of scraps to run has been * emptied. */ void add(List

l) throws ScrapperAlreadyFinishedException; /** * @return

* Return the results of the scrapping (which is a list of the objects * returned by {@link ScrapperHelper#returnResult(Object, Object)}). *

* * @throws ScrapFailedException If the underlying {@link Callable#call()} throws * an exception, or if the thread execution was * interrupted. * * @throws ScrapNotFinishedException If the method was called when {@link #isCompleted()} * still returns {@code false}. */ List getResults() throws ScrapFailedException, ScrapNotFinishedException; /** * @return

* Return the results of the scrapping (which is a list of the objects * returned by {@link ScrapperHelper#returnResult(Object, Object)}). *

*

* This method will wait synchronously for the scrap to end. *

* * * @throws ScrapFailedException If the underlying {@link Callable#call()} throws * an exception, or if the thread execution was * interrupted. * */ List waitAndGetResults() throws ScrapFailedException; /** * @param timeout

* the time that this method will be waiting for the underlying * scrapping thread to end. *

*

* If a null value is provided then, * it's equivalent to the the timeout being equal to 0. Therefore * {@link ScrapNotFinishedException} will be thrown if {@link #isCompleted()} * returns {@code false}. *

* @param timeUnit the {@link TimeUnit} of the {@code timeout}. * @return

* Return the results of the scrapping (which is a list of the objects * returned by {@link ScrapperHelper#returnResult(Object, Object)}). *

* * @throws ScrapFailedException *

* If the underlying {@link Callable#call()} throws an exception, or if * the thread execution was interrupted. *

*

* This method will trigger under the hood {@link FutureTask#get()} * method which means that it will wait for the underlying thread * to end for the submitted {@code timeout}. *

* @throws ScrapNotFinishedException *

if once the timeout is reached, the scrapper still runs.

*

* * Warning! Once this exception has been thrown once, * the scrapper won't run anymore and will call under * the hood {@link #terminate()}. * To continue scrapping, you should instanciate a new * {@link AutomaticScrapperClient}. * *

*/ List getResults(Long timeout, TimeUnit timeUnit) throws ScrapFailedException, ScrapNotFinishedException; /** * This method should close all running thread and keep the Scrapper * from opening new threads. * * Warning! Once this method has been called once, this * scrapper cannot be reused. * To continue scrapping, you should instanciate a new * {@link AutomaticScrapperClient}. * */ void terminate(); /** * @return {@code true} if the task is either completed or terminated, * {@code false} otherwise. */ boolean isCompleted(); /** * @return

* current stats of the {@link AutomaticScrapperClient}. * Default implementation will return an {@link ScrappingStatsImpl} * which is an immutable implementation of {@link ScrappingStats}. * Therefore, it won't be updated when the scrapping stats * will be modified. Rather than this, you should reuse this * method once again. *

*

* If your implementation features mutable implementation of * {@link ScrappingStats} and if this method returns always the * same object reference, you can implement this method much in * a getter style way. *

*/ ScrappingStats getScrappingStats(); /** * @return the current Http metrics {@link HttpMetrics} of the current instance of * the HttpManagerClient. * @throws ScrapperUnsupportedException If your implementation does not support returning * or if underlying classes does not support returning * HttpMetrics. (see {@link HttpManagerClient} and * {@link HtmlAutoScrapper}). */ @NotNull HttpMetrics getHttpMetrics() throws ScrapperUnsupportedException; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy