All Downloads are FREE. Search and download functionalities are using the official Maven repository.

mServer.crawler.sender.orf.tasks.OrfDayTask Maven / Gradle / Ivy

There is a newer version: 3.1.64
Show newest version
package mServer.crawler.sender.orf.tasks;

import java.util.concurrent.ConcurrentLinkedQueue;
import mServer.crawler.sender.MediathekReader;
import mServer.crawler.sender.orf.CrawlerUrlDTO;
import mServer.crawler.sender.orf.TopicUrlDTO;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class OrfDayTask extends AbstractDocumentTask {

  private static final String ITEM_SELECTOR = "article.item > a";
  private static final String ATTRIBUTE_HREF = "href";

  public OrfDayTask(final MediathekReader aCrawler,
          final ConcurrentLinkedQueue aUrlToCrawlDTOs) {
    super(aCrawler, aUrlToCrawlDTOs);
  }

  @Override
  protected void processDocument(CrawlerUrlDTO aUrlDTO, Document aDocument) {
    Elements elements = aDocument.select(ITEM_SELECTOR);
    elements.forEach(item -> {
      String theme = OrfHelper.parseTheme(item);
      String url = item.attr(ATTRIBUTE_HREF);

      TopicUrlDTO dto = new TopicUrlDTO(theme, url);
      taskResults.add(dto);
    });
  }

  @Override
  protected AbstractUrlTask createNewOwnInstance(ConcurrentLinkedQueue aURLsToCrawl) {
    return new OrfDayTask(crawler, aURLsToCrawl);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy