
mServer.crawler.sender.orf.tasks.OrfDayTask Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
package mServer.crawler.sender.orf.tasks;
import java.util.concurrent.ConcurrentLinkedQueue;
import mServer.crawler.sender.MediathekReader;
import mServer.crawler.sender.orf.CrawlerUrlDTO;
import mServer.crawler.sender.orf.TopicUrlDTO;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
public class OrfDayTask extends AbstractDocumentTask {
private static final String ITEM_SELECTOR = "article.item > a";
private static final String ATTRIBUTE_HREF = "href";
public OrfDayTask(final MediathekReader aCrawler,
final ConcurrentLinkedQueue aUrlToCrawlDTOs) {
super(aCrawler, aUrlToCrawlDTOs);
}
@Override
protected void processDocument(CrawlerUrlDTO aUrlDTO, Document aDocument) {
Elements elements = aDocument.select(ITEM_SELECTOR);
elements.forEach(item -> {
String theme = OrfHelper.parseTheme(item);
String url = item.attr(ATTRIBUTE_HREF);
TopicUrlDTO dto = new TopicUrlDTO(theme, url);
taskResults.add(dto);
});
}
@Override
protected AbstractUrlTask createNewOwnInstance(ConcurrentLinkedQueue aURLsToCrawl) {
return new OrfDayTask(crawler, aURLsToCrawl);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy