
mServer.crawler.sender.MediathekWdr Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView
* Copyright (C) 2008 - 2012 W. Xaver
* & thausherr
*
* W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package mServer.crawler.sender;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedList;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.ListeFilme;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import java.util.Collection;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.crawler.sender.wdr.WdrDayPageCallable;
import mServer.crawler.sender.wdr.WdrLetterPageCallable;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class MediathekWdr extends MediathekReader {
public final static String SENDERNAME = Const.WDR;
private final LinkedList dayUrls = new LinkedList<>();
private final LinkedList letterPageUrls = new LinkedList<>();
private MSStringBuilder seite_1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
Collection> futureFilme = new ArrayList<>();
private static final Logger LOG = LogManager.getLogger(MediathekWdr.class);
public MediathekWdr(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME,/* threads */ 3, /* urlWarten */ 100, startPrio);
}
//===================================
// public
//===================================
@Override
public synchronized void addToList() {
clearLists();
meldungStart();
fillLists();
if (Config.getStop()) {
meldungThreadUndFertig();
} else if (letterPageUrls.isEmpty() && dayUrls.isEmpty()) {
meldungThreadUndFertig();
} else {
meldungAddMax(letterPageUrls.size() + dayUrls.size());
startLetterPages();
startDayPages();
addFilms();
meldungThreadUndFertig();
}
}
private void addFilms() {
futureFilme.forEach(e -> {
try {
ListeFilme filmList = e.get();
if(filmList != null) {
filmList.forEach(film -> {
if(film != null) {
addFilm(film);
}
});
}
} catch(Exception exception)
{
LOG.error("Es ist ein Fehler beim lesen der WDR Filme aufgetreten.",exception);
}
});
}
private void fillLists() {
addLetterPages();
addDayPages();
}
private void clearLists() {
letterPageUrls.clear();
dayUrls.clear();
}
private void startLetterPages() {
letterPageUrls.forEach(url -> {
ExecutorService executor = Executors.newCachedThreadPool();
futureFilme.add(executor.submit(new WdrLetterPageCallable(url)));
meldungProgress(url);
});
}
private void startDayPages() {
dayUrls.forEach(url -> {
ExecutorService executor = Executors.newCachedThreadPool();
futureFilme.add(executor.submit(new WdrDayPageCallable(url)));
meldungProgress(url);
});
}
private void addDayPages() {
// Sendung verpasst, da sind einige die nicht in einer "Sendung" enthalten sind
// URLs nach dem Muster bauen:
// http://www1.wdr.de/mediathek/video/sendungverpasst/sendung-verpasst-100~_tag-27022016.html
SimpleDateFormat formatter = new SimpleDateFormat("ddMMyyyy");
String tag;
for (int i = 0; i < 14; ++i) {
final String URL = "http://www1.wdr.de/mediathek/video/sendungverpasst/sendung-verpasst-100~_tag-";
tag = formatter.format(new Date().getTime() - (1000 * 60 * 60 * 24 * i));
String urlString = URL + tag + ".html";
dayUrls.add(urlString);
}
}
private void addLetterPages() {
// http://www1.wdr.de/mediathek/video/sendungen/abisz-b100.html
//Theman suchen
final String URL = "http://www1.wdr.de/mediathek/video/sendungen-a-z/index.html";
final String MUSTER_URL = "A");
while (!Config.getStop() && (pos1 = seite_1.indexOf(MUSTER_URL, pos1)) != -1) {
pos1 += MUSTER_URL.length();
if ((pos2 = seite_1.indexOf("\"", pos1)) != -1) {
url = seite_1.substring(pos1, pos2);
if (url.equals("index.html")) {
continue;
}
if (url.isEmpty()) {
Log.errorLog(995122047, "keine URL");
} else {
url = "http://www1.wdr.de/mediathek/video/sendungen-a-z/" + url;
letterPageUrls.add(url);
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy