
mServer.crawler.sender.MediathekBr Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView
* Copyright (C) 2008 W. Xaver
* W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package mServer.crawler.sender;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.LinkedList;
import java.util.Locale;
import org.apache.commons.lang3.StringEscapeUtils;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.tool.MserverDaten;
public class MediathekBr extends MediathekReader {
public final static String SENDERNAME = Const.BR;
private final SimpleDateFormat sdf = new SimpleDateFormat("dd.MM.yyyy, HH:mm", Locale.ENGLISH);//08.11.2013, 18:00
private final SimpleDateFormat sdfOutTime = new SimpleDateFormat("HH:mm:ss");
private final SimpleDateFormat sdfOutDay = new SimpleDateFormat("dd.MM.yyyy");
private final LinkedListUrl listeTage = new LinkedListUrl();
private static final String PATTERN_VERY_SMALL = "";
private static final String PATTERN_SMALL = "";
private static final String PATTERN_NORMAL = "";
private static final String PATTERN_HD = "";
private static final String PATTERN_DLURL = "";
private static final String PATTERN_END = "<";
private final LinkedList listeAlleThemen = new LinkedList<>();
private final LinkedList listeAlleThemenCount = new LinkedList<>();
private final LinkedList listeAlleThemenCount_ = new LinkedList<>();
public MediathekBr(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME, 4, 100, startPrio);
}
private void startArchiveThreads() {
if (CrawlerTool.loadLongMax()) {
// Archiv durchsuchen
Thread thArchiv;
thArchiv = new ArchivLaden(1, 50);
thArchiv.start();
thArchiv = new ArchivLaden(51, 100);
thArchiv.start();
thArchiv = new ArchivLaden(101, 150);
thArchiv.start();
thArchiv = new ArchivLaden(151, 200);
thArchiv.start();
}
if (CrawlerTool.loadMax()) {
// Archiv durchsuchen
Thread thArchiv;
thArchiv = new ArchivLaden(201, 250);
thArchiv.start();
thArchiv = new ArchivLaden(251, 300);
thArchiv.start();
thArchiv = new ArchivLaden(301, 350);
thArchiv.start();
thArchiv = new ArchivLaden(351, 400);
thArchiv.start();
}
}
private void startKlassikThread() {
Thread thKlassik = new KlassikLaden();
thKlassik.setName(SENDERNAME + "-Klassik");
thKlassik.start();
}
private void startCrawlerThreads() {
for (int t = 0; t <= getMaxThreadLaufen(); ++t) {
Thread th = new ThemaLaden();
th.setName(SENDERNAME + t);
th.start();
}
}
@Override
protected void addToList() {
mlibFilmeSuchen.listeFilmeAlt.getThema(getSendername(), listeAlleThemenCount_);
meldungStart();
getTheman(); // Themen suchen
getTage(); // Programm der letzten Tage absuchen
if (Config.getStop()) {
meldungThreadUndFertig();
} else if (listeThemen.isEmpty() && listeTage.isEmpty()) {
meldungThreadUndFertig();
} else {
meldungAddMax(listeThemen.size() + listeTage.size());
// erst hier starten (Archiv, Klassik), sonst beendet er sich/und sucht doch!
startArchiveThreads();
startKlassikThread();
startCrawlerThreads();
}
}
private void getTheman() {
final String ADRESSE = "http://www.br.de/mediathek/video/sendungen/index.html";
final String MUSTER_URL = "")) != -1) {
while ((pos1 = seite.indexOf(MUSTER_URL, pos1)) != -1) {
if (Config.getStop()) {
break;
}
try {
pos1 += MUSTER_URL.length();
if ((pos2 = seite.indexOf("\"", pos1)) != -1) {
url = seite.substring(pos1, pos2);
}
String thema = seite.extract("", "<", pos1);
thema = StringEscapeUtils.unescapeXml(thema.trim());
thema = StringEscapeUtils.unescapeHtml4(thema.trim());
if (!listeAlleThemen.contains(thema)) {
listeAlleThemen.add(thema);
}
if (url.isEmpty()
|| (!url.startsWith(MUSTER_URL_1) && !url.startsWith(MUSTER_URL_2))) {
continue;
}
/// der BR ist etwas zu langsam dafür????
// // in die Liste eintragen
// String[] add;
// if (MSearchConfig.senderAllesLaden) {
// add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url + "#seriesMoreCount=10", ""};
// } else {
// add = new String[]{"http://www.br.de/mediathek/video/sendungen/" + url, ""};
// }
// in die Liste eintragen
String[] add = new String[]{"http://www.br.de/mediathek/video/" + url, thema};
listeThemen.addUrl(add);
} catch (Exception ex) {
Log.errorLog(821213698, ex);
}
}
}
}
private void getTage() {
//
String date;
final String ADRESSE = "http://www.br.de/mediathek/video/programm/index.html";
final String MUSTER = "http://www.br.de/mediathek/video/programm/mediathek-programm-100~_date-";
listeTage.clear();
MSStringBuilder seite1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
MSStringBuilder seite2 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
ArrayList al = new ArrayList<>();
try {
//seite1 = getUrlIo.getUri_Utf(SENDERNAME, ADRESSE, seite1, "");
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seite1 = getUrlIo.getUri(SENDERNAME, ADRESSE, StandardCharsets.UTF_8, 5 /* versuche */, seite1, "");
String url;
int max_;
if (CrawlerTool.loadLongMax()) {
max_ = 21;
} else {
max_ = 7;
}
for (int i = 0; i < max_; ++i) {
if ((Config.getStop())) {
break;
}
date = new SimpleDateFormat("yyyy-MM-dd").format(new Date().getTime() - i * (1000 * 60 * 60 * 24));
url = seite1.extract("/mediathek/video/programm/mediathek-programm-100~_date-" + date, "\"");
if (url.isEmpty()) {
continue;
}
// in die Liste eintragen
url = MUSTER + date + url;
seite2 = getUrlIo.getUri_Utf(SENDERNAME, url, seite2, "");
// public void extractList(String abMuster, String bisMuster, String musterStart, String musterEnde, String addUrl, ArrayList result) {
seite2.extractList("