
mServer.crawler.sender.MediathekArd Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView Copyright (C) 2008 W. Xaver W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU General Public License as published by the Free Software Foundation, either version 3 of the
* License, or any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with this program. If
* not, see .
*/
package mServer.crawler.sender;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.crawler.sender.ard.ArdVideoDTO;
import mServer.crawler.sender.ard.ArdVideoDeserializer;
import mServer.crawler.sender.newsearch.Qualities;
import mServer.tool.M3U8Utils;
public class MediathekArd extends MediathekReader {
private class ThemaLaden extends Thread {
private static final String DAUER_REGEX_PATERN = "\\d+";
private static final String THEMA_ALPHA_CENTAURI = "alpha-Centauri";
private static final String MUSTER_ADD_TAGE = "";
private static final String MUSTER_FILM_SUCHEN1 = "";
private static final String MUSTER_START_FILM_SUCHEN1 = "Beiträge der Sendung";
private final ArrayList liste = new ArrayList<>();
private MSStringBuilder seite1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
private MSStringBuilder seite2 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
private MSStringBuilder seite3 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
@Override
public void run() {
try {
meldungAddThread();
String[] link;
while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) {
meldungProgress(link[0]);
if (link[0].equals(THEMA_TAGE)) {
addTage();
} else {
filmSuchen1(link[0] /* url */, link[1], true);
}
}
} catch (final Exception ex) {
Log.errorLog(487326921, ex);
}
meldungThreadUndFertig();
}
private void addTage() {
// http://www.ardmediathek.de/tv/sendungVerpasst?tag=0 ... 6
for (int i = 0; i <= 6; ++i) {
if (Config.getStop()) {
break;
}
final String urlTage = "http://www.ardmediathek.de/tv/sendungVerpasst?tag=" + i;
final GetUrl getUrl = new GetUrl(getWartenSeiteLaden());
seite1 = getUrl.getUri(SENDERNAME, urlTage, StandardCharsets.UTF_8, 2, seite1, "");
if (seite1.length() == 0) {
Log.errorLog(765323214, "Leere Seite: " + urlTage);
return;
}
int pos = 0;
String url, datum, zeit = "", titel, dauer, urlSendung, thema;
long d = 0;
while (!Config.getStop() && (pos = seite1.indexOf(MUSTER_ADD_TAGE, pos)) != -1) {
zeit = seite1.extract("", "<", pos) + ":00";
pos += MUSTER_ADD_TAGE.length();
url = seite1.extract("documentId=", "&", pos);
if (url.contains("\"")) {
url = url.substring(0, url.indexOf('\"'));
}
if (!url.isEmpty()) {
url = url.replace("&", "&");
thema = seite1.extract("", "<", pos);
if (thema.endsWith("Uhr") && thema.contains(",")) {
// tagesschau, 09:00 Uhr
thema = thema.substring(0, thema.indexOf(','));
}
datum = seite1
.extract("Videos (TV-Sendungen) des Senders Das Erste vom", "- ARD").trim();
titel = seite1.extract("", "<", pos);
dauer = seite1.extract("
", "<", pos);
try {
final Matcher dauerMatcher = Pattern.compile(DAUER_REGEX_PATERN).matcher(dauer);
if (dauerMatcher.find()) {
d = Long.parseLong(dauerMatcher.group()) * 60;
}
} catch (final Exception ignored) {
}
if (d == 0) {
Log.errorLog(915263621, "Dauer==0: " + urlTage);
}
urlSendung = seite1.extract("",
"
© 2015 - 2025 Weber Informatics LLC | Privacy Policy