
mServer.crawler.sender.MediathekNdr Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView
* Copyright (C) 2008 W. Xaver
* W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package mServer.crawler.sender;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;
import org.apache.commons.lang3.time.FastDateFormat;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.tool.MserverDaten;
public class MediathekNdr extends MediathekReader implements Runnable {
public final static String SENDERNAME = Const.NDR;
private MSStringBuilder seiteAlle = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
public MediathekNdr(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME, /* threads */ 2, /* urlWarten */ 50, startPrio);
}
//-> erste Seite:
// Nordmagazin
@Override
protected void addToList() {
//45 Min
final String ADRESSE = "https://www.ndr.de/mediathek/sendungen_a-z/index.html";
final String MUSTER_URL1 = "", pos);
pos2 = seite.indexOf("<", pos);
if (pos1 != -1 && pos2 != -1 && pos1 < pos2) {
thema = seite.substring(pos1 + 1, pos2);
}
if (url.isEmpty()) {
Log.errorLog(210367600, "keine Url");
continue;
}
String url_ = "https://www.ndr.de/mediathek/mediatheksuche105_broadcast-" + url;
String[] add = new String[]{url_, thema};
if (CrawlerTool.loadLongMax()) {
if (!alleSeiteSuchen(url_, thema)) {
// dann halt so versuchen
listeThemen.addUrl(add);
}
} else {
listeThemen.addUrl(add);
}
} catch (Exception ex) {
Log.errorLog(332945670, ex);
}
}
// noch "Verpasst" für die letzten Tage einfügen
// https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-2014-05-17.html
// https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-2014-05-17_display-onlyvideo.html
FastDateFormat formatter1 = FastDateFormat.getInstance("yyyy-MM-dd");
FastDateFormat formatter2 = FastDateFormat.getInstance("dd.MM.yyyy");
final int maxTage = CrawlerTool.loadLongMax() ? 30 : 20;
for (int i = 0; i < maxTage; ++i) {
// https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-2015-09-05_display-all.html
final String URL = "https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-";
final String tag = formatter1.format(new Date().getTime() - (1000 * 60 * 60 * 24 * i));
final String date = formatter2.format(new Date().getTime() - (1000 * 60 * 60 * 24 * i));
//String urlString = URL + tag + "_display-onlyvideo.html"; --> stimmt leider nicht immer
final String urlString = URL + tag + "_display-all.html";
listeThemen.addUrl(new String[]{urlString, date});
}
if (Config.getStop()) {
meldungThreadUndFertig();
} else if (listeThemen.isEmpty()) {
meldungThreadUndFertig();
} else {
meldungAddMax(listeThemen.size());
for (int t = 0; t < getMaxThreadLaufen(); ++t) {
Thread th = new ThemaLaden();
th.setName(SENDERNAME + t);
th.start();
}
}
}
private boolean alleSeiteSuchen(String strUrlFeed, String tthema) {
boolean ret = false;
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seiteAlle = getUrlIo.getUri(SENDERNAME, strUrlFeed, StandardCharsets.UTF_8, 3 /* versuche */, seiteAlle, "Thema: " + tthema/* meldung */);
int pos1 = 0, pos2, anz1, anz2 = 0;
try {
//
// https://www.ndr.de/mediathek/mediatheksuche105_broadcast-30_page-1.html
final String WEITER = " title=\"Zeige Seite ";
while ((pos1 = seiteAlle.indexOf(WEITER, pos1)) != -1) {
pos1 += WEITER.length();
if ((pos2 = seiteAlle.indexOf("\"", pos1)) != -1) {
String anz = seiteAlle.substring(pos1, pos2);
try {
anz1 = Integer.parseInt(anz);
if (anz2 < anz1) {
anz2 = anz1;
}
} catch (Exception ex) {
Log.errorLog(643208979, strUrlFeed);
}
}
}
for (int i = 2; i <= anz2 && i <= 10; ++i) {
// geht bei 2 los da das ja schon die erste Seite ist!
//das: https://www.ndr.de/mediathek/mediatheksuche105_broadcast-30.html
// wird: https://www.ndr.de/mediathek/mediatheksuche105_broadcast-30_page-3.html
String url_ = strUrlFeed.replace(".html", "_page-" + i + ".html");
listeThemen.addUrl(new String[]{url_, tthema});
ret = true;
}
} catch (Exception ex) {
Log.errorLog(913047821, strUrlFeed);
}
return ret;
}
private class ThemaLaden extends Thread {
private final GetUrl getUrl = new GetUrl(getWartenSeiteLaden());
private MSStringBuilder seite1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
private MSStringBuilder seite2 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
private MSStringBuilder seite3 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
private final ArrayList liste = new ArrayList<>();
@Override
public void run() {
try {
meldungAddThread();
String[] link;
while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) {
try {
meldungProgress(link[1]);
feedEinerSeiteSuchen(link[0], link[1] /* thema */);
} catch (Exception ex) {
Log.errorLog(336901211, ex);
}
}
} catch (Exception ex) {
Log.errorLog(554632590, ex);
}
meldungThreadUndFertig();
}
private long convertDuration(final String duration, final String strUrlFeed) {
long durationInSeconds = 0;
try {
if (!duration.isEmpty()) {
final String[] parts = duration.split(":");
long power = 1;
durationInSeconds = 0;
for (int i = parts.length - 1; i >= 0; i--) {
durationInSeconds += Long.parseLong(parts[i]) * power;
power *= 60;
}
}
} catch (NumberFormatException ex) {
if (MserverDaten.debug)
Log.errorLog(369015497, ex, strUrlFeed);
} catch (Exception ex) {
Log.errorLog(369015497, ex, strUrlFeed);
}
return durationInSeconds;
}
private void feedEinerSeiteSuchen(String strUrlFeed, String tthema) {
final String MUSTER_URL = "") != -1) {
muster = "";
tage = true;
} else {
muster = "";
}
while (!Config.getStop() && (pos = seite1.indexOf(muster, pos)) != -1) {
pos += muster.length();
url = seite1.extract(MUSTER_URL, "\"", pos);
if (url.isEmpty()) {
Log.errorLog(659210274, "keine Url feedEinerSeiteSuchen" + strUrlFeed);
continue;
}
if (!url.startsWith("http")) {
url = "https://www.ndr.de" + url;
}
if (tage) {
// Rote Rosen (1725)
thema = seite1.extract(MUSTER_URL, " title=\"", "\"", pos, 0, "");
titel = seite1.extract(MUSTER_URL, ">", "<", pos, 0, "");
if (titel.contains("(Wdh.)")) {
// dann sollte der Beitrag schon in der Liste sein
continue;
}
if (thema.equals(titel)) {
if (thema.contains(" - ")) {
thema = thema.substring(0, thema.indexOf(" - ")).trim();
titel = titel.substring(titel.indexOf(" - "));
titel = titel.replace(" - ", "").trim();
} else {
String subtitle = seite1.extract("class=\"subtitle\">", "<", pos).trim();
if (!subtitle.isEmpty()) {
titel = subtitle;
}
}
}
} else {
titel = seite1.extract(" title=\"", "\"", pos);
titel = titel.replace("Zum Video:", "").trim();
}
if (tage) {
tmp = seite1.substring(pos, seite1.indexOf("<", pos));
datum = tthema;
try {
Date filmDate = FastDateFormat.getInstance("HH:mm").parse(tmp);
zeit = FastDateFormat.getInstance("HH:mm:ss").format(filmDate);
} catch (Exception ex) {
Log.errorLog(795623017, "convertDatum: " + strUrlFeed);
}
} else {
tmp = seite1.extract("", "<", pos);
String[] dateValues = parseDateTime(tmp, strUrlFeed);
datum = dateValues[0];
zeit = dateValues[1];
}
if (tage) {
//29:59
String duration = seite1.extract("\"Länge\">", "<", pos).trim();
durationInSeconds = convertDuration(duration, strUrlFeed);
} else {
String duration = seite1.extract("Video (", ")", pos);
duration = duration.replace("min", "").trim();
durationInSeconds = convertDuration(duration, strUrlFeed);
}
filmSuchen_1(strUrlFeed, thema, titel, url, datum, zeit, durationInSeconds);
}
} catch (Exception ex) {
Log.errorLog(693219870, strUrlFeed);
}
}
private String[] parseDateTime(String dateTimeValue, String strUrlFeed) {
String[] dateValues = new String[2];
dateValues[0] = "";
dateValues[1] = "";
String dateTime = dateTimeValue.replace("Uhr", "").trim();
if (!dateTime.isEmpty()) {
try {
Date filmDate = FastDateFormat.getInstance("dd.MM.yyyy HH:mm").parse(dateTime);
dateValues[0] = FastDateFormat.getInstance("dd.MM.yyyy").format(filmDate);
dateValues[1] = FastDateFormat.getInstance("HH:mm:ss").format(filmDate);
} catch (Exception ex) {
Log.errorLog(623657941, "convertDatum: " + strUrlFeed);
}
}
return dateValues;
}
private void filmSuchen_1(String strUrlThema, String thema, String titel, String filmWebsite, String datum, String zeit,
long durationInSeconds) {
//playlist: [
//{
//1: {src:'https://hds.ndr.de/z/2013/0419/TV-20130419-1010-0801.,hi,hq,.mp4.csmil/manifest.f4m', type:"application/f4m+xml"},
//2: {src:'https://hls.ndr.de/i/2013/0419/TV-20130419-1010-0801.,lo,hi,hq,.mp4.csmil/master.m3u8', type:"application/x-mpegURL"},
//3: {src:'https://media.ndr.de/progressive/2013/0419/TV-20130419-1010-0801.hi.mp4', type:"video/mp4"},
// https://media.ndr.de/progressive/2012/0820/TV-20120820-2300-0701.hi.mp4
// rtmpt://cp160844.edgefcs.net/ondemand/mp4:flashmedia/streams/ndr/2012/0820/TV-20120820-2300-0701.hq.mp4
seite2 = getUrl.getUri_Utf(SENDERNAME, filmWebsite, seite2, "strUrlThema: " + strUrlThema);
String description = extractDescription(seite2);
//String[] keywords = extractKeywords(seite2);
String subtitle = seite2.extract(",tracks: [{ src: \"", "\""); //,tracks: [{ src: "/fernsehen/sendungen/45_min/video-podcast/ut20448.xml", srclang:"de"}]
if (!subtitle.isEmpty()) {
subtitle = "https://www.ndr.de" + subtitle;
// } else {
// System.out.println("Test");
}
meldung(filmWebsite);
int pos1;
try {
// src="/fernsehen/hallondsopplatt162-player_image-2c09ece0-0508-49bf-b4d6-afff2be2115c_theme-ndrde.html"
// https://www.ndr.de/fernsehen/hallondsopplatt162-ppjson_image-2c09ece0-0508-49bf-b4d6-afff2be2115c.json
// id="pp_hallondsopplatt162"
if (datum.isEmpty()) {
String tmp = seite2.extract("")) != -1) {
tmp = tmp.substring(pos1 + 1, tmp.length());
String[] dateValues = parseDateTime(tmp, strUrlThema);
datum = dateValues[0];
zeit = dateValues[1];
}
}
String json = seite2.extract("
© 2015 - 2025 Weber Informatics LLC | Privacy Policy