
mServer.crawler.sender.MediathekRbb Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView
* Copyright (C) 2008 - 2012 W. Xaver
* & thausherr
*
* W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package mServer.crawler.sender;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
public class MediathekRbb extends MediathekReader {
public final static String SENDERNAME = Const.RBB;
//final static String ROOTADR = "http://mediathek.rbb-online.de";
public MediathekRbb(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME,/* threads */ 2, /* urlWarten */ 100, startPrio);
}
@Override
protected void addToList() {
MSStringBuilder seite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
//
ArrayList liste = new ArrayList<>();
final String ADRESSE_1 = "http://mediathek.rbb-online.de/tv/sendungen-a-z?cluster=a-k";
final String ADRESSE_2 = "http://mediathek.rbb-online.de/tv/sendungen-a-z?cluster=l-z";
final String URL = "7 Tage Rückblick";
final String MUSTER_URL = "";
final String URL = "";
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seite1 = getUrlIo.getUri_Utf(SENDERNAME, url, seite1, "");
int pos1 = seite1.indexOf("");
while (!Config.getStop() && (pos1 = seite1.indexOf(MUSTER_URL, pos1)) != -1) {
pos1 += MUSTER_URL.length();
String urlSeite = seite1.extract(URL, "\"", pos1);
if (!urlSeite.isEmpty()) {
urlSeite = urlSeite.replaceAll("&", "&");
urlSeite = "http://mediathek.rbb-online.de/tv/" + urlSeite;
addFilme(urlSeite);
} else {
Log.errorLog(751203697, "keine URL für: " + url);
}
}
// noch nach weiteren Seiten suchen
if (weiter && CrawlerTool.loadLongMax()) {
for (int i = 2; i < 10; ++i) {
if (seite1.indexOf("mcontents=page." + i) != -1) {
// dann gibts weiter Seiten
addThema(url + "&mcontents=page." + i, false);
}
}
}
} catch (Exception ex) {
Log.errorLog(541236987, ex);
}
}
private void addFilme(String urlSeite) {
try {
meldung(urlSeite);
String datum = "", zeit = "", thema, title, description, durationInSeconds;
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seite2 = getUrlIo.getUri_Utf(SENDERNAME, urlSeite, seite2, "");
description = seite2.extract("", "<");
if (sub.contains("|")) {
datum = sub.substring(0, sub.indexOf('|') - 1);
datum = datum.substring(datum.indexOf(' ')).trim();
zeit = datum.substring(datum.indexOf(' ')).trim();
if (zeit.length() == 5) {
zeit = zeit + ":00";
}
datum = datum.substring(0, datum.indexOf(' ')).trim();
if (datum.length() == 8) {
datum = datum.substring(0, 6) + "20" + datum.substring(6);
}
}
String urlFilm = urlSeite.substring(urlSeite.indexOf("documentId=") + "documentId=".length());
// http://mediathek.rbb-online.de/play/media/24938774?devicetype=pc&features=hls
urlFilm = "http://mediathek.rbb-online.de/play/media/" + urlFilm + "?devicetype=pc&features=hls";
seite3 = getUrlIo.getUri_Utf(SENDERNAME, urlFilm, seite3, "");
String urlNormal = "", urlLow = "";
urlLow = getUrlLow("https");
if(urlLow.isEmpty())
{
urlLow =getUrlLow("http");
if(!urlLow.isEmpty())
{
urlLow = "http://" + urlLow;
}
}else {
urlLow = "https://" + urlLow;
}
urlNormal = getUrlNormal("https");
if(urlNormal.isEmpty())
{
urlNormal =getUrlNormal("http");
if(!urlNormal.isEmpty())
{
urlNormal = "http://" + urlNormal;
}
}else {
urlNormal = "https://" + urlNormal;
}
//http://http-stream.rbb-online.de/rbb/rbbreporter/rbbreporter_20151125_solange_ich_tanze_lebe_ich_WEB_L_16_9_960x544.mp4?url=5
if (urlLow.contains("?url=")) {
urlLow = urlLow.substring(0, urlLow.indexOf("?url="));
}
if (urlNormal.contains("?url=")) {
urlNormal = urlNormal.substring(0, urlNormal.indexOf("?url="));
}
if (urlNormal.isEmpty()) {
if (!urlLow.isEmpty()) {
urlNormal = urlLow;
urlLow = "";
}
}
// ,"_subtitleUrl":"/subtitle/19088","_subtitleOffset":0,
// http://mediathek.rbb-online.de/subtitle/19088
String subtitle = seite3.extract("subtitleUrl\":\"", "\"");
if (!subtitle.isEmpty()) {
if (!subtitle.startsWith("http")) {
subtitle = "http://mediathek.rbb-online.de" + subtitle;
}
}
if (datum.isEmpty() || zeit.isEmpty() || thema.isEmpty() || title.isEmpty() || description.isEmpty() || durationInSeconds.isEmpty()) {
Log.errorLog(912012036, "empty für: " + urlSeite);
}
if (!urlNormal.isEmpty()) {
DatenFilm film = new DatenFilm(SENDERNAME, thema, urlSeite, title, urlNormal, "" /*urlRtmp*/,
datum, zeit/* zeit */, duration, description);
addFilm(film);
if (!urlLow.isEmpty()) {
CrawlerTool.addUrlKlein(film, urlLow, "");
}
if (!subtitle.isEmpty()) {
CrawlerTool.addUrlSubtitle(film, subtitle);
}
} else {
Log.errorLog(302014569, "keine URL für: " + urlSeite);
}
} catch (Exception ex) {
Log.errorLog(541236987, ex);
}
}
private String getUrlNormal(String aProtocol)
{
String urlNormal;
urlNormal = seite3.extract("\"_quality\":3,\"_server\":\"\",\"_cdn\":\"akamai\",", "\"_stream\":\""+aProtocol+"://", "\"");
if (urlNormal.isEmpty()) {
urlNormal = seite3.extract("\"_quality\":3,\"_server\":\"\",\"_cdn\":\"default\"", "\"_stream\":\""+aProtocol+"://", "\"");
}
if (urlNormal.isEmpty()) {
urlNormal = seite3.extract("\"_quality\":3,\"_server\":\"\",\"_cdn\":\"default\",\"_stream\":\""+aProtocol+"://", "\"");
}
return urlNormal;
}
private String getUrlLow(String aProtocol)
{
String urlLow;
urlLow = seite3.extract("\"_quality\":1,\"_server\":\"\",\"_cdn\":\"akamai\",\"_stream\":\""+aProtocol+"://", "\"");
if (urlLow.isEmpty()) {
urlLow = seite3.extract("\"_quality\":1,\"_server\":\"\",\"_cdn\":\"default\",\"_stream\":\""+aProtocol+"://", "\"");
}
return urlLow;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy