
mServer.crawler.sender.MediathekMdr Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView
* Copyright (C) 2008 W. Xaver
* W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package mServer.crawler.sender;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedList;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
public class MediathekMdr extends MediathekReader {
public final static String SENDERNAME = Const.MDR;
private final LinkedList listeTage = new LinkedList<>();
private final LinkedList listeGesucht = new LinkedList<>(); //thema,titel,datum,zeit
/**
*
* @param ssearch
* @param startPrio
*/
public MediathekMdr(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME, /* threads */ 3, /* urlWarten */ 200, startPrio);
}
/**
*
*/
@Override
public void addToList() {
// ";
int pos = 0;
String thema, url = "";
try {
seite1 = getUrl.getUri(SENDERNAME, strUrlFeed, StandardCharsets.UTF_8, 2 /* versuche */, seite1, "");
while (!Config.getStop() && (pos = seite1.indexOf(MUSTER, pos)) != -1) {
pos += MUSTER.length();
url = seite1.extract("", "<", pos);
if (url.isEmpty()) {
Log.errorLog(952136547, "keine URL: " + strUrlFeed);
} else {
meldung(url);
url = "http://www.mdr.de/mediathek/fernsehen/" + url;
addSendugen(strUrlFeed, thema, url);
}
}
if (url.isEmpty()) {
Log.errorLog(766250249, "keine URL: " + strUrlFeed);
}
} catch (Exception ex) {
Log.errorLog(316874602, ex);
}
}
private void addTage(String urlSeite) {
final String MUSTER = "";
int pos = 0;
String thema, url = "";
try {
seiteTage = getUrl.getUri(SENDERNAME, urlSeite, StandardCharsets.UTF_8, 2 /* versuche */, seiteTage, "");
while (!Config.getStop() && (pos = seiteTage.indexOf(MUSTER, pos)) != -1) {
pos += MUSTER.length();
url = seiteTage.extract("", "<", pos);
if (url.isEmpty()) {
Log.errorLog(975401478, "keine URL: " + urlSeite);
} else {
meldung(url);
url = "http://www.mdr.de/mediathek/" + url;
addSendug(urlSeite, thema, url);
}
}
if (url.isEmpty()) {
Log.errorLog(930215470, "keine URL: " + urlSeite);
}
} catch (Exception ex) {
Log.errorLog(102540897, ex);
}
}
private void addSendugen(String strUrlFeed, String thema, String urlThema) {
seite2 = getUrl.getUri(SENDERNAME, urlThema, StandardCharsets.UTF_8, 2 /* versuche */, seite2, "Thema: " + thema);
final String muster;
if (seite2.indexOf("div class=\"media mediaA \">") != -1) {
muster = "div class=\"media mediaA \">";
} else {
muster = "";
}
int pos = 0, count = 0;
String url = "";
while ((pos = seite2.indexOf(muster, pos)) != -1) {
++count;
if (!CrawlerTool.loadLongMax()) {
if (count > 5) {
return;
}
}
pos += muster.length();
url = seite2.extract(" 0 && pos > stop) {
break;
}
pos += MUSTER_XML.length();
pos1 = pos;
if ((pos2 = seite3.indexOf("'", pos)) != -1) {
url = seite3.substring(pos1, pos2);
}
if (url.isEmpty()) {
Log.errorLog(256987304, new String[]{"keine URL: " + urlSendung, "Thema: " + thema, "UrlFeed: " + strUrlFeed});
} else {
url = url.replace("\\", "");
url = MUSTER_ADD + url;
addXml(strUrlFeed, thema, url, urlSendung);
}
}
if (url.isEmpty()) {
Log.errorLog(256987304, new String[]{"keine URL: " + urlSendung, "Thema: " + thema, "UrlFeed: " + strUrlFeed});
}
}
private void addXml(String strUrlFeed, String thema, String xmlSite, String filmSite) {
final String MUSTER_URL_MP4 = "";
String titel, datum, zeit, urlMp4, urlMp4_klein, urlHD, urlSendung, description;
long duration;
try {
seite4 = getUrl.getUri_Utf(SENDERNAME, xmlSite, seite4, "Thema: " + thema);
if (seite4.length() == 0) {
Log.errorLog(903656532, xmlSite);
return;
}
duration = 0;
try {
String d = seite4.extract("", "<");
if (!d.isEmpty()) {
String[] parts = d.split(":");
duration = 0;
long power = 1;
for (int i = parts.length - 1; i >= 0; i--) {
duration += Long.parseLong(parts[i]) * power;
power *= 60;
}
}
} catch (Exception ex) {
Log.errorLog(313698749, ex, xmlSite);
}
titel = seite4.extract("", "<");
description = seite4.extract("", "<");
String subtitle = seite4.extract("", "<");
datum = seite4.extract("", "<");
if (datum.isEmpty()) {
datum = seite4.extract("", "<");
}
if (datum.isEmpty()) {
datum = seite4.extract("", "<");
}
zeit = convertZeitXml(datum);
datum = convertDatumXml(datum);
urlSendung = seite4.extract("", "<");
if (urlSendung.isEmpty()) {
urlSendung = filmSite;
}
// Film-URLs suchen
urlHD = seite4.extract("| MP4 Web XL |", MUSTER_URL_MP4, "<");
urlMp4 = seite4.extract("| MP4 Web L |", MUSTER_URL_MP4, "<");
if (urlMp4.isEmpty()) {
urlMp4 = seite4.extract("| MP4 Web L+ |", MUSTER_URL_MP4, "<");
}
urlMp4_klein = seite4.extract("| MP4 Web M |", MUSTER_URL_MP4, "<");
if (urlMp4.isEmpty()) {
urlMp4 = urlMp4_klein;
urlMp4_klein = "";
}
if (urlMp4.isEmpty()) {
Log.errorLog(326541230, new String[]{"keine URL: " + xmlSite, "Thema: " + thema, " UrlFeed: " + strUrlFeed});
} else if (!existiertSchon(thema, titel, datum, zeit)) {
meldung(urlMp4);
DatenFilm film = new DatenFilm(SENDERNAME, thema, urlSendung, titel, urlMp4, ""/*rtmpUrl*/, datum, zeit, duration, description);
CrawlerTool.addUrlKlein(film, urlMp4_klein, "");
CrawlerTool.addUrlHd(film, urlHD, "");
CrawlerTool.addUrlSubtitle(film, subtitle);
addFilm(film);
}
} catch (Exception ex) {
Log.errorLog(446286970, ex);
}
}
}
private String convertDatumXml(String datum) {
//23.08.2012 22:05
try {
SimpleDateFormat sdfIn = new SimpleDateFormat("dd.MM.yyyy HH:mm");
Date filmDate = sdfIn.parse(datum);
SimpleDateFormat sdfOut;
sdfOut = new SimpleDateFormat("dd.MM.yyyy");
datum = sdfOut.format(filmDate);
} catch (Exception ex) {
Log.errorLog(435209987, ex);
}
return datum;
}
private String convertZeitXml(String datum) {
//23.08.2012 22:05
try {
SimpleDateFormat sdfIn = new SimpleDateFormat("dd.MM.yyyy HH:mm");
Date filmDate = sdfIn.parse(datum);
SimpleDateFormat sdfOut;
sdfOut = new SimpleDateFormat("HH:mm:ss");
datum = sdfOut.format(filmDate);
} catch (Exception ex) {
Log.errorLog(102658736, ex);
}
return datum;
}
private synchronized String getListeTage() {
return listeTage.pollFirst();
}
private synchronized boolean existiertSchon(String thema, String titel, String datum, String zeit) {
// liefert true wenn schon in der Liste, ansonsten fügt es ein
boolean gefunden = false;
for (String[] k : listeGesucht) {
if (k[0].equalsIgnoreCase(thema) && k[1].equalsIgnoreCase(titel) && k[2].equalsIgnoreCase(datum) && k[3].equalsIgnoreCase(zeit)) {
gefunden = true;
}
}
if (!gefunden) {
listeGesucht.add(new String[]{thema, titel, datum, zeit});
}
return gefunden;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy