
mServer.crawler.sender.MediathekZdfTivi Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView
* Copyright (C) 2008 W. Xaver
* W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package mServer.crawler.sender;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.lang3.time.FastDateFormat;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.tool.MserverDaten;
public class MediathekZdfTivi extends MediathekReader {
public final static String SENDERNAME = Const.ZDF_TIVI;
private final SimpleDateFormat sdfIn = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSSXXX");
private final FastDateFormat sdfOut_date = FastDateFormat.getInstance("dd.MM.yyyy");
private final FastDateFormat sdfOut_time = FastDateFormat.getInstance("HH:mm:ss");
private final LinkedListUrl listeThemen_3 = new LinkedListUrl();
public MediathekZdfTivi(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME, 2 /* threads */, 50 /* urlWarten */, startPrio);
}
@Override
public synchronized void addToList() {
//Theman suchen
listeThemen.clear();
meldungStart();
add_1();
add_2();
add_3();
if (Config.getStop()) {
meldungThreadUndFertig();
} else if (listeThemen.isEmpty() && listeThemen_3.isEmpty()) {
meldungThreadUndFertig();
} else {
meldungAddMax(listeThemen.size() + listeThemen_3.size());
for (int t = 0; t < getMaxThreadLaufen(); ++t) {
Thread th = new ThemaLaden();
th.setName(SENDERNAME + t);
th.start();
}
}
}
private void add_1() {
//Nachrichten
///tiviVideos/contentblob/2063212/tivi9teaserbild/9050138
///tiviVideos/beitrag/pur%2B+Sendungen/895212/2063212?view=flashXml
//Ich will die Wahrheit!
final String MUSTER_URL = "/tiviVideos";
MSStringBuilder seiteTivi_1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seiteTivi_1 = getUrlIo.getUri(SENDERNAME, "http://www.tivi.de/tiviVideos/rueckblick?view=flashXml", StandardCharsets.UTF_8, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
if (seiteTivi_1.length() == 0) {
Log.errorLog(732323698, "Leere Seite");
}
int pos = 0;
int pos1;
int pos2;
String url;
try {
while ((pos = seiteTivi_1.indexOf(MUSTER_URL, pos)) != -1) {
url = "";
pos += MUSTER_URL.length();
pos1 = pos;
if ((pos2 = seiteTivi_1.indexOf("<", pos1)) != -1) {
url = seiteTivi_1.substring(pos1, pos2);
if (url.contains("%2F")) {
url = url.replace("%2F", "/");
}
// url = URLDecoder.decode(url, "UTF-8");
}
if (url.isEmpty()) {
Log.errorLog(309075109, "keine URL");
} else {
url = "http://www.tivi.de/tiviVideos" + url;
listeThemen.addUrl(new String[]{url});
}
}
} catch (Exception ex) {
Log.errorLog(302010498, ex);
}
}
private void add_2() {
//Nachrichten
///tiviVideos/contentblob/2063212/tivi9teaserbild/9050138
///tiviVideos/beitrag/pur%2B+Sendungen/895212/2063212?view=flashXml
//Ich will die Wahrheit!
final String MUSTER_URL = "/tiviVideos/beitrag";
MSStringBuilder seiteTivi_1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seiteTivi_1 = getUrlIo.getUri(SENDERNAME, "http://www.tivi.de/tiviVideos/?view=flashXml", StandardCharsets.UTF_8, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
///seiteTivi_1 = getUrl.getUri(nameSenderMReader, "http://www.tivi.de/tiviVideos/?view=xml", MSearchConst.KODIERUNG_UTF, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
if (seiteTivi_1.length() == 0) {
Log.errorLog(645121326, "Leere Seite");
}
int pos = 0;
int pos1;
int pos2;
String url;
try {
while ((pos = seiteTivi_1.indexOf(MUSTER_URL, pos)) != -1) {
url = "";
pos += MUSTER_URL.length();
pos1 = pos;
if ((pos2 = seiteTivi_1.indexOf("<", pos1)) != -1) {
url = seiteTivi_1.substring(pos1, pos2);
if (url.contains("%2F")) {
url = url.replace("%2F", "/");
}
// url = URLDecoder.decode(url, "UTF-8");
}
if (url.isEmpty()) {
Log.errorLog(915263985, "keine URL");
} else {
url = "http://www.tivi.de/tiviVideos/beitrag" + url;
listeThemen.addUrl(new String[]{url});
}
}
} catch (Exception ex) {
Log.errorLog(730169702, ex);
}
}
private void add_3() {
final String MUSTER_URL = "type=\"broadcast\">";
MSStringBuilder seiteTivi_1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seiteTivi_1 = getUrlIo.getUri(SENDERNAME, "http://www.tivi.de/tiviVideos/navigation?view=flashXml", StandardCharsets.UTF_8, 6 /* versuche */, seiteTivi_1, "" /* Meldung */);
if (seiteTivi_1.length() == 0) {
Log.errorLog(195623078, "Leere Seite");
}
int pos = 0;
int pos1;
int pos2;
String url;
try {
while ((pos = seiteTivi_1.indexOf(MUSTER_URL, pos)) != -1) {
url = "";
pos += MUSTER_URL.length();
pos1 = pos;
if ((pos2 = seiteTivi_1.indexOf("<", pos1)) != -1) {
url = seiteTivi_1.substring(pos1, pos2);
if (url.contains("%2F")) {
url = url.replace("%2F", "/");
}
// url = URLDecoder.decode(url, "UTF-8");
}
if (url.isEmpty()) {
Log.errorLog(152378787, "keine URL");
} else {
url = "http://www.tivi.de" + url;
listeThemen_3.addUrl(new String[]{url});
}
}
} catch (Exception ex) {
Log.errorLog(906037912, ex);
}
}
private class ThemaLaden extends Thread {
private final GetUrl getUrl = new GetUrl(getWartenSeiteLaden());
private MSStringBuilder seite1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
private MSStringBuilder seite2 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
@Override
public void run() {
try {
meldungAddThread();
String[] link;
while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) {
addTivi_(link[0] /* url */);
meldungProgress(link[0]);
}
while (!Config.getStop() && (link = listeThemen_3.getListeThemen()) != null) {
add_(link[0] /* url */);
meldungProgress(link[0]);
}
} catch (Exception ex) {
Log.errorLog(731214569, ex);
}
meldungThreadUndFertig();
}
private void add_(String url_) {
final String MUSTER_START = "";
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
seite2 = getUrlIo.getUri(SENDERNAME, url_, StandardCharsets.UTF_8, 1 /* versuche */, seite2, "" /* Meldung */);
if (seite2.length() == 0) {
Log.errorLog(302010698, "Leere Seite");
}
int pos = 0;
String url;
try {
while (!Config.getStop() && (pos = seite2.indexOf(MUSTER_START, pos)) != -1) {
pos += MUSTER_START.length();
url = seite2.extract("", "<", pos);
// url = URLDecoder.decode(url, "UTF-8");
if (url.isEmpty()) {
Log.errorLog(732698720, "keine URL");
} else {
if (url.contains("%2F")) {
url = url.replace("%2F", "/");
}
url = "http://www.tivi.de" + url;
addTivi_(url);
}
}
} catch (Exception ex) {
Log.errorLog(701212145, ex);
}
}
private void addTivi_(String url) {
int pos3;
long dauerL;
String titel, thema, urlFilm, datum, zeit, bild, website, text;
try {
urlFilm = "";
// Film laden
meldung(url);
seite1 = getUrl.getUri_Utf(SENDERNAME, url, seite1, "" /* Meldung */);
if (seite1.length() == 0) {
Log.errorLog(301649897, "Leere Seite Tivi-2: " + url);
return;
}
thema = seite1.extract("", "<");
titel = seite1.extract("", "<");
text = seite1.extract("", "<");
bild = seite1.extract("", "<");
if (!bild.isEmpty()) {
bild = "http://www.tivi.de" + bild;
}
website = seite1.extract("", "<");
dauerL = parseDauer(seite1, url);
zeit = "";
datum = seite1.extract("", "<");
//2014-01-19T08:35:00.000+01:00
if (!datum.isEmpty()) {
try {
Date filmDate = sdfIn.parse(datum);
datum = sdfOut_date.format(filmDate);
zeit = sdfOut_time.format(filmDate);
} catch (NumberFormatException ex) {
if (MserverDaten.debug)
Log.errorLog(649600299, ex, "Datum: " + url);
}
}
pos3 = 0;
while ((pos3 = seite1.indexOf("veryhigh ", pos3)) != -1) {
pos3 += 5;
urlFilm = seite1.extract("", "<", pos3);
if (urlFilm.startsWith("http") && urlFilm.endsWith("mp4") && !urlFilm.contains("metafilegenerator")) {
break;
}
}
if (urlFilm.isEmpty()) {
Log.errorLog(159876234, "kein Film: " + url);
} else {
if (urlFilm.startsWith("http://tvdl.zdf.de")) {
urlFilm = urlFilm.replace("http://tvdl.zdf.de", "http://nrodl.zdf.de");
}
DatenFilm film = new DatenFilm(SENDERNAME, thema, website, titel, urlFilm, "" /*urlRtmp*/,
datum, zeit,
dauerL, text);
// jetzt noch manuell die Auflösung hochsetzen
MediathekZdf.urlTauschen(film, url, mlibFilmeSuchen);
addFilm(film);
}
} catch (Exception ex) {
Log.errorLog(454123698, ex);
}
}
private long parseDauer(MSStringBuilder seite, String url) {
long dauer = 0;
try {
String duration = seite.extract("", "<");
if (!duration.isEmpty()) {
dauer = parseDuration(duration);
}
if (dauer == 0) {
String length = seite.extract("", "<");
dauer = Integer.parseInt(length);
}
} catch (NumberFormatException ex) {
Log.errorLog(349761012, ex, "Dauer: " + url);
}
return dauer;
}
// Parst Längenangaben im Format P0Y0M0DT0H22M6.000S
private long parseDuration(String duration) {
String time = duration.replace("P0Y0M0DT", "");
String h = time.substring(0, time.indexOf('H'));
int ih = Integer.parseInt(h);
String m = time.substring(time.indexOf('H') + 1, time.indexOf('M'));
int im = Integer.parseInt(m);
String s = time.substring(time.indexOf('M') + 1, time.indexOf('.'));
int is = Integer.parseInt(s);
return ih * 60 * 60 + im * 60 + is;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy