All Downloads are FREE. Search and download functionalities are using the official Maven repository.

mServer.crawler.sender.MediathekMdr Maven / Gradle / Ivy

There is a newer version: 3.1.64
Show newest version
/*
 * MediathekView
 * Copyright (C) 2008 W. Xaver
 * W.Xaver[at]googlemail.com
 * http://zdfmediathk.sourceforge.net/
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see .
 */
package mServer.crawler.sender;

import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.LinkedList;

import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;

public class MediathekMdr extends MediathekReader {

    public final static String SENDERNAME = Const.MDR;
    private final LinkedList listeTage = new LinkedList<>();
    private final LinkedList listeGesucht = new LinkedList<>(); //thema,titel,datum,zeit

    /**
     *
     * @param ssearch
     * @param startPrio
     */
    public MediathekMdr(FilmeSuchen ssearch, int startPrio) {
        super(ssearch, SENDERNAME, /* threads */ 3, /* urlWarten */ 200, startPrio);
    }

    /**
     *
     */
    @Override
    public void addToList() {
        // ";

            int pos = 0;
            String thema, url = "";
            try {
                seite1 = getUrl.getUri(SENDERNAME, strUrlFeed, StandardCharsets.UTF_8, 2 /* versuche */, seite1, "");
                while (!Config.getStop() && (pos = seite1.indexOf(MUSTER, pos)) != -1) {
                    pos += MUSTER.length();
                    url = seite1.extract("", "<", pos);
                    if (url.isEmpty()) {
                        Log.errorLog(952136547, "keine URL: " + strUrlFeed);
                    } else {
                        meldung(url);
                        url = "http://www.mdr.de/mediathek/fernsehen/" + url;
                        addSendugen(strUrlFeed, thema, url);
                    }
                }
                if (url.isEmpty()) {
                    Log.errorLog(766250249, "keine URL: " + strUrlFeed);
                }
            } catch (Exception ex) {
                Log.errorLog(316874602, ex);
            }
        }

        private void addTage(String urlSeite) {
            final String MUSTER = "
"; int pos = 0; String thema, url = ""; try { seiteTage = getUrl.getUri(SENDERNAME, urlSeite, StandardCharsets.UTF_8, 2 /* versuche */, seiteTage, ""); while (!Config.getStop() && (pos = seiteTage.indexOf(MUSTER, pos)) != -1) { pos += MUSTER.length(); url = seiteTage.extract("", "<", pos); if (url.isEmpty()) { Log.errorLog(975401478, "keine URL: " + urlSeite); } else { meldung(url); url = "http://www.mdr.de/mediathek/" + url; addSendug(urlSeite, thema, url); } } if (url.isEmpty()) { Log.errorLog(930215470, "keine URL: " + urlSeite); } } catch (Exception ex) { Log.errorLog(102540897, ex); } } private void addSendugen(String strUrlFeed, String thema, String urlThema) { seite2 = getUrl.getUri(SENDERNAME, urlThema, StandardCharsets.UTF_8, 2 /* versuche */, seite2, "Thema: " + thema); final String muster; if (seite2.indexOf("div class=\"media mediaA \">") != -1) { muster = "div class=\"media mediaA \">"; } else { muster = ""; } int pos = 0, count = 0; String url = ""; while ((pos = seite2.indexOf(muster, pos)) != -1) { ++count; if (!CrawlerTool.loadLongMax()) { if (count > 5) { return; } } pos += muster.length(); url = seite2.extract(" 0 && pos > stop) { break; } pos += MUSTER_XML.length(); pos1 = pos; if ((pos2 = seite3.indexOf("'", pos)) != -1) { url = seite3.substring(pos1, pos2); } if (url.isEmpty()) { Log.errorLog(256987304, new String[]{"keine URL: " + urlSendung, "Thema: " + thema, "UrlFeed: " + strUrlFeed}); } else { url = url.replace("\\", ""); url = MUSTER_ADD + url; addXml(strUrlFeed, thema, url, urlSendung); } } if (url.isEmpty()) { Log.errorLog(256987304, new String[]{"keine URL: " + urlSendung, "Thema: " + thema, "UrlFeed: " + strUrlFeed}); } } private void addXml(String strUrlFeed, String thema, String xmlSite, String filmSite) { final String MUSTER_URL_MP4 = ""; String titel, datum, zeit, urlMp4, urlMp4_klein, urlHD, urlSendung, description; long duration; try { seite4 = getUrl.getUri_Utf(SENDERNAME, xmlSite, seite4, "Thema: " + thema); if (seite4.length() == 0) { Log.errorLog(903656532, xmlSite); return; } duration = 0; try { String d = seite4.extract("", "<"); if (!d.isEmpty()) { String[] parts = d.split(":"); duration = 0; long power = 1; for (int i = parts.length - 1; i >= 0; i--) { duration += Long.parseLong(parts[i]) * power; power *= 60; } } } catch (Exception ex) { Log.errorLog(313698749, ex, xmlSite); } titel = seite4.extract("", "<"); description = seite4.extract("<teaserText>", "<"); String subtitle = seite4.extract("<videoSubtitleUrl>", "<"); datum = seite4.extract("<broadcastStartDate>", "<"); if (datum.isEmpty()) { datum = seite4.extract("<datetimeOfBroadcasting>", "<"); } if (datum.isEmpty()) { datum = seite4.extract("<webTime>", "<"); } zeit = convertZeitXml(datum); datum = convertDatumXml(datum); urlSendung = seite4.extract("<htmlUrl>", "<"); if (urlSendung.isEmpty()) { urlSendung = filmSite; } // Film-URLs suchen urlHD = seite4.extract("| MP4 Web XL |", MUSTER_URL_MP4, "<"); urlMp4 = seite4.extract("| MP4 Web L |", MUSTER_URL_MP4, "<"); if (urlMp4.isEmpty()) { urlMp4 = seite4.extract("| MP4 Web L+ |", MUSTER_URL_MP4, "<"); } urlMp4_klein = seite4.extract("| MP4 Web M |", MUSTER_URL_MP4, "<"); if (urlMp4.isEmpty()) { urlMp4 = urlMp4_klein; urlMp4_klein = ""; } if (urlMp4.isEmpty()) { Log.errorLog(326541230, new String[]{"keine URL: " + xmlSite, "Thema: " + thema, " UrlFeed: " + strUrlFeed}); } else if (!existiertSchon(thema, titel, datum, zeit)) { meldung(urlMp4); DatenFilm film = new DatenFilm(SENDERNAME, thema, urlSendung, titel, urlMp4, ""/*rtmpUrl*/, datum, zeit, duration, description); CrawlerTool.addUrlKlein(film, urlMp4_klein, ""); CrawlerTool.addUrlHd(film, urlHD, ""); CrawlerTool.addUrlSubtitle(film, subtitle); addFilm(film); } } catch (Exception ex) { Log.errorLog(446286970, ex); } } } private String convertDatumXml(String datum) { //<broadcastStartDate>23.08.2012 22:05</broadcastStartDate> try { SimpleDateFormat sdfIn = new SimpleDateFormat("dd.MM.yyyy HH:mm"); Date filmDate = sdfIn.parse(datum); SimpleDateFormat sdfOut; sdfOut = new SimpleDateFormat("dd.MM.yyyy"); datum = sdfOut.format(filmDate); } catch (Exception ex) { Log.errorLog(435209987, ex); } return datum; } private String convertZeitXml(String datum) { //<broadcastStartDate>23.08.2012 22:05</broadcastStartDate> try { SimpleDateFormat sdfIn = new SimpleDateFormat("dd.MM.yyyy HH:mm"); Date filmDate = sdfIn.parse(datum); SimpleDateFormat sdfOut; sdfOut = new SimpleDateFormat("HH:mm:ss"); datum = sdfOut.format(filmDate); } catch (Exception ex) { Log.errorLog(102658736, ex); } return datum; } private synchronized String getListeTage() { return listeTage.pollFirst(); } private synchronized boolean existiertSchon(String thema, String titel, String datum, String zeit) { // liefert true wenn schon in der Liste, ansonsten fügt es ein boolean gefunden = false; for (String[] k : listeGesucht) { if (k[0].equalsIgnoreCase(thema) && k[1].equalsIgnoreCase(titel) && k[2].equalsIgnoreCase(datum) && k[3].equalsIgnoreCase(zeit)) { gefunden = true; } } if (!gefunden) { listeGesucht.add(new String[]{thema, titel, datum, zeit}); } return gefunden; } } </code></pre> <br/> <br/> <div class='clear'></div> </main> </div> <br/><br/> <div class="align-center">© 2015 - 2025 <a href="/legal-notice.php">Weber Informatics LLC</a> | <a href="/data-protection.php">Privacy Policy</a></div> <br/><br/><br/><br/><br/><br/> </body> </html>