All Downloads are FREE. Search and download functionalities are using the official Maven repository.

mServer.crawler.sender.MediathekNdr Maven / Gradle / Ivy

There is a newer version: 3.1.64
Show newest version
/*    
 *    MediathekView
 *    Copyright (C) 2008   W. Xaver
 *    W.Xaver[at]googlemail.com
 *    http://zdfmediathk.sourceforge.net/
 *    
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program.  If not, see .
 */
package mServer.crawler.sender;

import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Date;

import org.apache.commons.lang3.time.FastDateFormat;

import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.tool.MserverDaten;

public class MediathekNdr extends MediathekReader implements Runnable {

    public final static String SENDERNAME = Const.NDR;
    private MSStringBuilder seiteAlle = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);

    public MediathekNdr(FilmeSuchen ssearch, int startPrio) {
        super(ssearch, SENDERNAME, /* threads */ 2, /* urlWarten */ 50, startPrio);
    }

    //-> erste Seite:
    // 
Nordmagazin
@Override protected void addToList() { //45 Min final String ADRESSE = "https://www.ndr.de/mediathek/sendungen_a-z/index.html"; final String MUSTER_URL1 = "
  • ", pos); pos2 = seite.indexOf("<", pos); if (pos1 != -1 && pos2 != -1 && pos1 < pos2) { thema = seite.substring(pos1 + 1, pos2); } if (url.isEmpty()) { Log.errorLog(210367600, "keine Url"); continue; } String url_ = "https://www.ndr.de/mediathek/mediatheksuche105_broadcast-" + url; String[] add = new String[]{url_, thema}; if (CrawlerTool.loadLongMax()) { if (!alleSeiteSuchen(url_, thema)) { // dann halt so versuchen listeThemen.addUrl(add); } } else { listeThemen.addUrl(add); } } catch (Exception ex) { Log.errorLog(332945670, ex); } } // noch "Verpasst" für die letzten Tage einfügen // https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-2014-05-17.html // https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-2014-05-17_display-onlyvideo.html FastDateFormat formatter1 = FastDateFormat.getInstance("yyyy-MM-dd"); FastDateFormat formatter2 = FastDateFormat.getInstance("dd.MM.yyyy"); final int maxTage = CrawlerTool.loadLongMax() ? 30 : 20; for (int i = 0; i < maxTage; ++i) { // https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-2015-09-05_display-all.html final String URL = "https://www.ndr.de/mediathek/sendung_verpasst/epg1490_date-"; final String tag = formatter1.format(new Date().getTime() - (1000 * 60 * 60 * 24 * i)); final String date = formatter2.format(new Date().getTime() - (1000 * 60 * 60 * 24 * i)); //String urlString = URL + tag + "_display-onlyvideo.html"; --> stimmt leider nicht immer final String urlString = URL + tag + "_display-all.html"; listeThemen.addUrl(new String[]{urlString, date}); } if (Config.getStop()) { meldungThreadUndFertig(); } else if (listeThemen.isEmpty()) { meldungThreadUndFertig(); } else { meldungAddMax(listeThemen.size()); for (int t = 0; t < getMaxThreadLaufen(); ++t) { Thread th = new ThemaLaden(); th.setName(SENDERNAME + t); th.start(); } } } private boolean alleSeiteSuchen(String strUrlFeed, String tthema) { boolean ret = false; GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden()); seiteAlle = getUrlIo.getUri(SENDERNAME, strUrlFeed, StandardCharsets.UTF_8, 3 /* versuche */, seiteAlle, "Thema: " + tthema/* meldung */); int pos1 = 0, pos2, anz1, anz2 = 0; try { // // https://www.ndr.de/mediathek/mediatheksuche105_broadcast-30_page-1.html final String WEITER = " title=\"Zeige Seite "; while ((pos1 = seiteAlle.indexOf(WEITER, pos1)) != -1) { pos1 += WEITER.length(); if ((pos2 = seiteAlle.indexOf("\"", pos1)) != -1) { String anz = seiteAlle.substring(pos1, pos2); try { anz1 = Integer.parseInt(anz); if (anz2 < anz1) { anz2 = anz1; } } catch (Exception ex) { Log.errorLog(643208979, strUrlFeed); } } } for (int i = 2; i <= anz2 && i <= 10; ++i) { // geht bei 2 los da das ja schon die erste Seite ist! //das: https://www.ndr.de/mediathek/mediatheksuche105_broadcast-30.html // wird: https://www.ndr.de/mediathek/mediatheksuche105_broadcast-30_page-3.html String url_ = strUrlFeed.replace(".html", "_page-" + i + ".html"); listeThemen.addUrl(new String[]{url_, tthema}); ret = true; } } catch (Exception ex) { Log.errorLog(913047821, strUrlFeed); } return ret; } private class ThemaLaden extends Thread { private final GetUrl getUrl = new GetUrl(getWartenSeiteLaden()); private MSStringBuilder seite1 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER); private MSStringBuilder seite2 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER); private MSStringBuilder seite3 = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER); private final ArrayList liste = new ArrayList<>(); @Override public void run() { try { meldungAddThread(); String[] link; while (!Config.getStop() && (link = listeThemen.getListeThemen()) != null) { try { meldungProgress(link[1]); feedEinerSeiteSuchen(link[0], link[1] /* thema */); } catch (Exception ex) { Log.errorLog(336901211, ex); } } } catch (Exception ex) { Log.errorLog(554632590, ex); } meldungThreadUndFertig(); } private long convertDuration(final String duration, final String strUrlFeed) { long durationInSeconds = 0; try { if (!duration.isEmpty()) { final String[] parts = duration.split(":"); long power = 1; durationInSeconds = 0; for (int i = parts.length - 1; i >= 0; i--) { durationInSeconds += Long.parseLong(parts[i]) * power; power *= 60; } } } catch (NumberFormatException ex) { if (MserverDaten.debug) Log.errorLog(369015497, ex, strUrlFeed); } catch (Exception ex) { Log.errorLog(369015497, ex, strUrlFeed); } return durationInSeconds; } private void feedEinerSeiteSuchen(String strUrlFeed, String tthema) { final String MUSTER_URL = "") != -1) { muster = ""; tage = true; } else { muster = ""; } while (!Config.getStop() && (pos = seite1.indexOf(muster, pos)) != -1) { pos += muster.length(); url = seite1.extract(MUSTER_URL, "\"", pos); if (url.isEmpty()) { Log.errorLog(659210274, "keine Url feedEinerSeiteSuchen" + strUrlFeed); continue; } if (!url.startsWith("http")) { url = "https://www.ndr.de" + url; } if (tage) { //

    Rote Rosen (1725)

    thema = seite1.extract(MUSTER_URL, " title=\"", "\"", pos, 0, ""); titel = seite1.extract(MUSTER_URL, ">", "<", pos, 0, ""); if (titel.contains("(Wdh.)")) { // dann sollte der Beitrag schon in der Liste sein continue; } if (thema.equals(titel)) { if (thema.contains(" - ")) { thema = thema.substring(0, thema.indexOf(" - ")).trim(); titel = titel.substring(titel.indexOf(" - ")); titel = titel.replace(" - ", "").trim(); } else { String subtitle = seite1.extract("class=\"subtitle\">", "<", pos).trim(); if (!subtitle.isEmpty()) { titel = subtitle; } } } } else { titel = seite1.extract(" title=\"", "\"", pos); titel = titel.replace("Zum Video:", "").trim(); } if (tage) { tmp = seite1.substring(pos, seite1.indexOf("<", pos)); datum = tthema; try { Date filmDate = FastDateFormat.getInstance("HH:mm").parse(tmp); zeit = FastDateFormat.getInstance("HH:mm:ss").format(filmDate); } catch (Exception ex) { Log.errorLog(795623017, "convertDatum: " + strUrlFeed); } } else { tmp = seite1.extract("
    ", "<", pos); String[] dateValues = parseDateTime(tmp, strUrlFeed); datum = dateValues[0]; zeit = dateValues[1]; } if (tage) { //29:59
    String duration = seite1.extract("\"Länge\">", "<", pos).trim(); durationInSeconds = convertDuration(duration, strUrlFeed); } else { String duration = seite1.extract("Video (", ")", pos); duration = duration.replace("min", "").trim(); durationInSeconds = convertDuration(duration, strUrlFeed); } filmSuchen_1(strUrlFeed, thema, titel, url, datum, zeit, durationInSeconds); } } catch (Exception ex) { Log.errorLog(693219870, strUrlFeed); } } private String[] parseDateTime(String dateTimeValue, String strUrlFeed) { String[] dateValues = new String[2]; dateValues[0] = ""; dateValues[1] = ""; String dateTime = dateTimeValue.replace("Uhr", "").trim(); if (!dateTime.isEmpty()) { try { Date filmDate = FastDateFormat.getInstance("dd.MM.yyyy HH:mm").parse(dateTime); dateValues[0] = FastDateFormat.getInstance("dd.MM.yyyy").format(filmDate); dateValues[1] = FastDateFormat.getInstance("HH:mm:ss").format(filmDate); } catch (Exception ex) { Log.errorLog(623657941, "convertDatum: " + strUrlFeed); } } return dateValues; } private void filmSuchen_1(String strUrlThema, String thema, String titel, String filmWebsite, String datum, String zeit, long durationInSeconds) { //playlist: [ //{ //1: {src:'https://hds.ndr.de/z/2013/0419/TV-20130419-1010-0801.,hi,hq,.mp4.csmil/manifest.f4m', type:"application/f4m+xml"}, //2: {src:'https://hls.ndr.de/i/2013/0419/TV-20130419-1010-0801.,lo,hi,hq,.mp4.csmil/master.m3u8', type:"application/x-mpegURL"}, //3: {src:'https://media.ndr.de/progressive/2013/0419/TV-20130419-1010-0801.hi.mp4', type:"video/mp4"}, // https://media.ndr.de/progressive/2012/0820/TV-20120820-2300-0701.hi.mp4 // rtmpt://cp160844.edgefcs.net/ondemand/mp4:flashmedia/streams/ndr/2012/0820/TV-20120820-2300-0701.hq.mp4 seite2 = getUrl.getUri_Utf(SENDERNAME, filmWebsite, seite2, "strUrlThema: " + strUrlThema); String description = extractDescription(seite2); //String[] keywords = extractKeywords(seite2); String subtitle = seite2.extract(",tracks: [{ src: \"", "\""); //,tracks: [{ src: "/fernsehen/sendungen/45_min/video-podcast/ut20448.xml", srclang:"de"}] if (!subtitle.isEmpty()) { subtitle = "https://www.ndr.de" + subtitle; // } else { // System.out.println("Test"); } meldung(filmWebsite); int pos1; try { // src="/fernsehen/hallondsopplatt162-player_image-2c09ece0-0508-49bf-b4d6-afff2be2115c_theme-ndrde.html" // https://www.ndr.de/fernsehen/hallondsopplatt162-ppjson_image-2c09ece0-0508-49bf-b4d6-afff2be2115c.json // id="pp_hallondsopplatt162" if (datum.isEmpty()) { String tmp = seite2.extract("")) != -1) { tmp = tmp.substring(pos1 + 1, tmp.length()); String[] dateValues = parseDateTime(tmp, strUrlThema); datum = dateValues[0]; zeit = dateValues[1]; } } String json = seite2.extract("