
mServer.crawler.sender.MediathekSwr Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of MServer Show documentation
Show all versions of MServer Show documentation
The crawler for mediathekview/MediathekView
/*
* MediathekView
* Copyright (C) 2008 W. Xaver
* W.Xaver[at]googlemail.com
* http://zdfmediathk.sourceforge.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package mServer.crawler.sender;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.StringEscapeUtils;
import de.mediathekview.mlib.Config;
import de.mediathekview.mlib.Const;
import de.mediathekview.mlib.daten.DatenFilm;
import de.mediathekview.mlib.tool.Log;
import de.mediathekview.mlib.tool.MSStringBuilder;
import mServer.crawler.CrawlerTool;
import mServer.crawler.FilmeSuchen;
import mServer.crawler.GetUrl;
import mServer.tool.MserverDaten;
public class MediathekSwr extends MediathekReader {
private static final int wartenKurz = 2000;
private static final int wartenLang = 4000;
public final static String SENDERNAME = Const.SWR;
public MediathekSwr(FilmeSuchen ssearch, int startPrio) {
super(ssearch, SENDERNAME, /* threads */ 2, /* urlWarten */ wartenLang, startPrio);
}
//===================================
// public
//===================================
@Override
public synchronized void addToList() {
meldungStart();
//Theman suchen
listeThemen.clear();
addToList__();
if (CrawlerTool.loadLongMax()) {
addToList_verpasst(); // brauchst eigentlich nicht und dauer zu lange
}
if (Config.getStop()) {
meldungThreadUndFertig();
} else if (listeThemen.isEmpty()) {
meldungThreadUndFertig();
} else {
meldungAddMax(listeThemen.size());
for (int t = 0; t < getMaxThreadLaufen(); ++t) {
Thread th = new ThemaLaden();
th.setName(SENDERNAME + t);
th.start();
}
}
}
//===================================
// private
//===================================
private void addToList__() {
//Theman suchen
final String MUSTER_START = "";
final String MUSTER_STOPP = "OFT GESUCHT
";
final String MUSTER_URL = " 0 && pos > stop) {
break;
}
pos += MUSTER_START.length();
url = strSeite.extract(MUSTER_URL, "\"", pos);
thema = strSeite.extract(MUSTER_THEMA, "\"", pos);
thema = StringEscapeUtils.unescapeHtml4(thema.trim()); //wird gleich benutzt und muss dann schon stimmen
if (thema.isEmpty()) {
Log.errorLog(915263078, "kein Thema");
}
if (url.isEmpty()) {
Log.errorLog(163255009, "keine URL");
} else {
//url = url.replace("&", "&");
String[] add = new String[]{"http://swrmediathek.de/tvshow.htm?show=" + url, thema};
listeThemen.addUrl(add);
}
}
}
private void addToList_verpasst() {
//Theman suchen
MSStringBuilder strSeite = new MSStringBuilder(Const.STRING_BUFFER_START_BUFFER);
GetUrl getUrlIo = new GetUrl(getWartenSeiteLaden());
strSeite = getUrlIo.getUriWithDelay(SENDERNAME, "http://swrmediathek.de/sendungverpasst.htm", StandardCharsets.UTF_8, 2, strSeite, "", 4, TimeUnit.SECONDS);
ArrayList list = new ArrayList<>();
strSeite.extractList("", "
© 2015 - 2025 Weber Informatics LLC | Privacy Policy