
me.vertretungsplan.parser.UntisMonitorParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of parser Show documentation
Show all versions of parser Show documentation
Java library for parsing schools' substitution schedules. Supports multiple different systems mainly used in the German-speaking countries.
/*
* substitution-schedule-parser - Java library for parsing schools' substitution schedules
* Copyright (c) 2016 Johan v. Forstner
*
* This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
* If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
package me.vertretungsplan.parser;
import me.vertretungsplan.exception.CredentialInvalidException;
import me.vertretungsplan.objects.SubstitutionSchedule;
import me.vertretungsplan.objects.SubstitutionScheduleData;
import me.vertretungsplan.objects.SubstitutionScheduleDay;
import org.apache.http.client.HttpResponseException;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Parser for substitution schedules in HTML format created by the Untis software
* using the "Monitor-Vertretungsplan" layout.
*
* Example: Lornsenschule Schleswig
*
* This parser can be accessed using "untis-monitor"
for {@link SubstitutionScheduleData#setApi(String)}.
*
*
Configuration parameters
* These parameters can be supplied in {@link SubstitutionScheduleData#setData(JSONObject)} to configure the parser:
*
*
* urls
(Array of JSONObjects, required)
* - The URLs of the HTML files of the schedule. There is one file for each day. Each JSONObject has a
*
url
parameter specifying the URL and a following
parameter to set if the parser
* should follow HTML meta
tag redirects to load multiple pages. If you are using
* {@link LoginHandler} for a HTTP POST login, the url
parameter can also be set to
* "loginResponse"
*
*
* encoding
(String, required)
* - The charset of the XML files. It's probably either UTF-8 or ISO-8859-1.
*
* classes
(Array of Strings, required)
* - The list of all classes, as they can appear in the schedule
*
* website
(String, recommended)
* - The URL of a website where the substitution schedule can be seen online
*
* lastChangeSelector
(String, optional)
* - When this is specified, the date of last change is read from the first HTML element that matches this CSS
* selector. The CSS selector syntax is supported as specified by
* JSoup.
*
* embeddedContentSelector
(String, optional)
* - When the Untis schedule is embedded in another HTML file using server-side code, you can use this to
* specify which HTML elements should be considered as the containers for the Untis schedule. The CSS selector
* syntax is supported as specified by
* JSoup.
*
*
* Additionally, this parser supports the parameters specified in {@link LoginHandler} for login-protected schedules
* and those specified in {@link UntisCommonParser}.
*/
public class UntisMonitorParser extends UntisCommonParser {
private static final int MAX_RECURSION_DEPTH = 30;
private static final String PARAM_URLS = "urls";
private static final String PARAM_ENCODING = "encoding";
private static final String PARAM_EMBEDDED_CONTENT_SELECTOR = "embeddedContentSelector";
private static final String PARAM_LAST_CHANGE_SELECTOR = "lastChangeSelector";
private static final String PARAM_WEBSITE = "website";
private static final String SUBPARAM_FOLLOWING = "following";
private static final String SUBPARAM_URL = "url";
private static final String VALUE_URL_LOGIN_RESPONSE = "loginResponse";
private String loginResponse;
public UntisMonitorParser(SubstitutionScheduleData scheduleData, CookieProvider cookieProvider) {
super(scheduleData, cookieProvider);
}
public SubstitutionSchedule getSubstitutionSchedule() throws IOException, JSONException, CredentialInvalidException {
loginResponse = new LoginHandler(scheduleData, credential, cookieProvider)
.handleLoginWithResponse(executor, cookieStore);
SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData);
JSONArray urls = scheduleData.getData().getJSONArray(PARAM_URLS);
String encoding = scheduleData.getData().getString(PARAM_ENCODING);
List docs = new ArrayList<>();
for (int i = 0; i < urls.length(); i++) {
JSONObject url = urls.getJSONObject(i);
loadUrl(url.getString(SUBPARAM_URL), encoding, url.getBoolean(SUBPARAM_FOLLOWING), docs);
}
for (Document doc : docs) {
if (scheduleData.getData().has(PARAM_EMBEDDED_CONTENT_SELECTOR)) {
for (Element part : doc.select(scheduleData.getData().getString(PARAM_EMBEDDED_CONTENT_SELECTOR))) {
SubstitutionScheduleDay day = parseMonitorDay(part, scheduleData.getData());
v.addDay(day);
}
} else if (doc.title().contains("Untis")) {
SubstitutionScheduleDay day = parseMonitorDay(doc, scheduleData.getData());
v.addDay(day);
}
// else Error
if (scheduleData.getData().has(PARAM_LAST_CHANGE_SELECTOR)
&& doc.select(scheduleData.getData().getString(PARAM_LAST_CHANGE_SELECTOR)).size() > 0) {
String text = doc.select(scheduleData.getData().getString(PARAM_LAST_CHANGE_SELECTOR)).first().text();
String lastChange;
Pattern pattern = Pattern.compile("\\d\\d\\.\\d\\d\\.\\d\\d\\d\\d,? \\d\\d:\\d\\d");
Matcher matcher = pattern.matcher(text);
if (matcher.find()) {
lastChange = matcher.group();
} else {
lastChange = text;
}
v.setLastChangeString(lastChange);
v.setLastChange(ParserUtils.parseDateTime(lastChange));
}
}
if (scheduleData.getData().has(PARAM_WEBSITE)) {
v.setWebsite(scheduleData.getData().getString(PARAM_WEBSITE));
} else if (urls.length() == 1) {
v.setWebsite(urls.getJSONObject(0).getString("url"));
}
v.setClasses(getAllClasses());
v.setTeachers(getAllTeachers());
return v;
}
private void loadUrl(String url, String encoding, boolean following, List docs, String startUrl,
int recursionDepth) throws IOException, CredentialInvalidException {
String html;
if (url.equals(VALUE_URL_LOGIN_RESPONSE)) {
html = loginResponse;
} else {
try {
html = httpGet(url, encoding).replace(" ", "");
} catch (HttpResponseException e) {
if (docs.size() == 0) {
throw e;
} else {
return; // ignore if first page was loaded and redirect didn't work
}
}
}
Document doc = Jsoup.parse(html);
doc.setBaseUri(url);
if (doc.select(".mon_title").size() == 0) {
// We have a problem - there seems to be no substitution schedule. Maybe it is hiding
// inside a frame?
if (doc.select("frameset frame[name").size() > 0) {
for (Element frame : doc.select("frameset frame")) {
if (frame.attr("src").matches(".*subst_\\d\\d\\d.html?") && recursionDepth < MAX_RECURSION_DEPTH) {
String frameUrl = frame.absUrl("src");
loadUrl(frame.absUrl("src"), encoding, following, docs, frameUrl, recursionDepth + 1);
}
}
} else if (doc.text().contains("registriert")) {
throw new CredentialInvalidException();
} else {
if (docs.size() == 0) {
// ignore if first page was loaded and redirect didn't work
throw new IOException("Could not find .mon-title, seems like there is no Untis " +
"schedule here");
}
}
} else {
findSubDocs(docs, html, doc);
if (following && doc.select("meta[http-equiv=refresh]").size() > 0) {
Element meta = doc.select("meta[http-equiv=refresh]").first();
String attr = meta.attr("content").toLowerCase();
String redirectUrl = url.substring(0, url.lastIndexOf("/") + 1) + attr.substring(attr.indexOf("url=") + 4);
if (!redirectUrl.equals(startUrl) && recursionDepth < MAX_RECURSION_DEPTH) {
loadUrl(redirectUrl, encoding, true, docs, startUrl, recursionDepth + 1);
}
}
}
}
static void findSubDocs(List docs, String html, Document doc) {
// Some schools concatenate multiple HTML files for multiple days
Pattern pattern = Pattern.compile("(.*?)", Pattern.DOTALL);
Matcher matcher = pattern.matcher(html);
List subHtmls = new ArrayList<>();
while (matcher.find()) {
subHtmls.add(matcher.group());
}
if (subHtmls.size() > 1) {
for (String subHtml : subHtmls) {
docs.add(Jsoup.parse(subHtml));
}
} else {
docs.add(doc);
}
}
private void loadUrl(String url, String encoding, boolean following, List docs) throws IOException, CredentialInvalidException {
loadUrl(url, encoding, following, docs, url, 0);
}
public List getAllTeachers() {
return null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy