All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.vertretungsplan.parser.IndiwareParser Maven / Gradle / Ivy

/*
 * substitution-schedule-parser - Java library for parsing schools' substitution schedules
 * Copyright (c) 2016 Johan v. Forstner
 *
 * This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
 * If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

package me.vertretungsplan.parser;

import me.vertretungsplan.exception.CredentialInvalidException;
import me.vertretungsplan.objects.Substitution;
import me.vertretungsplan.objects.SubstitutionSchedule;
import me.vertretungsplan.objects.SubstitutionScheduleData;
import me.vertretungsplan.objects.SubstitutionScheduleDay;
import org.apache.http.NameValuePair;
import org.apache.http.message.BasicNameValuePair;
import org.jetbrains.annotations.NotNull;
import org.joda.time.format.DateTimeFormat;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Parser for substitution schedules in XML or HTML format created by the Indiware
 * software.
 * 

* This parser can be accessed using "indiware" for {@link SubstitutionScheduleData#setApi(String)}. * *

Configuration parameters

* These parameters can be supplied in {@link SubstitutionScheduleData#setData(JSONObject)} to configure the parser: * *
*
urls (Array of Strings, required)
*
The URLs of the XML files of the schedule. There is one file for each day. If the filenames themselves * contain the date, you can use something like {date(yyyy-MM-dd)} in the URL. This placeholder will then * be replaced with the dates of the next 7 days.
* *
encoding (String, required)
*
The charset of the XML files. It's probably either UTF-8 or ISO-8859-1.
* *
classes (Array of Strings, required)
*
The list of all classes, as they can appear in the schedule
* *
embeddedContentSelector (String, optional)
*
When the Untis schedule is embedded in another HTML file using server-side code, you can use this to * specify which HTML elements should be considered as the containers for the Indiware HTML schedule. The CSS selector * syntax is supported as specified by * JSoup.
* *
splitTeachers (boolean, optional, default: true)
*
Whether strings with a comma in the teacher column denote multiple teachers
*
* * Additionally, this parser supports the parameters specified in {@link LoginHandler} for login-protected schedules. */ public class IndiwareParser extends BaseParser { private static final String PARAM_URLS = "urls"; private static final String PARAM_ENCODING = "encoding"; private static final String PARAM_EMBEDDED_CONTENT_SELECTOR = "embeddedContentSelector"; private static final String PARAM_SPLIT_TEACHERS = "splitTeachers"; protected JSONObject data; private static final int MAX_DAYS = 7; static final Pattern datePattern = Pattern.compile("\\w+, \\d\\d?\\. \\w+ \\d{4}", Pattern.UNICODE_CHARACTER_CLASS); static final Pattern lastChangePattern = Pattern.compile("\\d\\d?\\.\\d\\d?\\.\\d{4}, \\d\\d?\\:\\d\\d"); static final Pattern substitutionPattern = Pattern.compile("für ([^\\s]+) ((?:(?! ,|Frau|Herr).)+|(?:Herr|Frau) " + "[^\\s]+) ?,? ?(.*)"); static final Pattern cancelPattern = Pattern.compile("((?!verlegt|statt)[^\\s]+) (?:(.+) )?fällt (:?leider )?aus"); static final Pattern delayPattern = Pattern.compile("([^\\s]+) ([^\\s]+) (verlegt nach .*)"); static final Pattern selfPattern = Pattern.compile("selbst\\. ?,? ?(.*)"); static final Pattern coursePattern = Pattern.compile("(.*)/ (.*)"); static final Pattern bracesPattern = Pattern.compile("^\\((.*)\\)$"); static final Pattern takeOverPattern = Pattern.compile("((?:(?! ,|Frau|Herr).)+|(?:Herr|Frau) [^\\s]+) übernimmt " + "mit"); static final Pattern newPattern = Pattern.compile("^neu(?:, )?(.*)$"); static final Pattern examPattern = Pattern.compile("^Prüfung(?:; )?(.*)$"); public IndiwareParser(SubstitutionScheduleData scheduleData, CookieProvider cookieProvider) { super(scheduleData, cookieProvider); data = scheduleData.getData(); } @Override public SubstitutionSchedule getSubstitutionSchedule() throws IOException, JSONException, CredentialInvalidException { new LoginHandler(scheduleData, credential, cookieProvider).handleLogin(executor, cookieStore); JSONArray urls = data.getJSONArray(PARAM_URLS); String encoding = data.optString(PARAM_ENCODING, null); List docs = new ArrayList<>(); SubstitutionSchedule v = SubstitutionSchedule.fromData(scheduleData); int successfulSchedules = 0; IOException lastException = null; for (int i = 0; i < urls.length(); i++) { if (urls.optJSONObject(i) != null) { try { JSONObject obj = urls.getJSONObject(i); String url = obj.getString("url"); if (obj.has("postData")) { JSONObject postParams = obj.getJSONObject("postData"); List nvps = new ArrayList<>(); for (String name : JSONObject.getNames(postParams)) { String value = postParams.getString(name); nvps.add(new BasicNameValuePair(name, value)); } docs.add(httpPost(url, encoding, nvps)); successfulSchedules++; } } catch (IOException e) { lastException = e; } } else { for (String url : ParserUtils.handleUrl(urls.getString(i))) { try { docs.add(httpGet(url, encoding)); successfulSchedules++; } catch (IOException e) { lastException = e; } } } } if (successfulSchedules == 0 && lastException != null) { throw lastException; } successfulSchedules = 0; lastException = null; for (String response : docs) { try { parseIndiwarePage(v, response); successfulSchedules++; } catch (IOException e) { lastException = e; } } if (successfulSchedules == 0 && lastException != null) { throw lastException; } v.setWebsite(urls.optString(0, data.optString("website", null))); v.setClasses(getAllClasses()); v.setTeachers(getAllTeachers()); return v; } void parseIndiwarePage(SubstitutionSchedule v, String response) throws JSONException, IOException { boolean html; Element doc; if (response.contains(" 1) { // multiple schedules after each other on one page String[] htmls = doc.html().split(""); for (int i = 1; i < htmls.length; i++) { Document splitDoc = Jsoup.parse(htmls[i]); v.addDay(parseIndiwareDay(splitDoc, true)); } } else if (!html && doc.select("kopf").size() > 1) { String[] xmls = doc.html().split(""); for (int i = 1; i < xmls.length; i++) { String xml = "" + xmls[i]; if (i < xmls.length - 1) { xml += ""; } Document splitDoc = Jsoup.parse(xml); v.addDay(parseIndiwareDay(splitDoc, false)); } } else { v.addDay(parseIndiwareDay(doc, html)); } } private interface DataSource { Element titel(); Element datum(); Elements kopfinfos(); Element fuss(); Elements fusszeilen(); Element aufsichten(); Elements aufsichtzeilen(); Elements aktionen(); } private class XMLDataSource implements DataSource { private Element vp; private Element kopf; public XMLDataSource(Element doc) { vp = doc.select("vp").first(); kopf = vp.select("kopf").first(); } @Override public Element titel() { return kopf.select("titel").first(); } @Override public Element datum() { return kopf.select("datum").first(); } @Override public Elements kopfinfos() { return kopf.select("kopfinfo > *"); } @Override public Element fuss() { return vp.select("fuss").first(); } @Override public Elements fusszeilen() { return fuss().select("fusszeile fussinfo"); } @Override public Element aufsichten() { return vp.select("aufsichten").first(); } @Override public Elements aufsichtzeilen() { return aufsichten().select("aufsichtzeile aufsichtinfo"); } @Override public Elements aktionen() { return vp.select("haupt > aktion"); } } private class HTMLDataSource implements DataSource { private Element doc; public HTMLDataSource(Element doc) { this.doc = doc; } @Override public Element titel() { return doc.select(".vpfuerdatum").first(); } @Override public Element datum() { return doc.select(".vpdatum").first(); } @Override public Elements kopfinfos() { return doc.select("table:has(th[class^=thkopf]) tr"); } @Override public Element fuss() { return doc.select("table:not(:has(th[class^=thkopf])):not(:has(.tdaktionen))" + ":not(span:contains(Aufsichten) + table)").first(); } @Override public Elements fusszeilen() { return fuss().select("tr td"); } @Override public Element aufsichten() { return doc.select("span:contains(Aufsichten) + table").first(); } @Override public Elements aufsichtzeilen() { return aufsichten().select("tr td"); } @Override public Elements aktionen() { return doc.select("table:has(.tdaktionen) tr:gt(0)"); } public Elements headers() { return doc.select("table:has(.tdaktionen) th"); } } SubstitutionScheduleDay parseIndiwareDay(Element doc, boolean html) throws IOException, JSONException { SubstitutionScheduleDay day = new SubstitutionScheduleDay(); DataSource ds; if (html) { ds = new HTMLDataSource(doc); } else { ds = new XMLDataSource(doc); } Matcher matcher = datePattern.matcher(ds.titel().text()); if (!matcher.find()) throw new IOException("malformed date: " + ds.titel().text()); String date = matcher.group(); day.setDate(DateTimeFormat.forPattern("EEEE, dd. MMMM yyyy") .withLocale(Locale.GERMAN).parseLocalDate(date)); matcher = lastChangePattern.matcher(ds.datum().text()); if (!matcher.find()) throw new IOException("malformed date: " + ds.datum().text()); String lastChange = matcher.group(); day.setLastChange(DateTimeFormat.forPattern("dd.MM.yyyy, HH:mm") .withLocale(Locale.GERMAN).parseLocalDateTime(lastChange)); if (ds.kopfinfos().size() > 0) { for (Element kopfinfo : ds.kopfinfos()) { String title = html ? kopfinfo.select("th").text() : kopfinfoTitle(kopfinfo.tagName()) + ":"; StringBuilder message = new StringBuilder(); if (title != null && !title.isEmpty()) { message.append("").append(title).append("").append(" "); } message.append(html ? kopfinfo.select("td").text() : kopfinfo.text()); day.addMessage(message.toString()); } } if (ds.fuss() != null) { StringBuilder message = new StringBuilder(); boolean first = true; for (Element fusszeile : ds.fusszeilen()) { if (first) { first = false; } else { message.append("
\n"); } message.append(fusszeile.text()); } day.addMessage(message.toString()); } if (ds.aufsichten() != null) { StringBuilder message = new StringBuilder(); message.append("").append("Geänderte Aufsichten:").append(""); for (Element aufsicht : ds.aufsichtzeilen()) { message.append("
\n"); message.append(aufsicht.text()); } day.addMessage(message.toString()); } List columnTypes = null; if (html) { columnTypes = new ArrayList<>(); for (Element th : ((HTMLDataSource) ds).headers()) { Set classNames = th.classNames(); for (String className : classNames) { if (className.contains("thplan") || className.contains("thlplan")) { columnTypes.add(className.replace("thplan", "") .replace("thlplan", "") .replace("_scheuler", "")); // sic! -> http://www.hildebrand-gymnasium.de/index.php/klasse-5.html break; } } } } for (Element aktion : ds.aktionen()) { Substitution substitution = new Substitution(); String course = null; int i = 0; boolean splitTeachers = data.optBoolean(PARAM_SPLIT_TEACHERS, true); for (Element info : aktion.children()) { String value = info.text().replace("\u00a0", ""); if (value.equals("---")) { i++; continue; } final String columnType = html ? columnTypes.get(i) : info.tagName(); Matcher bracesMatcher = bracesPattern.matcher(value); switch (columnType) { case "klasse": ClassAndCourse cac = new ClassAndCourse(value, data); course = cac.course; substitution.setClasses(cac.classes); break; case "stunde": substitution.setLesson(value); break; case "fach": String subject = subjectAndCourse(course, value); if (html ? columnTypes.contains("vfach") : aktion.getElementsByTag("vfach").size() > 0) { substitution.setPreviousSubject(subject); } else { substitution.setSubject(subject); } break; case "vfach": substitution.setSubject(subjectAndCourse(course, value)); break; case "lehrer": if (bracesMatcher.matches()) { value = bracesMatcher.group(1); substitution.setPreviousTeachers(splitTeachers(value, splitTeachers)); } else if (html ? columnTypes.contains("vlehrer") : aktion.getElementsByTag("vlehrer").size() > 0) { substitution.setPreviousTeachers(splitTeachers(value, splitTeachers)); } else { substitution.setTeachers(splitTeachers(value, splitTeachers)); } break; case "vlehrer": if (bracesMatcher.matches()) { value = bracesMatcher.group(1); substitution.setPreviousTeachers(splitTeachers(value, splitTeachers)); } else { substitution.setTeachers(splitTeachers(value, splitTeachers)); } break; case "raum": if (columnTypes != null && columnTypes.contains("vraum")) { substitution.setPreviousRoom(value); } else { substitution.setRoom(value); } break; case "vraum": substitution.setRoom(value); case "info": handleDescription(substitution, value); break; } i++; } if (substitution.getType() == null) substitution.setType("Vertretung"); substitution.setColor(colorProvider.getColor(substitution.getType())); if (course != null && substitution.getSubject() == null) { substitution.setSubject(course); } day.addSubstitution(substitution); } return day; } @NotNull static HashSet splitTeachers(String value, boolean split) { if (split) { return new HashSet<>(Arrays.asList(value.split(", "))); } else { return new HashSet<>(Collections.singletonList(value)); } } static void handleDescription(Substitution substitution, String value) { handleDescription(substitution, value, false); } static void handleDescription(Substitution substitution, String value, boolean teacher) { if (value == null) return; Matcher newMatcher = newPattern.matcher(value); if (newMatcher.matches()) { value = newMatcher.group(1); } Matcher examMatcher = examPattern.matcher(value); Matcher substitutionMatcher = substitutionPattern.matcher(value); Matcher cancelMatcher = cancelPattern.matcher(value); Matcher delayMatcher = delayPattern.matcher(value); Matcher selfMatcher = selfPattern.matcher(value); if (examMatcher.matches()) { substitution.setType("Prüfung"); substitution.setDesc(examMatcher.group(1)); } else if (substitutionMatcher.matches()) { substitution.setPreviousSubject(substitutionMatcher.group(1)); substitution.setPreviousTeacher(substitutionMatcher.group(2)); if (!substitutionMatcher.group(3).isEmpty()) { substitution.setDesc(substitutionMatcher.group(3)); } } else if (cancelMatcher.matches()) { substitution.setType("Entfall"); substitution.setPreviousSubject(cancelMatcher.group(1)); if (cancelMatcher.groupCount() > 1) { if (teacher) { substitution.setClasses(Collections.singleton(cancelMatcher.group(2))); } else { substitution.setPreviousTeacher(cancelMatcher.group(2)); } } } else if (delayMatcher.matches()) { substitution.setType("Verlegung"); substitution.setPreviousSubject(delayMatcher.group(1)); substitution.setPreviousTeacher(delayMatcher.group(2)); substitution.setDesc(delayMatcher.group(3)); } else if (selfMatcher.matches()) { substitution.setType("selbst."); if (!selfMatcher.group(1).isEmpty()) substitution.setDesc(selfMatcher.group(1)); } else if (value.equals("fällt aus") || value.equals("Klausur") || value.equals("Aufg.")) { substitution.setType(value); } else { substitution.setDesc(value); } if (substitution.getDesc() != null) { Matcher takeOverMatcher = takeOverPattern.matcher(substitution.getDesc()); if (takeOverMatcher.find()) { substitution.setTeacher(takeOverMatcher.group(1)); } } } @NotNull private String subjectAndCourse(String course, String subject) { StringBuilder subjectBuilder = new StringBuilder(); subjectBuilder.append(subject); if (course != null) { subjectBuilder.append(" ").append(course); } return subjectBuilder.toString(); } private static String kopfinfoTitle(String type) { switch (type) { case "abwesendl": return "Abwesende Lehrer"; case "abwesendk": return "Abwesende Klassen"; case "abwesendr": return "Nicht verfügbare Räume"; case "aenderungl": return "Lehrer mit Änderung"; case "aenderungk": return "Klassen mit Änderung"; default: return null; } } @Override public List getAllClasses() throws IOException, JSONException { return getClassesFromJson(); } @Override public List getAllTeachers() throws IOException, JSONException { return null; } static class ClassAndCourse { public String course = null; public Set classes; public ClassAndCourse(String value, JSONObject data) throws JSONException { String classesString; Matcher courseMatcher = coursePattern.matcher(value); if (courseMatcher.matches()) { classesString = courseMatcher.group(1); course = courseMatcher.group(2); } else { classesString = value; } classes = new HashSet<>(Arrays.asList(classesString.split(","))); classes = BaseParser.handleClassRanges(classes, data); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy