
me.vertretungsplan.parser.UntisCommonParser Maven / Gradle / Ivy
Show all versions of parser Show documentation
/*
* substitution-schedule-parser - Java library for parsing schools' substitution schedules
* Copyright (c) 2016 Johan v. Forstner
*
* This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
* If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
package me.vertretungsplan.parser;
import me.vertretungsplan.objects.Substitution;
import me.vertretungsplan.objects.SubstitutionSchedule;
import me.vertretungsplan.objects.SubstitutionScheduleData;
import me.vertretungsplan.objects.SubstitutionScheduleDay;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Enthält gemeinsam genutzte Funktionen für die Parser für
* Untis-Vertretungspläne
*
*/
public abstract class UntisCommonParser extends BaseParser {
private static final String[] EXCLUDED_CLASS_NAMES = new String[]{"-----"};
public UntisCommonParser(SubstitutionScheduleData scheduleData, CookieProvider cookieProvider) {
super(scheduleData, cookieProvider);
}
static String findLastChange(Element doc, SubstitutionScheduleData scheduleData) {
String lastChange = null;
if (doc.select("table.mon_head").size() > 0) {
Element monHead = doc.select("table.mon_head").first();
lastChange = findLastChangeFromMonHeadTable(monHead);
} else if (scheduleData != null && scheduleData.getData().optBoolean("stand_links", false)) {
lastChange = doc.select("body").html().substring(0, doc.select("body").html().indexOf("") - 1);
} else {
List childNodes;
if (doc instanceof Document) {
childNodes = ((Document) doc).body().childNodes();
} else {
childNodes = doc.childNodes();
}
for (Node node : childNodes) {
if (node instanceof Comment) {
Comment comment = (Comment) node;
if (comment.getData().contains("")) {
Document commentedDoc = Jsoup.parse(comment.getData());
Element monHead = commentedDoc.select("table.mon_head").first();
lastChange = findLastChangeFromMonHeadTable(monHead);
break;
}
}
}
}
return lastChange;
}
private static String findLastChangeFromMonHeadTable(Element monHead) {
if (monHead.select("td[align=right]").size() == 0) return null;
String lastChange = null;
Pattern pattern = Pattern.compile("\\d\\d\\.\\d\\d\\.\\d\\d\\d\\d \\d\\d:\\d\\d");
Matcher matcher = pattern.matcher(monHead.select("td[align=right]").first().text());
if (matcher.find()) {
lastChange = matcher.group();
} else if (monHead.text().contains("Stand: ")) {
lastChange = monHead.text().substring(monHead.text().indexOf("Stand:") + "Stand:".length()).trim();
}
return lastChange;
}
private static boolean equalsOrNull(String a, String b) {
return a == null || b == null || a.equals(b);
}
/**
* Parst eine Vertretungstabelle eines Untis-Vertretungsplans
*
* @param table
* das table
-Element des HTML-Dokuments, das geparst
* werden soll
* @param data
* Daten von der Schule (aus Schule.getData()
)
* @param day
* der {@link SubstitutionScheduleDay} in dem die Vertretungen
* gespeichert werden sollen
* @throws JSONException
*/
protected void parseVertretungsplanTable(Element table, JSONObject data,
SubstitutionScheduleDay day) throws JSONException {
if (data.optBoolean("class_in_extra_line")) {
for (Element element : table.select("td.inline_header")) {
String className = getClassName(element.text(), data);
if (isValidClass(className)) {
Element zeile = null;
try {
zeile = element.parent().nextElementSibling();
if (zeile.select("td") == null) {
zeile = zeile.nextElementSibling();
}
int skipLines = 0;
while (zeile != null
&& !zeile.select("td").attr("class")
.equals("list inline_header")) {
if (skipLines > 0) {
skipLines --;
zeile = zeile.nextElementSibling();
continue;
}
Substitution v = new Substitution();
int i = 0;
for (Element spalte : zeile.select("td")) {
String text = spalte.text();
if (isEmpty(text)) {
i++;
continue;
}
int skipLinesForThisColumn = 0;
Element nextLine = zeile.nextElementSibling();
boolean continueSkippingLines = true;
while (continueSkippingLines) {
if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
Element columnInNextLine = nextLine.child(spalte
.elementSiblingIndex());
if (columnInNextLine.text().replaceAll("\u00A0", "").trim().equals(
nextLine.text().replaceAll("\u00A0", "").trim())) {
// Continued in the next line
text += " " + columnInNextLine.text();
skipLinesForThisColumn++;
nextLine = nextLine.nextElementSibling();
} else {
continueSkippingLines = false;
}
} else {
continueSkippingLines = false;
}
}
if (skipLinesForThisColumn > skipLines) skipLines = skipLinesForThisColumn;
String type = data.getJSONArray("columns")
.getString(i);
switch (type) {
case "lesson":
v.setLesson(text);
break;
case "subject":
handleSubject(v, spalte);
break;
case "previousSubject":
v.setPreviousSubject(text);
break;
case "type":
v.setType(text);
v.setColor(colorProvider.getColor(text));
break;
case "type-entfall":
if (text.equals("x")) {
v.setType("Entfall");
v.setColor(colorProvider.getColor("Entfall"));
} else {
v.setType("Vertretung");
v.setColor(colorProvider.getColor("Vertretung"));
}
break;
case "room":
handleRoom(v, spalte);
break;
case "teacher":
handleTeacher(v, spalte);
break;
case "previousTeacher":
v.setPreviousTeacher(text);
break;
case "desc":
v.setDesc(text);
break;
case "desc-type":
v.setDesc(text);
String recognizedType = recognizeType(text);
v.setType(recognizedType);
v.setColor(colorProvider.getColor(recognizedType));
break;
case "previousRoom":
v.setPreviousRoom(text);
break;
}
i++;
}
if (v.getType() == null) {
v.setType("Vertretung");
v.setColor(colorProvider.getColor("Vertretung"));
}
v.getClasses().add(className);
if (v.getLesson() != null && !v.getLesson().equals("")) {
day.addSubstitution(v);
}
zeile = zeile.nextElementSibling();
}
} catch (Throwable e) {
e.printStackTrace();
}
}
}
} else {
boolean hasType = false;
for (int i = 0; i < data.getJSONArray("columns").length(); i++) {
if (data.getJSONArray("columns").getString(i).equals("type"))
hasType = true;
}
Substitution previousSubstitution = null;
int skipLines = 0;
for (Element zeile : table
.select("tr.list.odd:not(:has(td.inline_header)), "
+ "tr.list.even:not(:has(td.inline_header)), "
+ "tr:has(td[align=center]):gt(0)")) {
if (skipLines > 0) {
skipLines --;
continue;
}
Substitution v = new Substitution();
String klassen = "";
int i = 0;
for (Element spalte : zeile.select("td")) {
String text = spalte.text();
if (isEmpty(text)) {
i++;
continue;
}
int skipLinesForThisColumn = 0;
Element nextLine = zeile.nextElementSibling();
boolean continueSkippingLines = true;
while (continueSkippingLines) {
if (nextLine != null && nextLine.children().size() == zeile.children().size()) {
Element columnInNextLine = nextLine.child(spalte
.elementSiblingIndex());
if (columnInNextLine.text().replaceAll("\u00A0", "").trim().equals(
nextLine.text().replaceAll("\u00A0", "").trim())) {
// Continued in the next line
text += " " + columnInNextLine.text();
skipLinesForThisColumn++;
nextLine = nextLine.nextElementSibling();
} else {
continueSkippingLines = false;
}
} else {
continueSkippingLines = false;
}
}
if (skipLinesForThisColumn > skipLines) skipLines = skipLinesForThisColumn;
String type = data.getJSONArray("columns").getString(i);
switch (type) {
case "lesson":
v.setLesson(text);
break;
case "subject":
handleSubject(v, spalte);
break;
case "previousSubject":
v.setPreviousSubject(text);
break;
case "type":
v.setType(text);
v.setColor(colorProvider.getColor(text));
break;
case "type-entfall":
if (text.equals("x")) {
v.setType("Entfall");
v.setColor(colorProvider.getColor("Entfall"));
} else if (!hasType) {
v.setType("Vertretung");
v.setColor(colorProvider.getColor("Vertretung"));
}
break;
case "room":
handleRoom(v, spalte);
break;
case "previousRoom":
v.setPreviousRoom(text);
break;
case "desc":
v.setDesc(text);
break;
case "desc-type":
v.setDesc(text);
String recognizedType = recognizeType(text);
v.setType(recognizedType);
v.setColor(colorProvider.getColor(recognizedType));
break;
case "teacher":
handleTeacher(v, spalte);
break;
case "previousTeacher":
v.setPreviousTeacher(text);
break;
case "class":
klassen = getClassName(text, data);
break;
}
i++;
}
if (v.getLesson() == null || v.getLesson().equals("")) {
continue;
}
if (v.getType() == null) {
if ((zeile.select("strike").size() > 0 && equalsOrNull(v.getSubject(), v.getPreviousSubject()) &&
equalsOrNull(v.getTeacher(), v.getPreviousTeacher()))
|| (v.getSubject() == null && v.getRoom() == null && v
.getTeacher() == null && v.getPreviousSubject() != null)) {
v.setType("Entfall");
v.setColor(colorProvider.getColor("Entfall"));
} else {
v.setType("Vertretung");
v.setColor(colorProvider.getColor("Vertretung"));
}
}
List affectedClasses;
// Detect things like "5-12"
Pattern pattern = Pattern.compile("(\\d+) ?- ?(\\d+)");
Matcher matcher = pattern.matcher(klassen);
if (matcher.find()) {
affectedClasses = new ArrayList();
int min = Integer.parseInt(matcher.group(1));
int max = Integer.parseInt(matcher.group(2));
try {
for (String klasse : getAllClasses()) {
Pattern pattern2 = Pattern.compile("\\d+");
Matcher matcher2 = pattern2.matcher(klasse);
if (matcher2.find()) {
int num = Integer.parseInt(matcher2.group());
if (min <= num && num <= max)
affectedClasses.add(klasse);
}
}
} catch (IOException e) {
e.printStackTrace();
}
} else {
if (data.optBoolean("classes_separated", true)) {
affectedClasses = Arrays.asList(klassen.split(", "));
} else {
affectedClasses = new ArrayList();
try {
for (String klasse : getAllClasses()) { // TODO:
// Gibt es
// eine
// bessere
// Möglichkeit?
StringBuilder regex = new StringBuilder();
for (char character : klasse.toCharArray()) {
regex.append(character);
regex.append(".*");
}
if (klassen.matches(regex.toString()))
affectedClasses.add(klasse);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
for (String klasse : affectedClasses) {
if (isValidClass(klasse)) {
v.getClasses().add(klasse);
}
}
day.addSubstitution(v);
previousSubstitution = v;
}
}
}
private void handleTeacher(Substitution subst, Element cell) {
if (cell.select("s").size() > 0) {
subst.setPreviousTeacher(cell.select("s").text());
if (cell.ownText().length() > 0) {
subst.setTeacher(cell.ownText().replaceFirst("^\\?", ""));
}
} else {
subst.setTeacher(cell.text());
}
}
private void handleRoom(Substitution subst, Element cell) {
if (cell.select("s").size() > 0) {
subst.setPreviousRoom(cell.select("s").text());
if (cell.ownText().length() > 0) {
subst.setRoom(cell.ownText().replaceFirst("^\\?", ""));
}
} else {
subst.setRoom(cell.text());
}
}
private void handleSubject(Substitution subst, Element cell) {
if (cell.select("s").size() > 0) {
subst.setPreviousSubject(cell.select("s").text());
if (cell.ownText().length() > 0) {
subst.setSubject(cell.ownText().replaceFirst("^\\?", ""));
}
} else {
subst.setSubject(cell.text());
}
}
private boolean isEmpty(String text) {
return text.trim().equals("") || text.trim().equals("---");
}
/**
* Parst eine "Nachrichten zum Tag"-Tabelle aus Untis-Vertretungsplänen
* @param table
* das table
-Element des HTML-Dokuments, das geparst
* werden soll
* @param day
* der {@link SubstitutionScheduleDay} in dem die Nachrichten
*/
protected void parseMessages(Element table, SubstitutionScheduleDay day) {
Elements zeilen = table
.select("tr:not(:contains(Nachrichten zum Tag))");
for (Element i : zeilen) {
Elements spalten = i.select("td");
String info = "";
for (Element b : spalten) {
info += "\n"
+ TextNode.createFromEncoded(b.html(), null)
.getWholeText();
}
info = info.substring(1); // remove first \n
day.addMessage(info);
}
}
protected SubstitutionScheduleDay parseMonitorVertretungsplanTag(Element doc, JSONObject data) throws
JSONException {
SubstitutionScheduleDay day = new SubstitutionScheduleDay();
String date = doc.select(".mon_title").first().text().replaceAll(" \\(Seite \\d+ / \\d+\\)", "");
day.setDateString(date);
day.setDate(ParserUtils.parseDate(date));
if (!scheduleData.getData().has("lastChangeSelector")) {
String lastChange = findLastChange(doc, scheduleData);
day.setLastChangeString(lastChange);
day.setLastChange(ParserUtils.parseDateTime(lastChange));
}
// NACHRICHTEN
if (doc.select("table.info").size() > 0) {
parseMessages(doc.select("table.info").first(), day);
}
// VERTRETUNGSPLAN
if (doc.select("table:has(tr.list)").size() > 0)
parseVertretungsplanTable(doc.select("table:has(tr.list)").first(), data, day);
return day;
}
private boolean isValidClass(String klasse) throws JSONException {
return klasse != null && !Arrays.asList(EXCLUDED_CLASS_NAMES).contains(klasse) &&
!(scheduleData.getData().has("exclude_classes") &&
contains(scheduleData.getData().getJSONArray("exclude_classes"), klasse));
}
@Override
public List getAllClasses() throws IOException, JSONException {
return getClassesFromJson();
}
protected void parseDay(SubstitutionScheduleDay day, Element next, SubstitutionSchedule v) throws JSONException {
if (next.className().equals("subst")) {
//Vertretungstabelle
if (next.text().contains("Vertretungen sind nicht freigegeben")) {
return;
}
parseVertretungsplanTable(next, scheduleData.getData(), day);
} else {
//Nachrichten
parseMessages(next, day);
next = next.nextElementSibling().nextElementSibling();
parseVertretungsplanTable(next, scheduleData.getData(), day);
}
v.addDay(day);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy