All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.vertretungsplan.parser.DaVinciParser Maven / Gradle / Ivy

Go to download

Java library for parsing schools' substitution schedules. Supports multiple different systems mainly used in the German-speaking countries.

There is a newer version: 1.0.0-beta356
Show newest version
/*
 * substitution-schedule-parser - Java library for parsing schools' substitution schedules
 * Copyright (c) 2016 Johan v. Forstner
 *
 * This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
 * If a copy of the MPL was not distributed with this file, You can obtain one at https://mozilla.org/MPL/2.0/.
 */

package me.vertretungsplan.parser;

import me.vertretungsplan.exception.CredentialInvalidException;
import me.vertretungsplan.objects.Substitution;
import me.vertretungsplan.objects.SubstitutionSchedule;
import me.vertretungsplan.objects.SubstitutionScheduleData;
import me.vertretungsplan.objects.SubstitutionScheduleDay;
import org.jetbrains.annotations.NotNull;
import org.joda.time.LocalDateTime;
import org.joda.time.format.DateTimeFormat;
import org.json.JSONArray;
import org.json.JSONException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.net.URL;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class DaVinciParser extends BaseParser {
    private static final String ENCODING = "UTF-8";

    public DaVinciParser(SubstitutionScheduleData scheduleData, CookieProvider cookieProvider) {
        super(scheduleData, cookieProvider);
    }

    public static void parseDaVinciTable(Element table, SubstitutionScheduleDay day, ColorProvider colorProvider) {
        parseDaVinciTable(table, day, null, colorProvider);
    }

    public static void parseDaVinciTable(Element table, SubstitutionScheduleDay day, String klasse, ColorProvider colorProvider) {
        List headers = new ArrayList<>();
        for (Element header : table.select("thead tr th, tr td[bgcolor=#9999FF]")) {
            headers.add(header.text());
        }

        // These two variables can
        Set classes = new HashSet<>();
        String lesson = null;

        Pattern previousCurrentPattern = Pattern.compile("\\+([^\\s]+) \\(([^)]+)\\)");

        for (Element row : table.select("tr:not(thead tr, tr:has(td[bgcolor=#9999FF]))")) {
            Substitution subst = new Substitution();
            Elements columns = row.select("td");
            for (int i = 0; i < headers.size(); i++) {
                String value = columns.get(i).text().replace("\u00a0", "");
                String header = headers.get(i);

                if (value.isEmpty()) {
                    if (header.equals("Klasse")) subst.setClasses(classes);
                    if (header.equals("Pos")) subst.setLesson(lesson);
                    if (header.equals("Art") || header.equals("Merkmal")) subst.setType("Vertretung");
                    continue;
                }

                Matcher matcher = previousCurrentPattern.matcher(value);

                switch (header) {
                    case "Klasse":
                        classes = new HashSet<>(Arrays.asList(value.split(",")));
                        subst.setClasses(classes);
                        break;
                    case "Pos":
                        lesson = value;
                        subst.setLesson(lesson);
                        break;
                    case "VLehrer Kürzel":
                    case "VLehrer":
                    case "Vertreter":
                        if (!value.startsWith("*")) {
                            if (value.equals("Raumänderung")) {
                                subst.setType(value);
                            } else {
                                subst.setTeacher(value);
                            }
                        }
                        break;
                    case "Lehrer":
                    case "Lehrer Kürzel":
                        if (matcher.find()) {
                            subst.setTeacher(matcher.group(1));
                            subst.setPreviousTeacher(matcher.group(2));
                        } else {
                            subst.setPreviousTeacher(value);
                        }
                        break;
                    case "VFach":
                        subst.setSubject(value);
                        break;
                    case "Fach":
                        if (matcher.find()) {
                            subst.setSubject(matcher.group(1));
                            subst.setPreviousSubject(matcher.group(2));
                        } else {
                            subst.setPreviousSubject(value);
                        }
                        break;
                    case "VRaum":
                        subst.setRoom(value);
                        break;
                    case "Raum":
                        if (matcher.find()) {
                            subst.setRoom(matcher.group(1));
                            subst.setPreviousRoom(matcher.group(2));
                        } else {
                            subst.setPreviousRoom(value);
                        }
                        break;
                    case "Art":
                    case "Merkmal":
                        subst.setType(value);
                        break;
                    case "Info":
                    case "Mitteilung":
                        subst.setDesc(value);
                        break;
                }
            }
            if (klasse != null) {
                Set fixedClasses = new HashSet<>();
                fixedClasses.add(klasse);
                subst.setClasses(fixedClasses);
            }
            if (subst.getType() == null) {
                String recognizedType = null;
                if (subst.getDesc() != null) recognizedType = recognizeType(subst.getDesc());
                subst.setType(recognizedType != null ? recognizedType : "Vertretung");
            }
            subst.setColor(colorProvider.getColor(subst.getType()));
            day.addSubstitution(subst);
        }
    }

    @Override
    public SubstitutionSchedule getSubstitutionSchedule() throws IOException, JSONException, CredentialInvalidException {
        SubstitutionSchedule schedule = SubstitutionSchedule.fromData(scheduleData);

        String url = scheduleData.getData().getString("url");
        Document doc = Jsoup.parse(httpGet(url, ENCODING));

        if (doc.select("ul.classes").size() > 0) {
            // List of classes
            Elements classes = doc.select("ul.classes li a");
            for (Element klasse : classes) {
                String classUrl = new URL(new URL(url), klasse.attr("href")).toString();
                Document classDoc = Jsoup.parse(httpGet(classUrl, ENCODING));
                schedule.addDay(parseDay(classDoc));
            }
        } else if (doc.select("ul.month").size() > 0) {
            // List of days in calendar view
            Elements days = doc.select("ul.month li input[onclick]");
            for (Element day : days) {
                String urlFromOnclick = urlFromOnclick(day.attr("onclick"));
                if (urlFromOnclick == null) continue;
                String dayUrl = new URL(new URL(url), urlFromOnclick).toString();
                Document dayDoc = Jsoup.parse(httpGet(dayUrl, ENCODING));
                schedule.addDay(parseDay(dayDoc));
            }
        } else if (doc.select("ul.day-index").size() > 0) {
            // List of days in list view
            Elements days = doc.select("ul.day-index li a");
            for (Element day : days) {
                String dayUrl = new URL(new URL(url), day.attr("href")).toString();
                Document dayDoc = Jsoup.parse(httpGet(dayUrl, ENCODING));
                schedule.addDay(parseDay(dayDoc));
            }
        } else {
            // Single day
            schedule.addDay(parseDay(doc));
        }

        schedule.setWebsite(url);
        schedule.setClasses(getAllClasses());
        schedule.setTeachers(getAllTeachers());

        return schedule;
    }

    private String urlFromOnclick(String onclick) {
        Pattern pattern = Pattern.compile("window\\.location\\.href='([^']+)'");
        Matcher matcher = pattern.matcher(onclick);
        if (matcher.find()) {
            return matcher.group(1);
        } else {
            return null;
        }
    }

    @NotNull
    private SubstitutionScheduleDay parseDay(Document doc) throws IOException {
        SubstitutionScheduleDay day = new SubstitutionScheduleDay();

        String title = doc.select("h1.list-table-caption").first().text();
        String klasse = null;
        // title can either be date or class
        if (title.matches("\\w+ \\d+\\.\\d+.\\d{4}")) {
            day.setDateString(title);
            day.setDate(ParserUtils.parseDate(title));
        } else {
            klasse = title;
            String nextText = doc.select("h1.list-table-caption").first().nextElementSibling().text();
            if (nextText.matches("\\w+ \\d+\\.\\d+.\\d{4}")) {
                day.setDateString(nextText);
                day.setDate(ParserUtils.parseDate(nextText));
            } else {
                throw new IOException("Could not find date");
            }
        }

        String lastChange = doc.select(".row.copyright div").first().ownText();
        Pattern pattern = Pattern.compile("(\\d{2}-\\d{2}-\\d{4} \\d{2}:\\d{2}) \\|");
        Matcher matcher = pattern.matcher(lastChange);
        if (matcher.find()) {
            LocalDateTime lastChangeTime =
                    DateTimeFormat.forPattern("dd-MM-yyyy HH:mm").parseLocalDateTime(matcher.group(1));
            day.setLastChange(lastChangeTime);
        }

        if (doc.select(".list-table").size() > 0 || !doc.select(".callout").text().contains("Es liegen keine")) {
            Element table = doc.select(".list-table").first();
            parseDaVinciTable(table, day, klasse, colorProvider);
        }
        return day;
    }

    @Override
    public List getAllClasses() throws IOException, JSONException {
        if (scheduleData.getData().has("classesSource")) {
            Document doc = Jsoup.parse(httpGet(scheduleData.getData().getString("classesSource"), ENCODING));
            List classes = new ArrayList();
            for (Element li : doc.select("li.Class")) {
                classes.add(li.text());
            }
            return classes;
        } else if (scheduleData.getData().has("classes")) {
            JSONArray classesJson = scheduleData.getData().getJSONArray("classes");
            List classes = new ArrayList();
            for (int i = 0; i < classesJson.length(); i++) {
                classes.add(classesJson.getString(i));
            }
            return classes;
        } else {
            return null;
        }
    }

    @Override
    public List getAllTeachers() {
        return null;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy