com.conveyal.gtfs.loader.JdbcGtfsExporter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of gtfs-lib Show documentation
A library to load and index GTFS feeds of arbitrary size using disk-backed storage
There is a newer version: 6.2.0
package com.conveyal.gtfs.loader;

import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.model.Calendar;
import com.conveyal.gtfs.model.CalendarDate;
import com.conveyal.gtfs.model.ScheduleException;
import com.conveyal.gtfs.model.Service;
import org.postgresql.copy.CopyManager;
import org.postgresql.core.BaseConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.sql.DataSource;
import java.io.FileOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.SQLException;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;

/**
 * Handles exporting a feed contained in the database to a GTFS zip file.
 */
public class JdbcGtfsExporter {
    private static final Logger LOG = LoggerFactory.getLogger(JdbcGtfsExporter.class);

    private final String outFile;
    private final DataSource dataSource;
    private final boolean fromEditor;

    // These fields will be filled in once feed snapshot begins.
    private Connection connection;
    private ZipOutputStream zipOutputStream;
    // The reference feed ID (namespace) to copy.
    private final String feedIdToExport;
    private List emptyTableList = new ArrayList<>();

    public JdbcGtfsExporter(String feedId, String outFile, DataSource dataSource, boolean fromEditor) {
        this.feedIdToExport = feedId;
        this.outFile = outFile;
        this.dataSource = dataSource;
        this.fromEditor = fromEditor;
    }

    /**
     * Export primary entity tables as well as Pattern and PatternStops tables.
     *
     * FIXME: This should probably only export main GTFS tables... unless there is a benefit to keeping editor fields or
     * tables intact for future editing. Maybe this could be an option provided to the function (e.g., specTablesOnly).
     */
    public FeedLoadResult exportTables() {
        // This result object will be returned to the caller to summarize the feed and report any critical errors.
        // FIXME: use feed export result object?
        FeedLoadResult result = new FeedLoadResult();

        try {
            zipOutputStream = new ZipOutputStream(new FileOutputStream(outFile));
            long startTime = System.currentTimeMillis();
            // We get a single connection object and share it across several different methods.
            // This ensures that actions taken in one method are visible to all subsequent SQL statements.
            // If we create a schema or table on one connection, then access it in a separate connection, we have no
            // guarantee that it exists when the accessing statement is executed.
            connection = dataSource.getConnection();
            if (!connection.getMetaData().getDatabaseProductName().equals("PostgreSQL")) {
                // This code path currently requires the Postgres text "copy to" format.
                throw new RuntimeException("Export from SQL database not implemented for non-PostgreSQL databases.");
            }
            // Construct where clause for routes table to filter only "approved" routes (and entities related to routes)
            // if exporting a feed/schema that represents an editor snapshot.
            String whereRouteIsApproved = String.format("where %s.%s.status = 2", feedIdToExport, Table.ROUTES.name);
            // Export each table in turn (by placing entry in zip output stream).
            result.agency = export(Table.AGENCY);
            result.calendar = export(Table.CALENDAR);
            if (fromEditor) {
                // Export schedule exceptions in place of calendar dates if exporting a feed/schema that represents an editor snapshot.
                GTFSFeed feed = new GTFSFeed();
                // FIXME: The below table readers should probably just share a connection with the exporter.
                JDBCTableReader exceptionsReader =
                        new JDBCTableReader(Table.SCHEDULE_EXCEPTIONS, dataSource, feedIdToExport + ".",
                                EntityPopulator.SCHEDULE_EXCEPTION);
                JDBCTableReader calendarsReader =
                        new JDBCTableReader(Table.CALENDAR, dataSource, feedIdToExport + ".",
                                EntityPopulator.CALENDAR);
                Iterable calendars = calendarsReader.getAll();
                Iterable exceptionsIterator = exceptionsReader.getAll();
                List exceptions = new ArrayList<>();
                // FIXME: Doing this causes the connection to stay open, but it is closed in the finalizer so it should
                // not be a big problem.
                for (ScheduleException exception : exceptionsIterator) {
                    exceptions.add(exception);
                }
                int calendarDateCount = 0;
                for (Calendar cal : calendars) {
                    Service service = new Service(cal.service_id);
                    service.calendar = cal;
                    for (ScheduleException ex : exceptions) {
                        if (ex.exemplar.equals(ScheduleException.ExemplarServiceDescriptor.SWAP) &&
                            (!ex.addedService.contains(cal.service_id) && !ex.removedService.contains(cal.service_id))) {
                            // Skip swap exception if cal is not referenced by added or removed service.
                            // This is not technically necessary, but the output is cleaner/more intelligible.
                            continue;
                        }

                        for (LocalDate date : ex.dates) {
                            if (date.isBefore(cal.start_date) || date.isAfter(cal.end_date)) {
                                // No need to write dates that do not apply
                                continue;
                            }

                            CalendarDate calendarDate = new CalendarDate();
                            calendarDate.date = date;
                            calendarDate.service_id = cal.service_id;
                            calendarDate.exception_type = ex.serviceRunsOn(cal) ? 1 : 2;
                            LOG.info("Adding exception {} (type={}) for calendar {} on date {}", ex.name, calendarDate.exception_type, cal.service_id, date.toString());

                            if (service.calendar_dates.containsKey(date))
                                throw new IllegalArgumentException("Duplicate schedule exceptions on " + date.toString());

                            service.calendar_dates.put(date, calendarDate);
                            calendarDateCount += 1;
                        }
                    }
                    feed.services.put(cal.service_id, service);
                }
                if (calendarDateCount == 0) {
                    LOG.info("No calendar dates found. Skipping table.");
                } else {
                    LOG.info("Writing {} calendar dates from schedule exceptions", calendarDateCount);
                    new CalendarDate.Writer(feed).writeTable(zipOutputStream);
                }
            } else {
                // Otherwise, simply export the calendar dates as they were loaded in.
                result.calendarDates = export(Table.CALENDAR_DATES);
            }
            result.fareAttributes = export(Table.FARE_ATTRIBUTES);
            result.fareRules = export(Table.FARE_RULES);
            result.feedInfo = export(Table.FEED_INFO);
            // Only write frequencies for "approved" routes using COPY TO with results of select query
            String frequencySelectSql = null;
            if (fromEditor) {
                // Generate filter SQL for trips if exporting a feed/schema that represents an editor snapshot.
                // The filter clause for frequencies requires two joins to reach the routes table and a where filter on
                // route status.
                String scopedStartTime = qualifyField(Table.FREQUENCIES, "start_time");
                String scopedEndTime = qualifyField(Table.FREQUENCIES, "end_time");
                // FIXME Replace with string literal query instead of clause generators
                frequencySelectSql = String.join(" ",
                        // Convert start_time and end_time values from seconds to time format (HH:MM:SS).
                        Table.FREQUENCIES.generateSelectSql(feedIdToExport, Requirement.OPTIONAL)
                            .replace(scopedStartTime, convertSecondsToTime(scopedStartTime, "start_time"))
                            .replace(scopedEndTime, convertSecondsToTime(scopedEndTime, "end_time")),
                        Table.FREQUENCIES.generateJoinSql(Table.TRIPS, feedIdToExport),
                        Table.TRIPS.generateJoinSql(Table.ROUTES, feedIdToExport, "route_id", false),
                        whereRouteIsApproved);
            }
            result.frequencies = export(Table.FREQUENCIES, frequencySelectSql);
            // Only write "approved" routes using COPY TO with results of select query
            String routeSelectSql = null;
            if (fromEditor) {
                // The filter clause for routes is simple. We're just checking that the route is APPROVED.
                routeSelectSql = String.join(" ",
                        Table.ROUTES.generateSelectSql(feedIdToExport, Requirement.OPTIONAL),
                        whereRouteIsApproved);
            }
            result.routes = export(Table.ROUTES, routeSelectSql);
            // FIXME: Find some place to store errors encountered on export for patterns and pattern stops.
            // FIXME: Is there a need to export patterns or pattern stops? Should these be iterated over to ensure that
            // frequency-based pattern travel times match stop time arrivals/departures?
//            export(Table.PATTERNS);
//            export(Table.PATTERN_STOP);
            // Only write shapes for "approved" routes using COPY TO with results of select query
            String shapeSelectSql = null;
            if (fromEditor) {
                // Generate filter SQL for shapes if exporting a feed/schema that represents an editor snapshot.
                // The filter clause for shapes requires joining to trips and then to routes table and a where filter on
                // route status.
                // FIXME: I'm not sure that shape_id is indexed for the trips table. This could cause slow downs.
                // FIXME: this is exporting point_type, which is not a GTFS field, but its presence shouldn't hurt.
                String shapeFieldsToExport = Table.commaSeparatedNames(
                        Table.SHAPES.specFields(), String.join(".", feedIdToExport, Table.SHAPES.name + "."));
                // NOTE: The below substitution uses relative indexing. All values "% zip_properties = new HashMap<>();
        // We want to read an existing ZIP File, so we set this to False
        zip_properties.put("create", "false");

        // Specify the path to the ZIP File that you want to read as a File System
        URI zip_disk = URI.create("jar:file://" + outFile);

        // Create ZIP file System
        try (FileSystem fileSystem = FileSystems.newFileSystem(zip_disk, zip_properties)) {
            // Get the Path inside ZIP File to delete the ZIP Entry
            for (String fileName : emptyTableList) {
                Path filePath = fileSystem.getPath(fileName);
                // Execute Delete
                Files.delete(filePath);
                LOG.info("Empty file {} successfully deleted", fileName);
            }
        } catch (IOException e) {
            LOG.error("Could not remove empty zip files");
            e.printStackTrace();
        }
        LOG.info("Deleted {} empty files in {} ms", emptyTableList.size(), System.currentTimeMillis() - startTime);
    }

    private TableLoadResult export (Table table) {
        if (fromEditor) {
            // Default behavior for exporting editor snapshot tables is to select only the spec fields.
            return export(table, table.generateSelectSql(feedIdToExport, Requirement.OPTIONAL));
        } else {
            return export(table, null);
        }
    }

    private TableLoadResult export (Table table, String filterSql) {
        long startTime = System.currentTimeMillis();
        TableLoadResult tableLoadResult = new TableLoadResult();
        try {
            // Create entry for table
            String textFileName = table.name + ".txt";
            ZipEntry zipEntry = new ZipEntry(textFileName);
            zipOutputStream.putNextEntry(zipEntry);

            // don't let CSVWriter close the stream when it is garbage-collected
            OutputStream protectedOut = new FilterOutputStream(zipOutputStream);
            if (filterSql == null) {
                // If there is no filter SQL specified, simply copy out the whole table.
                filterSql = String.format("%s.%s", feedIdToExport, table.name);
            } else {
                // Surround filter SQL in parentheses.
                filterSql = String.format("(%s)", filterSql);
            }
            String copySql = String.format("copy %s to STDOUT DELIMITER ',' CSV HEADER", filterSql);
            LOG.info(copySql);
            // Our connection pool wraps the Connection objects, so we need to unwrap the Postgres connection interface.
            CopyManager copyManager = new CopyManager(connection.unwrap(BaseConnection.class));
            tableLoadResult.rowCount = (int) copyManager.copyOut(copySql, protectedOut);
            if (tableLoadResult.rowCount == 0) {
                // If no rows were exported, keep track of table name for later removal.
                emptyTableList.add(textFileName);
            }
            zipOutputStream.closeEntry();
            LOG.info("Copied {} {} in {} ms.", tableLoadResult.rowCount, table.name, System.currentTimeMillis() - startTime);
            connection.commit();
        } catch (SQLException | IOException e) {
            // Rollback connection so that fatal exception does not impact loading of other tables.
            try {
                connection.rollback();
            } catch (SQLException ex) {
                ex.printStackTrace();
            }
            tableLoadResult.fatalException = e.getMessage();
            LOG.error("Exception while exporting tables", e);
        }
        return tableLoadResult;
    }
}