com.conveyal.gtfs.loader.JdbcGtfsExporter Maven / Gradle / Ivy
package com.conveyal.gtfs.loader;
import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.model.Calendar;
import com.conveyal.gtfs.model.CalendarDate;
import com.conveyal.gtfs.model.ScheduleException;
import com.conveyal.gtfs.model.Service;
import org.apache.commons.dbutils.DbUtils;
import org.postgresql.copy.CopyManager;
import org.postgresql.core.BaseConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.sql.DataSource;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.SQLException;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
/**
* Handles exporting a feed contained in the database to a GTFS zip file.
*/
public class JdbcGtfsExporter {
private static final Logger LOG = LoggerFactory.getLogger(JdbcGtfsExporter.class);
private final String outFile;
private final DataSource dataSource;
private final boolean fromEditor;
// These fields will be filled in once feed snapshot begins.
private Connection connection;
private ZipOutputStream zipOutputStream;
// The reference feed ID (namespace) to copy.
private final String feedIdToExport;
private List emptyTableList = new ArrayList<>();
public JdbcGtfsExporter(String feedId, String outFile, DataSource dataSource, boolean fromEditor) {
this.feedIdToExport = feedId;
this.outFile = outFile;
this.dataSource = dataSource;
this.fromEditor = fromEditor;
}
/**
* Export primary entity tables as well as Pattern and PatternStops tables.
*
* FIXME: This should probably only export main GTFS tables... unless there is a benefit to keeping editor fields or
* tables intact for future editing. Maybe this could be an option provided to the function (e.g., specTablesOnly).
*/
public FeedLoadResult exportTables() {
// This result object will be returned to the caller to summarize the feed and report any critical errors.
// FIXME: use feed export result object?
FeedLoadResult result = new FeedLoadResult();
try {
zipOutputStream = new ZipOutputStream(new FileOutputStream(outFile));
long startTime = System.currentTimeMillis();
// We get a single connection object and share it across several different methods.
// This ensures that actions taken in one method are visible to all subsequent SQL statements.
// If we create a schema or table on one connection, then access it in a separate connection, we have no
// guarantee that it exists when the accessing statement is executed.
connection = dataSource.getConnection();
if (!connection.getMetaData().getDatabaseProductName().equals("PostgreSQL")) {
// This code path currently requires the Postgres text "copy to" format.
throw new RuntimeException("Export from SQL database not implemented for non-PostgreSQL databases.");
}
// Construct where clause for routes table to filter only "approved" routes (and entities related to routes)
// if exporting a feed/schema that represents an editor snapshot.
String whereRouteIsApproved = String.format("where %s.%s.status = 2", feedIdToExport, Table.ROUTES.name);
// Export each table in turn (by placing entry in zip output stream).
result.agency = export(Table.AGENCY, connection);
if (fromEditor) {
// only export calendar entries that have at least one day of service set
// this could happen in cases where a feed was imported that only had calendar_dates.txt
result.calendar = export(
Table.CALENDAR,
String.join(
" ",
Table.CALENDAR.generateSelectSql(feedIdToExport, Requirement.OPTIONAL),
"WHERE monday=1 OR tuesday=1 OR wednesday=1 OR thursday=1 OR friday=1 OR saturday=1 OR sunday=1"
)
);
} else {
result.calendar = export(Table.CALENDAR, connection);
}
if (fromEditor) {
// Export schedule exceptions in place of calendar dates if exporting a feed/schema that represents an editor snapshot.
GTFSFeed feed = new GTFSFeed();
// FIXME: The below table readers should probably just share a connection with the exporter.
JDBCTableReader exceptionsReader =
new JDBCTableReader(Table.SCHEDULE_EXCEPTIONS, dataSource, feedIdToExport + ".",
EntityPopulator.SCHEDULE_EXCEPTION);
JDBCTableReader calendarsReader =
new JDBCTableReader(Table.CALENDAR, dataSource, feedIdToExport + ".",
EntityPopulator.CALENDAR);
Iterable calendars = calendarsReader.getAll();
Iterable exceptionsIterator = exceptionsReader.getAll();
List exceptions = new ArrayList<>();
// FIXME: Doing this causes the connection to stay open, but it is closed in the finalizer so it should
// not be a big problem.
for (ScheduleException exception : exceptionsIterator) {
exceptions.add(exception);
}
// check whether the feed is organized in a format with the calendars.txt file
if (calendarsReader.getRowCount() > 0) {
// feed does have calendars.txt file, continue export with strategy of matching exceptions
// to calendar to output calendar_dates.txt
int calendarDateCount = 0;
for (Calendar cal : calendars) {
Service service = new Service(cal.service_id);
service.calendar = cal;
for (ScheduleException ex : exceptions) {
if (ex.exemplar.equals(ScheduleException.ExemplarServiceDescriptor.SWAP) &&
(!ex.addedService.contains(cal.service_id) && !ex.removedService.contains(cal.service_id))) {
// Skip swap exception if cal is not referenced by added or removed service.
// This is not technically necessary, but the output is cleaner/more intelligible.
continue;
}
for (LocalDate date : ex.dates) {
if (date.isBefore(cal.start_date) || date.isAfter(cal.end_date)) {
// No need to write dates that do not apply
continue;
}
CalendarDate calendarDate = new CalendarDate();
calendarDate.date = date;
calendarDate.service_id = cal.service_id;
calendarDate.exception_type = ex.serviceRunsOn(cal) ? 1 : 2;
LOG.info("Adding exception {} (type={}) for calendar {} on date {}", ex.name, calendarDate.exception_type, cal.service_id, date.toString());
if (service.calendar_dates.containsKey(date))
throw new IllegalArgumentException("Duplicate schedule exceptions on " + date.toString());
service.calendar_dates.put(date, calendarDate);
calendarDateCount += 1;
}
}
feed.services.put(cal.service_id, service);
}
if (calendarDateCount == 0) {
LOG.info("No calendar dates found. Skipping table.");
} else {
LOG.info("Writing {} calendar dates from schedule exceptions", calendarDateCount);
new CalendarDate.Writer(feed).writeTable(zipOutputStream);
}
} else {
// No calendar records exist, export calendar_dates as is and hope for the best.
// This situation will occur in at least 2 scenarios:
// 1. A GTFS has been loaded into the editor that had only the calendar_dates.txt file
// and no further edits were made before exporting to a snapshot
// 2. A new GTFS has been created from scratch and calendar information has yet to be added.
// This will result in an invalid GTFS, but it was what the user wanted so ¯\_(ツ)_/¯
result.calendarDates = export(Table.CALENDAR_DATES, connection);
}
} else {
// Otherwise, simply export the calendar dates as they were loaded in.
result.calendarDates = export(Table.CALENDAR_DATES, connection);
}
result.fareAttributes = export(Table.FARE_ATTRIBUTES, connection);
result.fareRules = export(Table.FARE_RULES, connection);
result.feedInfo = export(Table.FEED_INFO, connection);
// Only write frequencies for "approved" routes using COPY TO with results of select query
if (fromEditor) {
// Generate filter SQL for trips if exporting a feed/schema that represents an editor snapshot.
// The filter clause for frequencies requires two joins to reach the routes table and a where filter on
// route status.
// FIXME Replace with string literal query instead of clause generators
result.frequencies = export(
Table.FREQUENCIES,
String.join(
" ",
// Convert start_time and end_time values from seconds to time format (HH:MM:SS).
Table.FREQUENCIES.generateSelectSql(feedIdToExport, Requirement.OPTIONAL),
Table.FREQUENCIES.generateJoinSql(Table.TRIPS, feedIdToExport),
Table.TRIPS.generateJoinSql(
Table.ROUTES,
feedIdToExport,
"route_id",
false
),
whereRouteIsApproved
)
);
} else {
result.frequencies = export(Table.FREQUENCIES, connection);
}
// Only write "approved" routes using COPY TO with results of select query
if (fromEditor) {
// The filter clause for routes is simple. We're just checking that the route is APPROVED.
result.routes = export(
Table.ROUTES,
String.join(
" ",
Table.ROUTES.generateSelectSql(feedIdToExport, Requirement.OPTIONAL),
whereRouteIsApproved
)
);
} else {
result.routes = export(Table.ROUTES, connection);
}
// FIXME: Find some place to store errors encountered on export for patterns and pattern stops.
// FIXME: Is there a need to export patterns or pattern stops? Should these be iterated over to ensure that
// frequency-based pattern travel times match stop time arrivals/departures?
// export(Table.PATTERNS);
// export(Table.PATTERN_STOP);
// Only write shapes for "approved" routes using COPY TO with results of select query
if (fromEditor) {
// Generate filter SQL for shapes if exporting a feed/schema that represents an editor snapshot.
// The filter clause for shapes requires joining to trips and then to routes table and a where filter on
// route status.
// FIXME: I'm not sure that shape_id is indexed for the trips table. This could cause slow downs.
// FIXME: this is exporting point_type, which is not a GTFS field, but its presence shouldn't hurt.
String shapeFieldsToExport = Table.commaSeparatedNames(
Table.SHAPES.specFields(),
String.join(".", feedIdToExport, Table.SHAPES.name + "."),
true
);
// NOTE: The below substitution uses relative indexing. All values "% zip_properties = new HashMap<>();
// We want to read an existing ZIP File, so we set this to False
zip_properties.put("create", "false");
// Specify the path to the ZIP File that you want to read as a File System
// (File#toURI allows this to work across different operating systems, including Windows)
URI zip_disk = URI.create("jar:" + new File(outFile).toURI());
// Create ZIP file System
try (FileSystem fileSystem = FileSystems.newFileSystem(zip_disk, zip_properties)) {
// Get the Path inside ZIP File to delete the ZIP Entry
for (String fileName : emptyTableList) {
Path filePath = fileSystem.getPath(fileName);
// Execute Delete
Files.delete(filePath);
LOG.info("Empty file {} successfully deleted", fileName);
}
} catch (IOException e) {
LOG.error("Could not remove empty zip files");
e.printStackTrace();
}
LOG.info("Deleted {} empty files in {} ms", emptyTableList.size(), System.currentTimeMillis() - startTime);
}
private TableLoadResult export (Table table, Connection connection) {
if (fromEditor) {
// Default behavior for exporting editor snapshot tables is to select only the spec fields.
return export(table, table.generateSelectSql(feedIdToExport, Requirement.OPTIONAL));
} else {
String existingFieldsSelect = null;
try {
existingFieldsSelect = table.generateSelectAllExistingFieldsSql(connection, feedIdToExport);
} catch (SQLException e) {
LOG.error("failed to generate select statement for existing fields");
TableLoadResult tableLoadResult = new TableLoadResult();
tableLoadResult.fatalException = e.toString();
e.printStackTrace();
return tableLoadResult;
}
return export(table, existingFieldsSelect);
}
}
/**
* Export a table to the zipOutputStream to be written to the GTFS.
*/
private TableLoadResult export (Table table, String filterSql) {
long startTime = System.currentTimeMillis();
TableLoadResult tableLoadResult = new TableLoadResult();
try {
if (filterSql == null) {
throw new IllegalArgumentException("filterSql argument cannot be null");
} else {
// Surround filter SQL in parentheses.
filterSql = String.format("(%s)", filterSql);
}
// Create entry for table
String textFileName = table.name + ".txt";
ZipEntry zipEntry = new ZipEntry(textFileName);
zipOutputStream.putNextEntry(zipEntry);
// don't let CSVWriter close the stream when it is garbage-collected
OutputStream protectedOut = new FilterOutputStream(zipOutputStream);
String copySql = String.format("copy %s to STDOUT DELIMITER ',' CSV HEADER", filterSql);
LOG.info(copySql);
// Our connection pool wraps the Connection objects, so we need to unwrap the Postgres connection interface.
CopyManager copyManager = new CopyManager(connection.unwrap(BaseConnection.class));
tableLoadResult.rowCount = (int) copyManager.copyOut(copySql, protectedOut);
if (tableLoadResult.rowCount == 0) {
// If no rows were exported, keep track of table name for later removal.
emptyTableList.add(textFileName);
}
zipOutputStream.closeEntry();
LOG.info("Copied {} {} in {} ms.", tableLoadResult.rowCount, table.name, System.currentTimeMillis() - startTime);
connection.commit();
} catch (SQLException | IOException | IllegalArgumentException e) {
// Rollback connection so that fatal exception does not impact loading of other tables.
try {
connection.rollback();
} catch (SQLException ex) {
ex.printStackTrace();
}
tableLoadResult.fatalException = e.toString();
LOG.error("Exception while exporting tables", e);
}
return tableLoadResult;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy