com.conveyal.gtfs.loader.JdbcGtfsExporter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gtfs-lib Show documentation
Show all versions of gtfs-lib Show documentation
A library to load and index GTFS feeds of arbitrary size using disk-backed storage
package com.conveyal.gtfs.loader;
import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.model.Calendar;
import com.conveyal.gtfs.model.CalendarDate;
import com.conveyal.gtfs.model.ScheduleException;
import com.conveyal.gtfs.model.Service;
import org.postgresql.copy.CopyManager;
import org.postgresql.core.BaseConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.sql.DataSource;
import java.io.FileOutputStream;
import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.SQLException;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
/**
* Handles exporting a feed contained in the database to a GTFS zip file.
*/
public class JdbcGtfsExporter {
private static final Logger LOG = LoggerFactory.getLogger(JdbcGtfsExporter.class);
private final String outFile;
private final DataSource dataSource;
private final boolean fromEditor;
// These fields will be filled in once feed snapshot begins.
private Connection connection;
private ZipOutputStream zipOutputStream;
// The reference feed ID (namespace) to copy.
private final String feedIdToExport;
private List emptyTableList = new ArrayList<>();
public JdbcGtfsExporter(String feedId, String outFile, DataSource dataSource, boolean fromEditor) {
this.feedIdToExport = feedId;
this.outFile = outFile;
this.dataSource = dataSource;
this.fromEditor = fromEditor;
}
/**
* Export primary entity tables as well as Pattern and PatternStops tables.
*
* FIXME: This should probably only export main GTFS tables... unless there is a benefit to keeping editor fields or
* tables intact for future editing. Maybe this could be an option provided to the function (e.g., specTablesOnly).
*/
public FeedLoadResult exportTables() {
// This result object will be returned to the caller to summarize the feed and report any critical errors.
// FIXME: use feed export result object?
FeedLoadResult result = new FeedLoadResult();
try {
zipOutputStream = new ZipOutputStream(new FileOutputStream(outFile));
long startTime = System.currentTimeMillis();
// We get a single connection object and share it across several different methods.
// This ensures that actions taken in one method are visible to all subsequent SQL statements.
// If we create a schema or table on one connection, then access it in a separate connection, we have no
// guarantee that it exists when the accessing statement is executed.
connection = dataSource.getConnection();
if (!connection.getMetaData().getDatabaseProductName().equals("PostgreSQL")) {
// This code path currently requires the Postgres text "copy to" format.
throw new RuntimeException("Export from SQL database not implemented for non-PostgreSQL databases.");
}
// Construct where clause for routes table to filter only "approved" routes (and entities related to routes)
// if exporting a feed/schema that represents an editor snapshot.
String whereRouteIsApproved = String.format("where %s.%s.status = 2", feedIdToExport, Table.ROUTES.name);
// Export each table in turn (by placing entry in zip output stream).
result.agency = export(Table.AGENCY);
result.calendar = export(Table.CALENDAR);
if (fromEditor) {
// Export schedule exceptions in place of calendar dates if exporting a feed/schema that represents an editor snapshot.
GTFSFeed feed = new GTFSFeed();
// FIXME: The below table readers should probably just share a connection with the exporter.
JDBCTableReader exceptionsReader =
new JDBCTableReader(Table.SCHEDULE_EXCEPTIONS, dataSource, feedIdToExport + ".",
EntityPopulator.SCHEDULE_EXCEPTION);
JDBCTableReader calendarsReader =
new JDBCTableReader(Table.CALENDAR, dataSource, feedIdToExport + ".",
EntityPopulator.CALENDAR);
Iterable calendars = calendarsReader.getAll();
Iterable exceptionsIterator = exceptionsReader.getAll();
List exceptions = new ArrayList<>();
// FIXME: Doing this causes the connection to stay open, but it is closed in the finalizer so it should
// not be a big problem.
for (ScheduleException exception : exceptionsIterator) {
exceptions.add(exception);
}
int calendarDateCount = 0;
for (Calendar cal : calendars) {
Service service = new Service(cal.service_id);
service.calendar = cal;
for (ScheduleException ex : exceptions) {
if (ex.exemplar.equals(ScheduleException.ExemplarServiceDescriptor.SWAP) &&
(!ex.addedService.contains(cal.service_id) && !ex.removedService.contains(cal.service_id))) {
// Skip swap exception if cal is not referenced by added or removed service.
// This is not technically necessary, but the output is cleaner/more intelligible.
continue;
}
for (LocalDate date : ex.dates) {
if (date.isBefore(cal.start_date) || date.isAfter(cal.end_date)) {
// No need to write dates that do not apply
continue;
}
CalendarDate calendarDate = new CalendarDate();
calendarDate.date = date;
calendarDate.service_id = cal.service_id;
calendarDate.exception_type = ex.serviceRunsOn(cal) ? 1 : 2;
LOG.info("Adding exception {} (type={}) for calendar {} on date {}", ex.name, calendarDate.exception_type, cal.service_id, date.toString());
if (service.calendar_dates.containsKey(date))
throw new IllegalArgumentException("Duplicate schedule exceptions on " + date.toString());
service.calendar_dates.put(date, calendarDate);
calendarDateCount += 1;
}
}
feed.services.put(cal.service_id, service);
}
if (calendarDateCount == 0) {
LOG.info("No calendar dates found. Skipping table.");
} else {
LOG.info("Writing {} calendar dates from schedule exceptions", calendarDateCount);
new CalendarDate.Writer(feed).writeTable(zipOutputStream);
}
} else {
// Otherwise, simply export the calendar dates as they were loaded in.
result.calendarDates = export(Table.CALENDAR_DATES);
}
result.fareAttributes = export(Table.FARE_ATTRIBUTES);
result.fareRules = export(Table.FARE_RULES);
result.feedInfo = export(Table.FEED_INFO);
// Only write frequencies for "approved" routes using COPY TO with results of select query
String frequencySelectSql = null;
if (fromEditor) {
// Generate filter SQL for trips if exporting a feed/schema that represents an editor snapshot.
// The filter clause for frequencies requires two joins to reach the routes table and a where filter on
// route status.
String scopedStartTime = qualifyField(Table.FREQUENCIES, "start_time");
String scopedEndTime = qualifyField(Table.FREQUENCIES, "end_time");
// FIXME Replace with string literal query instead of clause generators
frequencySelectSql = String.join(" ",
// Convert start_time and end_time values from seconds to time format (HH:MM:SS).
Table.FREQUENCIES.generateSelectSql(feedIdToExport, Requirement.OPTIONAL)
.replace(scopedStartTime, convertSecondsToTime(scopedStartTime, "start_time"))
.replace(scopedEndTime, convertSecondsToTime(scopedEndTime, "end_time")),
Table.FREQUENCIES.generateJoinSql(Table.TRIPS, feedIdToExport),
Table.TRIPS.generateJoinSql(Table.ROUTES, feedIdToExport, "route_id", false),
whereRouteIsApproved);
}
result.frequencies = export(Table.FREQUENCIES, frequencySelectSql);
// Only write "approved" routes using COPY TO with results of select query
String routeSelectSql = null;
if (fromEditor) {
// The filter clause for routes is simple. We're just checking that the route is APPROVED.
routeSelectSql = String.join(" ",
Table.ROUTES.generateSelectSql(feedIdToExport, Requirement.OPTIONAL),
whereRouteIsApproved);
}
result.routes = export(Table.ROUTES, routeSelectSql);
// FIXME: Find some place to store errors encountered on export for patterns and pattern stops.
// FIXME: Is there a need to export patterns or pattern stops? Should these be iterated over to ensure that
// frequency-based pattern travel times match stop time arrivals/departures?
// export(Table.PATTERNS);
// export(Table.PATTERN_STOP);
// Only write shapes for "approved" routes using COPY TO with results of select query
String shapeSelectSql = null;
if (fromEditor) {
// Generate filter SQL for shapes if exporting a feed/schema that represents an editor snapshot.
// The filter clause for shapes requires joining to trips and then to routes table and a where filter on
// route status.
// FIXME: I'm not sure that shape_id is indexed for the trips table. This could cause slow downs.
// FIXME: this is exporting point_type, which is not a GTFS field, but its presence shouldn't hurt.
String shapeFieldsToExport = Table.commaSeparatedNames(
Table.SHAPES.specFields(), String.join(".", feedIdToExport, Table.SHAPES.name + "."));
// NOTE: The below substitution uses relative indexing. All values "% zip_properties = new HashMap<>();
// We want to read an existing ZIP File, so we set this to False
zip_properties.put("create", "false");
// Specify the path to the ZIP File that you want to read as a File System
URI zip_disk = URI.create("jar:file://" + outFile);
// Create ZIP file System
try (FileSystem fileSystem = FileSystems.newFileSystem(zip_disk, zip_properties)) {
// Get the Path inside ZIP File to delete the ZIP Entry
for (String fileName : emptyTableList) {
Path filePath = fileSystem.getPath(fileName);
// Execute Delete
Files.delete(filePath);
LOG.info("Empty file {} successfully deleted", fileName);
}
} catch (IOException e) {
LOG.error("Could not remove empty zip files");
e.printStackTrace();
}
LOG.info("Deleted {} empty files in {} ms", emptyTableList.size(), System.currentTimeMillis() - startTime);
}
private TableLoadResult export (Table table) {
if (fromEditor) {
// Default behavior for exporting editor snapshot tables is to select only the spec fields.
return export(table, table.generateSelectSql(feedIdToExport, Requirement.OPTIONAL));
} else {
return export(table, null);
}
}
private TableLoadResult export (Table table, String filterSql) {
long startTime = System.currentTimeMillis();
TableLoadResult tableLoadResult = new TableLoadResult();
try {
// Create entry for table
String textFileName = table.name + ".txt";
ZipEntry zipEntry = new ZipEntry(textFileName);
zipOutputStream.putNextEntry(zipEntry);
// don't let CSVWriter close the stream when it is garbage-collected
OutputStream protectedOut = new FilterOutputStream(zipOutputStream);
if (filterSql == null) {
// If there is no filter SQL specified, simply copy out the whole table.
filterSql = String.format("%s.%s", feedIdToExport, table.name);
} else {
// Surround filter SQL in parentheses.
filterSql = String.format("(%s)", filterSql);
}
String copySql = String.format("copy %s to STDOUT DELIMITER ',' CSV HEADER", filterSql);
LOG.info(copySql);
// Our connection pool wraps the Connection objects, so we need to unwrap the Postgres connection interface.
CopyManager copyManager = new CopyManager(connection.unwrap(BaseConnection.class));
tableLoadResult.rowCount = (int) copyManager.copyOut(copySql, protectedOut);
if (tableLoadResult.rowCount == 0) {
// If no rows were exported, keep track of table name for later removal.
emptyTableList.add(textFileName);
}
zipOutputStream.closeEntry();
LOG.info("Copied {} {} in {} ms.", tableLoadResult.rowCount, table.name, System.currentTimeMillis() - startTime);
connection.commit();
} catch (SQLException | IOException e) {
// Rollback connection so that fatal exception does not impact loading of other tables.
try {
connection.rollback();
} catch (SQLException ex) {
ex.printStackTrace();
}
tableLoadResult.fatalException = e.getMessage();
LOG.error("Exception while exporting tables", e);
}
return tableLoadResult;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy