com.conveyal.gtfs.validator.ServiceValidator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gtfs-lib Show documentation
Show all versions of gtfs-lib Show documentation
A library to load and index GTFS feeds of arbitrary size using disk-backed storage
package com.conveyal.gtfs.validator;
import com.conveyal.gtfs.error.NewGTFSError;
import com.conveyal.gtfs.error.NewGTFSErrorType;
import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.BatchTracker;
import com.conveyal.gtfs.loader.DateField;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.loader.Table;
import com.conveyal.gtfs.model.Calendar;
import com.conveyal.gtfs.model.CalendarDate;
import com.conveyal.gtfs.model.Entity;
import com.conveyal.gtfs.model.Route;
import com.conveyal.gtfs.model.Stop;
import com.conveyal.gtfs.model.StopTime;
import com.conveyal.gtfs.model.Trip;
import com.conveyal.gtfs.storage.StorageException;
import gnu.trove.map.hash.TIntIntHashMap;
import org.apache.commons.dbutils.DbUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.time.DayOfWeek;
import java.time.LocalDate;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static com.conveyal.gtfs.error.NewGTFSErrorType.TRIP_OVERLAP_IN_BLOCK;
/**
* This will validate that service date information is coherent, and attempt to deduce or validate the range of dates
* covered by a GTFS feed.
*
* It turns the GTFS system of repeating weekly calendars and exceptions (calendar dates) into a single large table
* listing which services run on which days. This in turn allows us to build a histogram of service duration on each
* day.
*
* As an intermediate result it builds a table of service duration by service ID and mode of transport.
*
* Makes one object representing each service ID.
* That object will contain a calendar (for repeating service on specific days of the week)
* and potentially multiple CalendarDates defining exceptions to the base calendar.
* TODO build histogram of stop times, check against calendar and declared feed validity dates
*/
public class ServiceValidator extends TripValidator {
private static final Logger LOG = LoggerFactory.getLogger(ServiceValidator.class);
private HashMap> blockIntervals = new HashMap<>();
private Map serviceInfoForServiceId = new HashMap<>();
private Map dateInfoForDate = new HashMap<>();
public ServiceValidator(Feed feed, SQLErrorStorage errorStorage) {
super(feed, errorStorage);
}
@Override
public void validateTrip(Trip trip, Route route, List stopTimes, List stops) {
if (trip.block_id != null) {
// If the trip has a block_id, add a new block interval to the map.
BlockInterval blockInterval = new BlockInterval();
blockInterval.trip = trip;
StopTime firstStopTime = stopTimes.get(0);
blockInterval.startTime = firstStopTime.departure_time;
blockInterval.firstStop = firstStopTime;
blockInterval.lastStop = stopTimes.get(stopTimes.size() - 1);
// Construct new list of intervals if none exists for encountered block_id.
blockIntervals
.computeIfAbsent(trip.block_id, k -> new ArrayList<>())
.add(blockInterval);
}
int firstStopDeparture = stopTimes.get(0).departure_time;
int lastStopArrival = stopTimes.get(stopTimes.size() - 1).arrival_time;
if (firstStopDeparture == Entity.INT_MISSING || lastStopArrival == Entity.INT_MISSING) {
// ERR
return;
}
int tripDurationSeconds = lastStopArrival - firstStopDeparture;
if (tripDurationSeconds <= 0) {
// ERR
return;
}
// Get the map from modes to service durations in seconds for this trip's service ID.
// Create a new empty map if it doesn't yet exist.
ServiceInfo serviceInfo = serviceInfoForServiceId.computeIfAbsent(trip.service_id, ServiceInfo::new);
if (route != null) {
// Increment the service duration for this trip's transport mode and service ID.
serviceInfo.durationByRouteType.adjustOrPutValue(route.route_type, tripDurationSeconds, tripDurationSeconds);
}
// Record which trips occur on each service_id.
serviceInfo.tripIds.add(trip.trip_id);
// TODO validate mode codes
}
/**
* You'd think we'd want to do this during the loading phase. But during the loading phase we don't have a reading
* connection to the entity tables in the database. Rather than make the Feed object read-write, we want to leave
* it completely read-only.
*
* @param validationResult can be written into
*/
@Override
public void complete(ValidationResult validationResult) {
validateServiceInfo(validationResult);
validateBlocks();
}
private void validateServiceInfo(ValidationResult validationResult) {
LOG.info("Merging calendars and calendar_dates...");
// First handle the calendar entries, which define repeating weekly schedules.
for (Calendar calendar : feed.calendars) {
try {
LocalDate endDate = calendar.end_date;
// Loop over all days in this calendar entry, recording on which ones it is active.
for (LocalDate date = calendar.start_date; date.isBefore(endDate) || date.isEqual(endDate); date = date.plusDays(1)) {
DayOfWeek dayOfWeek = date.getDayOfWeek();
if ( (dayOfWeek == DayOfWeek.MONDAY && calendar.monday > 0) ||
(dayOfWeek == DayOfWeek.TUESDAY && calendar.tuesday > 0) ||
(dayOfWeek == DayOfWeek.WEDNESDAY && calendar.wednesday > 0) ||
(dayOfWeek == DayOfWeek.THURSDAY && calendar.thursday > 0) ||
(dayOfWeek == DayOfWeek.FRIDAY && calendar.friday > 0) ||
(dayOfWeek == DayOfWeek.SATURDAY && calendar.saturday > 0) ||
(dayOfWeek == DayOfWeek.SUNDAY && calendar.sunday > 0)) {
// Service is active on this date.
serviceInfoForServiceId.computeIfAbsent(calendar.service_id, ServiceInfo::new).datesActive.add(date);
}
}
} catch (Exception ex) {
LOG.error("Error validating service entries (merging calendars and calendar_dates)", ex);
// Continue on to next calendar entry.
}
}
// Next handle the calendar_dates, which specify exceptions to the repeating weekly schedules.
for (CalendarDate calendarDate : feed.calendarDates) {
ServiceInfo serviceInfo = serviceInfoForServiceId.computeIfAbsent(calendarDate.service_id, ServiceInfo::new);
if (calendarDate.exception_type == 1) {
// Service added, add to set for this date.
serviceInfo.datesActive.add(calendarDate.date);
} else if (calendarDate.exception_type == 2) {
// Service removed, remove from Set for this date.
serviceInfo.datesActive.remove(calendarDate.date);
}
// Otherwise exception_type is out of range. This should already have been caught during the loading phase.
}
/*
A view that is similar to ServiceInfo class, but doesn't deal well with missing IDs in either subquery:
select durations.service_id, duration_seconds, days_active from (
(select service_id, sum(duration_seconds) as duration_seconds
from elwp_qhqsgzufnpvwnxtdbwcthn.service_durations group by service_id) as durations
join
(select service_id, count(service_date) as days_active
from elwp_qhqsgzufnpvwnxtdbwcthn.service_dates group by service_id) as days
on durations.service_id = days.service_id
);
*/
// Check for incoherent or erroneous services.
for (ServiceInfo serviceInfo : serviceInfoForServiceId.values()) {
if (serviceInfo.datesActive.isEmpty()) {
// This service must have been referenced by trips but is never active on any day.
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SERVICE_NEVER_ACTIVE, serviceInfo.serviceId));
for (String tripId : serviceInfo.tripIds) {
registerError(
NewGTFSError.forTable(Table.TRIPS, NewGTFSErrorType.TRIP_NEVER_ACTIVE)
.setEntityId(tripId)
.setBadValue(tripId));
}
}
if (serviceInfo.tripIds.isEmpty()) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.SERVICE_UNUSED, serviceInfo.serviceId));
}
}
// Accumulate info about services into each date that they are active.
for (ServiceInfo serviceInfo : serviceInfoForServiceId.values()) {
for (LocalDate date : serviceInfo.datesActive) {
dateInfoForDate.computeIfAbsent(date, DateInfo::new).add(serviceInfo);
}
}
// Check for dates that have no service within full range of dates with defined service.
// Sum up service duration by mode for each day within that range.
if (dateInfoForDate.isEmpty()) {
registerError(NewGTFSError.forFeed(NewGTFSErrorType.NO_SERVICE, null));
} else {
LocalDate firstDate = LocalDate.MAX;
LocalDate lastDate = LocalDate.MIN;
for (LocalDate date : dateInfoForDate.keySet()) {
// If the date is invalid, skip.
if (date == null) {
LOG.error("Encountered null date. Did something go wrong with computeIfAbsent?");
continue;
}
if (date.isBefore(firstDate)) firstDate = date;
if (date.isAfter(lastDate)) lastDate = date;
}
// Copy some useful information into the ValidationResult object to return to the caller.
// These variables are actually not directly tied to data in the calendar_dates.txt file. Instead, they
// represent the first and last date respectively of any entry in the calendar.txt and calendar_dates.txt
// files.
validationResult.firstCalendarDate = firstDate;
validationResult.lastCalendarDate = lastDate;
// Is this any different? firstDate.until(lastDate, ChronoUnit.DAYS);
// If no days were found in the dateInfoForDate, nDays is a very large negative number, so we default to 0.
int nDays = Math.max(0, (int) ChronoUnit.DAYS.between(firstDate, lastDate) + 1);
validationResult.dailyBusSeconds = new int[nDays];
validationResult.dailyTramSeconds = new int[nDays];
validationResult.dailyMetroSeconds = new int[nDays];
validationResult.dailyRailSeconds = new int[nDays];
validationResult.dailyTotalSeconds = new int[nDays];
validationResult.dailyTripCounts = new int[nDays];
for (int d = 0; d < nDays; d++) {
LocalDate date = firstDate.plusDays(d); // current date being processed
// Add one value per day. Trove map returns zero for missing keys.
DateInfo dateInfo = dateInfoForDate.get(date);
if (dateInfo == null) {
dateInfo = new DateInfo(date); // new empty object to get empty durations map.
}
validationResult.dailyBusSeconds[d] = dateInfo.durationByRouteType.get(3);
validationResult.dailyTramSeconds[d] = dateInfo.durationByRouteType.get(0);
validationResult.dailyMetroSeconds[d] = dateInfo.durationByRouteType.get(1);
validationResult.dailyRailSeconds[d] = dateInfo.durationByRouteType.get(2);
validationResult.dailyTotalSeconds[d] = dateInfo.getTotalServiceDurationSeconds();
validationResult.dailyTripCounts[d] = dateInfo.tripCount;
if (dateInfo.getTotalServiceDurationSeconds() <= 0) {
// Check for low or zero service, which seems to happen even when services are defined.
// This will also catch cases where dateInfo was null and the new instance contains no service.
registerError(NewGTFSError.forFeed(NewGTFSErrorType.DATE_NO_SERVICE,
DateField.GTFS_DATE_FORMATTER.format(date)));
}
}
}
// Now write all these calendar-date relations out to the database.
Connection connection = null;
try {
connection = feed.getConnection();
Statement statement = connection.createStatement();
// Create a table summarizing all known service IDs.
// This is almost just a view joining two sub-selects:
// select * from
// (select service_id, count(service_date) from x.service_dates group by service_id) as days
// join
// (select service_id, sum(duration_seconds) from x.service_durations group by service_id) as durations
// on days.service_id = durations.service_id;
// Except that some service IDs may have no trips on them, or may not be referenced in any calendar or
// calendar exception, which would keep them from appearing in either of those tables. So we just create
// this somewhat redundant materialized view to serve as a master list of all services.
String servicesTableName = feed.tablePrefix + "services";
String sql = String.format("create table %s (service_id varchar, n_days_active integer, duration_seconds integer, n_trips integer)", servicesTableName);
LOG.info(sql);
statement.execute(sql);
sql = String.format("insert into %s values (?, ?, ?, ?)", servicesTableName);
PreparedStatement serviceStatement = connection.prepareStatement(sql);
final BatchTracker serviceTracker = new BatchTracker("services", serviceStatement);
for (ServiceInfo serviceInfo : serviceInfoForServiceId.values()) {
serviceStatement.setString(1, serviceInfo.serviceId);
serviceStatement.setInt(2, serviceInfo.datesActive.size());
serviceStatement.setInt(3, serviceInfo.getTotalServiceDurationSeconds());
serviceStatement.setInt(4, serviceInfo.tripIds.size());
serviceTracker.addBatch();
}
serviceTracker.executeRemaining();
// Create a table that shows on which dates each service is active.
String serviceDatesTableName = feed.tablePrefix + "service_dates";
sql = String.format("create table %s (service_date varchar, service_id varchar)", serviceDatesTableName);
LOG.info(sql);
statement.execute(sql);
sql = String.format("insert into %s values (?, ?)", serviceDatesTableName);
PreparedStatement serviceDateStatement = connection.prepareStatement(sql);
final BatchTracker serviceDateTracker = new BatchTracker("service_dates", serviceDateStatement);
for (ServiceInfo serviceInfo : serviceInfoForServiceId.values()) {
for (LocalDate date : serviceInfo.datesActive) {
if (date == null) continue; // TODO ERR? Can happen with bad data (unparseable dates).
try {
serviceDateStatement.setString(1, date.format(DateField.GTFS_DATE_FORMATTER));
serviceDateStatement.setString(2, serviceInfo.serviceId);
serviceDateTracker.addBatch();
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
}
serviceDateTracker.executeRemaining();
LOG.info("Indexing...");
statement.execute(String.format("create index service_dates_service_date on %s (service_date)", serviceDatesTableName));
statement.execute(String.format("create index service_dates_service_id on %s (service_id)", serviceDatesTableName));
// Create a table containing the total trip durations per service_id and per transit mode.
// Using this table you can get total service duration by mode (route_type) per day, joining tables:
// select service_date, route_type, sum(duration_seconds)
// from x.service_dates as dates, x.service_durations as durations
// where dates.service_id = durations.service_id
// group by service_date, route_type order by service_date, route_type;
String serviceDurationsTableName = feed.tablePrefix + "service_durations";
sql = String.format("create table %s (service_id varchar, route_type integer, " +
"duration_seconds integer, primary key (service_id, route_type))", serviceDurationsTableName);
LOG.info(sql);
statement.execute(sql);
sql = String.format("insert into %s values (?, ?, ?)", serviceDurationsTableName);
PreparedStatement serviceDurationStatement = connection.prepareStatement(sql);
final BatchTracker serviceDurationTracker = new BatchTracker(
"service_durations",
serviceDurationStatement
);
for (ServiceInfo serviceInfo : serviceInfoForServiceId.values()) {
serviceInfo.durationByRouteType.forEachEntry((routeType, serviceDurationSeconds) -> {
try {
serviceDurationStatement.setString(1, serviceInfo.serviceId);
serviceDurationStatement.setInt(2, routeType);
serviceDurationStatement.setInt(3, serviceDurationSeconds);
serviceDurationTracker.addBatch();
} catch (SQLException ex) {
throw new StorageException(ex);
}
return true; // Iteration continues
});
}
serviceDurationTracker.executeRemaining();
// No need to build indexes because (service_id, route_type) is already the primary key of this table.
connection.commit();
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException(e);
} finally {
DbUtils.closeQuietly(connection);
}
LOG.info("Done.");
}
static class ServiceInfo {
final String serviceId;
TIntIntHashMap durationByRouteType = new TIntIntHashMap();
Set datesActive = new HashSet<>();
Set tripIds = new HashSet<>();
public ServiceInfo(String serviceId) {
this.serviceId = serviceId;
}
public int getTotalServiceDurationSeconds() {
return Arrays.stream(durationByRouteType.values()).sum();
}
}
static class DateInfo {
final LocalDate date;
TIntIntHashMap durationByRouteType = new TIntIntHashMap();
int tripCount = 0; // Trip count could also in theory be broken down by route type.
Set servicesActive = new HashSet<>();
public DateInfo(LocalDate date) {
this.date = date;
}
public int getTotalServiceDurationSeconds() {
return Arrays.stream(durationByRouteType.values()).sum();
}
public void add (ServiceInfo serviceInfo) {
servicesActive.add(serviceInfo.serviceId);
serviceInfo.durationByRouteType.forEachEntry((routeType, serviceDurationSeconds) -> {
durationByRouteType.adjustOrPutValue(routeType, serviceDurationSeconds, serviceDurationSeconds);
return true; // Continue iteration.
});
tripCount += serviceInfo.tripIds.size();
}
}
/**
* Checks that trips which run on the same block (i.e., share a block_id) do not overlap. The block_id
* represents a vehicle in service, so there must not be any trips on the same block interval that start while another
* block trip is running.
*
* NOTE: This validation check happens in the {@link ServiceValidator} because it depends on information derived
* about which service calendars operate on which feed dates ({@link #serviceInfoForServiceId}).
*/
private void validateBlocks () {
// Iterate over each block and determine if there are any trips that overlap one another.
for (String blockId : blockIntervals.keySet()) {
List intervals = blockIntervals.get(blockId);
intervals.sort(Comparator.comparingInt(i -> i.startTime));
// Iterate over each interval (except for the last) comparing it to every other interval (so the last interval
// is handled through the course of iteration).
// FIXME this has complexity of n^2, there has to be a better way.
for (int n = 0; n < intervals.size() - 1; n++) {
BlockInterval interval1 = intervals.get(n);
// Compare the interval at position N with all other intervals at position N+1 to the end of the list.
for (BlockInterval interval2 : intervals.subList(n + 1, intervals.size())) {
if (interval1.lastStop.departure_time <= interval2.firstStop.arrival_time || interval2.lastStop.departure_time <= interval1.firstStop.arrival_time) {
continue;
}
// If either trip's last departure occurs after the other's first arrival, they overlap. We still
// need to determine if they operate on the same day though.
if (interval1.trip.service_id.equals(interval2.trip.service_id)) {
// If the overlapping trips share a service_id, record an error.
registerError(interval1.trip, TRIP_OVERLAP_IN_BLOCK, interval2.trip.trip_id);
} else {
// Trips overlap but don't have the same service_id.
// Check to see if service days fall on the same days of the week.
ServiceValidator.ServiceInfo info1 = serviceInfoForServiceId.get(interval1.trip.service_id);
ServiceValidator.ServiceInfo info2 = serviceInfoForServiceId.get(interval2.trip.service_id);
Set overlappingDates = new HashSet<>(info1.datesActive); // use the copy constructor
overlappingDates.retainAll(info2.datesActive);
if (overlappingDates.size() > 0) {
registerError(interval1.trip, TRIP_OVERLAP_IN_BLOCK, interval2.trip.trip_id);
}
}
}
}
}
}
/**
* A simple class used during validation to store details the run interval for a block trip.
*/
private class BlockInterval {
Trip trip;
Integer startTime;
StopTime firstStop;
StopTime lastStop;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy