All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.conveyal.gtfs.validator.PatternFinderValidator Maven / Gradle / Ivy

Go to download

A library to load and index GTFS feeds of arbitrary size using disk-backed storage

There is a newer version: 6.2.0
Show newest version
package com.conveyal.gtfs.validator;

import com.conveyal.gtfs.PatternFinder;
import com.conveyal.gtfs.TripPatternKey;
import com.conveyal.gtfs.error.SQLErrorStorage;
import com.conveyal.gtfs.loader.BatchTracker;
import com.conveyal.gtfs.loader.Feed;
import com.conveyal.gtfs.loader.Requirement;
import com.conveyal.gtfs.loader.Table;
import com.conveyal.gtfs.model.Pattern;
import com.conveyal.gtfs.model.PatternStop;
import com.conveyal.gtfs.model.Route;
import com.conveyal.gtfs.model.Stop;
import com.conveyal.gtfs.model.StopTime;
import com.conveyal.gtfs.model.Trip;
import org.apache.commons.dbutils.DbUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.conveyal.gtfs.loader.JdbcGtfsLoader.copyFromFile;
import static com.conveyal.gtfs.model.Entity.INT_MISSING;
import static com.conveyal.gtfs.model.Entity.setDoubleParameter;
import static com.conveyal.gtfs.model.Entity.setIntParameter;

/**
 * Groups trips together into "patterns" that share the same sequence of stops.
 * This is not a normal validator in the sense that it does not check for bad data.
 * It's taking advantage of the fact that we're already iterating over the trips one by one to build up the patterns.
 */
public class PatternFinderValidator extends TripValidator {

    private static final Logger LOG = LoggerFactory.getLogger(PatternFinderValidator.class);

    PatternFinder patternFinder;
    private File tempPatternForTripsTextFile;
    private PrintStream patternForTripsFileStream;
    private String tempPatternForTripsTable;

    public PatternFinderValidator(Feed feed, SQLErrorStorage errorStorage) {
        super(feed, errorStorage);
        patternFinder = new PatternFinder();
    }

    @Override
    public void validateTrip (Trip trip, Route route, List stopTimes, List stops) {
        // As we hit each trip, accumulate them into the wrapped PatternFinder object.
        patternFinder.processTrip(trip, stopTimes);
    }

    /**
     * Store patterns and pattern stops in the database. Also, update the trips table with a pattern_id column.
     */
    @Override
    public void complete(ValidationResult validationResult) {
        LOG.info("Finding patterns...");
        // FIXME: There may be a better way to handle getting the full list of stops
        Map stopById = new HashMap<>();
        for (Stop stop : feed.stops) {
            stopById.put(stop.stop_id, stop);
        }
        // FIXME In the editor we need patterns to exist separately from and before trips themselves, so me make another table.
        Map patterns = patternFinder.createPatternObjects(stopById, errorStorage);
        Connection connection = null;
        try {
            // TODO this assumes gtfs-lib is using an SQL database and not a MapDB.
            //   Maybe we should just create patterns in a separate step, but that would mean iterating over the
            //   stop_times twice.
            LOG.info("Creating pattern and pattern stops tables.");
            connection = feed.getConnection();
            Statement statement = connection.createStatement();
            String tripsTableName = feed.tablePrefix + "trips";
            String patternsTableName = feed.tablePrefix + "patterns";
            String patternStopsTableName = feed.tablePrefix + "pattern_stops";
            statement.execute(String.format("alter table %s add column pattern_id varchar", tripsTableName));
            // FIXME: Here we're creating a pattern table that has an integer ID field (similar to the other GTFS tables)
            //   AND a varchar pattern_id with essentially the same value cast to a string. Perhaps the pattern ID should
            //   be a UUID or something, just to better distinguish it from the int ID?
            Table patternsTable = new Table(patternsTableName, Pattern.class, Requirement.EDITOR, Table.PATTERNS.fields);
            Table patternStopsTable = new Table(patternStopsTableName, PatternStop.class, Requirement.EDITOR,
                    Table.PATTERN_STOP.fields);
            // Create pattern and pattern stops table, each with serial ID fields.
            patternsTable.createSqlTable(connection, null, true);
            patternStopsTable.createSqlTable(connection, null, true);
            // Generate prepared statements for inserts.
            String insertPatternSql = patternsTable.generateInsertSql(true);
            String insertPatternStopSql = patternStopsTable.generateInsertSql(true);
            PreparedStatement insertPatternStatement = connection.prepareStatement(insertPatternSql);
            BatchTracker patternTracker = new BatchTracker("pattern", insertPatternStatement);
            PreparedStatement insertPatternStopStatement = connection.prepareStatement(insertPatternStopSql);
            BatchTracker patternStopTracker = new BatchTracker("pattern stop", insertPatternStopStatement);
            int currentPatternIndex = 0;
            LOG.info("Storing patterns and pattern stops");
            // If using Postgres, load pattern to trips mapping into temp table for quick updating.
            boolean postgresText = (connection.getMetaData().getDatabaseProductName().equals("PostgreSQL"));
            if (postgresText) {
                // NOTE: temp table name must NOT be prefixed with schema because temp tables are prefixed with their own
                // connection-unique schema.
                tempPatternForTripsTable = "pattern_for_trips";
                tempPatternForTripsTextFile = File.createTempFile(tempPatternForTripsTable, "text");
                LOG.info("Loading via temporary text file at {}", tempPatternForTripsTextFile.getAbsolutePath());
                // Create temp table for updating trips with pattern IDs to be dropped at the end of the transaction.
                String createTempSql = String.format("create temp table %s(trip_id varchar, pattern_id varchar) on commit drop", tempPatternForTripsTable);
                LOG.info(createTempSql);
                statement.execute(createTempSql);
                patternForTripsFileStream = new PrintStream(new BufferedOutputStream(new FileOutputStream(tempPatternForTripsTextFile)));
            }
            for (Map.Entry entry : patterns.entrySet()) {
                Pattern pattern = entry.getValue();
                LOG.debug("Batching pattern {}", pattern.pattern_id);
                TripPatternKey key = entry.getKey();
                pattern.setStatementParameters(insertPatternStatement, true);
                patternTracker.addBatch();
                // Construct pattern stops based on values in trip pattern key.
                // FIXME: Use pattern stops table here?
                int lastValidDeparture = key.departureTimes.get(0);
                for (int i = 0; i < key.stops.size(); i++) {
                    int travelTime = 0;
                    String stopId = key.stops.get(i);
                    int arrival = key.arrivalTimes.get(i);
                    if (i > 0) {
                        int prevDeparture = key.departureTimes.get(i - 1);
                        // Set travel time for all stops except the first.
                        if (prevDeparture != INT_MISSING) {
                            // Update the previous departure if it's not missing. Otherwise, base travel time based on the
                            // most recent valid departure.
                            lastValidDeparture = prevDeparture;
                        }
                        travelTime = arrival == INT_MISSING || lastValidDeparture == INT_MISSING
                            ? INT_MISSING
                            : arrival - lastValidDeparture;
                    }
                    int departure = key.departureTimes.get(i);
                    int dwellTime = arrival == INT_MISSING || departure == INT_MISSING
                        ? INT_MISSING
                        : departure - arrival;

                    insertPatternStopStatement.setString(1, pattern.pattern_id);
                    // Stop sequence is zero-based.
                    setIntParameter(insertPatternStopStatement, 2, i);
                    insertPatternStopStatement.setString(3, stopId);
                    setIntParameter(insertPatternStopStatement,4, travelTime);
                    setIntParameter(insertPatternStopStatement,5, dwellTime);
                    setIntParameter(insertPatternStopStatement,6, key.dropoffTypes.get(i));
                    setIntParameter(insertPatternStopStatement,7, key.pickupTypes.get(i));
                    setDoubleParameter(insertPatternStopStatement, 8, key.shapeDistances.get(i));
                    setIntParameter(insertPatternStopStatement,9, key.timepoints.get(i));
                    patternStopTracker.addBatch();
                }
                // Finally, update all trips on this pattern to reference this pattern's ID.
                String questionMarks = String.join(", ", Collections.nCopies(pattern.associatedTrips.size(), "?"));
                PreparedStatement updateTripStatement = connection.prepareStatement(
                        String.format("update %s set pattern_id = ? where trip_id in (%s)", tripsTableName, questionMarks));
                int oneBasedIndex = 1;
                updateTripStatement.setString(oneBasedIndex++, pattern.pattern_id);
                // Prepare each trip in pattern to update trips table.
                for (String tripId : pattern.associatedTrips) {
                    if (postgresText) {
                        // Add line to temp csv file if using postgres.
                        // No need to worry about null trip IDs because the trips have already been processed.
                        String[] strings = new String[]{tripId, pattern.pattern_id};
                        // Print a new line in the standard postgres text format:
                        // https://www.postgresql.org/docs/9.1/static/sql-copy.html#AEN64380
                        patternForTripsFileStream.println(String.join("\t", strings));
                    } else {
                        // Otherwise, set statement parameter.
                        updateTripStatement.setString(oneBasedIndex++, tripId);
                    }
                }
                if (!postgresText) {
                    // Execute trip update statement if not using temp text file.
                    LOG.info("Updating {} trips with pattern ID {} (%d/%d)", pattern.associatedTrips.size(), pattern.pattern_id, currentPatternIndex, patterns.size());
                    updateTripStatement.executeUpdate();
                }
                currentPatternIndex += 1;
            }
            // Send any remaining prepared statement calls to the database backend.
            patternTracker.executeRemaining();
            patternStopTracker.executeRemaining();
            LOG.info("Done storing patterns and pattern stops.");
            if (postgresText) {
                // Finally, copy the pattern for trips text file into a table, create an index on trip IDs, and update
                // the trips table.
                LOG.info("Updating trips with pattern IDs");
                patternForTripsFileStream.close();
                // Copy file contents into temp pattern for trips table.
                copyFromFile(connection, tempPatternForTripsTextFile, tempPatternForTripsTable);
                // Before updating the trips with pattern IDs, index the table on trip_id.
                String patternForTripsIndexSql = String.format("create index temp_trips_pattern_id_idx on %s (trip_id)", tempPatternForTripsTable);
                LOG.info(patternForTripsIndexSql);
                statement.execute(patternForTripsIndexSql);
                // Finally, execute the update statement.
                String updateTripsSql = String.format("update %s set pattern_id = %s.pattern_id from %s where %s.trip_id = %s.trip_id", tripsTableName, tempPatternForTripsTable, tempPatternForTripsTable, tripsTableName, tempPatternForTripsTable);
                LOG.info(updateTripsSql);
                statement.executeUpdate(updateTripsSql);
                // Delete temp file. Temp table will be dropped after the transaction is committed.
                tempPatternForTripsTextFile.delete();
                LOG.info("Updating trips complete");
            }
            LOG.info("Creating index on patterns");
            statement.executeUpdate(String.format("alter table %s add primary key (pattern_id)", patternsTableName));
            LOG.info("Creating index on pattern stops");
            statement.executeUpdate(String.format("alter table %s add primary key (pattern_id, stop_sequence)", patternStopsTableName));
            // Index new pattern_id column on trips. The other tables are already indexed because they have primary keys.
            LOG.info("Indexing trips on pattern id.");
            statement.execute(String.format("create index trips_pattern_id_idx on %s (pattern_id)", tripsTableName));
            LOG.info("Done indexing.");
            connection.commit();
        } catch (SQLException | IOException e) {
            // Rollback transaction if failure occurs on creating patterns.
            DbUtils.rollbackAndCloseQuietly(connection);
            // This exception will be stored as a validator failure.
            throw new RuntimeException(e);
        } finally {
            // Close transaction finally.
            if (connection != null) DbUtils.closeQuietly(connection);
        }

    }

}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy