com.conveyal.gtfs.error.SQLErrorStorage Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gtfs-lib Show documentation
Show all versions of gtfs-lib Show documentation
A library to load and index GTFS feeds of arbitrary size using disk-backed storage
package com.conveyal.gtfs.error;
import com.conveyal.gtfs.storage.StorageException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.sql.DataSource;
import java.sql.*;
import java.util.Map;
/**
* This is an abstraction for something that stores GTFS loading and validation errors one by one.
* Currently there's only one implementation, which uses SQL tables.
* We used to store the errors in plain old Lists, and could make an alternative implementation to do so.
* We may need to in order to output JSON reports.
*/
public class SQLErrorStorage {
private static final Logger LOG = LoggerFactory.getLogger(SQLErrorStorage.class);
// TODO Look into pooling prepared statements.
// It is debatable whether we should be holding a single connection from a pool open.
// Fetching a pooled connection might slow things down in sections where many thousands of errors are saved.
// By reusing the exact same connection as the GTFS table loader, we ensure that the newly created schema is
// visible to the connection when it creates the error tables, but we have to be careful where in the code we
// record errors.
private Connection connection;
private int errorCount; // This serves as a unique ID, so it must persist across multiple validator runs.
private PreparedStatement insertError;
private PreparedStatement insertInfo;
// A string to prepend to all table names. This is a unique identifier for the particular feed that is being loaded.
// Should include any dot or other separator. May also be the empty string if you want no prefix added.
private String tablePrefix;
// How many errors to insert at a time in a batch, for efficiency.
private static final long INSERT_BATCH_SIZE = 500;
public SQLErrorStorage (Connection connection, String tablePrefix, boolean createTables) {
// TablePrefix should always be internally generated so doesn't need to be sanitized.
this.tablePrefix = tablePrefix == null ? "" : tablePrefix;
errorCount = 0;
this.connection = connection;
if (createTables) createErrorTables();
else reconnectErrorTables();
createPreparedStatements();
}
public void storeError (NewGTFSError error) {
try {
// Insert one row for the error itself
insertError.setInt(1, errorCount);
insertError.setString(2, error.errorType.name());
// Using SetObject to allow null values, do all target DBs support this?
insertError.setObject(3, error.entityType == null ? null : error.entityType.getSimpleName());
insertError.setObject(4, error.lineNumber);
insertError.setObject(5, error.entityId);
insertError.setObject(6, error.entitySequenceNumber);
insertError.setObject(7, error.badValue);
insertError.addBatch();
// Insert all key-value info pairs for the error
for (Map.Entry entry : error.errorInfo.entrySet()) {
insertInfo.setInt(1, errorCount);
insertInfo.setString(2, entry.getKey());
insertInfo.setString(3, entry.getValue());
insertInfo.addBatch();
}
if (errorCount % INSERT_BATCH_SIZE == 0) {
insertError.executeBatch();
insertInfo.executeBatch();
}
errorCount += 1;
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
public int getErrorCount () {
return errorCount;
}
/**
* This commits any remaining inserts, commits the transaction, and closes the connection permanently.
* commitAndClose() should only be called when access to SQLErrorStorage is no longer needed.
*/
public void commitAndClose() {
try {
// Execute any remaining batch inserts and commit the transaction.
insertError.executeBatch();
insertInfo.executeBatch();
connection.commit();
// Close the connection permanently (should be called only after errorStorage instance no longer needed).
connection.close();
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
private void createErrorTables() {
try {
Statement statement = connection.createStatement();
// If tables are dropped, order matters because of foreign keys.
// TODO add foreign key constraint on info table?
statement.execute(String.format("create table %serrors (error_id integer primary key, error_type varchar, " +
"entity_type varchar, line_number integer, entity_id varchar, entity_sequence integer, " +
"bad_value varchar)", tablePrefix));
statement.execute(String.format("create table %serror_info (error_id integer, key varchar, value varchar)",
tablePrefix));
connection.commit();
// Keep connection open, closing would null the wrapped connection and return it to the pool.
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
private void createPreparedStatements () {
try {
insertError = connection.prepareStatement(
String.format("insert into %serrors values (?, ?, ?, ?, ?, ?, ?)", tablePrefix));
insertInfo = connection.prepareStatement(
String.format("insert into %serror_info values (?, ?, ?)", tablePrefix));
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
private void reconnectErrorTables () {
try {
Statement statement = connection.createStatement();
statement.execute(String.format("select max(error_id) from %serrors", tablePrefix));
ResultSet resultSet = statement.getResultSet();
resultSet.next();
errorCount = resultSet.getInt(1);
LOG.info("Reconnected to errors table, max error ID is {}.", errorCount);
errorCount += 1; // Error count is zero based, add one to avoid duplicate error key
} catch (SQLException ex) {
throw new StorageException("Could not connect to errors table.", ex);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy