com.conveyal.gtfs.error.SQLErrorStorage Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gtfs-lib Show documentation
Show all versions of gtfs-lib Show documentation
A library to load and index GTFS feeds of arbitrary size using disk-backed storage
package com.conveyal.gtfs.error;
import com.conveyal.gtfs.storage.StorageException;
import com.conveyal.gtfs.util.InvalidNamespaceException;
import org.apache.commons.dbutils.DbUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Map;
import java.util.Set;
import static com.conveyal.gtfs.util.Util.ensureValidNamespace;
/**
* This is an abstraction for something that stores GTFS loading and validation errors one by one.
* Currently there's only one implementation, which uses SQL tables.
* We used to store the errors in plain old Lists, and could make an alternative implementation to do so.
* We may need to in order to output JSON reports.
*/
public class SQLErrorStorage {
private static final Logger LOG = LoggerFactory.getLogger(SQLErrorStorage.class);
// TODO Look into pooling prepared statements.
// It is debatable whether we should be holding a single connection from a pool open.
// Fetching a pooled connection might slow things down in sections where many thousands of errors are saved.
// By reusing the exact same connection as the GTFS table loader, we ensure that the newly created schema is
// visible to the connection when it creates the error tables, but we have to be careful where in the code we
// record errors.
private Connection connection;
private PreparedStatement insertError;
private PreparedStatement insertInfo;
// A string to prepend to all table names. This is a unique identifier for the particular feed that is being loaded.
// Should include any dot or other separator. May also be the empty string if you want no prefix added.
private String tablePrefix;
// This serves as a unique ID, so it must persist across multiple validator runs. It is, however, distinct from the
// count, which should only be determined with a SQL query.
private int errorId;
// How many errors to insert at a time in a batch, for efficiency.
private static final long INSERT_BATCH_SIZE = 500;
public SQLErrorStorage (Connection connection, String tablePrefix, boolean createTables) throws InvalidNamespaceException {
ensureValidNamespace(tablePrefix);
this.tablePrefix = tablePrefix == null ? "" : tablePrefix;
errorId = 0;
this.connection = connection;
if (createTables) createErrorTables();
else reconnectErrorTables();
createPreparedStatements();
}
public void storeError (NewGTFSError error) {
try {
// Insert one row for the error itself
insertError.setInt(1, errorId);
insertError.setString(2, error.errorType.name());
// Using SetObject to allow null values, do all target DBs support this?
insertError.setObject(3, error.entityType == null ? null : error.entityType.getSimpleName());
insertError.setObject(4, error.lineNumber);
insertError.setObject(5, error.entityId);
insertError.setObject(6, error.entitySequenceNumber);
insertError.setObject(7, error.badValue);
insertError.addBatch();
// Insert all key-value info pairs for the error
for (Map.Entry entry : error.errorInfo.entrySet()) {
insertInfo.setInt(1, errorId);
insertInfo.setString(2, entry.getKey());
insertInfo.setString(3, entry.getValue());
insertInfo.addBatch();
}
if (errorId % INSERT_BATCH_SIZE == 0) {
insertError.executeBatch();
insertInfo.executeBatch();
}
errorId += 1;
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
public void storeErrors (Set errors) {
for (NewGTFSError error : errors) {
storeError(error);
}
}
/**
* Commits any outstanding error inserts and returns the error count via a SQL query.
*/
public int getErrorCount () {
try {
// Ensure any outstanding inserts are committed so that count is accurate.
this.commit();
Statement statement = connection.createStatement();
statement.execute(String.format("select count(*) from %serrors", tablePrefix));
ResultSet resultSet = statement.getResultSet();
resultSet.next();
int count = resultSet.getInt(1);
return count;
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
/**
* This executes any remaining inserts and commits the transaction.
*/
private void commit() {
try {
// Execute any remaining batch inserts and commit the transaction.
insertError.executeBatch();
insertInfo.executeBatch();
connection.commit();
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
/**
* This executes any remaining inserts, commits the transaction, and closes the connection permanently.
* commitAndClose() should only be called when access to SQLErrorStorage is no longer needed.
*/
public void commitAndClose() {
LOG.info("Committing errors and closing SQL connection.");
this.commit();
// Close the connection permanently (should be called only after errorStorage instance no longer needed).
DbUtils.closeQuietly(connection);
}
private void createErrorTables() {
try {
Statement statement = connection.createStatement();
// If tables are dropped, order matters because of foreign keys.
// TODO add foreign key constraint on info table?
String createErrorsSql = String.format("create table %serrors (error_id integer primary key, error_type varchar, " +
"entity_type varchar, line_number integer, entity_id varchar, entity_sequence integer, " +
"bad_value varchar)", tablePrefix);
LOG.info(createErrorsSql);
statement.execute(createErrorsSql);
String createErrorInfoSql = String.format("create table %serror_info (error_id integer, key varchar, value varchar)",
tablePrefix);
LOG.info(createErrorInfoSql);
statement.execute(createErrorInfoSql);
connection.commit();
// Keep connection open, closing would null the wrapped connection and return it to the pool.
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
private void createPreparedStatements () {
try {
insertError = connection.prepareStatement(
String.format("insert into %serrors values (?, ?, ?, ?, ?, ?, ?)", tablePrefix));
insertInfo = connection.prepareStatement(
String.format("insert into %serror_info values (?, ?, ?)", tablePrefix));
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
private void reconnectErrorTables () {
try {
Statement statement = connection.createStatement();
statement.execute(String.format("select max(error_id) from %serrors", tablePrefix));
ResultSet resultSet = statement.getResultSet();
resultSet.next();
errorId = resultSet.getInt(1);
LOG.info("Reconnected to errors table, max error ID is {}.", errorId);
errorId += 1; // Error count is zero based, add one to avoid duplicate error key
} catch (SQLException ex) {
throw new StorageException(ex);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy