![JAR search and dependency download from the Maven repository](/logo.png)
com.googlecode.fascinator.harvester.filesystem.DerbyCache Maven / Gradle / Ivy
Show all versions of plugin-harvester-filesystem Show documentation
/*
* The Fascinator - Plugin - File System Harvester - Derby Cache
* Copyright (C) 2011 University of Southern Queensland
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package com.googlecode.fascinator.harvester.filesystem;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.codec.binary.Hex;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.googlecode.fascinator.common.JsonSimpleConfig;
/**
*
* This class is designed to encapsulate all logic required to run a cache
* backed into a derby database in support of the file system harvester. There
* are two caches available.
*
*
*
* - Basic: The file is considered 'cached' if the last modified date
* matches the database entry. On some operating systems (like linux) this can
* provide a minimum of around 2 seconds of granularity. For most purposes this
* is sufficient, and this cache is the most efficient.
* - Hashed: The entire contents of the file are SHA hashed and the
* hash is stored in the database. The file is considered cached if the old hash
* matches the new hash. This approach will only trigger a harvest if the
* contents of the file really change, but it is quite slow across large data
* sets and large files.
*
*
*
* Some form of caching must be enabled to support deletion detection for this
* plugin.
*
*
* @author Greg Pendlebury
*/
public class DerbyCache {
/** Logging */
private final Logger log = LoggerFactory.getLogger(DerbyCache.class);
/** JDBC Driver */
private static String DERBY_DRIVER = "org.apache.derby.jdbc.EmbeddedDriver";
/** Connection string prefix */
private static String DERBY_PROTOCOL = "jdbc:derby:";
/** Database name */
private static String DATABASE_NAME = "fsHarvestCache";
/** Basic table */
private static String BASIC_TABLE = "basic";
/** Hash table */
private static String HASH_TABLE = "hashed";
/** Database home directory */
private String derbyHome;
/** Database connection */
private Connection connection;
/** Are we using the database cache */
private boolean useCache;
private String cacheType;
private String cacheId;
public DerbyCache(JsonSimpleConfig config) throws Exception {
cacheType = config.getString(null, "harvester", "file-system",
"caching");
cacheId = config.getString(null, "harvester", "file-system", "cacheId");
if (cacheType != null && cacheId != null
&& (cacheType.equals("basic") || cacheType.equals("hashed"))) {
useCache = true;
startDatabase(config);
} else {
log.error("Caching is either disabled or not configured properly:");
log.error("Cache Type: '{}'", cacheType);
log.error("Cache ID: '{}'", cacheId);
}
}
private void startDatabase(JsonSimpleConfig config) throws Exception {
// Find data directory
derbyHome = config.getString(null, "database-service", "derbyHome");
String oldHome = System.getProperty("derby.system.home");
// Derby's data directory has already been configured
if (oldHome != null) {
if (derbyHome != null) {
// Use the existing one, but throw a warning
log.warn("Using previously specified data directory:"
+ " '{}', provided value has been ignored: '{}'",
oldHome, derbyHome);
} else {
// This is ok, no configuration conflicts
log.info("Using existing data directory: '{}'", oldHome);
}
// We don't have one, config MUST have one
} else {
if (derbyHome == null) {
log.error("No database home directory configured!");
return;
} else {
// Establish its validity and existance, create if necessary
File file = new File(derbyHome);
if (file.exists()) {
if (!file.isDirectory()) {
throw new Exception("Database home '" + derbyHome
+ "' is not a directory!");
}
} else {
file.mkdirs();
if (!file.exists()) {
throw new Exception("Database home '" + derbyHome
+ "' does not exist and could not be created!");
}
}
System.setProperty("derby.system.home", derbyHome);
}
}
// Database prep work
try {
checkTable(BASIC_TABLE);
checkTable(HASH_TABLE);
} catch (SQLException ex) {
log.error("Error during database preparation:", ex);
throw new Exception("Error during database preparation:", ex);
}
// log.debug("Derby caching database online!");
}
private Connection connection() throws SQLException {
if (connection == null || !connection.isValid(1)) {
// At least try to close if not null... even though its not valid
if (connection != null) {
log.error("!!! Database connection has failed, recreating.");
try {
connection.close();
} catch (SQLException ex) {
log.error("Error closing invalid connection, ignoring: {}",
ex.getMessage());
}
}
// Open a new connection
Properties props = new Properties();
// Load the JDBC driver
try {
Class.forName(DERBY_DRIVER).newInstance();
} catch (Exception ex) {
log.error("Driver load failed: ", ex);
throw new SQLException("Driver load failed: ", ex);
}
// Establish a database connection
connection = DriverManager.getConnection(DERBY_PROTOCOL
+ DATABASE_NAME + ";create=true", props);
}
return connection;
}
/**
*
* Check whether or not the file has changed according to the configured
* cache. The response from this method should be used as an indicator on
* whether or not to proceed with harvesting the file. In that context:
*
*
*
* - If there are any errors or exceptions accessing or assessing the
* file, the return value will be false.
* - If no caches are configured at all, the return value will be
* true.
* - If any cache is turned on and the file appears to have changed, the
* return value will be true.
* - If any cache is turned on and the file does not appear to have
* changed, the return value will be false.
*
*
* @param file : The file to test.
* @return boolean : True if the harvest should proceed, otherwise
* false.
*/
public boolean hasChanged(String oid, File file) {
// log.debug("hasChanged('{}', '{}')", oid, file.getAbsolutePath());
// Sanity check
if (file == null || !file.exists()) {
return false;
}
// Cache check
if (useCache) {
try {
if (cacheType.equals("basic")) {
return checkBasicCache(oid, file);
}
if (cacheType.equals("hashed")) {
return checkHashedCache(oid, file);
}
} catch (Exception ex) {
log.error("Error during cache process: ", ex);
return false;
}
}
// Fallback, just approve it
return true;
}
/**
*
* Used to support deletion detection. Should be called at the beginning of
* a harvest to reset the flags in the database.
*
*
*/
public void resetFlags() {
// log.debug("resetFlags()");
if (useCache) {
try {
// Run whichever update is required
PreparedStatement sql = null;
if (cacheType.equals("basic")) {
sql = connection().prepareStatement(
"UPDATE " + BASIC_TABLE + " SET changeFlag = 0"
+ " WHERE cacheId = '" + cacheId + "'");
}
if (cacheType.equals("hashed")) {
sql = connection().prepareStatement(
"UPDATE " + HASH_TABLE + " SET changeFlag = 0"
+ " WHERE cacheId = '" + cacheId + "'");
}
sql.executeUpdate();
close(sql);
} catch (Exception ex) {
log.error("Error updating cache to reset flags: ", ex);
}
}
}
/**
*
* Used to support deletion detection. This method is called after the
* harvest, and will return a list of all file paths
*
*
* @return Set: A list of all object IDs in the cache which have not
* been 'touched'
*/
public Set getUnsetFlags() {
// log.debug("getUnsetFlags()");
Set response = null;
if (useCache) {
try {
// Run whichever update is required
ResultSet result = null;
PreparedStatement sql = null;
if (cacheType.equals("basic")) {
sql = connection().prepareStatement(
"SELECT oid FROM " + BASIC_TABLE
+ " WHERE changeFlag = 0"
+ " AND cacheId = '" + cacheId + "'");
}
if (cacheType.equals("hashed")) {
sql = connection().prepareStatement(
"SELECT oid FROM " + HASH_TABLE
+ " WHERE changeFlag = 0"
+ " AND cacheId = '" + cacheId + "'");
}
// Build response
response = new HashSet();
result = sql.executeQuery();
while (result.next()) {
String oid = result.getString("oid");
if (oid != null) {
response.add(oid);
}
}
close(result);
close(sql);
} catch (Exception ex) {
log.error("Error updating cache to reset flags: ", ex);
}
}
return response;
}
/**
*
* Used to support deletion detection. This method is just for cleanup after
* the process completes.
*
*
*/
public void purgeUnsetFlags() {
// log.debug("purgeUnsetFlags()");
if (useCache) {
try {
// Run whichever update is required
PreparedStatement sql = null;
if (cacheType.equals("basic")) {
sql = connection().prepareStatement(
"DELETE FROM " + BASIC_TABLE
+ " WHERE changeFlag = 0"
+ " AND cacheId = '" + cacheId + "'");
}
if (cacheType.equals("hashed")) {
sql = connection().prepareStatement(
"DELETE FROM " + HASH_TABLE
+ " WHERE changeFlag = 0"
+ " AND cacheId = '" + cacheId + "'");
}
sql.executeUpdate();
close(sql);
} catch (Exception ex) {
log.error("Error updating cache to delete data: ", ex);
}
}
}
private boolean checkBasicCache(String oid, File file) throws Exception {
// log.debug("checkBasicCache('{}', '{}')", oid,
// file.getAbsolutePath());
// What do we know?
long lastCached = getLastModified(oid);
long lastModified = file.lastModified();
// Now decide the return value
// log.debug("BASIC : cache({}) vs. file({})", lastCached,
// lastModified);
if (lastCached == -1l) {
// First time... insert and return true
// log.debug("BASIC : TRUE (INSERT) : ({})", oid);
insertLastModified(oid, file.lastModified());
return true;
} else {
// Force an update... even if unchanged, the flag avoids deletes
updateLastModified(oid, file.lastModified());
if (lastModified > lastCached) {
// Data has changed... return true
// log.debug("BASIC : TRUE (UPDATE) : ({})", oid);
return true;
}
}
// No luck
// log.debug("BASIC : FALSE : ({})", oid);
return false;
}
private boolean checkHashedCache(String oid, File file) throws Exception {
// log.debug("checkHashedCache('{}', '{}')", oid,
// file.getAbsolutePath());
// What do we know?
String cachedHash = getHash(oid);
String currentHash = hashFile(file);
// Now decide the return value
// log.debug("HASHED : cache({}) vs. file({})", cachedHash,
// currentHash);
if (cachedHash == null) {
// First time... insert and return true
// log.debug("HASHED : TRUE (INSERT) : ({})", oid);
insertHash(oid, currentHash);
return true;
} else {
// Force an update... even if unchanged, the flag avoids deletes
updateHash(oid, currentHash);
if (!currentHash.equals(cachedHash)) {
// Data has changed... return true
// log.debug("HASHED : TRUE (UPDATE) : ({})", oid);
return true;
}
}
// No luck
// log.debug("HASHED : FALSE : ({})", oid);
return false;
}
/**
* Shutdown the database connections and cleanup.
*
* @throws Exception if there are errors
*/
public void shutdown() throws Exception {
// Derby can only be shutdown from one thread,
// we'll catch errors from the rest.
// String threadedShutdownMessage = DERBY_DRIVER
// + " is not registered with the JDBC driver manager";
try {
// Tell the database to close
// DriverManager.getConnection(DERBY_PROTOCOL + ";shutdown=true");
// Shutdown just this database (but not the engine)
DriverManager.getConnection(DERBY_PROTOCOL + DATABASE_NAME
+ ";shutdown=true");
} catch (SQLException ex) {
// These test values are used if the engine is NOT shutdown
if (ex.getErrorCode() == 45000 && ex.getSQLState().equals("08006")) {
// Valid response
// if (ex.getErrorCode() == 50000 &&
// ex.getSQLState().equals("XJ015")) {
// Error response
} else {
// Make sure we ignore simple thread issues
// if (!ex.getMessage().equals(threadedShutdownMessage)) {
// throw new Exception("Error during database shutdown:", ex);
// }
}
} finally {
try {
// Close our connection
if (connection != null) {
connection.close();
connection = null;
}
} catch (SQLException ex) {
throw new Exception("Error closing connection:", ex);
}
}
}
private long getLastModified(String oid) {
try {
PreparedStatement sql = connection().prepareStatement(
"SELECT lastModified FROM " + BASIC_TABLE
+ " WHERE oid = ? AND cacheId = ?");
// Prepare and execute
sql.setString(1, oid);
sql.setString(2, cacheId);
ResultSet result = sql.executeQuery();
// Build response
Timestamp ts = null;
if (result.next()) {
ts = result.getTimestamp("lastModified");
}
close(result);
close(sql);
if (ts == null) {
return -1;
} else {
return ts.getTime();
}
} catch (SQLException ex) {
log.error("Error querying last modified date: ", ex);
return -1;
}
}
private void insertLastModified(String oid, long lastModified)
throws Exception {
PreparedStatement sql = connection().prepareStatement(
"INSERT INTO " + BASIC_TABLE
+ " (oid, cacheId, lastModified, changeFlag)"
+ " VALUES (?, ?, ?, 1)");
// Prepare and execute
sql.setString(1, oid);
sql.setString(2, cacheId);
sql.setTimestamp(3, new Timestamp(lastModified));
sql.executeUpdate();
close(sql);
}
private void updateLastModified(String oid, long lastModified)
throws Exception {
PreparedStatement sql = connection().prepareStatement(
"UPDATE " + BASIC_TABLE
+ " SET lastModified = ?, changeFlag = 1"
+ " WHERE oid = ? and cacheId = ?");
// Prepare and execute
sql.setTimestamp(1, new Timestamp(lastModified));
sql.setString(2, oid);
sql.setString(3, cacheId);
sql.executeUpdate();
close(sql);
}
private String getHash(String oid) {
try {
PreparedStatement sql = connection().prepareStatement(
"SELECT hash FROM " + HASH_TABLE
+ " WHERE oid = ? AND cacheId = ?");
// Prepare and execute
sql.setString(1, oid);
sql.setString(2, cacheId);
ResultSet result = sql.executeQuery();
// Build response
String response = null;
if (result.next()) {
response = result.getString("hash");
}
close(result);
close(sql);
return response;
} catch (SQLException ex) {
log.error("Error querying last hash: ", ex);
return null;
}
}
private void insertHash(String oid, String hash) throws Exception {
PreparedStatement sql = connection()
.prepareStatement(
"INSERT INTO "
+ HASH_TABLE
+ " (oid, cacheId, hash, changeFlag) VALUES (?, ?, ?, 1)");
// Prepare and execute
sql.setString(1, oid);
sql.setString(2, cacheId);
sql.setString(3, hash);
sql.executeUpdate();
close(sql);
}
private void updateHash(String oid, String hash) throws Exception {
PreparedStatement sql = connection().prepareStatement(
"UPDATE " + HASH_TABLE + " SET hash = ?, changeFlag = 1"
+ " WHERE oid = ? AND cacheId = ?");
// Prepare and execute
sql.setString(1, hash);
sql.setString(2, oid);
sql.setString(3, cacheId);
sql.executeUpdate();
close(sql);
}
/**
* Check for the existence of a table and arrange for its creation if not
* found.
*
* @param table The table to look for and create.
* @throws SQLException if there was an error.
*/
private void checkTable(String table) throws SQLException {
boolean tableFound = findTable(table);
// Create the table if we couldn't find it
if (!tableFound) {
log.debug("Table '{}' not found, creating now!", table);
createTable(table);
// Double check it was created
if (!findTable(table)) {
log.error("Unknown error creating table '{}'", table);
throw new SQLException("Could not find or create table '"
+ table + "'");
}
}
}
/**
* Check if the given table exists in the database.
*
* @param table The table to look for
* @return boolean flag if the table was found or not
* @throws SQLException if there was an error accessing the database
*/
private boolean findTable(String table) throws SQLException {
boolean tableFound = false;
DatabaseMetaData meta = connection().getMetaData();
ResultSet result = meta.getTables(null, null, null, null);
while (result.next() && !tableFound) {
if (result.getString("TABLE_NAME").equalsIgnoreCase(table)) {
tableFound = true;
}
}
close(result);
return tableFound;
}
/**
* Create the given table in the database.
*
* @param table The table to create
* @throws SQLException if there was an error during creation, or an unknown
* table was specified.
*/
private void createTable(String table) throws SQLException {
if (table.equals(BASIC_TABLE)) {
Statement sql = connection().createStatement();
sql.execute("CREATE TABLE " + BASIC_TABLE
+ "(oid VARCHAR(255) NOT NULL, "
+ "cacheId VARCHAR(255) NOT NULL, "
+ "lastModified TIMESTAMP NOT NULL, "
+ "changeFlag SMALLINT NOT NULL, "
+ "PRIMARY KEY (oid, cacheId))");
close(sql);
return;
}
if (table.equals(HASH_TABLE)) {
Statement sql = connection().createStatement();
sql.execute("CREATE TABLE " + HASH_TABLE
+ "(oid VARCHAR(255) NOT NULL, "
+ "cacheId VARCHAR(255) NOT NULL, "
+ "hash VARCHAR(50) NOT NULL, "
+ "changeFlag SMALLINT NOT NULL, "
+ "PRIMARY KEY (oid, cacheId))");
close(sql);
return;
}
throw new SQLException("Unknown table '" + table + "' requested!");
}
/**
* Attempt to close a ResultSet. Basic wrapper for exception catching and
* logging
*
* @param resultSet The ResultSet to try and close.
*/
private void close(ResultSet resultSet) {
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException ex) {
log.error("Error closing result set: ", ex);
}
}
resultSet = null;
}
/**
* Attempt to close a Statement. Basic wrapper for exception catching and
* logging
*
* @param statement The Statement to try and close.
*/
private void close(Statement statement) {
if (statement != null) {
try {
statement.close();
} catch (SQLException ex) {
log.error("Error closing statement: ", ex);
}
}
statement = null;
}
/*
* Sourced (and adapted) from commons-codec-1.4, required since 1.4
* contains bug which affects httpclient request headers
*/
private static final int BUFFER_SIZE = 1024;
private String hashFile(File file) throws IOException {
InputStream data = new FileInputStream(file);
try {
MessageDigest digest = MessageDigest.getInstance("SHA");
byte[] buffer = new byte[BUFFER_SIZE];
int read = data.read(buffer, 0, BUFFER_SIZE);
while (read > -1) {
digest.update(buffer, 0, read);
read = data.read(buffer, 0, BUFFER_SIZE);
}
return new String(Hex.encodeHex(digest.digest()));
} catch (NoSuchAlgorithmException nsae) {
throw new RuntimeException(nsae.getMessage());
}
}
}