org.dspace.ctask.replicate.METSReplicateConsumer Maven / Gradle / Ivy
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.ctask.replicate;
import java.sql.SQLException;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.*;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.*;
import org.dspace.core.Context;
import org.dspace.core.Constants;
import org.dspace.core.factory.CoreServiceFactory;
import org.dspace.core.service.PluginService;
import org.dspace.curate.Curator;
import org.dspace.curate.TaskQueue;
import org.dspace.curate.TaskQueueEntry;
import org.dspace.eperson.EPerson;
import org.dspace.event.Consumer;
import org.dspace.event.Event;
import org.dspace.pack.Packer;
import org.dspace.pack.bagit.CatalogPacker;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
// for readability
import static org.dspace.event.Event.*;
/**
* METSReplicateConsumer is an event consumer that tracks events relevant to
* replication synchronization when using METS AIPs. In response to deletions,
* it creates and transmits a catalog of deleted objects (so they may be restored if
* deletion was an error). For new or changed objects, it queues a request
* to perform the configured curation tasks, or directly performs the task
* if so indicated.
*
* This replicate consumer performs the following special actions:
*
* - If a Group/Eperson is changed/added/removed, this is considered a modification of the SITE object
*
- When a child object is added/removed, this is also considered a modification of its parent object
*
- Similar to other ReplicateConsumers, it also just performs the configured tasks on any object
* that is modified/added/removed
*
*
* This Consumer should be used with the settings similar to the following in your
* dspace.cfg file:
*
* # consumer to manage content replication (Replication Task Suite add-on)
* event.consumer.replicate.class = org.dspace.ctask.replicate.METSReplicateConsumer
* event.consumer.replicate.filters = Community|Collection|Item|Group|EPerson+All
*
* @author tdonohue
* @author richardrodgers
*/
public class METSReplicateConsumer implements Consumer {
private Logger log = Logger.getLogger(METSReplicateConsumer.class);
private ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService();
private PluginService pluginService = CoreServiceFactory.getInstance().getPluginService();
private SiteService siteService = ContentServiceFactory.getInstance().getSiteService();
private CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService();
private CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
private ItemService itemService = ContentServiceFactory.getInstance().getItemService();
private ReplicaManager repMan = null;
private TaskQueue taskQueue = null;
private String queueName = null;
// list and sense for id filtering
private List idFilter = null;
private boolean idExclude = true;
// map of task names to id sets
private Map> taskQMap = null;
private Map> taskPMap = null;
private String delObjId = null;
private String delOwnerId = null;
private List delMemIds = null;
// tasks to queue upon add events
private List addQTasks = null;
// tasks to perform immediately upon add events
private List addPTasks = null;
// tasks to queue upon modify events
private List modQTasks = null;
// tasks to perform immediately upon modify events
private List modPTasks = null;
// tasks to queue upon delete events
private List delTasks = null;
// create deletion catalogs?
private boolean catalogDeletes = false;
// Group where all AIPs are stored
private final String storeGroupName = configurationService.getProperty("replicate.group.aip.name");
// Group where object deletion catalog/records are stored
private final String deleteGroupName = configurationService.getProperty("replicate.group.delete.name");
@Override
public void initialize() throws Exception
{
repMan = ReplicaManager.instance();
taskQueue = (TaskQueue) pluginService.getSinglePlugin(TaskQueue.class);
queueName = configurationService.getProperty("replicate.consumer.queue");
// look for and load any idFilter files - excludes trump includes
// An "idFilter" is an actual textual file named "exclude" or "include"
// which contains a list of handles to filter from the Consumer
if (! loadIdFilter("exclude"))
{
if (loadIdFilter("include"))
{
idExclude = false;
}
}
taskQMap = new HashMap>();
taskPMap = new HashMap>();
parseTasks("add");
parseTasks("mod");
delMemIds = new ArrayList();
parseTasks("del");
}
/**
* Consume a content event. At a high level, 2 sorts of actions are
* performed: first, for all new or modified objects, the object handle
* is added to a set of objects to be processed. When a Curator batch
* next runs, this list will be read and whatever tasks are configured to
* be performed will be. Typically, a new AIP will be generated and
* uploaded to the replication service. Second, for deletions, the event
* stream is parsed to construct a 'delete catalog' containing an enumeration
* of the objects that are being deleted. This also is uploaded to the
* replication service, and can be used either to recover from mistaken
* deletions, or purge the replica store when desired.
*
* @param ctx Context
* @param event Event
* @throws Exception if error
*/
@Override
public void consume(Context ctx, Event event) throws Exception
{
int evType = event.getEventType();
int subjType = event.getSubjectType();
//In this situation the "id" is actually the Object Handle
String id = null;
//Special processing specific to Group & EPerson events
if(subjType==Constants.GROUP || subjType==Constants.EPERSON)
{
// ANY changes to a Group/EPerson are essentially modifications
// to the DSpace System (Site), as they are site-wide changes
Site site = siteService.findSite(ctx);
id = site == null ? null : site.getHandle();
// make sure we are supposed to process this object
if (acceptId(id, event, ctx))
{
// add it to the master lists of modified objects
// for which we need to perform tasks
mapId(taskQMap, modQTasks, id);
mapId(taskPMap, modPTasks, id);
}
}
else // process all other object types
{
switch (evType)
{
//ADD = Adding an object to a container or group
case ADD:
//If mapping/adding an Item to a Collection
if(subjType==Constants.COLLECTION)
{
//First, get Handle of collection that was modified
id = event.getSubject(ctx).getHandle();
// make sure we are supposed to process this Collection
if (acceptId(id, event, ctx))
{
// add Collection to the master lists of modified objects
// for which we need to perform tasks
mapId(taskQMap, modQTasks, id);
mapId(taskPMap, modPTasks, id);
//now, get Handle of Item that was mapped/added
id = event.getDetail();
// add Item to the master lists of modified objects
// for which we need to perform tasks
mapId(taskQMap, modQTasks, id);
mapId(taskPMap, modPTasks, id);
}
}
//IGNORE all other "ADD" events. Currently it's not possible to map
//Collections or SubCommunities to multiple parents.
break;
case CREATE: //CREATE = Create a new object.
case INSTALL: //INSTALL = Install an object (exits workflow/workspace). Only used for Items.
// For CREATE & INSTALL, the Handle of object being created is found in Event Detail
id = event.getDetail();
// if NOT (Create & Item)
// (i.e. We don't want to replicate items UNTIL they are Installed)
if (!(subjType == Constants.ITEM && evType == CREATE))
{
if (acceptId(id, event, ctx))
{
// add it to the master lists of added/new objects
// for which we need to perform tasks
mapId(taskQMap, addQTasks, id);
mapId(taskPMap, addPTasks, id);
}
// get parent of this newly created object & mark it as modified
DSpaceObject subject = event.getSubject(ctx);
DSpaceObject parent = ContentServiceFactory.getInstance().getDSpaceObjectService(subject).getParentObject(ctx, subject);
if(parent!=null)
{
id = parent.getHandle();
if(id != null)
{
if (acceptId(id, event, ctx))
{
// add it to the master lists of modified objects
// for which we need to perform tasks
mapId(taskQMap, modQTasks, id);
mapId(taskPMap, modPTasks, id);
}
}
}
}
break;
case MODIFY: //MODIFY = modify an object
case MODIFY_METADATA: //MODIFY_METADATA = just modify an object's metadata
// If subject of event is null, this means the object was likely deleted
if (event.getSubject(ctx)==null)
{
log.warn(event.getEventTypeAsString() + " event, could not get object for "
+ event.getSubjectTypeAsString() + " id="
+ String.valueOf(event.getSubjectID())
+ ", perhaps it has been deleted.");
break;
}
//For MODIFY events, the Handle of modified object needs to be obtained from the Subject
id = event.getSubject(ctx).getHandle();
// make sure handle resolves - these could be events
// for a newly created item that hasn't been assigned a handle
if (id != null)
{
// make sure we are supposed to process this object
if (acceptId(id, event, ctx))
{
// add it to the master lists of modified objects
// for which we need to perform tasks
mapId(taskQMap, modQTasks, id);
mapId(taskPMap, modPTasks, id);
}
}
break;
case REMOVE: //REMOVE = Remove an object from a container or group
case DELETE: //DELETE = Delete an object (actually destroy it)
// For REMOVE & DELETE, the Handle of object being deleted is found in Event Detail
id = event.getDetail();
// make sure we are supposed to process this object
if (acceptId(id, event, ctx))
{ // analyze & process the deletion/removal event
deleteEvent(ctx, id, event);
}
break;
default:
break;
}//end switch
}//end if
}
@Override
public void end(Context ctx) throws Exception
{
// if there are any pending objectIds, pass them to the curation
// system to queue for later processing, or perform immediately
EPerson ep = ctx.getCurrentUser();
String name = (ep != null) ? ep.getName() : "unknown";
long stamp = System.currentTimeMillis();
// first the queueables
Set entrySet = new HashSet();
if (taskQMap.size() > 0)
{
List taskList = new ArrayList();
for (String task : taskQMap.keySet())
{
taskList.add(task);
for (String id : taskQMap.get(task))
{
entrySet.add(new TaskQueueEntry(name, stamp, taskList, id));
}
taskList.clear();
}
taskQMap.clear();
}
// now the performables
if (taskPMap.size() > 0)
{
Curator curator = new Curator();
for (String task : taskPMap.keySet())
{
curator.addTask(task);
for (String id : taskQMap.get(task))
{
curator.curate(ctx, id);
}
curator.clear();
}
taskPMap.clear();
}
// if there any uncommitted deletions, record them now
if (delObjId != null)
{
if (delTasks != null)
{
entrySet.add(new TaskQueueEntry(name, stamp, delTasks, delObjId));
}
processDelete();
}
if (entrySet.size() > 0)
{
taskQueue.enqueue(queueName, entrySet);
}
}
@Override
public void finish(Context ctx) throws Exception
{
// no-op
}
/**
* Check to see if an object ID (Handle) is allowed to be processed by
* this consumer. Individual Objects may be filtered out of consumer
* processing by using a filter file (a textual file with a list of
* handles to either include or exclude).
*
* @param id Object ID to check
* @param event Event that was performed on the Object
* @param ctx Current DSpace Context
* @return true if this consumer should process this object event, false if it should not
* @throws SQLException if database error occurs
*/
private boolean acceptId(String id, Event event, Context ctx) throws SQLException
{
// always accept if not filtering
if (idFilter == null)
{
return true;
}
// filter supports only container ids - so if id is for an item,
// find its owning collection
String id2check = id;
if (event.getSubjectType() == Constants.ITEM)
{
// NB: Item should be available form context cache - should
// not incur a performance hit here
Item item = itemService.find(ctx, event.getSubjectID());
Collection coll = item.getOwningCollection();
if (coll != null)
{
id2check = coll.getHandle();
}
}
boolean onList = idFilter.contains(id2check);
return idExclude ? ! onList : onList;
}
/**
* Process a DELETE (destroy object) or REMOVE (remove object from container) event.
* For a DELETE, record all objects that were deleted (parent & possible child objects)
* For a REMOVE, if this was preceded by deletion of a parent, record a deletion catalog
* @param ctx current DSpace Context
* @param id Object on which the delete/remove event was triggered
* @param event event that was triggered
* @throws Exception if error
*/
private void deleteEvent(Context ctx, String id, Event event) throws Exception
{
int type = event.getEventType();
if (DELETE == type)
{
// either marks start of new deletion or a member of enclosing one
if (delObjId == null)
{
//Start of a new deletion
delObjId = id;
}
else
{
// just add to list of deleted members
delMemIds.add(id);
}
}
else if (REMOVE == type)
{
// either marks end of current deletion or is member of
// enclosing one: ignore if latter
if (delObjId.equals(id))
{
// determine owner and write out deletion catalog
if (Constants.COLLECTION == event.getSubjectType())
{
// my owner is a collection
Collection ownColl = collectionService.find(ctx, event.getSubjectID());
delOwnerId = ownColl.getHandle();
}
else if (Constants.COMMUNITY == event.getSubjectType())
{
// my owner is a community
Community comm = communityService.find(ctx, event.getSubjectID());
delOwnerId = comm.getHandle();
}
// If the parent/owner was found, mark that parent as having been modified
// (This ensures that a fresh AIP will be generated for the parent object)
if(delOwnerId != null)
{
if (acceptId(delOwnerId, event, ctx))
{
// add parent to the master lists of modified objects
// for which we need to perform tasks
mapId(taskQMap, modQTasks, delOwnerId);
mapId(taskPMap, modPTasks, delOwnerId);
}
}
//Record the deletion catalog for the deleted object (as needed)
processDelete();
}
}
}
/*
* Process a deletion event by recording a deletion catalog if configured
*/
private void processDelete() throws IOException
{
// write out deletion catalog if defined
if (catalogDeletes)
{
//First, check if this object has an AIP in storage
boolean found = repMan.objectExists(storeGroupName, delObjId);
// If the object has an AIP, then create a deletion catalog
// If there's no AIP, then there's no need for a deletion
// catalog as the object isn't backed up & cannot be restored!
if(found)
{
//Create a deletion catalog (in BagIt format) of all deleted objects
Packer packer = new CatalogPacker(delObjId, delOwnerId, delMemIds);
try
{
// Create a new deletion catalog (with default file extension / format)
// and store it in the deletion group store
String catID = repMan.deletionCatalogId(delObjId, null);
File packDir = repMan.stage(deleteGroupName, catID);
File archive = packer.pack(packDir);
// Create a deletion catalog in deletion archive location.
repMan.transferObject(deleteGroupName, archive);
}
catch (AuthorizeException authE)
{
throw new IOException(authE);
}
catch (SQLException sqlE)
{
throw new IOException(sqlE);
}
}
}
// reset for next events
delObjId = delOwnerId = null;
delMemIds.clear();
}
/**
* Load the ID filter file of the given name. This is a textual file in
* the base directory which contains a list of handles to include/exclude
* from this consumer
* @param filterName the name of the textual filter file
* @return true if filter file was loaded successfully, false otherwise
*/
private boolean loadIdFilter(String filterName)
{
File filterFile = new File(configurationService.getProperty("replicate.base.dir"), filterName);
if (filterFile.exists())
{
idFilter = new ArrayList();
BufferedReader reader = null;
try
{
reader = new BufferedReader(new FileReader(filterFile));
String id = null;
while((id = reader.readLine()) != null)
{
idFilter.add(id);
}
return true;
}
catch (IOException ioE)
{
//log.error("Unable to read filter file '" + filterName + "'");
idFilter = null;
}
finally
{
if (reader != null) {
try
{
reader.close();
}
catch (IOException ioE)
{
}
}
}
}
return false;
}
/**
* Record the given object tasklist in the given "map". This is essentially
* providing a master list (map) of tasks to perform for particular objects.
* NOTE: if this object and task already exist in the master list, it will
* NOT be duplicated.
* @param map Master task list to add to (String task, Set ids)
* @param tasks Tasks to be performed
* @param id Object for which the tasks should be performed.
*/
private void mapId(Map> map, List tasks, String id)
{
if (tasks != null)
{
for (String task : tasks)
{
Set ids = map.get(task);
if (ids == null)
{
ids = new HashSet();
map.put(task, ids);
}
ids.add(id);
}
}
}
/**
* Parse the list of Consumer tasks to perform. This list of tasks
* is in the 'replicate.cfg' file.
* @param propName property name
*/
private void parseTasks(String propName)
{
String taskStr = configurationService.getProperty("replicate.consumer.tasks." + propName);
if (taskStr == null || taskStr.length() == 0)
{
return;
}
for (String task : taskStr.split(","))
{
task = task.trim();
//If the task in question does NOT end in "+p",
// then it should be queued for later processing
if (! task.endsWith("+p"))
{
if ("add".equals(propName))
{
if (addQTasks == null)
{
addQTasks = new ArrayList();
}
addQTasks.add(task);
}
else if ("mod".equals(propName))
{
if (modQTasks == null)
{
modQTasks = new ArrayList();
}
modQTasks.add(task);
}
else if ("del".equals(propName))
{
if (delTasks == null)
{
delTasks = new ArrayList();
}
delTasks.add(task);
}
}
//Otherwise (if the task ends in "+p"),
// it should be added to the list of tasks to perform immediately
else
{
String sTask = task.substring(0, task.lastIndexOf("+p"));
if ("add".equals(propName))
{
if (addPTasks == null)
{
addPTasks = new ArrayList();
}
addPTasks.add(sTask);
}
else if ("mod".equals(propName))
{
if (modPTasks == null)
{
modPTasks = new ArrayList();
}
addPTasks.add(sTask);
}
else if ("del".equals(propName))
{
// just test for special case of deletion catalogs.
if ("catalog".equals(sTask))
{
catalogDeletes = true;
}
}
}
}
}
}