org.dspace.content.packager.AbstractPackageIngester Maven / Gradle / Ivy
Show all versions of dspace-api Show documentation
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.packager;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogHelper;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.workflow.WorkflowException;
/**
* An abstract implementation of a DSpace Package Ingester, which
* implements a few helper/utility methods that most (all?) PackageIngesters
* may find useful.
*
* First, implements recursive functionality in ingestAll() and replaceAll()
* methods of the PackageIngester interface. These methods are setup to
* recursively call ingest() and replace() respectively.
*
* Finally, it also implements several utility methods (createDSpaceObject(),
* finishCreateItem(), updateDSpaceObject()) which subclasses may find useful.
* This classes will allow subclasses to easily create/update objects without
* having to worry too much about normal DSpace submission workflows (which is
* taken care of in these utility methods).
*
* All Package ingesters should either extend this abstract class
* or implement PackageIngester
to better suit their needs.
*
* @author Tim Donohue
* @see PackageIngester
* @see org.dspace.core.service.PluginService
*/
public abstract class AbstractPackageIngester
implements PackageIngester {
/**
* log4j category
*/
private static Logger log = org.apache.logging.log4j.LogManager.getLogger(AbstractPackageIngester.class);
protected final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
protected final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
protected final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
/**
* References to other packages -- these are the next packages to ingest recursively
* Key = DSpace Object just ingested, Value = List of all packages relating to a DSpaceObject
**/
private Map> packageReferences = new HashMap>();
/**
* Map of all successfully ingested/replaced DSpace objects for current
* import process (used by ingestAll()/replaceAll()).
* The key is the package file (which was used to create the object),
* and the value is the Identifier (i.e. Handle) of the DSpaceObject created/replaced.
**/
private Map pkgIngestedMap = new LinkedHashMap();
/**
* Recursively create one or more DSpace Objects out of the contents
* of the ingested package (and all other referenced packages).
* The initial object is created under the indicated parent. All other
* objects are created based on their relationship to the initial object.
*
* For example, a scenario may be to create a Collection based on a
* collection-level package, and also create an Item for every item-level
* package referenced by the collection-level package.
*
* The output of this method is one or more newly created DSpaceObject Identifiers
* (i.e. Handles).
*
* The packager may choose not to implement ingestAll
,
* or simply forward the call to ingest
if it is unable to support
* recursive ingestion.
*
* The deposit license (Only significant for Item) is passed
* explicitly as a string since there is no place for it in many
* package formats. It is optional and may be given as
* null
.
*
* @param context DSpace context.
* @param parent parent under which to create the initial object
* (may be null -- in which case ingester must determine parent from package
* or throw an error).
* @param pkgFile The initial package file to ingest
* @param params Properties-style list of options (interpreted by each packager).
* @param license may be null, which takes default license.
* @return List of DSpaceObjects created
* @throws PackageValidationException if initial package (or any referenced package)
* is unacceptable or there is a fatal error in creating a DSpaceObject
* @throws UnsupportedOperationException if this packager does not
* implement ingestAll
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws WorkflowException if workflow error
*/
@Override
public List ingestAll(Context context, DSpaceObject parent, File pkgFile,
PackageParameters params, String license)
throws PackageException, UnsupportedOperationException,
CrosswalkException, AuthorizeException,
SQLException, IOException, WorkflowException {
//If unset, make sure the Parameters specifies this is a recursive ingest
if (!params.recursiveModeEnabled()) {
params.setRecursiveModeEnabled(true);
}
//Initial DSpace Object to ingest
DSpaceObject dso = null;
// If we have not previously parsed/ingested this package file
// NOTE: This ensures we don't accidentally ingest the same package
// TWICE, e.g. an Item's package may be referenced from multiple
// Collection packages (if Item is mapped to multiple Collections)
if (!getIngestedMap().containsKey(pkgFile)) {
try {
//actually ingest pkg using provided PackageIngester
dso = ingest(context, parent, pkgFile, params, license);
} catch (IllegalStateException ie) {
// NOTE: if we encounter an IllegalStateException, this means the
// handle is already in use and this object already exists.
//if we are skipping over (i.e. keeping) existing objects
if (params.keepExistingModeEnabled()) {
log.warn(LogHelper.getHeader(context, "skip_package_ingest",
"Object already exists, package-skipped=" + pkgFile.getName()));
} else {
// Pass this exception on -- which essentially causes a full rollback of all changes (this is
// the default)
throw ie;
}
}
} else {
log.info(LogHelper.getHeader(context, "skip_package_ingest",
"Object was already ingested, package-skipped=" + pkgFile.getName()));
}
// As long as an object was successfully created from this package
if (dso != null) {
// Add to map of successfully ingested packages/objects (if not already added)
addToIngestedMap(pkgFile, dso);
//We can only recursively ingest non-Item packages
//(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages)
if (dso.getType() != Constants.ITEM) {
//Check if we found child package references when ingesting this latest DSpaceObject
List childPkgRefs = getPackageReferences(dso);
//we can only recursively ingest child packages
//if we have references to them
if (childPkgRefs != null && !childPkgRefs.isEmpty()) {
//Recursively ingest each child package, using this current object as the parent DSpace Object
for (String childPkgRef : childPkgRefs) {
//Assume package reference is relative to current (parent) package location
File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef);
// fun, it's recursive! -- ingested referenced package
// NOTE: we are passing "null" as the Parent object, since we want to restore to the
// Parent object specified in the child Package.
// (Just in case this child is only *mapped* to the current Collection)
ingestAll(context, null, childPkg, params, license);
// A Collection can map to Items that it does not "own".
// If a Collection package has an Item as a child, it
// should be mapped regardless of ownership.
if (Constants.COLLECTION == dso.getType()) {
// If this newly ingested parent object was a Collection,
// lookup the newly ingested child Item and make sure
// it is mapped to this Collection.
String childHandle = getIngestedMap().get(childPkg);
if (childHandle != null) {
Item childItem = (Item) handleService.resolveToObject(context, childHandle);
// Ensure Item is mapped to Collection that referenced it
Collection collection = (Collection) dso;
if (childItem != null && !itemService.isIn(childItem, collection)) {
collectionService.addItem(context, collection, childItem);
}
}
}
}
} //end if child pkgs
} //end if not an Item
} //end if DSpaceObject not null
//Return list of all objects ingested
return getIngestedList();
}
/**
* Recursively replace one or more DSpace Objects out of the contents
* of the ingested package (and all other referenced packages).
* The initial object to replace is indicated by dso
. All other
* objects are replaced based on information provided in the referenced packages.
*
* For example, a scenario may be to replace a Collection based on a
* collection-level package, and also replace *every* Item in that collection
* based on the item-level packages referenced by the collection-level package.
*
* Please note that since the dso
input only specifies the
* initial object to replace, any additional objects to replace must be
* determined based on the referenced packages (or initial package itself).
*
* The output of this method is one or more replaced DSpaceObject Identifiers
* (i.e. Handles).
*
* The packager may choose not to implement replaceAll
,
* since it somewhat contradicts the archival nature of DSpace. It also
* may choose to forward the call to replace
if it is unable to
* support recursive replacement.
*
* @param context DSpace context.
* @param dso initial existing DSpace Object to be replaced, may be null
* if object to replace can be determined from package
* @param pkgFile The package file to ingest.
* @param params Properties-style list of options specific to this packager
* @return List of Identifiers of DSpaceObjects replaced
* @throws PackageValidationException if initial package (or any referenced package)
* is unacceptable or there is a fatal error in creating a DSpaceObject
* @throws UnsupportedOperationException if this packager does not
* implement replaceAll
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws WorkflowException if workflow error
*/
@Override
public List replaceAll(Context context, DSpaceObject dso,
File pkgFile, PackageParameters params)
throws PackageException, UnsupportedOperationException,
CrosswalkException, AuthorizeException,
SQLException, IOException, WorkflowException {
//If unset, make sure the Parameters specifies this is a recursive replace
if (!params.recursiveModeEnabled()) {
params.setRecursiveModeEnabled(true);
}
//Initial DSpace Object to replace
DSpaceObject replacedDso = null;
// If we have not previously parsed/ingested this package file
// NOTE: This ensures we don't accidentally ingest the same package
// TWICE, e.g. an Item's package may be referenced from multiple
// Collection packages (if Item is mapped to multiple Collections)
if (!getIngestedMap().containsKey(pkgFile)) {
//Actually ingest pkg using provided PackageIngester, and replace object
//NOTE: 'dso' may be null! If it is null, the PackageIngester must determine
// the object to be replaced from the package itself.
replacedDso = replace(context, dso, pkgFile, params);
} else {
log.info(LogHelper.getHeader(context, "skip_package_replace",
"Object was already replaced, package-skipped=" + pkgFile.getName()));
}
// As long as an object was successfully replaced from this package
if (replacedDso != null) {
// Add to map of successfully ingested packages/objects (if not already added)
addToIngestedMap(pkgFile, replacedDso);
//We can only recursively ingest non-Item packages
//(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages)
if (replacedDso.getType() != Constants.ITEM) {
//Check if we found child package references when replacing this latest DSpaceObject
List childPkgRefs = getPackageReferences(replacedDso);
//we can only recursively ingest child packages
//if we have references to them
if (childPkgRefs != null && !childPkgRefs.isEmpty()) {
//Recursively replace each child package
for (String childPkgRef : childPkgRefs) {
//Assume package reference is relative to current package location
File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef);
//fun, it's recursive! -- replaced referenced package as a child of current object
// Pass object to replace as 'null', as we don't know which object to replace.
// (it will therefore be looked up in the package itself)
replaceAll(context, null, childPkg, params);
// A Collection can map to Items that it does not "own".
// If a Collection package has an Item as a child, it
// should be mapped regardless of ownership.
if (Constants.COLLECTION == replacedDso.getType()) {
// If this newly ingested parent object was a Collection,
// lookup the newly ingested child Item and make sure
// it is mapped to this Collection.
String childHandle = getIngestedMap().get(childPkg);
if (childHandle != null) {
Item childItem = (Item) handleService.resolveToObject(context, childHandle);
// Ensure Item is mapped to Collection that referenced it
Collection collection = (Collection) replacedDso;
if (childItem != null && !itemService.isIn(childItem, collection)) {
collectionService.addItem(context, collection, childItem);
}
}
}
}
} //end if child pkgs
} //end if not an Item
} //end if DSpaceObject not null
//Return list of all objects replaced
return getIngestedList();
}
/**
* During ingestion process, some submission information packages (SIPs)
* may reference other packages to be ingested (recursively).
*
* This method collects all references to other packages, so that we
* can choose to recursively ingest them, as necessary, alongside the
* DSpaceObject created from the original SIP.
*
* References are collected based on the DSpaceObject created from the SIP
* (this way we keep the context of these references).
*
* @param dso DSpaceObject whose SIP referenced another package
* @param packageRef A reference to another package, which can be ingested after this one
*/
public void addPackageReference(DSpaceObject dso, String packageRef) {
List packageRefValues = null;
// Check if we already have an entry for packages reference by this object
if (packageReferences.containsKey(dso)) {
packageRefValues = packageReferences.get(dso);
} else {
//Create a new empty list of references
packageRefValues = new ArrayList();
}
//add this package reference to existing list and save
packageRefValues.add(packageRef);
packageReferences.put(dso, packageRefValues);
}
/**
* Return a list of known SIP references from a newly created DSpaceObject.
*
* These references should detail where another package exists which
* should be ingested alongside the current DSpaceObject.
*
* The AbstractPackageIngester
or an equivalent SIP handler is expected
* to understand how to deal with these package references.
*
* @param dso DSpaceObject whose SIP referenced other SIPs
* @return List of Strings which are the references to external submission ingestion packages
* (may be null if no SIPs were referenced)
*/
public List getPackageReferences(DSpaceObject dso) {
return packageReferences.get(dso);
}
/**
* Add parsed package and resulting DSpaceObject to list of successfully
* ingested/replaced objects.
*
* @param pkgFile the package file that was used to create the object
* @param dso the DSpaceObject created/replaced
*/
protected void addToIngestedMap(File pkgFile, DSpaceObject dso) {
// Add to list of successfully ingested packages
if (!pkgIngestedMap.containsKey(pkgFile)) {
pkgIngestedMap.put(pkgFile, dso.getHandle());
}
}
/**
* Return Map of all packages ingested and the DSpaceObjects which have been
* created/replaced by this instance of the Ingester.
*
*
* The Map "key" is the package file which was parsed, and the "value"
* is the Identifier (i.e. Handle) of the DSpaceObject which was created/replaced.
*
* @return Map of DSpaceObjects which have been created/replaced.
*/
protected Map getIngestedMap() {
return pkgIngestedMap;
}
/**
* Return List of all DSpaceObject Identifiers which have been ingested/replaced by
* this instance of the Ingester.
*
* This list can be useful in reporting back to the user what content has
* been added or replaced. It's used by ingestAll() and replaceAll() to
* return this list of everything that was ingested/replaced.
*
* @return List of Identifiers for DSpaceObjects which have been added/replaced
*/
protected List getIngestedList() {
// We have the list of ingested objects in our IngestedMap.
// So, we simply have to convert that Collection to a List
java.util.Collection coll = pkgIngestedMap.values();
if (coll instanceof List) {
return (List) coll;
} else {
return new ArrayList(coll);
}
}
}