All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.content.packager.AbstractPackageIngester Maven / Gradle / Ivy

There is a newer version: 8.0
Show newest version
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.content.packager;

import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogHelper;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.workflow.WorkflowException;

/**
 * An abstract implementation of a DSpace Package Ingester, which
 * implements a few helper/utility methods that most (all?) PackageIngesters
 * may find useful.
 * 

* First, implements recursive functionality in ingestAll() and replaceAll() * methods of the PackageIngester interface. These methods are setup to * recursively call ingest() and replace() respectively. *

* Finally, it also implements several utility methods (createDSpaceObject(), * finishCreateItem(), updateDSpaceObject()) which subclasses may find useful. * This classes will allow subclasses to easily create/update objects without * having to worry too much about normal DSpace submission workflows (which is * taken care of in these utility methods). *

* All Package ingesters should either extend this abstract class * or implement PackageIngester to better suit their needs. * * @author Tim Donohue * @see PackageIngester * @see org.dspace.core.service.PluginService */ public abstract class AbstractPackageIngester implements PackageIngester { /** * log4j category */ private static Logger log = org.apache.logging.log4j.LogManager.getLogger(AbstractPackageIngester.class); protected final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService(); protected final ItemService itemService = ContentServiceFactory.getInstance().getItemService(); protected final HandleService handleService = HandleServiceFactory.getInstance().getHandleService(); /** * References to other packages -- these are the next packages to ingest recursively * Key = DSpace Object just ingested, Value = List of all packages relating to a DSpaceObject **/ private Map> packageReferences = new HashMap>(); /** * Map of all successfully ingested/replaced DSpace objects for current * import process (used by ingestAll()/replaceAll()). * The key is the package file (which was used to create the object), * and the value is the Identifier (i.e. Handle) of the DSpaceObject created/replaced. **/ private Map pkgIngestedMap = new LinkedHashMap(); /** * Recursively create one or more DSpace Objects out of the contents * of the ingested package (and all other referenced packages). * The initial object is created under the indicated parent. All other * objects are created based on their relationship to the initial object. *

* For example, a scenario may be to create a Collection based on a * collection-level package, and also create an Item for every item-level * package referenced by the collection-level package. *

* The output of this method is one or more newly created DSpaceObject Identifiers * (i.e. Handles). *

* The packager may choose not to implement ingestAll, * or simply forward the call to ingest if it is unable to support * recursive ingestion. *

* The deposit license (Only significant for Item) is passed * explicitly as a string since there is no place for it in many * package formats. It is optional and may be given as * null. * * @param context DSpace context. * @param parent parent under which to create the initial object * (may be null -- in which case ingester must determine parent from package * or throw an error). * @param pkgFile The initial package file to ingest * @param params Properties-style list of options (interpreted by each packager). * @param license may be null, which takes default license. * @return List of DSpaceObjects created * @throws PackageValidationException if initial package (or any referenced package) * is unacceptable or there is a fatal error in creating a DSpaceObject * @throws UnsupportedOperationException if this packager does not * implement ingestAll * @throws CrosswalkException if crosswalk error * @throws IOException if IO error * @throws SQLException if database error * @throws AuthorizeException if authorization error * @throws WorkflowException if workflow error */ @Override public List ingestAll(Context context, DSpaceObject parent, File pkgFile, PackageParameters params, String license) throws PackageException, UnsupportedOperationException, CrosswalkException, AuthorizeException, SQLException, IOException, WorkflowException { //If unset, make sure the Parameters specifies this is a recursive ingest if (!params.recursiveModeEnabled()) { params.setRecursiveModeEnabled(true); } //Initial DSpace Object to ingest DSpaceObject dso = null; // If we have not previously parsed/ingested this package file // NOTE: This ensures we don't accidentally ingest the same package // TWICE, e.g. an Item's package may be referenced from multiple // Collection packages (if Item is mapped to multiple Collections) if (!getIngestedMap().containsKey(pkgFile)) { try { //actually ingest pkg using provided PackageIngester dso = ingest(context, parent, pkgFile, params, license); } catch (IllegalStateException ie) { // NOTE: if we encounter an IllegalStateException, this means the // handle is already in use and this object already exists. //if we are skipping over (i.e. keeping) existing objects if (params.keepExistingModeEnabled()) { log.warn(LogHelper.getHeader(context, "skip_package_ingest", "Object already exists, package-skipped=" + pkgFile.getName())); } else { // Pass this exception on -- which essentially causes a full rollback of all changes (this is // the default) throw ie; } } } else { log.info(LogHelper.getHeader(context, "skip_package_ingest", "Object was already ingested, package-skipped=" + pkgFile.getName())); } // As long as an object was successfully created from this package if (dso != null) { // Add to map of successfully ingested packages/objects (if not already added) addToIngestedMap(pkgFile, dso); //We can only recursively ingest non-Item packages //(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages) if (dso.getType() != Constants.ITEM) { //Check if we found child package references when ingesting this latest DSpaceObject List childPkgRefs = getPackageReferences(dso); //we can only recursively ingest child packages //if we have references to them if (childPkgRefs != null && !childPkgRefs.isEmpty()) { //Recursively ingest each child package, using this current object as the parent DSpace Object for (String childPkgRef : childPkgRefs) { //Assume package reference is relative to current (parent) package location File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef); // fun, it's recursive! -- ingested referenced package // NOTE: we are passing "null" as the Parent object, since we want to restore to the // Parent object specified in the child Package. // (Just in case this child is only *mapped* to the current Collection) ingestAll(context, null, childPkg, params, license); // A Collection can map to Items that it does not "own". // If a Collection package has an Item as a child, it // should be mapped regardless of ownership. if (Constants.COLLECTION == dso.getType()) { // If this newly ingested parent object was a Collection, // lookup the newly ingested child Item and make sure // it is mapped to this Collection. String childHandle = getIngestedMap().get(childPkg); if (childHandle != null) { Item childItem = (Item) handleService.resolveToObject(context, childHandle); // Ensure Item is mapped to Collection that referenced it Collection collection = (Collection) dso; if (childItem != null && !itemService.isIn(childItem, collection)) { collectionService.addItem(context, collection, childItem); } } } } } //end if child pkgs } //end if not an Item } //end if DSpaceObject not null //Return list of all objects ingested return getIngestedList(); } /** * Recursively replace one or more DSpace Objects out of the contents * of the ingested package (and all other referenced packages). * The initial object to replace is indicated by dso. All other * objects are replaced based on information provided in the referenced packages. *

* For example, a scenario may be to replace a Collection based on a * collection-level package, and also replace *every* Item in that collection * based on the item-level packages referenced by the collection-level package. *

* Please note that since the dso input only specifies the * initial object to replace, any additional objects to replace must be * determined based on the referenced packages (or initial package itself). *

* The output of this method is one or more replaced DSpaceObject Identifiers * (i.e. Handles). *

* The packager may choose not to implement replaceAll, * since it somewhat contradicts the archival nature of DSpace. It also * may choose to forward the call to replace if it is unable to * support recursive replacement. * * @param context DSpace context. * @param dso initial existing DSpace Object to be replaced, may be null * if object to replace can be determined from package * @param pkgFile The package file to ingest. * @param params Properties-style list of options specific to this packager * @return List of Identifiers of DSpaceObjects replaced * @throws PackageValidationException if initial package (or any referenced package) * is unacceptable or there is a fatal error in creating a DSpaceObject * @throws UnsupportedOperationException if this packager does not * implement replaceAll * @throws CrosswalkException if crosswalk error * @throws IOException if IO error * @throws SQLException if database error * @throws AuthorizeException if authorization error * @throws WorkflowException if workflow error */ @Override public List replaceAll(Context context, DSpaceObject dso, File pkgFile, PackageParameters params) throws PackageException, UnsupportedOperationException, CrosswalkException, AuthorizeException, SQLException, IOException, WorkflowException { //If unset, make sure the Parameters specifies this is a recursive replace if (!params.recursiveModeEnabled()) { params.setRecursiveModeEnabled(true); } //Initial DSpace Object to replace DSpaceObject replacedDso = null; // If we have not previously parsed/ingested this package file // NOTE: This ensures we don't accidentally ingest the same package // TWICE, e.g. an Item's package may be referenced from multiple // Collection packages (if Item is mapped to multiple Collections) if (!getIngestedMap().containsKey(pkgFile)) { //Actually ingest pkg using provided PackageIngester, and replace object //NOTE: 'dso' may be null! If it is null, the PackageIngester must determine // the object to be replaced from the package itself. replacedDso = replace(context, dso, pkgFile, params); } else { log.info(LogHelper.getHeader(context, "skip_package_replace", "Object was already replaced, package-skipped=" + pkgFile.getName())); } // As long as an object was successfully replaced from this package if (replacedDso != null) { // Add to map of successfully ingested packages/objects (if not already added) addToIngestedMap(pkgFile, replacedDso); //We can only recursively ingest non-Item packages //(NOTE: Items have no children, as Bitstreams/Bundles are created from Item packages) if (replacedDso.getType() != Constants.ITEM) { //Check if we found child package references when replacing this latest DSpaceObject List childPkgRefs = getPackageReferences(replacedDso); //we can only recursively ingest child packages //if we have references to them if (childPkgRefs != null && !childPkgRefs.isEmpty()) { //Recursively replace each child package for (String childPkgRef : childPkgRefs) { //Assume package reference is relative to current package location File childPkg = new File(pkgFile.getAbsoluteFile().getParent(), childPkgRef); //fun, it's recursive! -- replaced referenced package as a child of current object // Pass object to replace as 'null', as we don't know which object to replace. // (it will therefore be looked up in the package itself) replaceAll(context, null, childPkg, params); // A Collection can map to Items that it does not "own". // If a Collection package has an Item as a child, it // should be mapped regardless of ownership. if (Constants.COLLECTION == replacedDso.getType()) { // If this newly ingested parent object was a Collection, // lookup the newly ingested child Item and make sure // it is mapped to this Collection. String childHandle = getIngestedMap().get(childPkg); if (childHandle != null) { Item childItem = (Item) handleService.resolveToObject(context, childHandle); // Ensure Item is mapped to Collection that referenced it Collection collection = (Collection) replacedDso; if (childItem != null && !itemService.isIn(childItem, collection)) { collectionService.addItem(context, collection, childItem); } } } } } //end if child pkgs } //end if not an Item } //end if DSpaceObject not null //Return list of all objects replaced return getIngestedList(); } /** * During ingestion process, some submission information packages (SIPs) * may reference other packages to be ingested (recursively). *

* This method collects all references to other packages, so that we * can choose to recursively ingest them, as necessary, alongside the * DSpaceObject created from the original SIP. *

* References are collected based on the DSpaceObject created from the SIP * (this way we keep the context of these references). * * @param dso DSpaceObject whose SIP referenced another package * @param packageRef A reference to another package, which can be ingested after this one */ public void addPackageReference(DSpaceObject dso, String packageRef) { List packageRefValues = null; // Check if we already have an entry for packages reference by this object if (packageReferences.containsKey(dso)) { packageRefValues = packageReferences.get(dso); } else { //Create a new empty list of references packageRefValues = new ArrayList(); } //add this package reference to existing list and save packageRefValues.add(packageRef); packageReferences.put(dso, packageRefValues); } /** * Return a list of known SIP references from a newly created DSpaceObject. *

* These references should detail where another package exists which * should be ingested alongside the current DSpaceObject. *

* The AbstractPackageIngester or an equivalent SIP handler is expected * to understand how to deal with these package references. * * @param dso DSpaceObject whose SIP referenced other SIPs * @return List of Strings which are the references to external submission ingestion packages * (may be null if no SIPs were referenced) */ public List getPackageReferences(DSpaceObject dso) { return packageReferences.get(dso); } /** * Add parsed package and resulting DSpaceObject to list of successfully * ingested/replaced objects. * * @param pkgFile the package file that was used to create the object * @param dso the DSpaceObject created/replaced */ protected void addToIngestedMap(File pkgFile, DSpaceObject dso) { // Add to list of successfully ingested packages if (!pkgIngestedMap.containsKey(pkgFile)) { pkgIngestedMap.put(pkgFile, dso.getHandle()); } } /** * Return Map of all packages ingested and the DSpaceObjects which have been * created/replaced by this instance of the Ingester. * *

* The Map "key" is the package file which was parsed, and the "value" * is the Identifier (i.e. Handle) of the DSpaceObject which was created/replaced. * * @return Map of DSpaceObjects which have been created/replaced. */ protected Map getIngestedMap() { return pkgIngestedMap; } /** * Return List of all DSpaceObject Identifiers which have been ingested/replaced by * this instance of the Ingester. *

* This list can be useful in reporting back to the user what content has * been added or replaced. It's used by ingestAll() and replaceAll() to * return this list of everything that was ingested/replaced. * * @return List of Identifiers for DSpaceObjects which have been added/replaced */ protected List getIngestedList() { // We have the list of ingested objects in our IngestedMap. // So, we simply have to convert that Collection to a List java.util.Collection coll = pkgIngestedMap.values(); if (coll instanceof List) { return (List) coll; } else { return new ArrayList(coll); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy