org.dspace.content.packager.InternalDSpaceAIPIngester Maven / Gradle / Ivy
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.packager;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.storage.bitstore.BitstreamStorageManager;
import org.dspace.content.*;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.crosswalk.MetadataValidationException;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.dspace.handle.HandleManager;
import org.dspace.storage.rdbms.TableRow;
import org.jdom.Element;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;
/**
* InternalDSpaceAIPIngester will be used to restore previous versions of Items from Assetstore.
* Overrides existing AIP behavior, but must be used in "manifestOnly" mode.
*/
public class InternalDSpaceAIPIngester extends AbstractInternalMETSIngester {
/** log4j log */
private static Logger log = Logger.getLogger(InternalDSpaceAIPIngester.class);
/**
* Ensure it's an AIP generated by the complementary AIP disseminator.
*/
@Override
void checkManifest(METSManifest manifest)
throws MetadataValidationException
{
String profile = manifest.getProfile();
if (profile == null)
{
throw new MetadataValidationException("Cannot accept METS with no PROFILE attribute!");
}
else if (!profile.equals(DSpaceAIPDisseminator.PROFILE_1_0))
{
throw new MetadataValidationException("METS has unacceptable PROFILE attribute, profile=" + profile);
}
}
/**
* Choose DMD section(s) to crosswalk.
*
* The algorithm is:
* 1. Use whatever the dmd
parameter specifies as the primary DMD.
* 2. If (1) is unspecified, find DIM (preferably) or MODS as primary DMD.
* 3. If (1) or (2) succeeds, crosswalk it and ignore all other DMDs with
* same GROUPID
* 4. Crosswalk remaining DMDs not eliminated already.
*/
@Override
public void crosswalkObjectDmd(Context context, DSpaceObject dso,
METSManifest manifest,
MdrefManager callback,
Element dmds[], PackageParameters params)
throws CrosswalkException, PackageValidationException,
AuthorizeException, SQLException, IOException
{
int found = -1;
// Check to see what dmdSec the user specified in the 'dmd' parameter
String userDmd = null;
if (params != null)
{
userDmd = params.getProperty("dmd");
}
if (userDmd != null && userDmd.length() > 0)
{
for (int i = 0; i < dmds.length; ++i)
{
if (userDmd.equalsIgnoreCase(manifest.getMdType(dmds[i])))
{
found = i;
}
}
}
// DIM is preferred, if nothing specified by user
if (found == -1)
{
// DIM is preferred for AIP
for (int i = 0; i < dmds.length; ++i)
{
//NOTE: METS standard actually says this should be DIM (all uppercase). But,
// just in case, we're going to be a bit more forgiving.
if ("DIM".equalsIgnoreCase(manifest.getMdType(dmds[i])))
{
found = i;
}
}
}
// MODS is acceptable otehrwise..
if (found == -1)
{
for (int i = 0; i < dmds.length; ++i)
{
//NOTE: METS standard actually says this should be MODS (all uppercase). But,
// just in case, we're going to be a bit more forgiving.
if ("MODS".equalsIgnoreCase(manifest.getMdType(dmds[i])))
{
found = i;
}
}
}
String groupID = null;
if (found >= 0)
{
manifest.crosswalkItemDmd(context, params, dso, dmds[found], callback);
groupID = dmds[found].getAttributeValue("GROUPID");
if (groupID != null)
{
for (int i = 0; i < dmds.length; ++i)
{
String g = dmds[i].getAttributeValue("GROUPID");
if (g != null && !g.equals(groupID))
{
manifest.crosswalkItemDmd(context, params, dso, dmds[i], callback);
}
}
}
}
// otherwise take the first. Don't xwalk more than one because
// each xwalk _adds_ metadata, and could add duplicate fields.
else if (dmds.length > 0)
{
manifest.crosswalkItemDmd(context, params, dso, dmds[0], callback);
}
// it's an error if there is nothing to crosswalk:
else
{
throw new MetadataValidationException("DSpaceAIPIngester: Could not find an acceptable object-wide DMD section in manifest.");
}
}
/**
* Ignore license when restoring an manifest-only AIP, since it should
* be a bitstream in the AIP already.
* Otherwise: Check item for license first; then, take deposit
* license supplied by explicit argument next, else use collection's
* default deposit license.
* Normally the rightsMD crosswalks should provide a license.
*/
@Override
public void addLicense(Context context, Item item, String license,
Collection collection, PackageParameters params)
throws PackageValidationException,
AuthorizeException, SQLException, IOException
{
boolean newLicense = false;
if(!params.restoreModeEnabled())
{
//AIP is not being restored/replaced, so treat it like a SIP -- every new SIP needs a new license
newLicense = true;
}
// Add deposit license if there isn't one in the object,
// and it's not a restoration of an "manifestOnly" AIP:
if (!params.getBooleanProperty("manifestOnly", false) &&
PackageUtils.findDepositLicense(context, item) == null)
{
newLicense = true;
}
if(newLicense)
{
PackageUtils.addDepositLicense(context, license, item, collection);
}
}
/**
* Last change to fix up a DSpace Object.
*
* For AIPs, if the object is an Item, we may want to make sure all of its
* metadata fields already exist in the database (otherwise, the database
* will throw errors when we attempt to save/update the Item)
*
* @param context DSpace Context
* @param dso DSpace object
* @param params Packager Parameters
*/
@Override
public void finishObject(Context context, DSpaceObject dso, PackageParameters params)
throws PackageValidationException, CrosswalkException,
AuthorizeException, SQLException, IOException
{
if(dso.getType()==Constants.ITEM)
{
// Check if 'createMetadataFields' option is enabled (default=true)
// This defaults to true as by default we should attempt to restore as much metadata as we can.
// When 'createMetadataFields' is set to false, an ingest will fail if it attempts to ingest content to a missing metadata field.
if (params.getBooleanProperty("createMetadataFields", true))
{
// We want to verify that all the Metadata Fields we've crosswalked
// actually *exist* in the DB. If not, we'll try to create them
createMissingMetadataFields(context, (Item) dso);
}
}
}
/**
* Nothing extra to do to bitstream after ingestion.
*/
@Override
public void finishBitstream(Context context,
Bitstream bs,
Element mfile,
METSManifest manifest,
PackageParameters params)
throws MetadataValidationException, SQLException, AuthorizeException, IOException
{
// nothing to do.
}
/**
* Return the type of DSpaceObject in this package; it is
* in the TYPE attribute of the mets:mets element.
*/
@Override
public int getObjectType(METSManifest manifest)
throws PackageValidationException
{
Element mets = manifest.getMets();
String typeStr = mets.getAttributeValue("TYPE");
if (typeStr == null || typeStr.length() == 0)
{
throw new PackageValidationException("Manifest is missing the required mets@TYPE attribute.");
}
if (typeStr.startsWith("DSpace "))
{
typeStr = typeStr.substring(7);
}
int type = Constants.getTypeID(typeStr);
if (type < 0)
{
throw new PackageValidationException("Manifest has unrecognized value in mets@TYPE attribute: " + typeStr);
}
return type;
}
/**
* Name used to distinguish DSpace Configuration entries for this subclass.
*/
@Override
public String getConfigurationName()
{
return "dspaceAIP";
}
/**
* Verifies that all the unsaved, crosswalked metadata fields that have
* been added to an Item actually exist in our Database. If they don't
* exist, they are created within the proper database tables.
*
* This method must be called *before* item.update(), as the call to update()
* will throw a SQLException when attempting to save any fields which
* don't already exist in the database.
*
* NOTE: This will NOT create a missing Metadata Schema (e.g. "dc" schema),
* as we do not have enough info to create schemas on the fly.
*
* @param context - DSpace Context
* @param item - Item whose unsaved metadata fields we are testing
* @throws AuthorizeException if a metadata field doesn't exist and current user is not authorized to create it (i.e. not an Admin)
* @throws PackageValidationException if a metadata schema doesn't exist, as we cannot autocreate a schema
*/
protected static void createMissingMetadataFields(Context context, Item item)
throws PackageValidationException, AuthorizeException, IOException, SQLException
{
// Get all metadata fields/values currently added to this Item
Metadatum allMD[] = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY);
// For each field, we'll check if it exists. If not, we'll create it.
for(Metadatum md : allMD)
{
MetadataSchema mdSchema = null;
MetadataField mdField = null;
try
{
//Try to access this Schema
mdSchema = MetadataSchema.find(context, md.schema);
//If Schema found, try to locate field from database
if(mdSchema!=null)
{
mdField = MetadataField.findByElement(context, mdSchema.getSchemaID(), md.element, md.qualifier);
}
}
catch(SQLException se)
{
//If a SQLException error is thrown, then this field does NOT exist in DB
//Set field to null, so we know we need to create it
mdField = null;
}
// If our Schema was not found, we have a problem
// We cannot easily create a Schema automatically -- as we don't know its Namespace
if(mdSchema==null)
{
throw new PackageValidationException("Unknown Metadata Schema encountered (" + md.schema + ") when attempting to ingest an Item. You will need to create this Metadata Schema in DSpace Schema Registry before the Item can be ingested.");
}
// If our Metadata Field is null, we will attempt to create it in the proper Schema
if(mdField==null)
{
try
{
//initialize field (but don't set a scope note) & create it
mdField = new MetadataField(mdSchema, md.element, md.qualifier, null);
// NOTE: Only Adminstrators can create Metadata Fields -- create() will throw an AuthorizationException for non-Admins
mdField.create(context);
//log that field was created
log.info("Located a missing metadata field (schema:'" + mdSchema.getName() +"', element:'"+ md.element +"', qualifier:'"+ md.qualifier +"') while ingesting Item. This missing field has been created in the DSpace Metadata Field Registry.");
}
catch(NonUniqueMetadataException ne)
{ // This exception should never happen, as we already checked to make sure the field doesn't exist.
// But, we'll catch it anyways so that the Java compiler doesn't get upset
throw new SQLException("Unable to create Metadata Field (element='" + md.element + "', qualifier='" + md.qualifier + "') in Schema "+ mdSchema.getName() +".", ne);
}
}
}
}
/**
* Returns a user help string which should describe the
* additional valid command-line options that this packager
* implementation will accept when using the -o
or
* --option
flags with the Packager script.
*
* @return a string describing additional command-line options available
* with this packager
*/
@Override
public String getParameterHelp()
{
String parentHelp = super.getParameterHelp();
//Return superclass help info, plus the extra parameters/options that this class supports
return parentHelp +
"\n\n" +
"* createMetadataFields=[boolean] " +
"If true, ingest attempts to create any missing metadata fields." +
"If false, ingest will fail if a metadata field is encountered which doesn't already exist. (default = true)" +
"\n\n" +
"* dmd=[dmdSecType] " +
"Type of the METS which should be used to restore item metadata (defaults to DIM, then MODS)";
}
/**
* Add Bitstreams to an Item, based on the files listed in the METS Manifest
*
* @param context
* DSpace Context
* @param item
* DSpace Item
* @param manifest
* METS Manifest
* @param pkgFile
* the full package file (which may include content files if a
* zip)
* @param params
* Ingestion Parameters
* @param mdRefCallback
* MdrefManager storing info about mdRefs in manifest
* @throws SQLException
* @throws IOException
* @throws AuthorizeException
* @throws org.dspace.content.crosswalk.MetadataValidationException
* @throws CrosswalkException
* @throws PackageValidationException
*/
protected void addBitstreams(Context context, Item item,
METSManifest manifest, File pkgFile, PackageParameters params,
MdrefManager mdRefCallback) throws SQLException, IOException,
AuthorizeException, MetadataValidationException,
CrosswalkException, PackageValidationException
{
// Step 1 -- find the ID of the primary or Logo bitstream in manifest
String primaryID = null;
Element primaryFile = manifest.getPrimaryOrLogoBitstream();
if (primaryFile != null)
{
primaryID = primaryFile.getAttributeValue("ID");
if (log.isDebugEnabled())
{
log.debug("Got primary bitstream file ID=\"" + primaryID + "\"");
}
}
// Step 2 -- find list of all content files from manifest
// Loop through these files, and add them one by one to Item
List manifestContentFiles = manifest
.getContentFiles();
boolean setPrimaryBitstream = false;
BitstreamFormat unknownFormat = BitstreamFormat.findUnknown(context);
for (Iterator mi = manifestContentFiles.iterator(); mi
.hasNext();)
{
Element mfile = mi.next();
// basic validation -- check that it has an ID attribute
String mfileID = mfile.getAttributeValue("ID");
if (mfileID == null)
{
throw new PackageValidationException(
"Invalid METS Manifest: file element without ID attribute.");
}
// retrieve path/name of file in manifest
String path = METSManifest.getFileName(mfile);
//TODO : THIS MUST BE ALTERED TO SUPPORT ATTACHING EXISTING BITSTREAMS FROM ASSETSTORE
// extract the file input stream from package (or retrieve
// externally, if it is an externally referenced file)
// TODO replace with Bitstream b = Bitstream.dereferenceAbsoluteURI(context, path)
// InputStream fileStream = getFileInputStream(pkgFile, params, path);
Bitstream bitstream = resolveBitstream(context, mfile, params, path);
// retrieve bundle name from manifest
String bundleName = METSManifest.getBundleName(mfile);
// Find or create the bundle where bitstream should be attached
Bundle bundle;
Bundle bns[] = item.getBundles(bundleName);
if (bns != null && bns.length > 0)
{
bundle = bns[0];
}
else
{
bundle = item.createBundle(bundleName);
}
// Create the bitstream in the bundle & initialize its name
//TODO REPLACE WITH...bundle.addBitstream(b);
bundle.addBitstream(bitstream);
//TODO SET Bitream.deleted(false);
// TODO stop doing this
//Bitstream bitstream = bundle.createBitstream(fileStream);
//bitstream.setName(path);
// Set bitstream sequence id, if known
//TODO check if we need to do this
String seqID = mfile.getAttributeValue("SEQ");
if(seqID!=null && !seqID.isEmpty())
bitstream.setSequenceID(Integer.parseInt(seqID));
// crosswalk this bitstream's administrative metadata located in
// METS manifest (or referenced externally)
//TODO check if we need to do this
manifest.crosswalkBitstream(context, params, bitstream, mfileID,
mdRefCallback);
// is this the primary bitstream?
if (primaryID != null && mfileID.equals(primaryID))
{
bundle.setPrimaryBitstreamID(bitstream.getID());
bundle.update();
setPrimaryBitstream = true;
}
// Run any finishing activities -- this allows subclasses to
// change default bitstream information
//TODO see if we need to do this as well.
finishBitstream(context, bitstream, mfile, manifest, params);
// Last-ditch attempt to divine the format, if crosswalk failed to
// set it:
// 1. attempt to guess from MIME type
// 2. if that fails, guess from "name" extension.
//TODO see if we need to do this as well.
if (bitstream.getFormat().equals(unknownFormat))
{
if (log.isDebugEnabled())
{
log.debug("Guessing format of Bitstream left un-set: "
+ bitstream.toString());
}
String mimeType = mfile.getAttributeValue("MIMETYPE");
BitstreamFormat bf = (mimeType == null) ? null
: BitstreamFormat.findByMIMEType(context, mimeType);
if (bf == null)
{
bf = FormatIdentifier.guessFormat(context, bitstream);
}
bitstream.setFormat(bf);
}
bitstream.update();
}// end for each manifest file
// Step 3 -- Sanity checks
// sanity check for primary bitstream
if (primaryID != null && !setPrimaryBitstream)
{
log.warn("Could not find primary bitstream file ID=\"" + primaryID
+ "\" in manifest file \"" + pkgFile.getAbsolutePath()
+ "\"");
}
}
/**
* Interpret a Bitstream URI in the manifest. If this is an
* "internal" AIP, it refers directly to a file in the assetstore.
* Otherwise, it will be match the relative path of an entry in the package.
*/
public Bitstream resolveBitstream(Context context, Element mfile,
PackageParameters params, String path)
throws SQLException, PackageValidationException
{
// to ingest an internal AIP, just hook up the bitstreams
// referenced by bitstream URIs --
if (params.getBooleanProperty("internal", false))
{
try
{
TableRow row = BitstreamStorageManager.dereferenceAbsoluteURI(context, new URI(path));
Bitstream result = Bitstream.find(context,row.getIntColumn("bitstream_id"));
if (result == null)
throw new PackageValidationException("Package refers to a Bitstream that cannot be found (check assetstore), URI="+path);
//TODO MRD: commented out, we will have deleted bitstreams referenced in manifests due to versioning.
// else if (result.isDeleted())
// throw new PackageValidationException("Package refers to a Bitstream that has been deleted: "+result);
return result;
}
catch (URISyntaxException e)
{
log.error("bad bitstream path URI: ", e);
return null;
}
}
return null;
}
/**
* Replace an existing DSpace object with the contents of a METS-based
* package. All contents are dictated by the METS manifest. Package is a ZIP
* archive (or optionally bare manifest XML document). In a Zip, all files
* relative to top level and the manifest (as per spec) in mets.xml.
*
* This method is similar to ingest(), except that if the object already
* exists in DSpace, it is emptied of files and metadata. The METS-based
* package is then used to ingest new values for these.
*
* @param context
* DSpace Context
* @param dsoToReplace
* DSpace Object to be replaced (may be null if it will be
* specified in the METS manifest itself)
* @param pkgFile
* The package file to ingest
* @param params
* Parameters passed from the packager script
* @return DSpaceObject created by ingest.
* @throws PackageValidationException
* if package is unacceptable or there is a fatal error turning
* it into a DSpace Object.
* @throws IOException
* @throws SQLException
* @throws AuthorizeException
* @throws CrosswalkException
*/
@Override
public DSpaceObject replace(Context context, DSpaceObject dsoToReplace,
File pkgFile, PackageParameters params)
throws PackageValidationException, CrosswalkException,
AuthorizeException, SQLException, IOException
{
// parsed out METS Manifest from the file.
METSManifest manifest = null;
// resulting DSpace Object
DSpaceObject dso = null;
try
{
log.info(LogManager.getHeader(context, "package_parse",
"Parsing package for replace, file=" + pkgFile.getName()));
// Parse our ingest package, extracting out the METS manifest in the
// package
manifest = parsePackage(context, pkgFile, params);
// must have a METS Manifest to replace anything
if (manifest == null)
{
throw new PackageValidationException(
"No METS Manifest found (filename="
+ METSManifest.MANIFEST_FILE
+ "). Package is unacceptable!");
}
// It's possible that the object to replace will be passed in as
// null. Let's determine the handle of the object to replace.
if (dsoToReplace == null)
{
// since we don't know what we are replacing, we'll have to
// try to determine it from the parsed manifest
// Handle of object described by METS should be in OBJID
String handleURI = manifest.getObjID();
String handle = decodeHandleURN(handleURI);
try
{
// Attempt to resolve this handle to an existing object
dsoToReplace = HandleManager.resolveToObject(context,
handle);
}
catch (IllegalStateException ie)
{
// We don't care if this errors out -- we can continue
// whether or not an object exists with this handle.
}
}
// NOTE: At this point, it's still possible we don't have an object
// to replace. This could happen when there is actually no existing
// object in DSpace using that handle. (In which case, we're
// actually just doing a "restore" -- so we aren't going to throw an
// error or complain.)
// If we were unable to find the object to replace, then assume we
// are restoring it
if (dsoToReplace == null)
{
// As this object doesn't already exist, we will perform an
// ingest of a new object in order to restore it
// NOTE: passing 'null' as parent object in order to force
// ingestObject() method to determine parent using manifest.
dso = ingestObject(context, null, manifest, pkgFile, params,
null);
//if ingestion was successful
if(dso!=null)
{
// Log that we created an object
log.info(LogManager.getHeader(context, "package_replace",
"Created new Object, type="
+ Constants.typeText[dso.getType()]
+ ", handle=" + dso.getHandle() + ", dbID="
+ String.valueOf(dso.getID())));
}
}
else
// otherwise, we found the DSpaceObject to replace -- so, replace
// it!
{
// Actually replace the object described by the METS Manifest.
// NOTE: This will perform an in-place replace of all metadata
// and files currently associated with the object.
dso = replaceObject(context, dsoToReplace, manifest, pkgFile,
params, null);
// Log that we replaced an object
log.info(LogManager.getHeader(context, "package_replace",
"Replaced Object, type="
+ Constants.typeText[dso.getType()]
+ ", handle=" + dso.getHandle() + ", dbID="
+ String.valueOf(dso.getID())));
}
//if ingest/restore/replace successful
if(dso!=null)
{
// Check if the Packager is currently running recursively.
// If so, this means the Packager will attempt to recursively
// replace all referenced child packages.
if (params.recursiveModeEnabled())
{
// Retrieve list of all Child object METS file paths from the
// current METS manifest.
// This is our list of known child packages.
String[] childFilePaths = manifest.getChildMetsFilePaths();
// Save this list to our AbstractPackageIngester (and note which
// DSpaceObject the pkgs relate to)
// NOTE: The AbstractPackageIngester itself will perform the
// recursive ingest call, based on these child pkg references.
for (int i = 0; i < childFilePaths.length; i++)
{
addPackageReference(dso, childFilePaths[i]);
}
}
}
return dso;
}
catch (SQLException se)
{
// no need to really clean anything up,
// transaction rollback will get rid of it anyway, and will also
// restore everything to previous state.
dso = null;
// Pass this exception on to the next handler.
throw se;
}
}
}