All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.content.packager.InternalDSpaceAIPIngester Maven / Gradle / Ivy

There is a newer version: 5.4.2
Show newest version
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.content.packager;

import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.storage.bitstore.BitstreamStorageManager;
import org.dspace.content.*;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.crosswalk.MetadataValidationException;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.LogManager;
import org.dspace.handle.HandleManager;
import org.dspace.storage.rdbms.TableRow;
import org.jdom.Element;

import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.sql.SQLException;
import java.util.Iterator;
import java.util.List;

/**
 * InternalDSpaceAIPIngester will be used to restore previous versions of Items from Assetstore.
 * Overrides existing AIP behavior, but must be used in "manifestOnly" mode.
 */
public class InternalDSpaceAIPIngester extends AbstractInternalMETSIngester {

    /** log4j log */
    private static Logger log = Logger.getLogger(InternalDSpaceAIPIngester.class);


    /**
     * Ensure it's an AIP generated by the complementary AIP disseminator.
     */
    @Override
    void checkManifest(METSManifest manifest)
            throws MetadataValidationException
    {
        String profile = manifest.getProfile();
        if (profile == null)
        {
            throw new MetadataValidationException("Cannot accept METS with no PROFILE attribute!");
        }
        else if (!profile.equals(DSpaceAIPDisseminator.PROFILE_1_0))
        {
            throw new MetadataValidationException("METS has unacceptable PROFILE attribute, profile=" + profile);
        }
    }


    /**
     * Choose DMD section(s) to crosswalk.
     * 

* The algorithm is:
* 1. Use whatever the dmd parameter specifies as the primary DMD.
* 2. If (1) is unspecified, find DIM (preferably) or MODS as primary DMD.
* 3. If (1) or (2) succeeds, crosswalk it and ignore all other DMDs with * same GROUPID
* 4. Crosswalk remaining DMDs not eliminated already. */ @Override public void crosswalkObjectDmd(Context context, DSpaceObject dso, METSManifest manifest, MdrefManager callback, Element dmds[], PackageParameters params) throws CrosswalkException, PackageValidationException, AuthorizeException, SQLException, IOException { int found = -1; // Check to see what dmdSec the user specified in the 'dmd' parameter String userDmd = null; if (params != null) { userDmd = params.getProperty("dmd"); } if (userDmd != null && userDmd.length() > 0) { for (int i = 0; i < dmds.length; ++i) { if (userDmd.equalsIgnoreCase(manifest.getMdType(dmds[i]))) { found = i; } } } // DIM is preferred, if nothing specified by user if (found == -1) { // DIM is preferred for AIP for (int i = 0; i < dmds.length; ++i) { //NOTE: METS standard actually says this should be DIM (all uppercase). But, // just in case, we're going to be a bit more forgiving. if ("DIM".equalsIgnoreCase(manifest.getMdType(dmds[i]))) { found = i; } } } // MODS is acceptable otehrwise.. if (found == -1) { for (int i = 0; i < dmds.length; ++i) { //NOTE: METS standard actually says this should be MODS (all uppercase). But, // just in case, we're going to be a bit more forgiving. if ("MODS".equalsIgnoreCase(manifest.getMdType(dmds[i]))) { found = i; } } } String groupID = null; if (found >= 0) { manifest.crosswalkItemDmd(context, params, dso, dmds[found], callback); groupID = dmds[found].getAttributeValue("GROUPID"); if (groupID != null) { for (int i = 0; i < dmds.length; ++i) { String g = dmds[i].getAttributeValue("GROUPID"); if (g != null && !g.equals(groupID)) { manifest.crosswalkItemDmd(context, params, dso, dmds[i], callback); } } } } // otherwise take the first. Don't xwalk more than one because // each xwalk _adds_ metadata, and could add duplicate fields. else if (dmds.length > 0) { manifest.crosswalkItemDmd(context, params, dso, dmds[0], callback); } // it's an error if there is nothing to crosswalk: else { throw new MetadataValidationException("DSpaceAIPIngester: Could not find an acceptable object-wide DMD section in manifest."); } } /** * Ignore license when restoring an manifest-only AIP, since it should * be a bitstream in the AIP already. * Otherwise: Check item for license first; then, take deposit * license supplied by explicit argument next, else use collection's * default deposit license. * Normally the rightsMD crosswalks should provide a license. */ @Override public void addLicense(Context context, Item item, String license, Collection collection, PackageParameters params) throws PackageValidationException, AuthorizeException, SQLException, IOException { boolean newLicense = false; if(!params.restoreModeEnabled()) { //AIP is not being restored/replaced, so treat it like a SIP -- every new SIP needs a new license newLicense = true; } // Add deposit license if there isn't one in the object, // and it's not a restoration of an "manifestOnly" AIP: if (!params.getBooleanProperty("manifestOnly", false) && PackageUtils.findDepositLicense(context, item) == null) { newLicense = true; } if(newLicense) { PackageUtils.addDepositLicense(context, license, item, collection); } } /** * Last change to fix up a DSpace Object. *

* For AIPs, if the object is an Item, we may want to make sure all of its * metadata fields already exist in the database (otherwise, the database * will throw errors when we attempt to save/update the Item) * * @param context DSpace Context * @param dso DSpace object * @param params Packager Parameters */ @Override public void finishObject(Context context, DSpaceObject dso, PackageParameters params) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { if(dso.getType()==Constants.ITEM) { // Check if 'createMetadataFields' option is enabled (default=true) // This defaults to true as by default we should attempt to restore as much metadata as we can. // When 'createMetadataFields' is set to false, an ingest will fail if it attempts to ingest content to a missing metadata field. if (params.getBooleanProperty("createMetadataFields", true)) { // We want to verify that all the Metadata Fields we've crosswalked // actually *exist* in the DB. If not, we'll try to create them createMissingMetadataFields(context, (Item) dso); } } } /** * Nothing extra to do to bitstream after ingestion. */ @Override public void finishBitstream(Context context, Bitstream bs, Element mfile, METSManifest manifest, PackageParameters params) throws MetadataValidationException, SQLException, AuthorizeException, IOException { // nothing to do. } /** * Return the type of DSpaceObject in this package; it is * in the TYPE attribute of the mets:mets element. */ @Override public int getObjectType(METSManifest manifest) throws PackageValidationException { Element mets = manifest.getMets(); String typeStr = mets.getAttributeValue("TYPE"); if (typeStr == null || typeStr.length() == 0) { throw new PackageValidationException("Manifest is missing the required mets@TYPE attribute."); } if (typeStr.startsWith("DSpace ")) { typeStr = typeStr.substring(7); } int type = Constants.getTypeID(typeStr); if (type < 0) { throw new PackageValidationException("Manifest has unrecognized value in mets@TYPE attribute: " + typeStr); } return type; } /** * Name used to distinguish DSpace Configuration entries for this subclass. */ @Override public String getConfigurationName() { return "dspaceAIP"; } /** * Verifies that all the unsaved, crosswalked metadata fields that have * been added to an Item actually exist in our Database. If they don't * exist, they are created within the proper database tables. *

* This method must be called *before* item.update(), as the call to update() * will throw a SQLException when attempting to save any fields which * don't already exist in the database. *

* NOTE: This will NOT create a missing Metadata Schema (e.g. "dc" schema), * as we do not have enough info to create schemas on the fly. * * @param context - DSpace Context * @param item - Item whose unsaved metadata fields we are testing * @throws AuthorizeException if a metadata field doesn't exist and current user is not authorized to create it (i.e. not an Admin) * @throws PackageValidationException if a metadata schema doesn't exist, as we cannot autocreate a schema */ protected static void createMissingMetadataFields(Context context, Item item) throws PackageValidationException, AuthorizeException, IOException, SQLException { // Get all metadata fields/values currently added to this Item Metadatum allMD[] = item.getMetadata(Item.ANY, Item.ANY, Item.ANY, Item.ANY); // For each field, we'll check if it exists. If not, we'll create it. for(Metadatum md : allMD) { MetadataSchema mdSchema = null; MetadataField mdField = null; try { //Try to access this Schema mdSchema = MetadataSchema.find(context, md.schema); //If Schema found, try to locate field from database if(mdSchema!=null) { mdField = MetadataField.findByElement(context, mdSchema.getSchemaID(), md.element, md.qualifier); } } catch(SQLException se) { //If a SQLException error is thrown, then this field does NOT exist in DB //Set field to null, so we know we need to create it mdField = null; } // If our Schema was not found, we have a problem // We cannot easily create a Schema automatically -- as we don't know its Namespace if(mdSchema==null) { throw new PackageValidationException("Unknown Metadata Schema encountered (" + md.schema + ") when attempting to ingest an Item. You will need to create this Metadata Schema in DSpace Schema Registry before the Item can be ingested."); } // If our Metadata Field is null, we will attempt to create it in the proper Schema if(mdField==null) { try { //initialize field (but don't set a scope note) & create it mdField = new MetadataField(mdSchema, md.element, md.qualifier, null); // NOTE: Only Adminstrators can create Metadata Fields -- create() will throw an AuthorizationException for non-Admins mdField.create(context); //log that field was created log.info("Located a missing metadata field (schema:'" + mdSchema.getName() +"', element:'"+ md.element +"', qualifier:'"+ md.qualifier +"') while ingesting Item. This missing field has been created in the DSpace Metadata Field Registry."); } catch(NonUniqueMetadataException ne) { // This exception should never happen, as we already checked to make sure the field doesn't exist. // But, we'll catch it anyways so that the Java compiler doesn't get upset throw new SQLException("Unable to create Metadata Field (element='" + md.element + "', qualifier='" + md.qualifier + "') in Schema "+ mdSchema.getName() +".", ne); } } } } /** * Returns a user help string which should describe the * additional valid command-line options that this packager * implementation will accept when using the -o or * --option flags with the Packager script. * * @return a string describing additional command-line options available * with this packager */ @Override public String getParameterHelp() { String parentHelp = super.getParameterHelp(); //Return superclass help info, plus the extra parameters/options that this class supports return parentHelp + "\n\n" + "* createMetadataFields=[boolean] " + "If true, ingest attempts to create any missing metadata fields." + "If false, ingest will fail if a metadata field is encountered which doesn't already exist. (default = true)" + "\n\n" + "* dmd=[dmdSecType] " + "Type of the METS which should be used to restore item metadata (defaults to DIM, then MODS)"; } /** * Add Bitstreams to an Item, based on the files listed in the METS Manifest * * @param context * DSpace Context * @param item * DSpace Item * @param manifest * METS Manifest * @param pkgFile * the full package file (which may include content files if a * zip) * @param params * Ingestion Parameters * @param mdRefCallback * MdrefManager storing info about mdRefs in manifest * @throws SQLException * @throws IOException * @throws AuthorizeException * @throws org.dspace.content.crosswalk.MetadataValidationException * @throws CrosswalkException * @throws PackageValidationException */ protected void addBitstreams(Context context, Item item, METSManifest manifest, File pkgFile, PackageParameters params, MdrefManager mdRefCallback) throws SQLException, IOException, AuthorizeException, MetadataValidationException, CrosswalkException, PackageValidationException { // Step 1 -- find the ID of the primary or Logo bitstream in manifest String primaryID = null; Element primaryFile = manifest.getPrimaryOrLogoBitstream(); if (primaryFile != null) { primaryID = primaryFile.getAttributeValue("ID"); if (log.isDebugEnabled()) { log.debug("Got primary bitstream file ID=\"" + primaryID + "\""); } } // Step 2 -- find list of all content files from manifest // Loop through these files, and add them one by one to Item List manifestContentFiles = manifest .getContentFiles(); boolean setPrimaryBitstream = false; BitstreamFormat unknownFormat = BitstreamFormat.findUnknown(context); for (Iterator mi = manifestContentFiles.iterator(); mi .hasNext();) { Element mfile = mi.next(); // basic validation -- check that it has an ID attribute String mfileID = mfile.getAttributeValue("ID"); if (mfileID == null) { throw new PackageValidationException( "Invalid METS Manifest: file element without ID attribute."); } // retrieve path/name of file in manifest String path = METSManifest.getFileName(mfile); //TODO : THIS MUST BE ALTERED TO SUPPORT ATTACHING EXISTING BITSTREAMS FROM ASSETSTORE // extract the file input stream from package (or retrieve // externally, if it is an externally referenced file) // TODO replace with Bitstream b = Bitstream.dereferenceAbsoluteURI(context, path) // InputStream fileStream = getFileInputStream(pkgFile, params, path); Bitstream bitstream = resolveBitstream(context, mfile, params, path); // retrieve bundle name from manifest String bundleName = METSManifest.getBundleName(mfile); // Find or create the bundle where bitstream should be attached Bundle bundle; Bundle bns[] = item.getBundles(bundleName); if (bns != null && bns.length > 0) { bundle = bns[0]; } else { bundle = item.createBundle(bundleName); } // Create the bitstream in the bundle & initialize its name //TODO REPLACE WITH...bundle.addBitstream(b); bundle.addBitstream(bitstream); //TODO SET Bitream.deleted(false); // TODO stop doing this //Bitstream bitstream = bundle.createBitstream(fileStream); //bitstream.setName(path); // Set bitstream sequence id, if known //TODO check if we need to do this String seqID = mfile.getAttributeValue("SEQ"); if(seqID!=null && !seqID.isEmpty()) bitstream.setSequenceID(Integer.parseInt(seqID)); // crosswalk this bitstream's administrative metadata located in // METS manifest (or referenced externally) //TODO check if we need to do this manifest.crosswalkBitstream(context, params, bitstream, mfileID, mdRefCallback); // is this the primary bitstream? if (primaryID != null && mfileID.equals(primaryID)) { bundle.setPrimaryBitstreamID(bitstream.getID()); bundle.update(); setPrimaryBitstream = true; } // Run any finishing activities -- this allows subclasses to // change default bitstream information //TODO see if we need to do this as well. finishBitstream(context, bitstream, mfile, manifest, params); // Last-ditch attempt to divine the format, if crosswalk failed to // set it: // 1. attempt to guess from MIME type // 2. if that fails, guess from "name" extension. //TODO see if we need to do this as well. if (bitstream.getFormat().equals(unknownFormat)) { if (log.isDebugEnabled()) { log.debug("Guessing format of Bitstream left un-set: " + bitstream.toString()); } String mimeType = mfile.getAttributeValue("MIMETYPE"); BitstreamFormat bf = (mimeType == null) ? null : BitstreamFormat.findByMIMEType(context, mimeType); if (bf == null) { bf = FormatIdentifier.guessFormat(context, bitstream); } bitstream.setFormat(bf); } bitstream.update(); }// end for each manifest file // Step 3 -- Sanity checks // sanity check for primary bitstream if (primaryID != null && !setPrimaryBitstream) { log.warn("Could not find primary bitstream file ID=\"" + primaryID + "\" in manifest file \"" + pkgFile.getAbsolutePath() + "\""); } } /** * Interpret a Bitstream URI in the manifest. If this is an * "internal" AIP, it refers directly to a file in the assetstore. * Otherwise, it will be match the relative path of an entry in the package. */ public Bitstream resolveBitstream(Context context, Element mfile, PackageParameters params, String path) throws SQLException, PackageValidationException { // to ingest an internal AIP, just hook up the bitstreams // referenced by bitstream URIs -- if (params.getBooleanProperty("internal", false)) { try { TableRow row = BitstreamStorageManager.dereferenceAbsoluteURI(context, new URI(path)); Bitstream result = Bitstream.find(context,row.getIntColumn("bitstream_id")); if (result == null) throw new PackageValidationException("Package refers to a Bitstream that cannot be found (check assetstore), URI="+path); //TODO MRD: commented out, we will have deleted bitstreams referenced in manifests due to versioning. // else if (result.isDeleted()) // throw new PackageValidationException("Package refers to a Bitstream that has been deleted: "+result); return result; } catch (URISyntaxException e) { log.error("bad bitstream path URI: ", e); return null; } } return null; } /** * Replace an existing DSpace object with the contents of a METS-based * package. All contents are dictated by the METS manifest. Package is a ZIP * archive (or optionally bare manifest XML document). In a Zip, all files * relative to top level and the manifest (as per spec) in mets.xml. *

* This method is similar to ingest(), except that if the object already * exists in DSpace, it is emptied of files and metadata. The METS-based * package is then used to ingest new values for these. * * @param context * DSpace Context * @param dsoToReplace * DSpace Object to be replaced (may be null if it will be * specified in the METS manifest itself) * @param pkgFile * The package file to ingest * @param params * Parameters passed from the packager script * @return DSpaceObject created by ingest. * @throws PackageValidationException * if package is unacceptable or there is a fatal error turning * it into a DSpace Object. * @throws IOException * @throws SQLException * @throws AuthorizeException * @throws CrosswalkException */ @Override public DSpaceObject replace(Context context, DSpaceObject dsoToReplace, File pkgFile, PackageParameters params) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { // parsed out METS Manifest from the file. METSManifest manifest = null; // resulting DSpace Object DSpaceObject dso = null; try { log.info(LogManager.getHeader(context, "package_parse", "Parsing package for replace, file=" + pkgFile.getName())); // Parse our ingest package, extracting out the METS manifest in the // package manifest = parsePackage(context, pkgFile, params); // must have a METS Manifest to replace anything if (manifest == null) { throw new PackageValidationException( "No METS Manifest found (filename=" + METSManifest.MANIFEST_FILE + "). Package is unacceptable!"); } // It's possible that the object to replace will be passed in as // null. Let's determine the handle of the object to replace. if (dsoToReplace == null) { // since we don't know what we are replacing, we'll have to // try to determine it from the parsed manifest // Handle of object described by METS should be in OBJID String handleURI = manifest.getObjID(); String handle = decodeHandleURN(handleURI); try { // Attempt to resolve this handle to an existing object dsoToReplace = HandleManager.resolveToObject(context, handle); } catch (IllegalStateException ie) { // We don't care if this errors out -- we can continue // whether or not an object exists with this handle. } } // NOTE: At this point, it's still possible we don't have an object // to replace. This could happen when there is actually no existing // object in DSpace using that handle. (In which case, we're // actually just doing a "restore" -- so we aren't going to throw an // error or complain.) // If we were unable to find the object to replace, then assume we // are restoring it if (dsoToReplace == null) { // As this object doesn't already exist, we will perform an // ingest of a new object in order to restore it // NOTE: passing 'null' as parent object in order to force // ingestObject() method to determine parent using manifest. dso = ingestObject(context, null, manifest, pkgFile, params, null); //if ingestion was successful if(dso!=null) { // Log that we created an object log.info(LogManager.getHeader(context, "package_replace", "Created new Object, type=" + Constants.typeText[dso.getType()] + ", handle=" + dso.getHandle() + ", dbID=" + String.valueOf(dso.getID()))); } } else // otherwise, we found the DSpaceObject to replace -- so, replace // it! { // Actually replace the object described by the METS Manifest. // NOTE: This will perform an in-place replace of all metadata // and files currently associated with the object. dso = replaceObject(context, dsoToReplace, manifest, pkgFile, params, null); // Log that we replaced an object log.info(LogManager.getHeader(context, "package_replace", "Replaced Object, type=" + Constants.typeText[dso.getType()] + ", handle=" + dso.getHandle() + ", dbID=" + String.valueOf(dso.getID()))); } //if ingest/restore/replace successful if(dso!=null) { // Check if the Packager is currently running recursively. // If so, this means the Packager will attempt to recursively // replace all referenced child packages. if (params.recursiveModeEnabled()) { // Retrieve list of all Child object METS file paths from the // current METS manifest. // This is our list of known child packages. String[] childFilePaths = manifest.getChildMetsFilePaths(); // Save this list to our AbstractPackageIngester (and note which // DSpaceObject the pkgs relate to) // NOTE: The AbstractPackageIngester itself will perform the // recursive ingest call, based on these child pkg references. for (int i = 0; i < childFilePaths.length; i++) { addPackageReference(dso, childFilePaths[i]); } } } return dso; } catch (SQLException se) { // no need to really clean anything up, // transaction rollback will get rid of it anyway, and will also // restore everything to previous state. dso = null; // Pass this exception on to the next handler. throw se; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy