org.dspace.content.packager.METSManifest Maven / Gradle / Ivy
Show all versions of dspace-api Show documentation
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.packager;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.codec.binary.Base64;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.DSpaceObject;
import org.dspace.content.crosswalk.AbstractPackagerWrappingCrosswalk;
import org.dspace.content.crosswalk.CrosswalkException;
import org.dspace.content.crosswalk.CrosswalkObjectNotSupported;
import org.dspace.content.crosswalk.IngestionCrosswalk;
import org.dspace.content.crosswalk.MetadataValidationException;
import org.dspace.content.crosswalk.StreamIngestionCrosswalk;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.factory.CoreServiceFactory;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.jdom.Content;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
import org.jdom.xpath.XPath;
/**
*
* Manage the METS manifest document for METS importer classes,
* such as the package importer org.dspace.content.packager.MetsSubmission
* and the federated importer org.dspace.app.mets.FederatedMETSImport
*
*
* It can parse the METS document, build an internal model, and give the importers
* access to that model. It also crosswalks
* all of the descriptive and administrative metadata in the METS
* manifest into the target DSpace Item, under control of the importer.
*
*
*
* It reads the following DSpace Configuration entries:
*
*
* - Local XML schema (XSD) declarations, in the general format:
*
mets.xsd.identifier = namespace xsd-URL
*
e.g. mets.xsd.dc = http://purl.org/dc/elements/1.1/ dc.xsd
*
Add a separate configuration entry for each schema.
*
* - Crosswalk plugin mappings:
* These tell it the name of the crosswalk plugin to invoke for metadata sections
* with a particular value of
MDTYPE
(or OTHERMDTYPE
)
* By default, the crosswalk mechanism will look for a plugin with the
* same name as the metadata type (e.g. "MODS"
,
* "DC"
). This example line invokes the QDC
* plugin when MDTYPE="DC"
*
mets.submission.crosswalk.DC = QDC
*
general format is:
*
mets.submission.crosswalk.mdType = pluginName
*
*
*
* @author Robert Tansley
* @author WeiHua Huang
* @author Rita Lee
* @author Larry Stone
*/
public class METSManifest {
/**
* Callback interface to retrieve data streams in mdRef elements.
* "Package" or file reader returns an input stream for the
* given relative path, e.g. to dereference mdRef
elements.
*/
public interface Mdref {
/**
* Make the contents of an external resource mentioned in
* an mdRef
element available as an InputStream
.
* The implementation must use the information in the
* mdRef
element, and the state in the object that
* implements this interface, to find the actual metadata content.
*
* For example, an implementation that ingests a directory of
* files on the local filesystem would get a relative pathname
* out of the mdRef
and open that file.
*
* @param mdRef JDOM element of mdRef in the METS manifest.
* @return stream containing the metadata mentioned in mdRef.
* @throws MetadataValidationException if the mdRef is unacceptable or missing required information.
* @throws PackageValidationException if package validation error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
public InputStream getInputStream(Element mdRef)
throws MetadataValidationException, PackageValidationException,
IOException, SQLException, AuthorizeException;
}
/**
* log4j category
*/
private static final Logger log = LogManager.getLogger(METSManifest.class);
private static final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
/**
* Canonical filename of METS manifest within a package or as a bitstream.
*/
public static final String MANIFEST_FILE = "mets.xml";
/**
* Prefix of DSpace configuration lines that map METS metadata type to
* crosswalk plugin names.
*/
public static final String CONFIG_METS_PREFIX = "mets.";
/**
* prefix of configuration lines identifying local XML Schema (XSD) files
*/
protected static final String CONFIG_XSD_PREFIX = CONFIG_METS_PREFIX + "xsd.";
/**
* Dublin core element namespace
*/
protected static final Namespace dcNS = Namespace
.getNamespace("http://purl.org/dc/elements/1.1/");
/**
* Dublin core term namespace (for qualified DC)
*/
protected static final Namespace dcTermNS = Namespace
.getNamespace("http://purl.org/dc/terms/");
/**
* METS namespace -- includes "mets" prefix for use in XPaths
*/
public static final Namespace metsNS = Namespace
.getNamespace("mets", "http://www.loc.gov/METS/");
/**
* XLink namespace -- includes "xlink" prefix prefix for use in XPaths
*/
public static final Namespace xlinkNS = Namespace
.getNamespace("xlink", "http://www.w3.org/1999/xlink");
/**
* root element of the current METS manifest.
*/
protected Element mets = null;
/**
* all mdRef elements in the manifest
*/
protected List mdFiles = null;
/**
* {@code } elements in "original" file group (bundle)
*/
protected List contentFiles = null;
protected List bundleFiles = null;
/**
* builder to use for mdRef streams, inherited from create()
*/
protected SAXBuilder parser = null;
/**
* name of packager who created this manifest object, for looking up configuration entries.
*/
protected String configName;
// Create list of local schemas at load time, since it depends only
// on the DSpace configuration.
protected static String localSchemas;
static {
String dspace_dir = configurationService.getProperty("dspace.dir");
File xsdPath1 = new File(dspace_dir + "/config/schemas/");
File xsdPath2 = new File(dspace_dir + "/config/");
List configKeys = configurationService.getPropertyKeys(CONFIG_XSD_PREFIX);
StringBuilder result = new StringBuilder();
for (String key : configKeys) {
// config lines have the format:
// mets.xsd.{identifier} = {namespace} {xsd-URL}
// e.g.
// mets.xsd.dc = http://purl.org/dc/elements/1.1/ dc.xsd
// (filename is relative to {dspace_dir}/config/schemas/)
String spec = configurationService.getProperty(key);
String val[] = spec.trim().split("\\s+");
if (val.length == 2) {
File xsd = new File(xsdPath1, val[1]);
if (!xsd.exists()) {
xsd = new File(xsdPath2, val[1]);
}
if (!xsd.exists()) {
log.warn("Schema file not found for config entry=\"{}\"", spec);
} else {
try {
String u = xsd.toURI().toURL().toString();
if (result.length() > 0) {
result.append(" ");
}
result.append(val[0]).append(" ").append(u);
} catch (java.net.MalformedURLException e) {
log.warn("Skipping badly formed XSD URL: {}", () -> e.toString());
}
}
} else {
log.warn("Schema config entry has wrong format, entry=\"{}\"", spec);
}
}
log.debug("Got local schemas = \"{}\"", () -> result.toString());
}
/**
* Default constructor, only called internally.
*
* @param builder XML parser (for parsing mdRef'd files and binData)
* @param mets parsed METS document
* @param configName configuration name
*/
protected METSManifest(SAXBuilder builder, Element mets, String configName) {
super();
this.mets = mets;
this.parser = builder;
this.configName = configName;
}
/**
* Create a new manifest object from a serialized METS XML document.
* Parse document read from the input stream, optionally validating.
*
* @param is input stream containing serialized XML
* @param validate if true, enable XML validation using schemas
* in document. Also validates any sub-documents.
* @param configName config name
* @return new METSManifest object.
* @throws IOException if IO error
* @throws MetadataValidationException if there is any error parsing
* or validating the METS.
*/
public static METSManifest create(InputStream is, boolean validate, String configName)
throws IOException,
MetadataValidationException {
SAXBuilder builder = new SAXBuilder(validate);
builder.setIgnoringElementContentWhitespace(true);
// Set validation feature
if (validate) {
builder.setFeature("http://apache.org/xml/features/validation/schema", true);
// Tell the parser where local copies of schemas are, to speed up
// validation & avoid XXE attacks from remote schemas. Local XSDs are identified in the configuration file.
if (localSchemas.length() > 0) {
builder.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation", localSchemas);
}
} else {
// disallow DTD parsing to ensure no XXE attacks can occur.
// See https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html
builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
}
// Parse the METS file
Document metsDocument;
try {
metsDocument = builder.build(is);
/*** XXX leave commented out except if needed for
*** viewing the METS document that actually gets read.
*
* XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
* log.debug("Got METS DOCUMENT:");
* log.debug(outputPretty.outputString(metsDocument));
****/
} catch (JDOMException je) {
throw new MetadataValidationException("Error validating METS in "
+ is.toString(), je);
}
return new METSManifest(builder, metsDocument.getRootElement(), configName);
}
/**
* Gets name of the profile to which this METS document conforms.
*
* @return value the PROFILE attribute of mets element, or null if none.
*/
public String getProfile() {
return mets.getAttributeValue("PROFILE");
}
/**
* Return the OBJID attribute of the METS manifest.
* This is where the Handle URI/URN of the object can be found.
*
* @return OBJID attribute of METS manifest
*/
public String getObjID() {
return mets.getAttributeValue("OBJID");
}
/**
* Gets all file
elements which make up
* the item's content.
*
* @return a List of Element
s.
* @throws MetadataValidationException if validation error
*/
public List getBundleFiles()
throws MetadataValidationException {
if (bundleFiles != null) {
return bundleFiles;
}
bundleFiles = new ArrayList<>();
Element fileSec = mets.getChild("fileSec", metsNS);
if (fileSec != null) {
Iterator fgi = fileSec.getChildren("fileGrp", metsNS).iterator();
while (fgi.hasNext()) {
Element fg = (Element) fgi.next();
bundleFiles.add(fg);
}
}
return bundleFiles;
}
public List getContentFiles()
throws MetadataValidationException {
if (contentFiles != null) {
return contentFiles;
}
contentFiles = new ArrayList<>();
Element fileSec = mets.getChild("fileSec", metsNS);
if (fileSec != null) {
Iterator fgi = fileSec.getChildren("fileGrp", metsNS).iterator();
while (fgi.hasNext()) {
Element fg = (Element) fgi.next();
Iterator fi = fg.getChildren("file", metsNS).iterator();
while (fi.hasNext()) {
Element f = (Element) fi.next();
contentFiles.add(f);
}
}
}
return contentFiles;
}
/**
* Gets list of all mdRef
elements in the METS
* document. Used by ingester to e.g. check that all
* required files are present.
*
* @return a List of Element
s.
* @throws MetadataValidationException if validation error
*/
public List getMdFiles()
throws MetadataValidationException {
if (mdFiles == null) {
try {
// Use a special namespace with known prefix
// so we get the right prefix.
XPath xpath = XPath.newInstance("descendant::mets:mdRef");
xpath.addNamespace(metsNS);
mdFiles = xpath.selectNodes(mets);
} catch (JDOMException je) {
throw new MetadataValidationException("Failed while searching for mdRef elements in manifest: ", je);
}
}
return mdFiles;
}
/**
* Get the "original" file element for a derived file.
* Finds the original from which this was derived by matching the GROUPID
* attribute that binds it to its original. For instance, the file for
* a thumbnail image would have the same GROUPID as its full-size version.
*
* NOTE: This pattern of relating derived files through the GROUPID
* attribute is peculiar to the DSpace METS SIP profile, and may not be
* generally useful with other sorts of METS documents.
*
* @param file METS file element of derived file
* @return file path of original or null if none found.
*/
public String getOriginalFilePath(Element file) {
String groupID = file.getAttributeValue("GROUPID");
if (groupID == null || groupID.equals("")) {
return null;
}
try {
XPath xpath = XPath.newInstance(
"mets:fileSec/mets:fileGrp[@USE=\"CONTENT\"]/mets:file[@GROUPID=\"" + groupID + "\"]");
xpath.addNamespace(metsNS);
List oFiles = xpath.selectNodes(mets);
if (oFiles.size() > 0) {
if (log.isDebugEnabled()) {
log.debug("Got ORIGINAL file for derived=" + file.toString());
}
Element flocat = ((Element) oFiles.get(0)).getChild("FLocat", metsNS);
if (flocat != null) {
return flocat.getAttributeValue("href", xlinkNS);
}
}
return null;
} catch (JDOMException je) {
log.warn("Got exception on XPATH looking for Original file, " + je.toString());
return null;
}
}
// translate bundle name from METS to DSpace; METS may be "CONTENT"
// or "ORIGINAL" for the DSpace "ORIGINAL", rest are left alone.
protected static String normalizeBundleName(String in) {
if (in.equals("CONTENT")) {
return Constants.CONTENT_BUNDLE_NAME;
} else if (in.equals("MANIFESTMD")) {
return Constants.METADATA_BUNDLE_NAME;
}
return in;
}
/**
* Get the DSpace bundle name corresponding to the USE
* attribute of the file group enclosing this file
element.
*
* @param file file element
* @return DSpace bundle name
* @throws MetadataValidationException when there is no USE attribute on the enclosing fileGrp.
*/
public static String getBundleName(Element file)
throws MetadataValidationException {
return getBundleName(file, true);
}
/**
* Get the DSpace bundle name corresponding to the USE
* attribute of the file group enclosing this file
element.
*
* @param file file element
* @param getParent parent flag
* @return DSpace bundle name
* @throws MetadataValidationException when there is no USE attribute on the enclosing fileGrp.
*/
public static String getBundleName(Element file, boolean getParent)
throws MetadataValidationException {
Element fg = file;
if (getParent) {
fg = file.getParentElement();
}
String fgUse = fg.getAttributeValue("USE");
if (fgUse == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: every fileGrp element must have a USE attribute.");
}
return normalizeBundleName(fgUse);
}
/**
* Get the "local" file name of this file
or mdRef
element.
* By "local" we mean the reference to the actual resource containing
* the data for this file, e.g. a relative path within a Zip or tar archive
* if the METS is serving as a manifest for that sort of package.
*
* @param file file element
* @return "local" file name (i.e. relative to package or content
* directory) corresponding to this file
or mdRef
element.
* @throws MetadataValidationException when there is not enough information to find a resource identifier.
*/
public static String getFileName(Element file)
throws MetadataValidationException {
Element ref;
if (file.getName().equals("file")) {
ref = file.getChild("FLocat", metsNS);
if (ref == null) {
// check for forbidden FContent child first:
if (file.getChild("FContent", metsNS) == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: Every file element must have FLocat child.");
} else {
throw new MetadataValidationException(
"Invalid METS Manifest: file element has forbidden FContent child, only FLocat is allowed.");
}
}
} else if (file.getName().equals("mdRef")) {
ref = file;
} else {
throw new MetadataValidationException(
"getFileName() called with recognized element type: " + file.toString());
}
String loctype = ref.getAttributeValue("LOCTYPE");
if (loctype != null && loctype.equals("URL")) {
String result = ref.getAttributeValue("href", xlinkNS);
if (result == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: FLocat/mdRef is missing the required xlink:href attribute.");
}
return result;
}
throw new MetadataValidationException(
"Invalid METS Manifest: FLocat/mdRef does not have LOCTYPE=\"URL\" attribute.");
}
/**
* Returns file element corresponding to primary bitstream.
* There is ONLY a primary bitstream if the first {@code div} under
* first {@code structMap} has an {@code fptr}.
*
* @return file element of Item's primary bitstream, or null if there is none.
* @throws MetadataValidationException if validation error
*/
public Element getPrimaryOrLogoBitstream()
throws MetadataValidationException {
Element objDiv = getObjStructDiv();
Element fptr = objDiv.getChild("fptr", metsNS);
if (fptr == null) {
return null;
}
String id = fptr.getAttributeValue("FILEID");
if (id == null) {
throw new MetadataValidationException(
"fptr for Primary Bitstream is missing the required FILEID attribute.");
}
Element result = getElementByXPath("descendant::mets:file[@ID=\"" + id + "\"]", false);
if (result == null) {
throw new MetadataValidationException(
"Cannot find file element for Primary Bitstream: looking for ID=" + id);
}
return result;
}
/**
* Get the metadata type from within a *mdSec element.
*
* @param mdSec mdSec element
* @return metadata type name.
* @throws MetadataValidationException if validation error
*/
public String getMdType(Element mdSec)
throws MetadataValidationException {
Element md = mdSec.getChild("mdRef", metsNS);
if (md == null) {
md = mdSec.getChild("mdWrap", metsNS);
}
if (md == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: ?mdSec element has neither mdRef nor mdWrap child.");
}
String result = md.getAttributeValue("MDTYPE");
if (result != null && result.equals("OTHER")) {
result = md.getAttributeValue("OTHERMDTYPE");
}
if (result == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: " + md.getName() + " has no MDTYPE or OTHERMDTYPE attribute.");
}
return result;
}
/**
* Returns MIME type of metadata content, if available.
*
* @param mdSec mdSec element
* @return MIMEtype word, or null if none is available.
* @throws MetadataValidationException if validation error
*/
public String getMdContentMimeType(Element mdSec)
throws MetadataValidationException {
Element mdWrap = mdSec.getChild("mdWrap", metsNS);
if (mdWrap != null) {
String mimeType = mdWrap.getAttributeValue("MIMETYPE");
if (mimeType == null && mdWrap.getChild("xmlData", metsNS) != null) {
mimeType = "text/xml";
}
return mimeType;
}
Element mdRef = mdSec.getChild("mdRef", metsNS);
if (mdRef != null) {
return mdRef.getAttributeValue("MIMETYPE");
}
return null;
}
/**
* Return contents of *md element as List of XML Element objects.
* Gets content, dereferencing mdRef if necessary, or decoding and parsing
* a binData that contains XML.
*
* @param mdSec mdSec element
* @param callback mdref callback
* @return contents of metadata section, or empty list if no XML content is available.
* @throws MetadataValidationException if METS is invalid, or there is an error parsing the XML.
* @throws PackageValidationException if invalid package
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
private List getMdContentAsXml(Element mdSec, Mdref callback)
throws MetadataValidationException, PackageValidationException,
IOException, SQLException, AuthorizeException {
try {
// XXX sanity check: if this has more than one child, consider it
// an error since we cannot deal with more than one mdRef|mdWrap
// child. This may be considered a bug and need to be fixed,
// so it's best to bring it to the attention of users.
List mdc = mdSec.getChildren();
if (mdc.size() > 1) {
// XXX scaffolding for debugging diagnosis; at least one
// XML parser stupidly includes newlines in prettyprinting
// as text content objects..
String id = mdSec.getAttributeValue("ID");
StringBuilder sb = new StringBuilder();
for (Iterator mi = mdc.iterator(); mi.hasNext(); ) {
sb.append(", ").append(((Content) mi.next()).toString());
}
throw new MetadataValidationException("Cannot parse METS with " + mdSec
.getQualifiedName() + " element that contains more than one child, size=" + String
.valueOf(mdc.size()) + ", ID=" + id + "Kids=" + sb.toString());
}
Element mdRef = null;
Element mdWrap = mdSec.getChild("mdWrap", metsNS);
if (mdWrap != null) {
Element xmlData = mdWrap.getChild("xmlData", metsNS);
if (xmlData == null) {
Element bin = mdWrap.getChild("binData", metsNS);
if (bin == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
} else {
// if binData is actually XML, return it; otherwise ignore.
String mimeType = mdWrap.getAttributeValue("MIMETYPE");
if (mimeType != null && mimeType.equalsIgnoreCase("text/xml")) {
byte value[] = Base64.decodeBase64(bin.getText().getBytes(StandardCharsets.UTF_8));
Document mdd = parser.build(new ByteArrayInputStream(value));
List result = new ArrayList<>(1);
result.add(mdd.getRootElement());
return result;
} else {
log.warn("Ignoring binData section because MIMETYPE is not XML, but: " + mimeType);
return new ArrayList<>(0);
}
}
} else {
return xmlData.getChildren();
}
} else {
mdRef = mdSec.getChild("mdRef", metsNS);
if (mdRef != null) {
String mimeType = mdRef.getAttributeValue("MIMETYPE");
if (mimeType != null && mimeType.equalsIgnoreCase("text/xml")) {
// This next line triggers a false-positive XXE warning from LGTM, even though we disallow DTD
// parsing during initialization of parser in create()
Document mdd = parser.build(callback.getInputStream(mdRef)); // lgtm [java/xxe]
List result = new ArrayList<>(1);
result.add(mdd.getRootElement());
return result;
} else {
log.warn("Ignoring mdRef section because MIMETYPE is not XML, but: " + mimeType);
return new ArrayList<>(0);
}
} else {
throw new MetadataValidationException(
"Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
}
}
} catch (JDOMException je) {
throw new MetadataValidationException(
"Error parsing or validating metadata section in mdRef or binData within " + mdSec.toString(), je);
}
}
/**
* Return contents of *md element as stream.
* Gets content, dereferencing mdRef if necessary, or decoding
* a binData element if necessary.
*
* @param mdSec mdSec element
* @param callback mdref callback
* @return Stream containing contents of metadata section. Never returns null.
* @throws MetadataValidationException if METS format does not contain any metadata.
* @throws PackageValidationException if invalid package
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
public InputStream getMdContentAsStream(Element mdSec, Mdref callback)
throws MetadataValidationException, PackageValidationException,
IOException, SQLException, AuthorizeException {
Element mdRef = null;
Element mdWrap = mdSec.getChild("mdWrap", metsNS);
if (mdWrap != null) {
Element xmlData = mdWrap.getChild("xmlData", metsNS);
if (xmlData == null) {
Element bin = mdWrap.getChild("binData", metsNS);
if (bin == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: mdWrap element with neither xmlData nor binData child.");
} else {
byte value[] = Base64.decodeBase64(bin.getText().getBytes(StandardCharsets.UTF_8));
return new ByteArrayInputStream(value);
}
} else {
XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
return new ByteArrayInputStream(
outputPretty.outputString(xmlData.getChildren()).getBytes(StandardCharsets.UTF_8));
}
} else {
mdRef = mdSec.getChild("mdRef", metsNS);
if (mdRef != null) {
return callback.getInputStream(mdRef);
} else {
throw new MetadataValidationException(
"Invalid METS Manifest: ?mdSec element with neither mdRef nor mdWrap child.");
}
}
}
/**
* Return the {@code } which describes this DSpace Object (and its contents)
* from the {@code }. In all cases, this is the first {@code }
* in the first {@code }.
*
* @return Element which is the DSpace Object Contents {@code }
* @throws MetadataValidationException if metadata validation error
*/
public Element getObjStructDiv()
throws MetadataValidationException {
//get first
Element sm = mets.getChild("structMap", metsNS);
if (sm == null) {
throw new MetadataValidationException("METS document is missing the required structMap element.");
}
//get first
Element result = sm.getChild("div", metsNS);
if (result == null) {
throw new MetadataValidationException(
"METS document is missing the required first div element in first structMap.");
}
if (log.isDebugEnabled()) {
log.debug("Got getObjStructDiv result=" + result.toString());
}
return (Element) result;
}
/**
* Get an array of child object {@code }s from the METS Manifest {@code }.
* These {@code }s reference the location of any child objects METS manifests.
*
* @return a List of {@code Element}s, each a {@code }. May be empty but NOT null.
* @throws MetadataValidationException if metadata validation error
*/
public List getChildObjDivs()
throws MetadataValidationException {
//get the in which describes the current object's contents
Element objDiv = getObjStructDiv();
//get the child s -- these should reference the child METS manifest
return objDiv.getChildren("div", metsNS);
}
/**
* Retrieve the file paths for the children objects' METS Manifest files.
* These file paths are located in the {@code } where @LOCTYPE=URL
*
* @return a list of Strings, corresponding to relative file paths of children METS manifests
* @throws MetadataValidationException if metadata validation error
*/
public String[] getChildMetsFilePaths()
throws MetadataValidationException {
//get our child object s
List childObjDivs = getChildObjDivs();
List childPathList = new ArrayList<>();
if (childObjDivs != null && !childObjDivs.isEmpty()) {
Iterator childIterator = childObjDivs.iterator();
//For each Div, we want to find the underlying with @LOCTYPE=URL
while (childIterator.hasNext()) {
Element childDiv = (Element) childIterator.next();
//get all child 's
List childMptrs = childDiv.getChildren("mptr", metsNS);
if (childMptrs != null && !childMptrs.isEmpty()) {
Iterator mptrIterator = childMptrs.iterator();
//For each mptr, we want to find the one with @LOCTYPE=URL
while (mptrIterator.hasNext()) {
Element mptr = (Element) mptrIterator.next();
String locType = mptr.getAttributeValue("LOCTYPE");
//if @LOCTYPE=URL, then capture @xlink:href as the METS Manifest file path
if (locType != null && locType.equals("URL")) {
String filePath = mptr.getAttributeValue("href", xlinkNS);
if (filePath != null && filePath.length() > 0) {
childPathList.add(filePath);
}
}
} //end loop
} //end if 's exist
} //end child loop
} //end if child s exist
String[] childPaths = new String[childPathList.size()];
childPaths = (String[]) childPathList.toArray(childPaths);
return childPaths;
}
/**
* Return the reference to the Parent Object from the "Parent" {@code }.
* This parent object is the owner of current object.
*
* @return Link to the Parent Object (this is the Handle of that Parent)
* @throws MetadataValidationException if metadata validation error
*/
public String getParentOwnerLink()
throws MetadataValidationException {
//get a list of our structMaps
List childStructMaps = mets.getChildren("structMap", metsNS);
Element parentStructMap = null;
// find the
if (!childStructMaps.isEmpty()) {
for (Element structMap : childStructMaps) {
String label = structMap.getAttributeValue("LABEL");
if (label != null && label.equalsIgnoreCase("Parent")) {
parentStructMap = structMap;
break;
}
}
}
if (parentStructMap == null) {
throw new MetadataValidationException(
"METS document is missing the required structMap[@LABEL='Parent'] element.");
}
//get first
Element linkDiv = parentStructMap.getChild("div", metsNS);
if (linkDiv == null) {
throw new MetadataValidationException(
"METS document is missing the required first div element in structMap[@LABEL='Parent'].");
}
//the link is in the in the @xlink:href attribute
Element mptr = linkDiv.getChild("mptr", metsNS);
if (mptr != null) {
return mptr.getAttributeValue("href", xlinkNS);
}
//return null if we couldn't find the link
return null;
}
// return a single Element node found by one-off path.
// use only when path varies each time you call it.
protected Element getElementByXPath(String path, boolean nullOk)
throws MetadataValidationException {
try {
XPath xpath = XPath.newInstance(path);
xpath.addNamespace(metsNS);
xpath.addNamespace(xlinkNS);
Object result = xpath.selectSingleNode(mets);
if (result == null && nullOk) {
return null;
} else if (result instanceof Element) {
return (Element) result;
} else {
throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\"" + path + "\"");
}
} catch (JDOMException je) {
throw new MetadataValidationException("METSManifest: Failed to resolve XPath, path=\"" + path + "\"", je);
}
}
// Find crosswalk for the indicated metadata type (e.g. "DC", "MODS")
protected Object getCrosswalk(String type, Class clazz) {
/**
* Allow DSpace Config to map the metadata type to a
* different crosswalk name either per-packager or for METS
* in general. First, look for config key like:
* mets..ingest.crosswalk.MDNAME = XWALKNAME
* then try
* mets.default.ingest.crosswalk.MDNAME = XWALKNAME
*/
String xwalkName = configurationService.getProperty(
CONFIG_METS_PREFIX + configName + ".ingest.crosswalk." + type);
if (xwalkName == null) {
xwalkName = configurationService.getProperty(
CONFIG_METS_PREFIX + "default.ingest.crosswalk." + type);
if (xwalkName == null) {
xwalkName = type;
}
}
return CoreServiceFactory.getInstance().getPluginService().getNamedPlugin(clazz, xwalkName);
}
/**
* Gets all dmdSec elements containing metadata for the DSpace Item.
*
* @return array of Elements, each a dmdSec. May be empty but NOT null.
* @throws MetadataValidationException if the METS is missing a reference to item-wide
* DMDs in the correct place.
*/
public Element[] getItemDmds()
throws MetadataValidationException {
// div@DMDID is actually IDREFS, a space-separated list of IDs:
Element objDiv = getObjStructDiv();
String dmds = objDiv.getAttributeValue("DMDID");
if (dmds == null) {
throw new MetadataValidationException(
"Invalid METS: Missing reference to Item descriptive metadata, first div on first structmap must have" +
" a DMDID attribute.");
}
return getDmdElements(dmds);
}
/**
* Gets all dmdSec elements from a space separated list
*
* @param dmdList space-separated list of DMDIDs
* @return array of Elements, each a dmdSec. May be empty but NOT null.
* @throws MetadataValidationException if the METS is missing a reference to item-wide
* DMDs in the correct place.
*/
public Element[] getDmdElements(String dmdList)
throws MetadataValidationException {
if (dmdList != null && !dmdList.isEmpty()) {
String dmdID[] = dmdList.split("\\s+");
Element result[] = new Element[dmdID.length];
for (int i = 0; i < dmdID.length; ++i) {
result[i] = getElementByXPath("mets:dmdSec[@ID=\"" + dmdID[i] + "\"]", false);
}
return result;
} else {
return new Element[0];
}
}
/**
* Return rights metadata section(s) relevant to item as a whole.
*
* @return array of rightsMd elements, possibly empty but never null.
* @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
*/
public Element[] getItemRightsMD()
throws MetadataValidationException {
// div@ADMID is actually IDREFS, a space-separated list of IDs:
Element objDiv = getObjStructDiv();
String amds = objDiv.getAttributeValue("ADMID");
if (amds == null) {
if (log.isDebugEnabled()) {
log.debug("getItemRightsMD: No ADMID references found.");
}
return new Element[0];
}
String amdID[] = amds.split("\\s+");
List resultList = new ArrayList<>();
for (int i = 0; i < amdID.length; ++i) {
List rmds = getElementByXPath("mets:amdSec[@ID=\"" + amdID[i] + "\"]", false).
getChildren("rightsMD",
metsNS);
if (rmds.size() > 0) {
resultList.addAll(rmds);
}
}
return resultList.toArray(new Element[resultList.size()]);
}
/**
* Invokes appropriate crosswalks on Item-wide descriptive metadata.
*
* @param context context
* @param callback mdref callback
* @param dso DSpaceObject
* @param params package params
* @param dmdSec dmdSec element
* @throws MetadataValidationException if METS error
* @throws CrosswalkException if crosswalk error
* @throws PackageValidationException if invalid package
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
public void crosswalkItemDmd(Context context, PackageParameters params,
DSpaceObject dso,
Element dmdSec, Mdref callback)
throws MetadataValidationException, PackageValidationException,
CrosswalkException, IOException, SQLException, AuthorizeException {
crosswalkXmd(context, params, dso, dmdSec, callback, false);
}
/**
* Crosswalk all technical and source metadata sections that belong
* to the whole object.
*
* @param context context
* @param callback mdref callback
* @param params package params
* @param dso DSpaceObject
* @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
* @throws PackageValidationException if invalid package
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
public void crosswalkObjectOtherAdminMD(Context context, PackageParameters params,
DSpaceObject dso, Mdref callback)
throws MetadataValidationException, PackageValidationException,
CrosswalkException, IOException, SQLException, AuthorizeException {
for (String amdID : getAmdIDs()) {
Element amdSec = getElementByXPath("mets:amdSec[@ID=\"" + amdID + "\"]", false);
for (Iterator ti = amdSec.getChildren("techMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, dso, (Element) ti.next(), callback, false);
}
for (Iterator ti = amdSec.getChildren("digiprovMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, dso, (Element) ti.next(), callback, false);
}
for (Iterator ti = amdSec.getChildren("rightsMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, dso, (Element) ti.next(), callback, false);
}
}
}
/**
* Just crosswalk the sourceMD sections; used to set the handle and parent of AIP.
*
* @param context context
* @param callback mdref callback
* @param params package params
* @param dso DSpaceObject
* @return true if any metadata section was actually crosswalked, false otherwise
* @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
* @throws PackageValidationException if invalid package
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws CrosswalkException if crosswalk error
*/
public boolean crosswalkObjectSourceMD(Context context, PackageParameters params,
DSpaceObject dso, Mdref callback)
throws MetadataValidationException, PackageValidationException,
CrosswalkException, IOException, SQLException, AuthorizeException {
boolean result = false;
for (String amdID : getAmdIDs()) {
Element amdSec = getElementByXPath("mets:amdSec[@ID=\"" + amdID + "\"]", false);
for (Iterator ti = amdSec.getChildren("sourceMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, dso, (Element) ti.next(), callback, false);
result = true;
}
}
return result;
}
/**
* Get an array of all AMDID values for this object
*
* @return array of all AMDID values for this object
* @throws MetadataValidationException if metadata validation error
*/
protected String[] getAmdIDs()
throws MetadataValidationException {
// div@ADMID is actually IDREFS, a space-separated list of IDs:
Element objDiv = getObjStructDiv();
String amds = objDiv.getAttributeValue("ADMID");
if (amds == null) {
if (log.isDebugEnabled()) {
log.debug("crosswalkObjectTechMD: No ADMID references found.");
}
return new String[0];
}
return amds.split("\\s+");
}
// Crosswalk *any* kind of metadata section - techMD, rightsMD, etc.
protected void crosswalkXmd(Context context, PackageParameters params,
DSpaceObject dso,
Element xmd, Mdref callback, boolean createMissingMetadataFields)
throws MetadataValidationException, PackageValidationException,
CrosswalkException, IOException, SQLException, AuthorizeException {
String type = getMdType(xmd);
//First, try to find the IngestionCrosswalk to use
IngestionCrosswalk xwalk = (IngestionCrosswalk) getCrosswalk(type, IngestionCrosswalk.class);
// If metadata is not simply applicable to object,
// let it go with a warning.
try {
// If we found the IngestionCrosswalk, crosswalk our XML-based content
if (xwalk != null) {
// Check if our Crosswalk actually wraps another Packager Plugin
if (xwalk instanceof AbstractPackagerWrappingCrosswalk) {
// If this crosswalk wraps another Packager Plugin, we can pass it our Packaging Parameters
// (which essentially allow us to customize the ingest process of the crosswalk)
AbstractPackagerWrappingCrosswalk wrapper = (AbstractPackagerWrappingCrosswalk) xwalk;
wrapper.setPackagingParameters(params);
}
xwalk.ingest(context, dso, getMdContentAsXml(xmd, callback), false);
} else {
// Otherwise, try stream-based crosswalk
StreamIngestionCrosswalk sxwalk =
(StreamIngestionCrosswalk) getCrosswalk(type, StreamIngestionCrosswalk.class);
if (sxwalk != null) {
// Check if our Crosswalk actually wraps another Packager Plugin
if (sxwalk instanceof AbstractPackagerWrappingCrosswalk) {
// If this crosswalk wraps another Packager Plugin, we can pass it our Packaging Parameters
// (which essentially allow us to customize the ingest process of the crosswalk)
AbstractPackagerWrappingCrosswalk wrapper = (AbstractPackagerWrappingCrosswalk) sxwalk;
wrapper.setPackagingParameters(params);
}
// If we found a Stream-based crosswalk that matches, we now want to
// locate the stream we are crosswalking. This stream should be
// references in METS via an element
// (which is how METS references external files)
Element mdRef = xmd.getChild("mdRef", metsNS);
if (mdRef != null) {
InputStream in = null;
try {
in = callback.getInputStream(mdRef);
sxwalk.ingest(context, dso, in,
mdRef.getAttributeValue("MIMETYPE"));
} finally {
if (in != null) {
in.close();
}
}
} else {
// If we couldn't find an , then we'll try an
// with a element instead.
// (this is how METS wraps embedded base64-encoded content streams)
Element mdWrap = xmd.getChild("mdWrap", metsNS);
if (mdWrap != null) {
Element bin = mdWrap.getChild("binData", metsNS);
if (bin == null) {
throw new MetadataValidationException(
"Invalid METS Manifest: mdWrap element for streaming crosswalk without binData " +
"child.");
} else {
byte value[] = Base64.decodeBase64(bin.getText().getBytes(StandardCharsets.UTF_8));
sxwalk.ingest(context, dso,
new ByteArrayInputStream(value),
mdWrap.getAttributeValue("MIMETYPE"));
}
} else {
throw new MetadataValidationException("Cannot process METS Manifest: " +
"Metadata of type=" + type + " requires a " +
"reference to a stream (mdRef), which was not " +
"found in " + xmd
.getName());
}
}
} else {
throw new MetadataValidationException("Cannot process METS Manifest: " +
"No crosswalk found for contents of " + xmd
.getName() + " element, MDTYPE=" + type);
}
}
} catch (CrosswalkObjectNotSupported e) {
log.warn("Skipping metadata section " + xmd
.getName() + ", type=" + type + " inappropriate for this type of object: Object=" + dso
.toString() + ", error=" + e.toString());
}
}
/**
* Crosswalk the metadata associated with a particular file
* element into the bitstream it corresponds to.
*
* @param context a dspace context.
* @param params any PackageParameters which may affect how bitstreams are crosswalked
* @param bitstream bitstream target of the crosswalk
* @param fileId value of ID attribute in the file element responsible
* for the contents of that bitstream.
* @param callback mdref callback
* @throws MetadataValidationException if METS is invalid, e.g. referenced amdSec is missing.
* @throws PackageValidationException if invalid package
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @throws CrosswalkException if crosswalk error
*/
public void crosswalkBitstream(Context context, PackageParameters params,
Bitstream bitstream,
String fileId, Mdref callback)
throws MetadataValidationException, PackageValidationException,
CrosswalkException, IOException, SQLException, AuthorizeException {
Element file = getElementByXPath("descendant::mets:file[@ID=\"" + fileId + "\"]", false);
if (file == null) {
throw new MetadataValidationException(
"Failed in Bitstream crosswalk, Could not find file element with ID=" + fileId);
}
// In DSpace METS SIP spec, admin metadata is only "highly
// recommended", not "required", so it is OK if there is no ADMID.
String amds = file.getAttributeValue("ADMID");
if (amds == null) {
log.warn("Got no bitstream ADMID, file@ID=" + fileId);
return;
}
String amdID[] = amds.split("\\s+");
for (int i = 0; i < amdID.length; ++i) {
Element amdSec = getElementByXPath("mets:amdSec[@ID=\"" + amdID[i] + "\"]", false);
for (Iterator ti = amdSec.getChildren("techMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, bitstream, (Element) ti.next(), callback, false);
}
for (Iterator ti = amdSec.getChildren("sourceMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, bitstream, (Element) ti.next(), callback, false);
}
for (Iterator ti = amdSec.getChildren("rightsMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, bitstream, (Element) ti.next(), callback, false);
}
}
}
public void crosswalkBundle(Context context, PackageParameters params,
Bundle bundle,
String fileId, Mdref callback)
throws MetadataValidationException, PackageValidationException,
CrosswalkException, IOException, SQLException, AuthorizeException {
Element file = getElementByXPath("descendant::mets:fileGrp[@ADMID=\"" + fileId + "\"]", false);
if (file == null) {
throw new MetadataValidationException(
"Failed in Bitstream crosswalk, Could not find file element with ID=" + fileId);
}
// In DSpace METS SIP spec, admin metadata is only "highly
// recommended", not "required", so it is OK if there is no ADMID.
String amds = file.getAttributeValue("ADMID");
if (amds == null) {
log.warn("Got no bitstream ADMID, file@ID=" + fileId);
return;
}
String amdID[] = amds.split("\\s+");
for (int i = 0; i < amdID.length; ++i) {
Element amdSec = getElementByXPath("mets:amdSec[@ID=\"" + amdID[i] + "\"]", false);
for (Iterator ti = amdSec.getChildren("techMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, bundle, (Element) ti.next(), callback, false);
}
for (Iterator ti = amdSec.getChildren("sourceMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, bundle, (Element) ti.next(), callback, false);
}
for (Iterator ti = amdSec.getChildren("rightsMD", metsNS).iterator(); ti.hasNext(); ) {
crosswalkXmd(context, params, bundle, (Element) ti.next(), callback, false);
}
}
}
/**
* @return root element of METS document.
*/
public Element getMets() {
return mets;
}
/**
* Return entire METS document as an inputStream
*
* @return entire METS document as a stream
*/
public InputStream getMetsAsStream() {
XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat());
return new ByteArrayInputStream(
outputPretty.outputString(mets).getBytes(StandardCharsets.UTF_8));
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy