
org.dspace.content.crosswalk.MODSDisseminationCrosswalk Maven / Gradle / Ivy
Show all versions of dspace-api Show documentation
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.crosswalk;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.MetadataValue;
import org.dspace.content.Site;
import org.dspace.content.dto.MetadataValueDTO;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.content.service.CommunityService;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.SelfNamedPlugin;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.Namespace;
import org.jdom2.Text;
import org.jdom2.Verifier;
import org.jdom2.filter.Filters;
import org.jdom2.input.SAXBuilder;
import org.jdom2.output.XMLOutputter;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
/**
* Configurable MODS Crosswalk
*
* This class supports multiple dissemination crosswalks from DSpace
* internal data to the MODS XML format
* (see http://www.loc.gov/standards/mods/.)
*
* It registers multiple Plugin names, which it reads from
* the DSpace configuration as follows:
*
*
Configuration
* Every key starting with "crosswalk.mods.properties."
describes a
* MODS crosswalk. Everything after the last period is the plugin name,
* and the value is the pathname (relative to dspace.dir/config
)
* of the crosswalk configuration file.
*
* You can have two names point to the same crosswalk,
* just add two configuration entries with the same value, e.g.
*
* crosswalk.mods.properties.MODS = crosswalks/mods.properties
* crosswalk.mods.properties.default = crosswalks/mods.properties
*
* The first line creates a plugin with the name "MODS"
* which is configured from the file dspace-dir/config/crosswalks/mods.properties
.
*
* Since there is significant overhead in reading the properties file to
* configure the crosswalk, and a crosswalk instance may be used any number
* of times, we recommend caching one instance of the crosswalk for each
* name and simply reusing those instances. The PluginService does this
* by default.
*
* @author Larry Stone
* @author Scott Phillips
*/
public class MODSDisseminationCrosswalk extends SelfNamedPlugin
implements DisseminationCrosswalk {
/**
* log4j category
*/
private static final Logger log = LogManager.getLogger(MODSDisseminationCrosswalk.class);
private static final String CONFIG_PREFIX = "crosswalk.mods.properties.";
protected final CommunityService communityService = ContentServiceFactory.getInstance().getCommunityService();
protected final CollectionService collectionService = ContentServiceFactory.getInstance().getCollectionService();
protected final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
protected final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
protected static final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
/**
* Fill in the plugin alias table from DSpace configuration entries
* for configuration files for flavors of MODS crosswalk:
*/
private static String aliases[] = null;
static {
List aliasList = new ArrayList<>();
List keys = configurationService.getPropertyKeys(CONFIG_PREFIX);
for (String key : keys) {
aliasList.add(key.substring(CONFIG_PREFIX.length()));
}
aliases = (String[]) aliasList.toArray(new String[aliasList.size()]);
}
public static String[] getPluginNames() {
return (String[]) ArrayUtils.clone(aliases);
}
/**
* MODS namespace.
*/
public static final Namespace MODS_NS =
Namespace.getNamespace("mods", "http://www.loc.gov/mods/v3");
private static final Namespace XLINK_NS =
Namespace.getNamespace("xlink", "http://www.w3.org/1999/xlink");
private static final Namespace namespaces[] = {MODS_NS, XLINK_NS};
/**
* URL of MODS XML Schema
*/
public static final String MODS_XSD =
"http://www.loc.gov/standards/mods/v3/mods-3-1.xsd";
private static final String schemaLocation =
MODS_NS.getURI() + " " + MODS_XSD;
private static final XMLOutputter outputUgly = new XMLOutputter();
private static final SAXBuilder builder = new SAXBuilder();
private Map modsMap = null;
/**
* Container for crosswalk mapping: expressed as "triple" of:
* 1. QDC field name (really field.qualifier).
* 2. XML subtree to add to MODS record.
* 3. XPath expression showing places to plug in the value.
*/
static class modsTriple {
public String qdc = null;
public Element xml = null;
public XPathExpression xpath = null;
/**
* Initialize from text versions of QDC, XML and XPath.
* The DC stays a string; parse the XML with appropriate
* namespaces; "compile" the XPath.
*/
public static modsTriple create(String qdc, String xml, String xpath) {
modsTriple result = new modsTriple();
final String prolog = "";
final String postlog = " ";
try {
result.qdc = qdc;
result.xpath =
XPathFactory.instance()
.compile(xpath, Filters.fpassthrough(), null, MODS_NS, XLINK_NS);
Document d = builder.build(new StringReader(prolog + xml + postlog));
result.xml = (Element) d.getRootElement().getContent(0);
} catch (JDOMException | IOException je) {
log.error("Error initializing modsTriple(\"" + qdc + "\",\"" + xml + "\",\"" + xpath + "\"): got " + je
.toString());
return null;
}
return result;
}
}
/**
* Initialize Crosswalk table from a properties file
* which itself is the value of the DSpace configuration property
* "crosswalk.mods.properties.X", where "X" is the alias name of this instance.
* Each instance may be configured with a separate mapping table.
*
* The MODS crosswalk configuration properties follow the format:
*
* {field-name} = {XML-fragment} | {XPath}
*
* 1. qualified DC field name is of the form
* {MDschema}.{element}.{qualifier}
*
* e.g. dc.contributor.author
*
* 2. XML fragment is prototype of metadata element, with empty or "%s"
* placeholders for value(s). NOTE: Leave the %s's in because
* it's much easier then to see if something is broken.
*
* 3. XPath expression listing point(s) in the above XML where
* the value is to be inserted. Context is the element itself.
*
* Example properties line:
*
* dc.description.abstract = %s | text()
*/
private void initMap()
throws CrosswalkInternalException {
if (modsMap != null) {
return;
}
String myAlias = getPluginInstanceName();
if (myAlias == null) {
log.error("Must use PluginService to instantiate MODSDisseminationCrosswalk so the class knows its name.");
return;
}
String cmPropName = CONFIG_PREFIX + myAlias;
String propsFilename = configurationService.getProperty(cmPropName);
if (propsFilename == null) {
String msg = "MODS crosswalk missing " +
"configuration file for crosswalk named \"" + myAlias + "\"";
log.error(msg);
throw new CrosswalkInternalException(msg);
} else {
String parent = configurationService.getProperty("dspace.dir") +
File.separator + "config" + File.separator;
File propsFile = new File(parent, propsFilename);
Properties modsConfig = new Properties();
FileInputStream pfs = null;
try {
pfs = new FileInputStream(propsFile);
modsConfig.load(pfs);
} catch (IOException e) {
log.error(
"Error opening or reading MODS properties file: " + propsFile.toString() + ": " + e.toString());
throw new CrosswalkInternalException("MODS crosswalk cannot " +
"open config file: " + e.toString(), e);
} finally {
if (pfs != null) {
try {
pfs.close();
} catch (IOException ioe) {
// ignore
}
}
}
modsMap = new HashMap<>();
Enumeration pe = (Enumeration) modsConfig.propertyNames();
while (pe.hasMoreElements()) {
String qdc = pe.nextElement();
String val = modsConfig.getProperty(qdc);
String pair[] = val.split("\\s+\\|\\s+", 2);
if (pair.length < 2) {
log.warn("Illegal MODS mapping in " + propsFile.toString() + ", line = " +
qdc + " = " + val);
} else {
modsTriple trip = modsTriple.create(qdc, pair[0], pair[1]);
if (trip != null) {
modsMap.put(qdc, trip);
}
}
}
}
}
/**
* Return the MODS namespace
*/
@Override
public Namespace[] getNamespaces() {
return (Namespace[]) ArrayUtils.clone(namespaces);
}
/**
* Return the MODS schema
*/
@Override
public String getSchemaLocation() {
return schemaLocation;
}
/**
* Returns object's metadata in MODS format, as List of XML structure nodes.
*
* @param context context
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
* @return List of Elements
*/
@Override
public List disseminateList(Context context, DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException {
return disseminateListInternal(dso, true);
}
/**
* Disseminate an Item, Collection, or Community to MODS.
*
* @param context context
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
@Override
public Element disseminateElement(Context context, DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException {
Element root = new Element("mods", MODS_NS);
root.setAttribute("schemaLocation", schemaLocation, XSI_NS);
root.addContent(disseminateListInternal(dso, false));
return root;
}
private List disseminateListInternal(DSpaceObject dso, boolean addSchema)
throws CrosswalkException, IOException, SQLException, AuthorizeException {
List dcvs = null;
if (dso.getType() == Constants.ITEM) {
dcvs = item2Metadata((Item) dso);
} else if (dso.getType() == Constants.COLLECTION) {
dcvs = collection2Metadata((Collection) dso);
} else if (dso.getType() == Constants.COMMUNITY) {
dcvs = community2Metadata((Community) dso);
} else if (dso.getType() == Constants.SITE) {
dcvs = site2Metadata((Site) dso);
} else {
throw new CrosswalkObjectNotSupported(
"MODSDisseminationCrosswalk can only crosswalk Items, Collections, or Communities");
}
initMap();
List result = new ArrayList<>(dcvs.size());
for (MetadataValueDTO dcv : dcvs) {
String qdc = dcv.getSchema() + "." + dcv.getElement();
if (dcv.getQualifier() != null) {
qdc += "." + dcv.getQualifier();
}
String value = dcv.getValue();
modsTriple trip = modsMap.get(qdc);
if (trip == null) {
log.warn("WARNING: " + getPluginInstanceName() + ": No MODS mapping for \"" + qdc + "\"");
} else {
Element me = (Element) trip.xml.clone();
if (addSchema) {
me.setAttribute("schemaLocation", schemaLocation, XSI_NS);
}
List