org.dspace.content.crosswalk.QDCCrosswalk Maven / Gradle / Ivy
Show all versions of dspace-api Show documentation
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.content.crosswalk;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchema;
import org.dspace.content.MetadataSchemaEnum;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.SelfNamedPlugin;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
/**
* Configurable QDC Crosswalk
*
* This class supports multiple dissemination crosswalks from DSpace
* internal data to the Qualified Dublin Core XML format
* (see http://dublincore.org/).
*
* It registers multiple Plugin names, which it reads from
* the DSpace configuration as follows:
*
*
Configuration
* Every key starting with "crosswalk.qdc.properties."
describes a
* QDC crosswalk. Everything after the last period is the plugin instance,
* and the value is the pathname (relative to dspace.dir/config
)
* of the crosswalk configuration file.
*
* You can have two aliases point to the same crosswalk,
* just add two configuration entries with the same value, e.g.
*
* crosswalk.qdc.properties.QDC = xwalk/qdc.properties
* crosswalk.qdc.properties.default = xwalk/qdc.properties
*
* The first line creates a plugin with the name "QDC"
* which is configured from the file dspace-dir/xwalk/qdc.properties
.
*
* Since there is significant overhead in reading the properties file to
* configure the crosswalk, and a crosswalk instance may be used any number
* of times, we recommend caching one instance of the crosswalk for each
* alias and simply reusing those instances. The PluginService does
* this by default.
*
* Each named crosswalk has two other types of configuration lines:
*
* XML Namespaces: all XML namespace prefixes used in the XML fragments below
* must be defined in the configuration as follows. Add a line of
* the form:
* crosswalk.qdc.namespace.{NAME}.{prefix} = {namespace-URI}
* e.g. for the namespaces dc
and dcterms
* in the plugin named QDC
, add these lines:
* crosswalk.qdc.namespace.QDC.dc = http://purl.org/dc/elements/1.1/
* crosswalk.qdc.namespace.QDC.dcterms = http://purl.org/dc/terms/
*
*
* Finally, you need to declare an XML Schema URI for the plugin, with
* a line of the form
* crosswalk.qdc.schema.{NAME} = {schema-URI}
* for example,
* crosswalk.qdc.schemaLocation.QDC = \
* http://purl.org/dc/terms/ \
* http://dublincore.org/schemas/xmls/qdc/2003/04/02/qualifieddc.xsd
*
* @author Larry Stone
*/
public class QDCCrosswalk extends SelfNamedPlugin
implements DisseminationCrosswalk, IngestionCrosswalk {
/**
* log4j category
*/
private static final Logger log = LogManager.getLogger(QDCCrosswalk.class);
// map of qdc to JDOM Element
private final Map qdc2element = new HashMap<>();
// map of JDOM Element to qdc Metadatum
private final Map element2qdc = new HashMap<>();
// the XML namespaces from config file for this name.
private Namespace namespaces[] = null;
private static final Namespace DCTERMS_NS =
Namespace.getNamespace("dcterms", "http://purl.org/dc/terms/");
// sentinal: done init?
private boolean inited = false;
// my plugin name
private String myName = null;
// prefix of all DSpace Configuration entries.
private static final String CONFIG_PREFIX = "crosswalk.qdc";
// XML schemaLocation fragment for this crosswalk, from config.
private String schemaLocation = null;
private static final SAXBuilder builder = new SAXBuilder();
protected ItemService itemService = ContentServiceFactory.getInstance().getItemService();
protected static final ConfigurationService configurationService
= DSpaceServicesFactory.getInstance().getConfigurationService();
private final CrosswalkMetadataValidator metadataValidator = new CrosswalkMetadataValidator();
/**
* Fill in the plugin-name table from DSpace configuration entries
* for configuration files for flavors of QDC crosswalk:
*/
private static String aliases[] = null;
static {
List aliasList = new ArrayList<>();
String propname = CONFIG_PREFIX + ".properties.";
List configKeys = configurationService.getPropertyKeys(propname);
for (String key : configKeys) {
aliasList.add(key.substring(propname.length()));
}
aliases = (String[]) aliasList.toArray(new String[aliasList.size()]);
}
public static String[] getPluginNames() {
return (String[]) ArrayUtils.clone(aliases);
}
// utility: return "fully qualified" name of XML element, for a
// hashtable key to use on ingesting elements.
// Format is {prefix:}name where prefix is optional.
private String makeQualifiedTagName(Element element) {
String prefix = "";
Namespace ns = element.getNamespace();
if (ns != null) {
prefix = ns.getPrefix() + ":";
}
String tagName;
String nsQualifier = element.getAttributeValue("type", DisseminationCrosswalk.XSI_NS);
if (nsQualifier == null || nsQualifier.length() < 1) {
String qualifier = element.getAttributeValue("type");
if (qualifier == null || qualifier.length() < 1) {
tagName = prefix + element.getName();
} else {
tagName = prefix + element.getName() + qualifier;
}
} else {
tagName = prefix + element.getName() + nsQualifier;
}
return tagName;
}
/**
* Initialize Crosswalk table from a properties file
* which itself is the value of the DSpace configuration property
* "crosswalk.qdc.properties.X", where "X" is the alias name of this instance.
* Each instance may be configured with a separate mapping table.
*
* The QDC crosswalk configuration properties follow the format:
*
* {qdc-element} = {XML-fragment}
*
* 1. qualified DC field name is of the form (qualifier is optional)
* {MDschema}.{element}.{qualifier}
*
* e.g. dc.contributor.author
* dc.title
*
* 2. XML fragment is prototype of metadata element, with empty
* placeholders for value).
*
* Example properties line:
*
* dc.coverage.temporal =
*/
private void init()
throws CrosswalkException, IOException {
if (inited) {
return;
}
inited = true;
myName = getPluginInstanceName();
if (myName == null) {
throw new CrosswalkInternalException("Cannot determine plugin name. " +
"You must use PluginService to instantiate QDCCrosswalk so the " +
"instance knows its name.");
}
// grovel DSpace configuration for namespaces
List nsList = new ArrayList<>();
String propname = CONFIG_PREFIX + ".namespace." + myName + ".";
List configKeys = configurationService.getPropertyKeys(propname);
for (String key : configKeys) {
nsList.add(Namespace.getNamespace(key.substring(propname.length()),
configurationService.getProperty(key)));
}
nsList.add(Namespace.XML_NAMESPACE);
namespaces = (Namespace[]) nsList.toArray(new Namespace[nsList.size()]);
// get XML schemaLocation fragment from config
schemaLocation = configurationService.getProperty(CONFIG_PREFIX + ".schemaLocation." + myName);
// read properties
String cmPropName = CONFIG_PREFIX + ".properties." + myName;
String propsFilename = configurationService.getProperty(cmPropName);
if (propsFilename == null) {
throw new CrosswalkInternalException("Configuration error: " +
"No properties file configured for QDC crosswalk named \"" +
myName + "\"");
}
String parent = configurationService.getProperty("dspace.dir") +
File.separator + "config" + File.separator;
File propsFile = new File(parent, propsFilename);
Properties qdcProps = new Properties();
FileInputStream pfs = null;
try {
pfs = new FileInputStream(propsFile);
qdcProps.load(pfs);
} finally {
if (pfs != null) {
try {
pfs.close();
} catch (IOException ioe) {
// ignore
}
}
}
// grovel properties to initialize qdc->element and element->qdc maps.
// evaluate the XML fragment with a wrapper including namespaces.
String postlog = "";
StringBuilder prologb = new StringBuilder("");
String prolog = prologb.toString();
Enumeration qdcKeys = (Enumeration) qdcProps.propertyNames();
while (qdcKeys.hasMoreElements()) {
String qdc = qdcKeys.nextElement();
String val = qdcProps.getProperty(qdc);
try {
Document d = builder.build(new StringReader(prolog + val + postlog));
Element element = (Element) d.getRootElement().getContent(0);
qdc2element.put(qdc, element);
element2qdc.put(makeQualifiedTagName(element), qdc);
log.debug("Building Maps: qdc=\"" + qdc + "\", element=\"" + element.toString() + "\"");
} catch (org.jdom.JDOMException je) {
throw new CrosswalkInternalException(
"Failed parsing XML fragment in properties file: \"" + prolog + val + postlog + "\": " + je
.toString(), je);
}
}
}
@Override
public Namespace[] getNamespaces() {
try {
init();
} catch (IOException | CrosswalkException e) {
// ignore
}
return (Namespace[]) ArrayUtils.clone(namespaces);
}
@Override
public String getSchemaLocation() {
try {
init();
} catch (IOException | CrosswalkException e) {
// ignore
}
return schemaLocation;
}
/**
* Returns object's metadata in MODS format, as XML structure node.
*
* @param context context
* @throws CrosswalkException if crosswalk error
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
@Override
public List disseminateList(Context context, DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException {
return disseminateListInternal(dso, true);
}
private List disseminateListInternal(DSpaceObject dso, boolean addSchema)
throws CrosswalkException,
IOException, SQLException, AuthorizeException {
if (dso.getType() != Constants.ITEM) {
throw new CrosswalkObjectNotSupported("QDCCrosswalk can only crosswalk an Item.");
}
Item item = (Item) dso;
init();
List dc = itemService.getMetadata(item, Item.ANY, Item.ANY, Item.ANY, Item.ANY);
List result = new ArrayList<>(dc.size());
for (int i = 0; i < dc.size(); i++) {
MetadataValue metadataValue = dc.get(i);
MetadataField metadataField = metadataValue.getMetadataField();
MetadataSchema metadataSchema = metadataField.getMetadataSchema();
// Compose qualified DC name - schema.element[.qualifier]
// e.g. "dc.title", "dc.subject.lcc", "lom.Classification.Keyword"
String qdc = metadataSchema.getName() + "." +
((metadataField.getQualifier() == null) ? metadataField.getElement()
: (metadataField.getElement() + "." + metadataField.getQualifier()));
Element elt = qdc2element.get(qdc);
// only complain about missing elements in the DC schema:
if (elt == null) {
if (metadataField.getMetadataSchema().getName().equals(MetadataSchemaEnum.DC.getName())) {
log.warn("WARNING: " + myName + ": No QDC mapping for \"" + qdc + "\"");
}
} else {
Element qe = (Element) elt.clone();
qe.setText(metadataValue.getValue());
if (addSchema && schemaLocation != null) {
qe.setAttribute("schemaLocation", schemaLocation, XSI_NS);
}
if (metadataValue.getLanguage() != null) {
qe.setAttribute("lang", metadataValue.getLanguage(), Namespace.XML_NAMESPACE);
}
result.add(qe);
}
}
return result;
}
@Override
public Element disseminateElement(Context context, DSpaceObject dso)
throws CrosswalkException,
IOException, SQLException, AuthorizeException {
init();
Element root = new Element("qualifieddc", DCTERMS_NS);
if (schemaLocation != null) {
root.setAttribute("schemaLocation", schemaLocation, XSI_NS);
}
root.addContent(disseminateListInternal(dso, false));
return root;
}
@Override
public boolean canDisseminate(DSpaceObject dso) {
return true;
}
@Override
public void ingest(Context context, DSpaceObject dso, Element root, boolean createMissingMetadataFields)
throws CrosswalkException, IOException, SQLException, AuthorizeException {
init();
// NOTE: don't bother comparing namespace on root element
// because DCMI doesn't specify one, and every app uses its
// own.. just give up in the face of this madness and accept
// anything with the right name.
if (!(root.getName().equals("qualifieddc"))) {
throw new MetadataValidationException("Wrong root element for Qualified DC: " + root.toString());
}
ingest(context, dso, root.getChildren(), createMissingMetadataFields);
}
@Override
public void ingest(Context context, DSpaceObject dso, List ml, boolean createMissingMetadataFields)
throws CrosswalkException, IOException, SQLException, AuthorizeException {
init();
// for now, forget about any targets but item.
if (dso.getType() != Constants.ITEM) {
throw new CrosswalkInternalException(
"Wrong target object type, QDCCrosswalk can only crosswalk to an Item.");
}
Item item = (Item) dso;
for (Element me : ml) {
String key = makeQualifiedTagName(me);
// if the root element gets passed here, recurse:
if ("qualifieddc".equals(me.getName())) {
ingest(context, dso, me.getChildren(), createMissingMetadataFields);
} else if (element2qdc.containsKey(key)) {
String qdc[] = (element2qdc.get(key)).split("\\.");
MetadataField metadataField;
if (qdc.length == 3) {
metadataField = metadataValidator
.checkMetadata(context, qdc[0], qdc[1], qdc[2], createMissingMetadataFields);
} else if (qdc.length == 2) {
metadataField = metadataValidator
.checkMetadata(context, qdc[0], qdc[1], null, createMissingMetadataFields);
} else {
throw new CrosswalkInternalException(
"Unrecognized format in QDC element identifier for key=\"" + key + "\", qdc=\"" + element2qdc
.get(key) + "\"");
}
// get language - prefer xml:lang, accept lang.
String lang = me.getAttributeValue("lang", Namespace.XML_NAMESPACE);
if (lang == null) {
lang = me.getAttributeValue("lang");
}
itemService.addMetadata(context, item, metadataField, lang, me.getText());
} else {
log.warn("WARNING: " + myName + ": No mapping for Element=\"" + key + "\" to qdc.");
}
}
}
@Override
public boolean preferList() {
return true;
}
}