All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.content.crosswalk.QDCCrosswalk Maven / Gradle / Ivy

There is a newer version: 8.0
Show newest version
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.content.crosswalk;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.MetadataField;
import org.dspace.content.MetadataSchema;
import org.dspace.content.MetadataSchemaEnum;
import org.dspace.content.MetadataValue;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.SelfNamedPlugin;
import org.dspace.services.ConfigurationService;
import org.dspace.services.factory.DSpaceServicesFactory;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;

/**
 * Configurable QDC Crosswalk
 * 

* This class supports multiple dissemination crosswalks from DSpace * internal data to the Qualified Dublin Core XML format * (see http://dublincore.org/). *

* It registers multiple Plugin names, which it reads from * the DSpace configuration as follows: * *

Configuration

* Every key starting with "crosswalk.qdc.properties." describes a * QDC crosswalk. Everything after the last period is the plugin instance, * and the value is the pathname (relative to dspace.dir/config) * of the crosswalk configuration file. *

* You can have two aliases point to the same crosswalk, * just add two configuration entries with the same value, e.g. *

 *    crosswalk.qdc.properties.QDC = xwalk/qdc.properties
 *    crosswalk.qdc.properties.default = xwalk/qdc.properties
 * 
* The first line creates a plugin with the name "QDC" * which is configured from the file dspace-dir/xwalk/qdc.properties. *

* Since there is significant overhead in reading the properties file to * configure the crosswalk, and a crosswalk instance may be used any number * of times, we recommend caching one instance of the crosswalk for each * alias and simply reusing those instances. The PluginService does * this by default. *

* Each named crosswalk has two other types of configuration lines: *

* XML Namespaces: all XML namespace prefixes used in the XML fragments below * must be defined in the configuration as follows. Add a line of * the form:

 *  crosswalk.qdc.namespace.{NAME}.{prefix} = {namespace-URI}
* e.g. for the namespaces dc and dcterms * in the plugin named QDC, add these lines: *
crosswalk.qdc.namespace.QDC.dc = http://purl.org/dc/elements/1.1/
 * crosswalk.qdc.namespace.QDC.dcterms = http://purl.org/dc/terms/
* *

* Finally, you need to declare an XML Schema URI for the plugin, with * a line of the form

 *  crosswalk.qdc.schema.{NAME} = {schema-URI}
* for example, *
crosswalk.qdc.schemaLocation.QDC  = \
 *  http://purl.org/dc/terms/ \
 *  http://dublincore.org/schemas/xmls/qdc/2003/04/02/qualifieddc.xsd
* * @author Larry Stone */ public class QDCCrosswalk extends SelfNamedPlugin implements DisseminationCrosswalk, IngestionCrosswalk { /** * log4j category */ private static final Logger log = LogManager.getLogger(QDCCrosswalk.class); // map of qdc to JDOM Element private final Map qdc2element = new HashMap<>(); // map of JDOM Element to qdc Metadatum private final Map element2qdc = new HashMap<>(); // the XML namespaces from config file for this name. private Namespace namespaces[] = null; private static final Namespace DCTERMS_NS = Namespace.getNamespace("dcterms", "http://purl.org/dc/terms/"); // sentinal: done init? private boolean inited = false; // my plugin name private String myName = null; // prefix of all DSpace Configuration entries. private static final String CONFIG_PREFIX = "crosswalk.qdc"; // XML schemaLocation fragment for this crosswalk, from config. private String schemaLocation = null; private static final SAXBuilder builder = new SAXBuilder(); protected ItemService itemService = ContentServiceFactory.getInstance().getItemService(); protected static final ConfigurationService configurationService = DSpaceServicesFactory.getInstance().getConfigurationService(); private final CrosswalkMetadataValidator metadataValidator = new CrosswalkMetadataValidator(); /** * Fill in the plugin-name table from DSpace configuration entries * for configuration files for flavors of QDC crosswalk: */ private static String aliases[] = null; static { List aliasList = new ArrayList<>(); String propname = CONFIG_PREFIX + ".properties."; List configKeys = configurationService.getPropertyKeys(propname); for (String key : configKeys) { aliasList.add(key.substring(propname.length())); } aliases = (String[]) aliasList.toArray(new String[aliasList.size()]); } public static String[] getPluginNames() { return (String[]) ArrayUtils.clone(aliases); } // utility: return "fully qualified" name of XML element, for a // hashtable key to use on ingesting elements. // Format is {prefix:}name where prefix is optional. private String makeQualifiedTagName(Element element) { String prefix = ""; Namespace ns = element.getNamespace(); if (ns != null) { prefix = ns.getPrefix() + ":"; } String tagName; String nsQualifier = element.getAttributeValue("type", DisseminationCrosswalk.XSI_NS); if (nsQualifier == null || nsQualifier.length() < 1) { String qualifier = element.getAttributeValue("type"); if (qualifier == null || qualifier.length() < 1) { tagName = prefix + element.getName(); } else { tagName = prefix + element.getName() + qualifier; } } else { tagName = prefix + element.getName() + nsQualifier; } return tagName; } /** * Initialize Crosswalk table from a properties file * which itself is the value of the DSpace configuration property * "crosswalk.qdc.properties.X", where "X" is the alias name of this instance. * Each instance may be configured with a separate mapping table. * * The QDC crosswalk configuration properties follow the format: * * {qdc-element} = {XML-fragment} * * 1. qualified DC field name is of the form (qualifier is optional) * {MDschema}.{element}.{qualifier} * * e.g. dc.contributor.author * dc.title * * 2. XML fragment is prototype of metadata element, with empty * placeholders for value). * * Example properties line: * * dc.coverage.temporal = */ private void init() throws CrosswalkException, IOException { if (inited) { return; } inited = true; myName = getPluginInstanceName(); if (myName == null) { throw new CrosswalkInternalException("Cannot determine plugin name. " + "You must use PluginService to instantiate QDCCrosswalk so the " + "instance knows its name."); } // grovel DSpace configuration for namespaces List nsList = new ArrayList<>(); String propname = CONFIG_PREFIX + ".namespace." + myName + "."; List configKeys = configurationService.getPropertyKeys(propname); for (String key : configKeys) { nsList.add(Namespace.getNamespace(key.substring(propname.length()), configurationService.getProperty(key))); } nsList.add(Namespace.XML_NAMESPACE); namespaces = (Namespace[]) nsList.toArray(new Namespace[nsList.size()]); // get XML schemaLocation fragment from config schemaLocation = configurationService.getProperty(CONFIG_PREFIX + ".schemaLocation." + myName); // read properties String cmPropName = CONFIG_PREFIX + ".properties." + myName; String propsFilename = configurationService.getProperty(cmPropName); if (propsFilename == null) { throw new CrosswalkInternalException("Configuration error: " + "No properties file configured for QDC crosswalk named \"" + myName + "\""); } String parent = configurationService.getProperty("dspace.dir") + File.separator + "config" + File.separator; File propsFile = new File(parent, propsFilename); Properties qdcProps = new Properties(); FileInputStream pfs = null; try { pfs = new FileInputStream(propsFile); qdcProps.load(pfs); } finally { if (pfs != null) { try { pfs.close(); } catch (IOException ioe) { // ignore } } } // grovel properties to initialize qdc->element and element->qdc maps. // evaluate the XML fragment with a wrapper including namespaces. String postlog = ""; StringBuilder prologb = new StringBuilder(""); String prolog = prologb.toString(); Enumeration qdcKeys = (Enumeration) qdcProps.propertyNames(); while (qdcKeys.hasMoreElements()) { String qdc = qdcKeys.nextElement(); String val = qdcProps.getProperty(qdc); try { Document d = builder.build(new StringReader(prolog + val + postlog)); Element element = (Element) d.getRootElement().getContent(0); qdc2element.put(qdc, element); element2qdc.put(makeQualifiedTagName(element), qdc); log.debug("Building Maps: qdc=\"" + qdc + "\", element=\"" + element.toString() + "\""); } catch (org.jdom.JDOMException je) { throw new CrosswalkInternalException( "Failed parsing XML fragment in properties file: \"" + prolog + val + postlog + "\": " + je .toString(), je); } } } @Override public Namespace[] getNamespaces() { try { init(); } catch (IOException | CrosswalkException e) { // ignore } return (Namespace[]) ArrayUtils.clone(namespaces); } @Override public String getSchemaLocation() { try { init(); } catch (IOException | CrosswalkException e) { // ignore } return schemaLocation; } /** * Returns object's metadata in MODS format, as XML structure node. * * @param context context * @throws CrosswalkException if crosswalk error * @throws IOException if IO error * @throws SQLException if database error * @throws AuthorizeException if authorization error */ @Override public List disseminateList(Context context, DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { return disseminateListInternal(dso, true); } private List disseminateListInternal(DSpaceObject dso, boolean addSchema) throws CrosswalkException, IOException, SQLException, AuthorizeException { if (dso.getType() != Constants.ITEM) { throw new CrosswalkObjectNotSupported("QDCCrosswalk can only crosswalk an Item."); } Item item = (Item) dso; init(); List dc = itemService.getMetadata(item, Item.ANY, Item.ANY, Item.ANY, Item.ANY); List result = new ArrayList<>(dc.size()); for (int i = 0; i < dc.size(); i++) { MetadataValue metadataValue = dc.get(i); MetadataField metadataField = metadataValue.getMetadataField(); MetadataSchema metadataSchema = metadataField.getMetadataSchema(); // Compose qualified DC name - schema.element[.qualifier] // e.g. "dc.title", "dc.subject.lcc", "lom.Classification.Keyword" String qdc = metadataSchema.getName() + "." + ((metadataField.getQualifier() == null) ? metadataField.getElement() : (metadataField.getElement() + "." + metadataField.getQualifier())); Element elt = qdc2element.get(qdc); // only complain about missing elements in the DC schema: if (elt == null) { if (metadataField.getMetadataSchema().getName().equals(MetadataSchemaEnum.DC.getName())) { log.warn("WARNING: " + myName + ": No QDC mapping for \"" + qdc + "\""); } } else { Element qe = (Element) elt.clone(); qe.setText(metadataValue.getValue()); if (addSchema && schemaLocation != null) { qe.setAttribute("schemaLocation", schemaLocation, XSI_NS); } if (metadataValue.getLanguage() != null) { qe.setAttribute("lang", metadataValue.getLanguage(), Namespace.XML_NAMESPACE); } result.add(qe); } } return result; } @Override public Element disseminateElement(Context context, DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException { init(); Element root = new Element("qualifieddc", DCTERMS_NS); if (schemaLocation != null) { root.setAttribute("schemaLocation", schemaLocation, XSI_NS); } root.addContent(disseminateListInternal(dso, false)); return root; } @Override public boolean canDisseminate(DSpaceObject dso) { return true; } @Override public void ingest(Context context, DSpaceObject dso, Element root, boolean createMissingMetadataFields) throws CrosswalkException, IOException, SQLException, AuthorizeException { init(); // NOTE: don't bother comparing namespace on root element // because DCMI doesn't specify one, and every app uses its // own.. just give up in the face of this madness and accept // anything with the right name. if (!(root.getName().equals("qualifieddc"))) { throw new MetadataValidationException("Wrong root element for Qualified DC: " + root.toString()); } ingest(context, dso, root.getChildren(), createMissingMetadataFields); } @Override public void ingest(Context context, DSpaceObject dso, List ml, boolean createMissingMetadataFields) throws CrosswalkException, IOException, SQLException, AuthorizeException { init(); // for now, forget about any targets but item. if (dso.getType() != Constants.ITEM) { throw new CrosswalkInternalException( "Wrong target object type, QDCCrosswalk can only crosswalk to an Item."); } Item item = (Item) dso; for (Element me : ml) { String key = makeQualifiedTagName(me); // if the root element gets passed here, recurse: if ("qualifieddc".equals(me.getName())) { ingest(context, dso, me.getChildren(), createMissingMetadataFields); } else if (element2qdc.containsKey(key)) { String qdc[] = (element2qdc.get(key)).split("\\."); MetadataField metadataField; if (qdc.length == 3) { metadataField = metadataValidator .checkMetadata(context, qdc[0], qdc[1], qdc[2], createMissingMetadataFields); } else if (qdc.length == 2) { metadataField = metadataValidator .checkMetadata(context, qdc[0], qdc[1], null, createMissingMetadataFields); } else { throw new CrosswalkInternalException( "Unrecognized format in QDC element identifier for key=\"" + key + "\", qdc=\"" + element2qdc .get(key) + "\""); } // get language - prefer xml:lang, accept lang. String lang = me.getAttributeValue("lang", Namespace.XML_NAMESPACE); if (lang == null) { lang = me.getAttributeValue("lang"); } itemService.addMetadata(context, item, metadataField, lang, me.getText()); } else { log.warn("WARNING: " + myName + ": No mapping for Element=\"" + key + "\" to qdc."); } } } @Override public boolean preferList() { return true; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy