All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.googlecode.fascinator.redbox.VitalTransformer Maven / Gradle / Ivy

There is a newer version: 1.10.1
Show newest version
/*
 * ReDBox - VITAL Transformer
 * Copyright (C) 2011 University of Southern Queensland
 * Copyright (C) 2011 Queensland Cyber Infrastructure Foundation (http://www.qcif.edu.au/)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package com.googlecode.fascinator.redbox;

import com.googlecode.fascinator.api.PluginDescription;
import com.googlecode.fascinator.api.PluginException;
import com.googlecode.fascinator.api.PluginManager;
import com.googlecode.fascinator.api.indexer.Indexer;
import com.googlecode.fascinator.api.indexer.SearchRequest;
import com.googlecode.fascinator.api.storage.DigitalObject;
import com.googlecode.fascinator.api.storage.Payload;
import com.googlecode.fascinator.api.storage.Storage;
import com.googlecode.fascinator.api.storage.StorageException;
import com.googlecode.fascinator.api.transformer.Transformer;
import com.googlecode.fascinator.api.transformer.TransformerException;
import com.googlecode.fascinator.common.JsonObject;
import com.googlecode.fascinator.common.JsonSimple;
import com.googlecode.fascinator.common.JsonSimpleConfig;
import com.googlecode.fascinator.common.messaging.MessagingException;
import com.googlecode.fascinator.common.messaging.MessagingServices;
import com.googlecode.fascinator.common.solr.SolrDoc;
import com.googlecode.fascinator.common.solr.SolrResult;

import fedora.client.FedoraClient;
import fedora.server.management.FedoraAPIM;
import fedora.server.types.gen.Datastream;
import fedora.server.types.gen.DatastreamDef;
import fedora.server.types.gen.UserInfo;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.StringWriter;

import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.io.IOUtils;
import org.json.simple.JSONArray;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A Transformer to notify VITAL of completed objects in ReDBox.
 * 
 * @author Greg Pendlebury
 */
public class VitalTransformer implements Transformer {
    /** Logging */
    private final Logger log = LoggerFactory.getLogger(VitalTransformer.class);

    /** Messaging */
    private MessagingServices messaging;
    private String emailQueue;
    private List emailAddresses;
    private String emailSubject;
    private String emailTemplate;

    /** Fascinator plugins */
    private Storage storage;
    private Indexer indexer;

    /** Fedora */
    private String fedoraUrl;
    private String fedoraUsername;
    private String fedoraPassword;
    private String fedoraNamespace;
    // Template for log entries
    private String fedoraMessageTemplate;
    private int fedoraTimeout;

    /** Valid instantiation */
    boolean valid = false;

    /** VITAL integration config */
    private Map pids;
    private String attachDs;
    private String attachStatusField;
    private Map attachStatuses;
    private String attachLabelField;
    private Map attachLabels;
    private String attachControlGroup;
    private boolean attachVersionable;
    private boolean attachRetainIds;
    private Map> attachAltIds;
    private File foxmlTemplate;

    /** Temp directory */
    private File tmpDir;

    /** Wait conditions */
    private List waitProperties;

    /**
     * Gets an identifier for this type of plugin. This should be a simple name
     * such as "file-system" for a storage plugin, for example.
     * 
     * @return the plugin type id
     */
    @Override
    public String getId() {
        return "vital";
    }

    /**
     * Gets a name for this plugin. This should be a descriptive name.
     * 
     * @return the plugin name
     */
    @Override
    public String getName() {
        return "VITAL Transformer";
    }

    /**
     * Gets a PluginDescription object relating to this plugin.
     * 
     * @return a PluginDescription
     */
    @Override
    public PluginDescription getPluginDetails() {
        return new PluginDescription(this);
    }

    /**
     * Initializes the plugin using the specified JSON String
     * 
     * @param jsonString JSON configuration string
     * @throws TransformerException if there was an error in initialization
     */
    @Override
    public void init(String jsonString) throws TransformerException {
        try {
            setConfig(new JsonSimpleConfig(jsonString));
        } catch (IOException e) {
            throw new TransformerException(e);
        }
    }

    /**
     * Initializes the plugin using the specified JSON configuration
     * 
     * @param jsonFile JSON configuration file
     * @throws TransformerException if there was an error in initialization
     */
    @Override
    public void init(File jsonFile) throws TransformerException {
        try {
            setConfig(new JsonSimpleConfig(jsonFile));
        } catch (IOException ioe) {
            throw new TransformerException(ioe);
        }
    }

    /**
     * Initialization of plugin
     * 
     * @param config The configuration to use
     * @throws TransformerException if fails to initialize
     */
    private void setConfig(JsonSimpleConfig config)
            throws TransformerException {
        // Test our Fedora connection... things are kind of useless without it
        fedoraUrl = config.getString(null,
                Strings.CONFIG_SERVER, "url");
        fedoraNamespace = config.getString(null,
                Strings.CONFIG_SERVER, "namespace");
        fedoraUsername = config.getString(null,
                Strings.CONFIG_SERVER, "username");
        fedoraPassword = config.getString(null,
                Strings.CONFIG_SERVER, "password");
        fedoraTimeout = config.getInteger(15,
                Strings.CONFIG_SERVER, "timeout");
        if (fedoraUrl == null || fedoraNamespace == null ||
                fedoraUsername == null || fedoraPassword == null) {
            throw new TransformerException(
                    "Valid fedora configuration is missing!");
        }
        // This will throw the TransformerException for
        // us if there's something wrong
        fedoraConnect(true);
        fedoraMessageTemplate = config.getString(Strings.DEFAULT_VITAL_MESSAGE,
                Strings.CONFIG_SERVER, "message");

        // Temp space
        boolean success = false;
        String tempPath = config.getString(
                System.getProperty("java.io.tmpdir"), "tempDir");
        if (tempPath != null) {
            tmpDir = new File(tempPath);

            // Make sure it exists
            if (!tmpDir.exists()) {
                success = tmpDir.mkdirs();
            } else {
                // And it's a directory
                success = tmpDir.isDirectory();
            }
            // Now make sure it's writable
            if (success) {
                File file = new File(tmpDir, "creation.test");
                try {
                    file.createNewFile();
                    file.delete();
                    success = !file.exists();
                } catch (IOException ex) {
                    success = false;
                }

            }
        }
        if (tmpDir == null || !success) {
            throw new TransformerException(
                    "Cannot find a valid (and writable) TEMP directory!");
        }

        // Get the list of pids we are sending to VITAL
        pids = config.getJsonSimpleMap("dataStreams");
        if (pids == null || pids.isEmpty()) {
            throw new TransformerException(
                    "No datastreams configured to export!");
        }
        // And attachment handling
        JsonSimple attachmentsConfig = new JsonSimple(
                config.getObject("attachments"));
        attachDs = attachmentsConfig.getString("ATTACHMENT%02d", "dsID");
        Pattern p = Pattern.compile("%\\d*d");
        Matcher m = p.matcher(attachDs);
        if (!m.find()) {
            throw new TransformerException(
                    "'*/attachments/dsId' must have a format placeholder for incrementing integer, eg. '%d' or '%02d'. The value provided ('"
                    + attachDs + "') is invalid");
        }
        attachStatusField = attachmentsConfig.getString(null, "statusField");
        attachStatuses = getStringMap(attachmentsConfig, "status");
        attachLabelField = attachmentsConfig.getString(null, "labelField");
        attachLabels = getStringMap(attachmentsConfig, "label");
        attachControlGroup = attachmentsConfig.getString(null, "controlGroup");
        attachVersionable = attachmentsConfig.getBoolean(false, "versionable");
        attachRetainIds = attachmentsConfig.getBoolean(true, "retainIds");
        // To make life easier we're going to use the new JSON Library here
        attachAltIds = new LinkedHashMap>();
        JsonSimple json;
        try {
            json = new JsonSimple(config.toString());
        } catch (IOException ex) {
            throw new TransformerException("Error parsing attachment JSON", ex);
        }
        // Use the base object for iteration
        JsonObject objAltIds = json.getObject("attachments", "altIds");
        // And the library for access methods
        JsonSimple altIds = new JsonSimple(objAltIds);
        for (Object oKey : objAltIds.keySet()) {
            String key = (String) oKey;
            List ids = altIds.getStringList(key);
            if (ids.isEmpty()) {
                log.warn("WARNING: '{}' has no altIds configured.", key);
            } else {
                attachAltIds.put(key, ids);
            }
        }
        // Make sure 'default' exists, even if empty
        if (!attachAltIds.containsKey(Strings.LITERAL_DEFAULT)) {
            attachAltIds.put(Strings.LITERAL_DEFAULT, new ArrayList());
        }

        // Are we sending emails on errors?
        emailQueue = config.getString(null,
                Strings.CONFIG_FAILURE, "emailQueue");
        if (emailQueue != null) {
            emailAddresses = config.getStringList(
                    Strings.CONFIG_FAILURE, "emailAddress");
            if (emailAddresses != null && !emailAddresses.isEmpty()) {
                emailSubject = config.getString(Strings.DEFAULT_EMAIL_SUBJECT,
                        Strings.CONFIG_FAILURE, "emailSubject");
                emailTemplate = config.getString(Strings.DEFAULT_EMAIL_TEMPLATE,
                        Strings.CONFIG_FAILURE, "emailTemplate");
            } else {
                log.error("No email address provided! Reverting to errors using log files");
                emailQueue = null;
            }
        } else {
            log.warn("No email queue provided. Errors will only be logged");
        }

        // Ensure we have access to messaging services
        if (emailQueue != null) {
            try {
                messaging = MessagingServices.getInstance();
            } catch (MessagingException ex) {
                throw new TransformerException(
                        "Error starting Messaging Services", ex);
            }
        }

        // Need our config file to instantiate plugins
        File sysFile = null;
        try {
            sysFile = JsonSimpleConfig.getSystemFile();
        } catch (IOException ioe) {
            log.error("Failed to read configuration: {}", ioe.getMessage());
            throw new TransformerException("Failed to read configuration", ioe);
        }

        // Start our storage layer
        try {
            storage = PluginManager.getStorage(
                    config.getString("file-system", "storage", "type"));
            storage.init(sysFile);
        } catch (PluginException pe) {
            log.error("Failed to initialise plugin: {}", pe.getMessage());
            throw new TransformerException("Failed to initialise storage", pe);
        }

        // Instantiate an indexer for searching
        try {
            indexer = PluginManager.getIndexer(
                    config.getString("solr", "indexer", "type"));
            indexer.init(sysFile);
        } catch (PluginException pe) {
            log.error("Failed to initialise plugin: {}", pe.getMessage());
            throw new TransformerException("Failed to initialise indexer", pe);
        }

        // Do we have a template?
        String templatePath = config.getString(null, "foxmlTemplate");
        if (templatePath != null) {
            foxmlTemplate = new File(templatePath);
            if (!foxmlTemplate.exists()) {
                foxmlTemplate = null;
                throw new TransformerException(
                        "The new object template provided does not exist: '" +
                        templatePath + "'");
            }
        }

        // Wait conditions
        waitProperties = new ArrayList();
        Map waitConditions =
                getStringMap(config, "waitConditions");
        if (waitConditions != null) {
            for (String type : waitConditions.keySet()) {
                String value = waitConditions.get(type);
                if (value == null) {
                    continue;
                }
                // We only support properties at this stage
                if (type.equals("property")) {
                    log.info("New wait condition: Property '{}'.", value);
                    waitProperties.add(value);
                }
            }
        }

        valid = true;
    }

    /**
     * Trivial wrapper on the JsonConfigHelper getMap() method to cast all map
     * entries to strings if appropriate and return.
     * 
     * @param json The json object to query.
     * @param path The path on which the map is found.
     * @return Map: The object map cast to Strings
     */
    private Map getStringMap(JsonSimple json, String... path) {
        Map response = new LinkedHashMap();
        JsonObject object = json.getObject((Object[]) path);
        if (object == null) {
            return null;
        }
        for (Object key : object.keySet()) {
            Object value = object.get(key);
            if (value instanceof String) {
                response.put((String) key, (String) value);
            }
        }
        return response;
    }

    /**
     * Establish a connection to Fedora's management API (API-M) to confirm
     * credentials, then return the instantiated fedora client used to connect.
     * 
     * @param firstConnection If this is the first connection (ie. from the
     *      Constructor), set this flag. Some logging will occur, and a basic
     *      API call will be triggered to test genuine connectivity with regards
     *      to the network and the credentials supplied.
     * @return FedoraClient The client used to connect to the API
     * @throws TransformerException if there was an error
     */
    private FedoraClient fedoraConnect() throws TransformerException {
        return fedoraConnect(false);
    }

    private FedoraClient fedoraConnect(boolean firstConnection)
            throws TransformerException {
        FedoraClient fedora = null;
        try {
            // Connect to the server
            fedora = new FedoraClient(
                    fedoraUrl, fedoraUsername, fedoraPassword);
            fedora.SOCKET_TIMEOUT_SECONDS = fedoraTimeout;
            if (firstConnection) {
                log.info("Connected to FEDORA : '{}'", fedoraUrl);
            }
            // Make sure we can get the server version
            String version = fedora.getServerVersion();
            // Version cutout
            if (!version.startsWith(Strings.FEDORA_VERSION_TEST)) {
                throw new StorageException(
                        "Error; this plugin is designed to work with Fedora versions 2.x");
            }

            if (firstConnection) {
                log.info("FEDORA version: '{}'", version);
            }
            // And that we have appropriate access to the management API
            FedoraAPIM apim = fedora.getAPIM();
            if (firstConnection) {
                log.info("API-M access testing... {} second timeout",
                        fedoraTimeout);
                UserInfo user = apim.describeUser(fedoraUsername);
                log.info("API-M access confirmed: User '{}', ID: '{}'",
                        fedoraUsername, user.getId());
            }
        } catch (MalformedURLException ex) {
            throw new TransformerException("Server URL is Invalid (?) : ", ex);
        } catch (IOException ex) {
            throw new TransformerException("Error connecting to VITAL! : ", ex);
        } catch (Exception ex) {
            throw new TransformerException(
                    "Error accesing management API! : ", ex);
        }
        return fedora;
    }

    /**
     * Shuts down the plugin
     * 
     * @throws TransformerException if there was an error during shutdown
     */
    @Override
    public void shutdown() throws TransformerException {
        if (storage != null) {
            try {
                storage.shutdown();
            } catch (PluginException pe) {
                log.error("Failed to shutdown storage: {}", pe.getMessage());
                throw new TransformerException(
                        "Failed to shutdown storage", pe);
            }
        }
        if (indexer != null) {
            try {
                indexer.shutdown();
            } catch (PluginException pe) {
                log.error("Failed to shutdown indexer: {}", pe.getMessage());
                throw new TransformerException(
                        "Failed to shutdown indexer", pe);
            }
        }
    }

    /**
     * Transform method
     * 
     * @param object DigitalObject to be transformed
     * @param jsonConfig String containing configuration for this item
     * @return DigitalObject The object after being transformed
     * @throws TransformerException
     */
    @Override
    public DigitalObject transform(DigitalObject in, String jsonConfig)
            throws TransformerException {
        if (!valid) {
            error("Instantiation did not complete.");
        }
        log.debug("Received OID '{}'", in.getId());
        return process(in, jsonConfig);
    }

    /**
     * Top level wrapping method for a processing an object.
     * 
     * This method first performs all the basic checks whether this Object is
     * technically ready to go to VITAL (no matter what the workflow says).
     * 
     * @param param Map of key/value pairs to add to the index
     */
    private DigitalObject process(DigitalObject in, String jsonConfig)
            throws TransformerException {
        String oid = in.getId();

        // Workflow payload
        JsonSimple workflow = null;
        try {
            Payload workflowPayload = in.getPayload("workflow.metadata");
            workflow = new JsonSimple(workflowPayload.open());
            workflowPayload.close();
        } catch (StorageException ex) {
            error("Error accessing workflow data from Object!\nOID: '"
                    + oid + "'", ex);
        } catch (IOException ex) {
            error("Error parsing workflow data from Object!\nOID: '"
                    + oid + "'", ex);
        }

        // Make sure it is live
        String step = workflow.getString(null, "step");
        if (step == null || !step.equals("live")) {
            log.warn("Object is not live! '{}'", oid);
            return in;
        }

        // Make sure we have a title
        String title = workflow.getString(null, Strings.NODE_FORMDATA, "title");
        if (title == null) {
            error("No title provided in Object form data!\nOID: '" + oid + "'");
        }

        // Object metadata
        Properties metadata = null;
        try {
            metadata = in.getMetadata();
        } catch (StorageException ex) {
            error("Error reading Object metadata!\nOID: '" + oid + "'", ex);
        }

        // Now that we have all the data we need, go do the real work
        return processObject(in, workflow, metadata);
    }

    /**
     * Middle level wrapping method for processing objects. Now we are looking
     * at what actually needs to be done. Has the object already been put in
     * VITAL, or is it new.
     * 
     * @param object The Object in question
     * @param workflow The workflow data for the object
     * @param metadata The Object's metadata
     */
    private DigitalObject processObject(DigitalObject object,
            JsonSimple workflow, Properties metadata)
            throws TransformerException {
        String oid = object.getId();
        String title = workflow.getString(null, Strings.NODE_FORMDATA, "title");
        FedoraClient fedora = null;

        try {
            fedora = fedoraConnect();
        } catch (TransformerException ex) {
            error("Error connecting to VITAL", ex, oid, title);
        }

        // Find out if we've sent it to VITAL before
        String vitalPid = metadata.getProperty(Strings.PROP_VITAL_KEY);
        if (vitalPid != null) {
            log.debug("Existing VITAL object: '{}'", vitalPid);
            // Make sure it exists, we'll test the DC datastream
            if (!datastreamExists(fedora, vitalPid, "DC")) {
                // How did this happen? Better let someone know
                String message = " !!! WARNING !!! The expected VITAL object '"
                        + vitalPid +
                        "' was not found. A new object will be created instead!";
                error(message, null, oid, title);
                vitalPid = null;
            }
        }

        // A new VITAL object
        if (vitalPid == null) {
            try {
                vitalPid = createNewObject(fedora, object.getId());
                log.debug("New VITAL object created: '{}'", vitalPid);
                metadata.setProperty(Strings.PROP_VITAL_KEY, vitalPid);
                // Trigger a save on the object's metadata
                object.close();
            } catch (Exception ex) {
                error("Failed to create object in VITAL", ex, oid, title);
            }
        }

        // Do we have any wait conditions to test?
        if (!waitProperties.isEmpty()) {
            boolean process = false;
            for (String test : waitProperties) {
                String value = metadata.getProperty(test);
                if (value != null) {
                    // We found a property we've been told to wait for
                    log.info("Wait condition '{}' found.", test);
                    process = true;
                }
            }
            // Are we continuing?
            if (!process) {
                log.info("No wait conditions have been met, processing halted");
                return object;
            }
        }

        // Need to make sure the object is active
        try {
            String isActive = metadata.getProperty(Strings.PROP_VITAL_ACTIVE);
            if (isActive == null) {
                log.info("Activating object in fedora: '{}'", oid);
                String cutTitle = title;
                if (cutTitle.length() > 250) {
                    cutTitle = cutTitle.substring(0, 250) + "...";
                }
                fedora.getAPIM().modifyObject(vitalPid, "A", cutTitle, null,
                        "ReDBox activating object: '" + oid + "'");
                // Record this so we don't do it again
                metadata.setProperty(Strings.PROP_VITAL_ACTIVE, "true");
                object.close();
            }
        } catch (Exception ex) {
            error("Failed to activate object in VITAL", ex, oid, title);
        }

        // Submit all the payloads to VITAL now
        try {
            processDatastreams(fedora, object, vitalPid);
        } catch (Exception ex) {
            error("Failed to send object to VITAL", ex, oid, title);
        }
        return object;
    }

    /**
     * Create a new VITAL object and return the PID.
     * 
     * @param fedora An instantiated fedora client
     * @param oid The ID of the ReDBox object we will store here. For logging
     * @return String The new VITAL PID that was just created
     */
    private String createNewObject(FedoraClient fedora, String oid)
            throws Exception {
        InputStream in = null;
        byte[] template = null;
        // Start by reading our FOXML template into memory
        try {
            if (foxmlTemplate != null) {
                // We have a user provided template
                in = new FileInputStream(foxmlTemplate);
                template = IOUtils.toByteArray(in);
            } else {
                // Use the built in template
                in = getClass().getResourceAsStream("/foxml_template.xml");
                template = IOUtils.toByteArray(in);
            }
        } catch (IOException ex) {
            throw new Exception(
                    "Error accessing FOXML Template, please check system configuration!");
        } finally {
            if (in != null) {
                in.close();
            }
        }

        String vitalPid = fedora.getAPIM().ingest(template,
                Strings.FOXML_VERSION,
                "ReDBox creating new object: '" + oid + "'");
        log.info("New VITAL PID: '{}'", vitalPid);
        return vitalPid;
    }

    /**
     * Method responsible for arranging submissions to VITAL to store our
     * datastreams.
     * 
     * @param fedora An instantiated fedora client
     * @param object The Object to submit
     * @param vitalPid The VITAL PID to use
     * @throws Exception on any errors
     */
    private void processDatastreams(FedoraClient fedora, DigitalObject object,
            String vitalPid) throws Exception {
        int sent = 0;

        // Each payload we care about needs to be sent
        for (String ourPid : pids.keySet()) {
            // Fascinator packages have unpredictable names,
            // so we just use the extension
            // eg. 'e6e174fe-3508-4c8a-8530-1d6bb644d10a.tfpackage'
            String realPid = ourPid;
            if (ourPid.equals(".tfpackage")) {
                realPid = getPackagePid(object);
                if (realPid == null) {
                    String message = partialUploadErrorMessage(ourPid, sent,
                            pids.size(), vitalPid);
                    throw new Exception(message + "\n\nPackage not found.");
                }
            }
            log.info("Processing PID to send to VITAL: '{}'", ourPid);

            // Get our configuration
            JsonSimple thisPid = pids.get(ourPid);
            String dsId = thisPid.getString(realPid, "dsID");
            String label = thisPid.getString(dsId, "label");
            String status = thisPid.getString("A", "status");
            String controlGroup = thisPid.getString("X", "controlGroup");
            boolean versionable = thisPid.getBoolean(true, "versionable");
            boolean retainIds = thisPid.getBoolean(true, "retainIds");
            String[] altIds = {};
            if (retainIds && datastreamExists(fedora, vitalPid, dsId)) {
                altIds = getAltIds(fedora, vitalPid, dsId);
                for (String altId : altIds) {
                    log.debug("Retaining alt ID: '{}' => {}'", dsId, altId);
                }
            }

            // MIME Type
            Payload payload = null;
            String mimeType = null;
            try {
                payload = object.getPayload(realPid);
            } catch (StorageException ex) {
                String message = partialUploadErrorMessage(realPid, sent,
                        pids.size(), vitalPid);
                throw new Exception(message + "\n\nError accessing payload '"
                        + realPid + "' : ", ex);
            }
            mimeType = payload.getContentType();
            // Default to binary data
            if (mimeType == null) {
                mimeType = "application/octet-stream";
            }

            try {
                sendToVital(fedora, object, realPid, vitalPid, dsId, altIds,
                        label, mimeType, controlGroup, status, versionable);
            } catch (Exception ex) {
                String message = partialUploadErrorMessage(realPid, sent,
                        pids.size(), vitalPid);
                throw new Exception(message, ex);
            }

            // Increase our counter
            sent++;
        } // End for loop

        // Datastreams are taken care of, now handle attachments
        try {
            processAttachments(fedora, object, vitalPid);
        } catch (Exception ex) {
            throw new Exception("Error processing attachments: ", ex);
        }
    }

    /**
     * Similar to sendToVital(), but this method is specifically looking for
     * attachments distributed throughout the system.
     * 
     * @param fedora An instantiated fedora client
     * @param object The Object to submit
     * @param vitalPid The VITAL PID to use
     * @throws Exception on any errors
     */
    private void processAttachments(FedoraClient fedora, DigitalObject object,
            String vitalPid) throws Exception {
        ByteArrayOutputStream out = null;
        ByteArrayInputStream in = null;
        SolrResult result;

        // Search for attachments to this object
        String oid = object.getId();
        SearchRequest req = new SearchRequest("attached_to:\"" + oid + "\"");
        req.setParam("rows", "1000");

        // Get our search results
        try {
            out = new ByteArrayOutputStream();
            indexer.search(req, out);
            in = new ByteArrayInputStream(out.toByteArray());
            result = new SolrResult(in);
        } catch (Exception ex) {
            throw new Exception("Error searching for attachments : ", ex);
        } finally {
            close(out);
            close(in);
        }

        // Make sure there were even results
        if (result.getNumFound() == 0) {
            log.info("No attachments found for '{}'", oid);
            return;
        }

        // Do a *first* pre-pass establishing which IDs to use
        Map> idMap = new HashMap>();
        List usedIds = new ArrayList();
        for (SolrDoc item : result.getResults()) {
            // Has it been to VITAL before?
            String aOid = item.getFirst("id");
            DigitalObject attachment = storage.getObject(aOid);
            Properties metadata = attachment.getMetadata();
            String vitalDsId = metadata.getProperty(Strings.PROP_VITAL_DSID);
            String vitalOrder = metadata.getProperty(Strings.PROP_VITAL_ORDER);

            // Record what we know
            Map map = new HashMap();
            if (vitalDsId != null) {
                map.put("hasId", "true");
                map.put(Strings.PROP_VITAL_DSID, vitalDsId);
                map.put(Strings.PROP_VITAL_ORDER, vitalOrder);
                usedIds.add(vitalDsId);
            } else {
                map.put("hasId", "false");
            }
            idMap.put(aOid, map);
        }

        // Another pass, now that we know all the used IDs
        int dsIdSuffix = 1;
        for (SolrDoc item : result.getResults()) {
            String aOid = item.getFirst("id");
            boolean hasId = Boolean.parseBoolean(idMap.get(aOid).get("hasId"));
            // This record needs a new ID
            if (!hasId) {
                String newId = String.format(attachDs, dsIdSuffix);
                // Make sure it's not in use already either by us
                while (usedIds.contains(newId) || // or by VITAL
                        datastreamExists(fedora, vitalPid, newId)) {
                    dsIdSuffix++;
                    newId = String.format(attachDs, dsIdSuffix);
                }
                // 'Use' it
                idMap.get(aOid).put(Strings.PROP_VITAL_DSID, newId);
                idMap.get(aOid).put(Strings.PROP_VITAL_ORDER,
                        String.valueOf(dsIdSuffix));
                usedIds.add(newId);
                dsIdSuffix++;
            }
        }

        // Now, the real work. Loop through each attachment
        for (SolrDoc item : result.getResults()) {
            String aOid = item.getFirst("id");
            log.info("Processing Attachment: '{}'", aOid);

            // Get the object from storage
            DigitalObject attachment = storage.getObject(aOid);

            // Find our workflow/form data
            Payload wfPayload = attachment.getPayload("workflow.metadata");
            JsonSimple workflow = null;
            try {
                workflow = new JsonSimple(wfPayload.open());
            } catch (Exception ex) {
                throw ex;
            } finally {
                wfPayload.close();
            }

            // Find our payload
            String pid = workflow.getString(attachment.getSourceId(),
                    Strings.NODE_FORMDATA, "filename");
            log.info(" === Attachment PID: '{}'", pid);
            Payload payload = attachment.getPayload(pid);

            // MIME Type - Default to binary data
            String mimeType = payload.getContentType();
            if (mimeType == null) {
                mimeType = "application/octet-stream";
            }

            // Get our VITAL config
            String dsId = idMap.get(aOid).get(Strings.PROP_VITAL_DSID);
            String vitalOrder = idMap.get(aOid).get(Strings.PROP_VITAL_ORDER);
            String label = dsId; // Default
            String labelData = workflow.getString(null,
                    Strings.NODE_FORMDATA, attachLabelField);
            if (attachLabels.containsKey(labelData)) {
                // We found a real value
                label = attachLabels.get(labelData);
            }
            String status = "A"; // Default
            String statusData = workflow.getString(null,
                    Strings.NODE_FORMDATA, attachStatusField);
            if (attachStatuses.containsKey(statusData)) {
                // We found a real value
                status = attachStatuses.get(statusData);
            }
            // Check for Alt IDs that already exist... if configured to
            String[] altIds = {};
            if (attachRetainIds && datastreamExists(fedora, vitalPid, dsId)) {
                altIds = getAltIds(fedora, vitalPid, dsId);
                for (String altId : altIds) {
                    log.debug("Retaining alt ID: '{}' => {}'", dsId, altId);
                }
            }
            altIds = resolveAltIds(altIds, mimeType,
                    Integer.valueOf(vitalOrder));

            try {
                sendToVital(fedora, attachment, pid, vitalPid, dsId, altIds,
                        label, mimeType, attachControlGroup, status,
                        attachVersionable);
            } catch (Exception ex) {
                // Throw error
                throw new Exception("Error uploading attachment '" + aOid
                        + "' : ", ex);
            }

            // The submission was successful, store the dsId if not already
            boolean hasId = Boolean.parseBoolean(idMap.get(aOid).get("hasId"));
            if (!hasId) {
                Properties metadata = attachment.getMetadata();
                metadata.setProperty(Strings.PROP_VITAL_DSID, dsId);
                metadata.setProperty(Strings.PROP_VITAL_ORDER, vitalOrder);
                attachment.close();
            }
        } // End for loop
    }

    /**
     * For the given digital object, find the Fascinator package inside.
     * 
     * @param object The object with a package
     * @return String The payload ID of the package, NULL if not found
     * @throws Exception if any errors occur
     */
    private String getPackagePid(DigitalObject object) throws Exception {
        for (String pid : object.getPayloadIdList()) {
            if (pid.endsWith(".tfpackage")) {
                return pid;
            }
        }
        return null;
    }

    /**
     * For the given mime type, ensure that the array of alternate identifiers
     * is correct. If identifiers are missing they will be added to the array.
     * 
     * @param oldArray The old array of identifiers
     * @param mimeType The mime type of the datastream
     * @param count The attachment count, to use in the format call
     * @return String[] An array containing all of the old IDs with any that
     *      were missing for the mime type
     */
    private String[] resolveAltIds(String[] oldArray, String mimeType,
            int count) {
        // First, find the valid list we want
        String key = null;
        for (String mimeTest : attachAltIds.keySet()) {
            // Ignore 'default'
            if (mimeTest.equals(Strings.LITERAL_DEFAULT)) {
                continue;
            }
            // Is it a broad group?
            if (mimeTest.endsWith("/")) {
                if (mimeType.startsWith(mimeTest)) {
                    key = mimeTest;
                }
                // Or a specific mime type?
            } else {
                if (mimeType.equals(mimeTest)) {
                    key = mimeTest;
                }
            }
        }
        // Use default if not found
        if (key == null) {
            key = Strings.LITERAL_DEFAULT;
        }
        // Loop through the ids we're going to use
        for (String newId : attachAltIds.get(key)) {
            // If there is a format requirement, use it
            String formatted = String.format(newId, count);
            // Modify our arrray (if we it's not there)
            oldArray = growArray(oldArray, formatted);
        }
        return oldArray;
    }

    /**
     * Check the array for the new element, and if not found, generate a new
     * array containing all of the old elements plus the new.
     * 
     * @param oldArray The old array of data
     * @param newElement The new element we want
     * @return String[] An array containing all of the old data
     */
    private String[] growArray(String[] oldArray, String newElement) {
        // Look for the element first
        for (String element : oldArray) {
            if (element.equals(newElement)) {
                // If it's already there, we're done
                return oldArray;
            }
        }
        log.debug("Adding ID: '{}'", newElement);

        // Ok, we know we need a new array
        int length = oldArray.length + 1;
        String[] newArray = new String[length];
        // Copy the old array contents
        System.arraycopy(oldArray, 0, newArray, 0, oldArray.length);
        // And the new element, and return
        newArray[length - 1] = newElement;
        return newArray;
    }

    /**
     * Take care of the actual transmission to VITAL. This method will select
     * the appropriate transmission method based on:
     * 
     * 1) If VITAL has already seen the datastream before
     * 2) If the data is XML or not
     * 
     * @param fedora The fedora client to use in transmission
     * @param ourObject The DigitalObject in storage
     * @param ourPid The payload in the object to send
     * @param vitalPid The object in fedora we are targeting
     * @param dsId The datastream ID in fedora to create or overwrite
     * @param label The label to use
     * @param mimeType The mime type of the content we are sending
     * @param controlGroup The control group value to use if the object is new
     * @param status The status to use in fedora if the object is new
     * @throws Exception if any errors occur
     */
    private void sendToVital(FedoraClient fedora, DigitalObject ourObject,
            String ourPid, String vitalPid, String dsId, String[] altIds,
            String label, String mimeType, String controlGroup, String status,
            boolean versionable) throws Exception {
        // We might need to cleanup a file upload if things go wrong
        File tempFile = null;
        String tempURI = null;

        try {
            // Find out if it has been sent before
            if (datastreamExists(fedora, vitalPid, dsId)) {
                log.info("Updating existing datastream: '{}'", dsId);
                log.debug("LABEL: '" + label + "', STATUS: '" + status
                        + "', GROUP: '" + controlGroup + "'");

                /**********************************
                 * 1) Submission to overwrite EXISTING datastreams in VITAL
                 * 2) Can only be used for XML uploads
                 */
                if (mimeType.equals("text/xml")) {
                    // Updates on inline XML must be by value
                    byte[] data = getBytes(ourObject, ourPid);
                    // Modify the existing datastream
                    fedora.getAPIM().modifyDatastreamByValue(
                            vitalPid, // Object PID in VITAL
                            dsId,     // The dsID we have configured
                            altIds,   // Alt IDs... not using
                            label,    // Label
                            mimeType, // MIME type
                            null,     // Format URI
                            data,     // Our XML data
                            null,     // ChecksumType
                            null,     // Checksum
                            fedoraLogEntry(ourObject, ourPid), // Log message
                            true);    // Force update

                    /**********************************
                     * 1) Submission to overwrite EXISTING datastreams in VITAL
                     * 2) Must be performed by reference if not XML
                     */
                } else {
                    // Get our data
                    try {
                        tempFile = getTempFile(ourObject, ourPid);
                    } catch (Exception ex) {
                        throw new Exception("Error caching file to disk '"
                                + ourObject.getId() + "' : ", ex);
                    }

                    // Upload out data first
                    tempURI = fedora.uploadFile(tempFile);

                    // Modify the existing datastream
                    fedora.getAPIM().modifyDatastreamByReference(
                            vitalPid, // Object PID in VITAL
                            dsId,     // The dsID we have configured
                            altIds,   // Alt IDs... not using
                            label,    // Label
                            mimeType, // MIME type
                            null,     // Format URI
                            tempURI,  // Datastream Location
                            null,     // ChecksumType
                            null,     // Checksum
                            fedoraLogEntry(ourObject, ourPid), // Log message
                            true);    // Force update
                }

                /**********************************
                 * 1) Submission for NEW datastreams in VITAL
                 */
            } else {
                log.info("Creating new datastream: '{}'", dsId);
                log.debug("LABEL: '" + label + "', STATUS: '" + status
                        + "', GROUP: '" + controlGroup + "'");

                // Get our data
                try {
                    tempFile = getTempFile(ourObject, ourPid);
                } catch (Exception ex) {
                    throw new Exception("Error caching file to disk '"
                            + ourObject.getId() + "' : ", ex);
                }

                // Upload out data first
                tempURI = fedora.uploadFile(tempFile);

                // A new datastream
                fedora.getAPIM().addDatastream(
                        vitalPid,     // Object PID in VITAL
                        dsId,         // The dsID we have configured
                        altIds,       // Alt IDs... not using
                        label,        // Label
                        versionable,  // Versionable
                        mimeType,     // MIME type
                        null,         // Format URI
                        tempURI,      // Datastream Location
                        controlGroup, // Control Group
                        status,       // State
                        null,         // ChecksumType
                        null,         // Checksum
                        fedoraLogEntry(ourObject, ourPid)); // Log message
            }

        } catch (Exception ex) {
            // Throw error
            throw new Exception("Error submitting datastream '"
                    + ourObject.getId() + "' : ", ex);
        } finally {
            if (tempFile != null && tempFile.exists()) {
                tempFile.delete();
            }
        }
    }

    /**
     * Trivial wrapper to close Closeable objects with an awareness that they
     * may not have been instantiated, or may have already been closed.
     * 
     * Typically this would be a Stream, either in or out.
     * 
     * @param toClose The object to close
     */
    private void close(Closeable toClose) {
        if (toClose != null) {
            try {
                toClose.close();
            } catch (Exception ex) {
                // Already closed
            }
        }
    }

    /**
     * Test for the existence of a given datastream in VITAL.
     * 
     * @param fedora An instantiated fedora client
     * @param vitalPid The VITAL PID to use
     * @param dsPid The datastream ID on the object
     * @returns boolean True is found, False if not found or there are errors
     */
    private boolean datastreamExists(FedoraClient fedora, String vitalPid,
            String dsPid) {
        try {
            // Some options:
            // * getAPIA().listDatastreams... seems best
            // * getAPIM().getDatastream... causes Exceptions against new IDs
            // * getAPIM().getDatastreams... is limited to a single state
            DatastreamDef[] streams = fedora.getAPIA().listDatastreams(
                    vitalPid, null);
            for (DatastreamDef stream : streams) {
                if (stream.getID().equals(dsPid)) {
                    return true;
                }
            }
        } catch (Exception ex) {
            log.error("API Query error: ", ex);
        }
        return false;
    }

    /**
     * Find and return any alternate identifiers already in use in fedora for
     * the given datastream.
     * 
     * @param fedora An instantiated fedora client
     * @param vitalPid The VITAL PID to use
     * @param dsPid The datastream ID on the object
     * @returns String[] An array or String identifiers, will be empty if
     *      datastream does not exist.
     */
    private String[] getAltIds(FedoraClient fedora, String vitalPid,
            String dsPid) {
        Datastream ds = getDatastream(fedora, vitalPid, dsPid);
        if (ds != null) {
            return ds.getAltIDs();
        }
        return new String[]{};
    }

    /**
     * Get the indicated datastream from VITAL. This method pre-supposes that
     * the datastream does in fact exist. Call datastreamExists() first to
     * confirm.
     * 
     * @param fedora An instantiated fedora client
     * @param vitalPid The VITAL PID to use
     * @param dsPid The datastream ID on the object
     * @returns Datastream The datastream requested, null if not found
     */
    private Datastream getDatastream(FedoraClient fedora, String vitalPid,
            String dsPid) {
        try {
            return fedora.getAPIM().getDatastream(vitalPid, dsPid, null);
        } catch (Exception ex) {
            log.error("API Query error: ", ex);
            return null;
        }
    }

    /**
     * Build a Log entry to use in Fedora. Replace all the template placeholders
     * 
     * @param object The Object being submitted
     * @param pid The PID in our system
     */
    private String fedoraLogEntry(DigitalObject object, String pid) {
        String message = fedoraMessageTemplate.replace("[[PID]]", pid);
        return message.replace("[[OID]]", object.getId());
    }

    /**
     * Build an error message detailing an interrupted upload. Some (or none) of
     * the intended list of payloads did not transfer to VITAL correctly.
     * 
     * @param pid The PID in our system for which the failure occurred.
     * @param count The number of successful PIDs sent before the failure.
     * @param total The total number of PIDs that were intended to be sent.
     * @param vitalPid The PID for the entire object in VITAL.
     */
    private String partialUploadErrorMessage(String pid, int count, int total,
            String vitalPid) {
        String message = "Error submitting payload '" + pid + "' to VITAL. ";
        message += count + " of " + total + " payloads where successfully";
        message += " sent to VITAL before this error occurred.";
        message += " The VITAL PID is '" + vitalPid + "'.";
        return message;
    }

    /**
     * Stream the data out of storage to our temp directory.
     * 
     * @param object Our digital object.
     * @param pid The payload ID to retrieve.
     * @return File The file creating in the temp directory
     * @throws Exception on any errors
     */
    private File getTempFile(DigitalObject object, String pid)
            throws Exception {
        // Create file in temp space, use OID in path for uniqueness
        File directory = new File(tmpDir, object.getId());
        File target = new File(directory, pid);
        if (!target.exists()) {
            target.getParentFile().mkdirs();
            target.createNewFile();
        }

        // These can happily throw exceptions higher
        Payload payload = object.getPayload(pid);
        InputStream in = payload.open();
        FileOutputStream out = null;

        // But here, the payload must receive
        // a close before throwing the error
        try {
            out = new FileOutputStream(target);
            IOUtils.copyLarge(in, out);

        } catch (Exception ex) {
            close(out);
            target.delete();
            payload.close();
            throw ex;
        }

        // We close out here, because the catch statement needed to close
        // before it could delete... so it can't be in 'finally'
        close(out);
        payload.close();

        return target;
    }

    /**
     * Retrieve the payload from storage and return as a byte array.
     * 
     * @param object Our digital object.
     * @param pid The payload ID to retrieve.
     * @return byte[] The byte array containing payload data
     * @throws Exception on any errors
     */
    private byte[] getBytes(DigitalObject object, String pid) throws Exception {
        // These can happily throw exceptions higher
        Payload payload = object.getPayload(pid);
        InputStream in = payload.open();
        byte[] result = null;

        // But here, the payload must receive
        // a close before throwing the error
        try {
            result = IOUtils.toByteArray(in);
        } catch (Exception ex) {
            throw ex;
        } finally {
            payload.close();
        }

        return result;
    }

    /**
     * Error handling methods. Will at least log the errors, but also try to
     * send emails if configured to do so, and the data provided indicates it is
     * warranted.
     * 
     * If an OID and Title are provided it indicates an Object we are confident
     * should have been sent to VITAL, so emails will be sent out (if
     * configured).
     * 
     * @param message Our own error message
     * @param ex Any exception that has been thrown (OPTIONAL)
     * @param oid The OID of our Object (OPTIONAL)
     * @param title The title of our Object (OPTIONAL)
     */
    private void error(String message) throws TransformerException {
        error(message, null, null, null);
    }

    private void error(String message, Exception ex)
            throws TransformerException {
        error(message, ex, null, null);
    }

    private void error(String message, Exception ex, String oid, String title)
            throws TransformerException {
        // We are only sending emails when we are configured to
        if (emailQueue != null) {
            // And when a complete and correct document fails to go to VITAL
            if (oid != null && title != null) {
                JsonSimple messageJson = new JsonSimple();
                JSONArray to = messageJson.writeArray("to");
                for (String email : emailAddresses) {
                    to.add(email);
                }
                JsonObject json = messageJson.getJsonObject();
                json.put("subject", emailSubject);
                // Emails require an Object ID... not sure why
                json.put("oid", oid);

                // Grab the template and replace each placeholder
                String body = emailTemplate.replace("[[OID]]", oid);
                body = body.replace("[[TITLE]]", title);
                body = body.replace("[[MESSAGE]]", message);

                // Did we have a genuine exception?
                if (ex != null) {
                    // Message
                    /////// String exception = ex.getMessage() + "\n";
                    // Stack trace
                    StringWriter sw = new StringWriter();
                    ex.printStackTrace(new PrintWriter(sw));
                    body = body.replace("[[ERROR]]", sw.toString());
                } else {
                    body = body.replace(
                            "[[ERROR]]", "{No error stacktrace provide}");
                }
                json.put("body", body);

                // Send the message
                log.debug("Error, sending email:\n{}",
                        messageJson.toString(true));
                try {
                    messaging.queueMessage(emailQueue, messageJson.toString());
                } catch (MessagingException mex) {
                    log.error("Cannot access message system to send email!!",
                            mex);
                }
            }
        }

        // Always log errors at least
        if (ex != null) {
            log.error("Error: {}", message, ex);
            log.error("STACK TRACE:\n", ex);
        } else {
            log.error("Error: {}", message);
        }

        throw new TransformerException(message);
    }

    /****
     * Avoid use of duplicate String literals
     * 
     */
    private static class Strings
            extends com.googlecode.fascinator.common.Strings {
        // Config file nodes
        public static String CONFIG_FAILURE = "failure";
        public static String CONFIG_SERVER = "server";
        // Default values
        public static String FEDORA_VERSION_TEST = "2.";
        public static String FOXML_VERSION = "foxml1.0";
        public static String DEFAULT_EMAIL_SUBJECT = "VITAL Transformer error";
        public static String DEFAULT_EMAIL_TEMPLATE =
                "VITAL Transformer error: [[MESSAGE]]\n\n====\n\n[[ERROR]]";
        public static String DEFAULT_VITAL_MESSAGE =
                "Datastream update from ReDBox '[[OID]]' => '[[PID]]'";
        // Basic literals... generally free of context
        public static String LITERAL_DEFAULT = "default";
        // Used for traversing JSON nodes of importance
        public static String NODE_FORMDATA = "formData";
        // Key metadata properties
        public static String PROP_VITAL_ACTIVE = "vitalActive";
        public static String PROP_VITAL_DSID = "vitalDsId";
        public static String PROP_VITAL_ORDER = "vitalOrder";
        public static String PROP_VITAL_KEY = "vitalPid";
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy