All Downloads are FREE. Search and download functionalities are using the official Maven repository.

portal.default.redbox.scripts.workflows.dataset.py Maven / Gradle / Ivy

There is a newer version: 1.10.2
Show newest version
from com.googlecode.fascinator.api.indexer import SearchRequest
from com.googlecode.fascinator.api.storage import PayloadType
from com.googlecode.fascinator.api.storage import StorageException
from com.googlecode.fascinator.common import FascinatorHome
from com.googlecode.fascinator.common import JsonObject
from com.googlecode.fascinator.common import JsonSimple
from com.googlecode.fascinator.common.messaging import MessagingServices
from com.googlecode.fascinator.common.solr import SolrResult
from com.googlecode.fascinator.messaging import TransactionManagerQueueConsumer

from java.io import ByteArrayInputStream
from java.io import ByteArrayOutputStream
from java.lang import Exception
from java.lang import String

from org.apache.commons.lang import StringEscapeUtils
from org.json.simple import JSONArray


class DatasetData:
    def __init__(self):
        self.messaging = MessagingServices.getInstance()

    def __activate__(self, context):
        self.velocityContext = context
        self.log = self.vc("log")
        ##self.log.debug("**** dataset.py")

        # We use these here in __activate__()
        formData = self.vc("formData")
        #print "context=%s" % formData
        #self.log.debug("formData: {}", repr(formData))
        response = self.vc("response")
        request = self.vc("request")
        func = formData.get("func", "")
        id = formData.get("id")
        # This needs to remain None unless an AJAX event is happening
        result = None

        # We need these later
        self.__formData = formData
        self.isAjax = bool(formData.get("ajax"))
        self.__object = None
        self.__oid = formData.get("_oid") or formData.get("oid")
        self.Services = self.vc("Services")
        self.page = self.vc("page")
        
 

        # These cache responses from methods
        self.__manifest = None
        self.__solrMetadata = None
        self.__tfpackage = None
        self.__wfMetadata = None

        # Allow for URL GET paramters
        if self.vc("request").method == "GET" and func != "":
            func = ""
        if func == "" and request.getParameter("func"):
            func = request.getParameter("func")

        config = self._getDataConfig()     
        self.presentationConfig = config.getObject("presentation-settings")
        

        self.log.debug("func='%s', oid='%s', id='%s'" % (func, self.__oid, id))
        try:
            if func == "file-upload":
                ##self.log.debug("**************\n  file-upload\n**************")
                result = JsonObject()
                result.put("ok", "file-upload")
                result.put("oid", self.__oid)

            # If we have an OID, ensure our data is accessible first
            if self.__oid is not None:
                # Are we updating the package whilst here?
                if func == "update-package-meta":
                    result = JsonObject()
                    result = self._updatePackageMetadata()
                elif func == "update-package-meta-deposit":
                    result = JsonObject()
                    result = self._updatePackageMetadata(True)

        except Exception, e:
            self.log.error("Failed to load manifest", e)
            result = JsonObject()
            result.put("status", "error")
            result.put("message", str(e))

        # Close our object... if we update properties this triggers a save
        if self.__object is not None:
            self.__object.close()
            self.__object=None

        if result is not None:
            writer = response.getPrintWriter("text/plain; charset=UTF-8")
            writer.println(result.toString())
            writer.close()
            


    # Get from velocity context
    def vc(self, index):
        if self.velocityContext[index] is not None:
            return self.velocityContext[index]
        else:
            self.log.error("ERROR: Requested context entry '" + index + "' doesn't exist")
            return None

    # Some basic wrappers to long calls
    def userRoles(self):
        return self.vc("page").authentication.get_roles_list()

    def username(self):
        return self.vc("page").authentication.get_username()

    def getCurrentStep(self):
        return self._getWorkflowMetadata().getString("", ["step"])

    def getCurrentStepLabel(self):
        return self._getWorkflowMetadata().getString("", ["label"])

    ### Supports form rendering, not involved in AJAX
    def getHandleUri(self):
        pid = None
        pidProperty = self.vc("systemConfig").getString(None, ["curation", "pidProperty"])
        if pidProperty is None:
            self.log.error("No configuration found for persistent IDs!")
        else:
            try:
                pid = self._getObject().getMetadata().getProperty(pidProperty)
                #self.log.info("****Persistent ID = '{}'", pidProperty)
            except Exception,e:
                self.log.warn("Failed to get Persistent ID from storage!", e)
        return pid or ""

    def getDoiMetadata(self):
        propName = self.vc("systemConfig").getString(None, ["andsDoi", "doiProperty"])
        try:
            doi = self._getObject().getMetadata().getProperty(propName)
            self.log.debug("Getting DOI from storage = '{}'", doi)
        except Exception,e:
            self.log.error("Failed to get DOI from storage!", e)
            return "{\"error\": \"Error accessing DOI in storage, please see system logs.\"}"

        if doi is not None:
            return "{\"doi\": \""+doi+"\"}"
        else:
            return "{}"

    ### Supports form rendering, not involved in AJAX
    def getNextStepAcceptMessage(self):
        step = self.getCurrentStep()
        msg = "?"
        if step == "inbox":
            msg = "This record is ready for the '''Investigation''' stage."
        elif step == "investigation":
            msg = "This record is ready for the '''Metadata Review''' stage."
        elif step == "metadata-review":
            msg = "This record is ready for the '''Final Review''' stage."
        elif step == "final-review":
            msg = "This record is ready to be '''Published'''."
        elif step == "live":
            msg = "This record has already been '''Published'''."
        elif step == "retired":
            msg = "This record has been '''Retired'''."
        return msg

    ### Supports form rendering, not involved in AJAX
    def getNextStepAcceptValidationErrorMessage(self):
        step = self.getCurrentStep()
        msg = "?"
        if step == "pending":
            msg = "You must accept responsibility and accountability for the" + \
            " accuracy and completeness of the information provided before" + \
            " you can submit this item!"
        elif step == "reviewing":
            msg = "You must check the 'Make record live' checkbox!"
        elif step == "live":
            msg = "[Live]"
        return msg

    ### Supports form rendering, not involved in AJAX
    def getJsonMetadata(self):
        package = self._getTFPackage()
        ## Look for a title
        title = package.getString("", ["dc:title"])
        title = package.getString(title, ["title"])
        ## And a description
        description = package.getString("", ["dc:abstract"])
        description = package.getString(description, ["description"])
        ## Make sure we have the fields we need
        json = package.getJsonObject()
        json.put("dc:title", title)
        json.put("dc:abstract", description)
        ## fix newlines
        ignoreFields = ["metaList", "relationships", "responses"]
        for key in json:
            if key not in ignoreFields:
                value = json.get(key)
                if value and value.find("\n"):
                    value = value.replace("\n", "\\n")
                    json.put(key, value)
                    ##self.log.info("****** %s=%s" % (key,value))
        jsonStr = package.toString(True)
        ##self.log.info(" ******** jsonStr: %s" % jsonStr)
        return jsonStr

    ### Supports form rendering, not involved in AJAX
    def getAttachedFiles(self):
        # Build a query
        req = SearchRequest("attached_to:%s" % self.__oid)
        req.setParam("rows", "1000")
        # Run a search
        out = ByteArrayOutputStream()
        self.Services.getIndexer().search(req, out)
        result = SolrResult(ByteArrayInputStream(out.toByteArray()))
        # Process results
        docs = JSONArray()
        for doc in result.getResults():
            entry = JsonObject()
            entry.put("filename",        doc.getFirst("filename"))
            entry.put("attachment_type", doc.getFirst("attachment_type"))
            entry.put("access_rights",   doc.getFirst("access_rights"))
            entry.put("oid",             doc.getFirst("id"))
            docs.add(entry)
        return docs.toString()

    ### Supports form rendering, not involved in AJAX
    def getFormData(self, field):
        formData = self.vc("formData")
        #print "********** getFormData(field='%s')='%s'" % (field, formData)
        return StringEscapeUtils.escapeHtml(formData.get(field, ""))
    
    def getPresentationConfig(self, field):
        presentationConfig = self.presentationConfig
        #print "********** getPresentationConfig '%s'" % (presentationConfig.get(field) )
        if presentationConfig is None or presentationConfig.get(field) is None:
            return ''
        
        return StringEscapeUtils.escapeHtml(presentationConfig.get(field))

    ### Supports form rendering, not involved in AJAX
    def getOid(self):
        self.log.debug("getOid() = '{}'", self.__oid)
        return self.__oid

    # Retrieve and cache the current object
    def _getObject(self):
        if not self.__object:
            try:
                self.__object = self.Services.storage.getObject(self.__oid)
            except StorageException, e:
                self.log.error("Failed to retrieve object : ", e)
        return self.__object

           
    # Retrieve and parse the Fascinator Package from storage
    def _getTFPackage(self):
        if self.__tfpackage is None:
            payload = None
            inStream = None

            # We don't need to worry about close() calls here
            try:
                object = self._getObject()
                sourceId = object.getSourceId()
                payload = None
                if sourceId is None or not sourceId.endswith(".tfpackage"):
                    # The package is not the source... look for it
                    for pid in object.getPayloadIdList():
                        if pid.endswith(".tfpackage"):
                            payload = object.getPayload(pid)
                            payload.setType(PayloadType.Source)
                else:
                    payload = object.getPayload(sourceId)
                inStream = payload.open()
            except Exception, e:
                self.log.error("Error during package access", e)
                return None

            # The input stream has now been opened, it MUST be closed
            try:
                self.__tfpackage = JsonSimple(inStream)
            except Exception, e:
                self.log.error("Error parsing package contents", e)
            payload.close()
        return self.__tfpackage

    # Get the manifest from the Fascinator package and wrap in JSON Library
    def _getManifest(self):
        if self.__manifest is None:
            package = self._getTFPackage()
            if package is None:
                return None
            manifestJson = package.writeObject(["manifest"])
            self.__manifest = JsonSimple(manifestJson)
        return self.__manifest

    # Save the provided package to disk
    def _saveTFPackage(self, tfpackage):
        object = self._getObject()
        jsonString = String(tfpackage.toString(True))
        jsonData = jsonString.getBytes("UTF-8")
        self.packagePid = None
        pidList = self.__object.getPayloadIdList()
        for pid in pidList:
            if pid.endswith(".tfpackage"):
                self.packagePid = pid
        object.updatePayload(self.packagePid, ByteArrayInputStream(jsonData))

    # Save the current package
    def _savePackage(self):
        self._saveTFPackage(self._getTFPackage())

    # Get our object's workflow metadata from storage
    def _getWorkflowMetadata(self):
        if self.__wfMetadata is None:
            payload = None
            inStream = None

            # We don't need to worry about close() calls here
            try:
                object = self._getObject()
                payload = object.getPayload("workflow.metadata")
                inStream = payload.open()
            except Exception, e:
                self.log.error("Error during metadata access", e)
                return None

            # The input stream has now been opened, it MUST be closed
            try:
                self.__wfMetadata = JsonSimple(inStream)
            except Exception, e:
                self.log.error("Error during metadata access", e)
            payload.close()
        return self.__wfMetadata

    # Save the workflow metadata provided to disk
    def _saveWorkflowMetadata(self, wfMetadata):
        object = self._getObject()
        jsonString = String(wfMetadata.toString(True))
        jsonData = jsonString.getBytes("UTF-8")
        object.updatePayload("workflow.metadata", ByteArrayInputStream(jsonData))

    # Update and save the package metadata
    def _updatePackageMetadata(self, progressStep = False):
        result = JsonObject()
        self.log.debug("** dataset update-package-meta **")
        result.put("error", "unknown")

        currentStep = self.getCurrentStep()
        targetStep = None
        self.log.debug("  currentStep='%s'" % currentStep)

        # A security check
        try:
            # Find our workflow configuraion
            systemConfig = self.vc("systemConfig")
            jsonConfigFile = systemConfig.getObject(["portal", "packageTypes", "dataset"]).get("jsonconfig")
            jsonConfigFile = FascinatorHome.getPathFile(
                    "harvest/workflows/" + jsonConfigFile)
            config = JsonSimple()
            try:
                config = JsonSimple(jsonConfigFile)
            except:
                self.log.error("Error accessing config", e)
                result.put("error", "Error accessing config: '%s'" % str(e))
                return result
            stages = config.getJsonSimpleList(["stages"])

            # Currently indexed metadata
            solr = self.__getSolrData()
            if solr is None:
                result.put("error", "Solr document unavailable!")
                return result

            # Find were we are in the workflow
            currentStage = None
            nextStage = None
            nextStep = None
            for stage in stages:
                # This executes on loop AFTER we found current stage
                if currentStage is not None:
                    nextStage = stage
                    nextStep = nextStage.getString(None, ["name"])
                    break
                # Find the current stage
                stageName = stage.getString(None, ["name"])
                if stageName is not None and stageName == currentStep:
                    currentStage = stage

            # Get user data
            username = self.username()
            userRoles = self.userRoles()
            owner = solr.getFirst("owner")

            # Print some debug data
            #self.log.debug(" === username = '%s'" % username)
            #self.log.debug(" === userRoles = '%s'" % userRoles)
            #self.log.debug(" === nextStep = '%s'" % nextStep)
            #self.log.debug(" === owner = '%s'" % owner)

            # Now do the security check
            workflowSecurity = solr.getList("workflow_security")
            if workflowSecurity is None:
                # Use with care... this is no longer a Java List
                workflowSecurity = []
            if progressStep:
                targetStep = nextStep
                self.log.debug(" === targetStep = '%s'" % targetStep)
            # Let owners or admins user the 'pending' step
            if (currentStep == "pending") and (owner != username) and \
                    ("admin" not in userRoles):
                message = "Only the owner or admin can do this!"
                self.log.error(message)
                result.put("error", message)
                return result
            # Otherwise, normal workflow security applies, with admin always allowed
            else:
                if ("admin" not in userRoles) and \
                        (not set(userRoles).intersection(workflowSecurity)):
                    message = "Not an admin or you do not have the correct role"
                    self.log.error(message)
                    result.put("error", message)
                    return result
        except Exception, e:
            self.log.error("Error in _updatePackageMetadata():", e)
            result.put("error", str(e))
            return result

        # Update our data
        formData = self.__formData
        tfpackage = self._getTFPackage()
        packageJson = tfpackage.getJsonObject()
        try:
            # Update all of our data fields
            metaList = list(formData.getValues("metaList"))
            storedList = tfpackage.getStringList(["metaList"])
            if storedList is None:
                # Use with care... this is no longer a Java List
                storedList = []
            removedSet = set(storedList).difference(metaList)
            try:
                # Add the actual data
                for metaName in metaList:
                    value = formData.get(metaName)
                    packageJson.put(metaName, value)
                # We are overwriting this list
                tfMetaList = tfpackage.writeArray(["metaList"])
                tfMetaList.clear()
                tfMetaList.addAll(metaList)
                # Remove any old data
                for metaName in removedSet:
                    if metaName != "relationships":
                        packageJson.remove(metaName)
            except Exception, e:
                self.log.error("Error updating package data", e)
                result.put("error", "Error updating package data")
                return result

            # Copy core Fascinator data
            title = tfpackage.getString("", ["dc:title"])
            description = tfpackage.getString("", ["dc:description"])
            packageJson.put("title", title)
            packageJson.put("description", description)
            if not title:
                self.log.error("Object has no title!")
                result.put("error", "no title")
                return result

            # Update our worflow data
            try:
                wfMeta = self._getWorkflowMetadata()
                wfJson = wfMeta.getJsonObject()
                if targetStep is not None:
                    wfJson.put("targetStep", targetStep)

                self.log.debug("title = '%s'" % title)
                self.log.debug("wfMeta = '%s'" % wfMeta)

                formJson = wfMeta.writeObject(["formData"])
                formJson.put("title", title)
                formJson.put("description", description)
                self._saveWorkflowMetadata(wfMeta)
            except Exception, e:
                self.log.error("Error updating workflow data", e)

            # Save & re-index
            self._saveTFPackage(tfpackage)
            self._reIndex(targetStep)
            result.remove("error")
            result.put("ok", "updated ok")
            result.put("oid", self.__oid)
        except Exception, e:
            self.log.error("Error updating data", e)
            result.put("error", str(e))

        return result

    # Re-index the current object
    def _reIndex(self, step):
        object = self._getObject()
        oid = object.getId()

        # Notify the curation manager
        self.sendMessage(oid, step)

    # Get the solr document for this object
    def __getSolrData(self):
        if self.__solrMetadata is None:
            object = self._getObject()
            if object is None:
                return None
            oid = object.getId()

            try:
                # Build our query
                query = 'id:"%s"' % oid
                req = SearchRequest(query)
                req.addParam("fq", 'item_type:"object"')
                out = ByteArrayOutputStream()
                # Search and parse
                self.Services.getIndexer().search(req, out)
                result = SolrResult(ByteArrayInputStream(out.toByteArray()))
                # Check results
                if result.getNumFound() == 0:
                    self.log.error("No solr document found for OID: '{}'", oid)
                    return None
                if result.getNumFound() > 1:
                    self.log.warn("WARNING: Found {} solr documents for OID '{}', expected 0!", result.getNumFound(), oid)
                # The first result is all we care about
                self.__solrMetadata = result.getResults().get(0)
            except Exception, e:
                self.log.error("Error in __getSolrData(): ", e)
        return self.__solrMetadata

    # Send an event notification to the curation manager
    def sendMessage(self, oid, step):
        message = JsonObject()
        message.put("oid", oid)
        if step is None:
            message.put("eventType", "ReIndex")
        else:
            message.put("eventType", "NewStep : %s" % step)
            message.put("newStep", step)
        message.put("username", self.vc("page").authentication.get_username())
        message.put("context", "Workflow")
        message.put("task", "workflow")
        self.messaging.queueMessage(
                TransactionManagerQueueConsumer.LISTENER_ID,
                message.toString())
                
    def _getDataConfig(self):
        systemConfig = self.vc("systemConfig")
        
        jsonConfigFileString = systemConfig.getObject(["portal", "packageTypes", "dataset"]).get("jsonconfig")
        
        jsonConfigFile = FascinatorHome.getPathFile(
            "harvest/workflows/" + jsonConfigFileString)
        config = JsonSimple()
        config = JsonSimple(jsonConfigFile)
        
        return config





© 2015 - 2025 Weber Informatics LLC | Privacy Policy