org.dspace.app.sherpa.v2.SHERPAResponse Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dspace-api Show documentation
DSpace core data model and service APIs.
The newest version!
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.app.sherpa.v2;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import com.fasterxml.jackson.annotation.JsonIgnore;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.json.JSONTokener;

/**
 * Model class for the SHERPAv2 API (JSON) response for a publication (journal) search
 * The structure and approached used is quite different to the simple v1 API used previously
 * The structure is based on journal data, which in turn contains data about publishers and policies
 *
 * @see SHERPAJournal
 *
 * @author Kim Shepherd
 *
 */
public class SHERPAResponse implements Serializable {

    private static final long serialVersionUID = 2732963970169240597L;

    // Is this response to be treated as an error?
    private boolean error;

    // Error message
    private String message;

    // Parsed system metadata from search results
    private SHERPASystemMetadata metadata;

    // List of parsed journal results
    private List journals;

    // Internal Sherpa ID
    private int id;

    // SHERPA URI (the human page version of this API response)
    private String uri;

    @JsonIgnore
    private Instant retrievalTime = Instant.now();

    // Format enum - currently only JSON is supported
    public enum SHERPAFormat {
        JSON, XML
    };

    private static Logger log = LogManager.getLogger();

    /**
     * Parse SHERPA v2 API for a given format
     * @param input - input stream from the HTTP response content
     * @param format - requested format
     * @throws IOException
     */
    public SHERPAResponse(InputStream input, SHERPAFormat format) throws IOException {
        if (format == SHERPAFormat.JSON) {
            parseJSON(input);
        }
    }

    /**
     * Create an empty SHERPAResponse representation
     */
    public SHERPAResponse() {}

    /**
     * Parse the SHERPA v2 API JSON and construct Romeo policy data for display
     * This method does not return a value, but rather populates the metadata and journals objects
     * with data parsed from the JSON.
     * @param jsonData - the JSON input stream from the API result response body
     */
    private void parseJSON(InputStream jsonData) throws IOException {
        InputStreamReader streamReader = new InputStreamReader(jsonData, StandardCharsets.UTF_8);
        JSONTokener jsonTokener = new JSONTokener(streamReader);
        JSONObject httpResponse;
        try {
            httpResponse = new JSONObject(jsonTokener);
            if (httpResponse.has("items")) {
                JSONArray items = httpResponse.getJSONArray("items");

                // items array is search results, *not* journals or publishers - they are listed for each item
                // - however, we only ever want one result since we're passing an "equals ISSN" query
                if (items.length() > 0) {
                    metadata = new SHERPASystemMetadata();
                    this.journals = new ArrayList<>();
                    // Iterate search result items
                    for (int itemIndex = 0; itemIndex < items.length(); itemIndex++) {
                        List sherpaPublishers = new ArrayList<>();
                        List policies = new ArrayList<>();
                        SHERPAPublisher sherpaPublisher = new SHERPAPublisher();
                        SHERPAJournal sherpaJournal = new SHERPAJournal();

                        JSONObject item = items.getJSONObject(itemIndex);

                        // Parse system metadata (per-item / result information)
                        if (item.has("system_metadata")) {
                            JSONObject systemMetadata = item.getJSONObject("system_metadata");
                            metadata = parseSystemMetadata(systemMetadata);
                        }

                        // Parse "publisher policy"
                        // note - most of the information that was previously under 'publisher' is now under here
                        if (item.has("publisher_policy")) {

                            // Parse main publisher policies node
                            JSONArray publisherPolicies = item.getJSONArray("publisher_policy");
                            for (int i = 0; i < publisherPolicies.length(); i++) {

                                JSONObject policy = publisherPolicies.getJSONObject(i);

                                // Special case - quickly check the policy for the 'paid access' option
                                // and continue if found, then parse the rest of the policy
                                String moniker = null;
                                if (policy.has("internal_moniker")) {
                                    moniker = policy.getString("internal_moniker");
                                }
                                // This seems to be usually policy(ies) for the journal proper
                                // and then an "Open access option" which contains some of the info
                                // that the 'paidaccess' node in the old API used to contain
                                // Look for: internal_moniker = "Open access option"
                                // Check if this is OA options (Paid Access) or not
                                if ("Open access option".equalsIgnoreCase(moniker)) {
                                    log.debug("This is the Open access options policy - a special case");
                                    if (policy.has("urls")) {
                                        JSONArray urls = policy.getJSONArray("urls");
                                        for (int u = 0; u < urls.length(); u++) {
                                            JSONObject url = urls.getJSONObject(u);
                                            if (url.has("description") &&
                                                "Open Access".equalsIgnoreCase(url.getString("description"))) {
                                                log.debug("Found OA paid access url: " + url.getString("url"));
                                                sherpaPublisher.setPaidAccessDescription(url.getString("description"));
                                                sherpaPublisher.setPaidAccessUrl(url.getString("url"));
                                                break;
                                            }
                                        }
                                    }
                                    // Continue the loop here - this "policy" is a bit different and we
                                    // don't want to add irrelevant conditions to the policy
                                    continue;
                                }

                                // Parse the main publisher policy object and add to the list
                                SHERPAPublisherPolicy sherpaPublisherPolicy = parsePublisherPolicy(policy);
                                policies.add(sherpaPublisherPolicy);
                            }

                            // set publisher name - note we're only looking for the first name here
                            // as per previous functionality (for simple display)
                            if (item.has("publishers")) {
                                JSONArray publishers = item.getJSONArray("publishers");
                                if (publishers.length() > 0) {
                                    JSONObject publisherElement = publishers.getJSONObject(0);
                                    if (publisherElement.has("publisher")) {
                                        JSONObject publisher = publisherElement.getJSONObject("publisher");
                                        sherpaPublisher.setName(parsePublisherName(publisher));
                                        sherpaPublisher.setUri(parsePublisherURL(publisher));
                                    }
                                }
                            }

                            // Parse journal data
                            sherpaJournal = parseJournal(item, sherpaPublisher.getName());
                        }

                        sherpaPublishers.add(sherpaPublisher);
                        sherpaJournal.setPublisher(sherpaPublisher);
                        sherpaJournal.setPublishers(sherpaPublishers);
                        sherpaJournal.setPolicies(policies);
                        this.journals.add(sherpaJournal);
                    }

                } else {
                    error = true;
                    message = "No results found";
                }
            } else {
                error = true;
                message = "No results found";
            }

        } catch (JSONException e) {
            log.error("Failed to parse SHERPA response", e);
            error = true;
        } finally {
            streamReader.close();
        }
    }

    /**
     * Parse system metadata and return populated SHERPASystemMetadata object
     * @param systemMetadata
     */
    private SHERPASystemMetadata parseSystemMetadata(JSONObject systemMetadata) {

        SHERPASystemMetadata metadata = new SHERPASystemMetadata();

        if (systemMetadata.has("uri")) {
            this.uri = systemMetadata.getString("uri");
            metadata.setUri(this.uri);
        } else {
            log.error("SHERPA URI missing for API response item");
        }
        if (systemMetadata.has("id")) {
            this.id = systemMetadata.getInt("id");
            metadata.setId(this.id);
        } else {
            log.error("SHERPA internal ID missing for API response item");
        }
        // Get date created and added - DSpace expects this in the publisher object, though
        if (systemMetadata.has("date_created")) {
            metadata.setDateCreated(systemMetadata.getString("date_created"));
        }
        if (systemMetadata.has("date_modified")) {
            metadata.setDateModified(systemMetadata.getString("date_modified"));
        }
        // Is this item publicly visible?
        if (systemMetadata.has("publicly_visible")) {
            metadata.setPubliclyVisible ("yes".equals(systemMetadata
                .getString("publicly_visible")));
        }
        // Is this item listed in the DOAJ?
        if (systemMetadata.has("listed_in_doaj")) {
            metadata.setPubliclyVisible ("yes".equals(systemMetadata
                .getString("listed_in_doaj")));
        }

        return metadata;
    }

    /**
     * Parse journal JSON data and return populated bean
     * This method also takes publisherName as a string to help construct some
     * legacy labels
     * @param item - the main result item JSON (which is the closest thing to an actual 'journal')
     * @param publisherName - the parsed publisher name
     * @return
     */
    private SHERPAJournal parseJournal(JSONObject item, String publisherName) {

        SHERPAJournal sherpaJournal = new SHERPAJournal();

        // set journal title
        if (item.has("title")) {
            JSONArray titles = item.getJSONArray("title");
            if (titles.length() > 0) {
                List titleList = new ArrayList<>();
                for (int t = 0; t < titles.length(); t++) {
                    JSONObject title = titles.getJSONObject(t);
                    if (title.has("title")) {
                        titleList.add(title.getString("title").trim());
                    }
                }
                sherpaJournal.setTitles(titleList);
                if (titleList.size() > 0) {
                    // Faking this a bit based on what I'd seen - not in the API v2 data
                    sherpaJournal.setRomeoPub(publisherName + ": "
                        + titleList.get(0));
                    sherpaJournal.setZetoPub(publisherName + ": "
                        + titleList.get(0));
                    log.debug("Found journal title: " + titleList.get(0));
                }
            }
        }

        // Journal URL
        if (item.has("url")) {
            sherpaJournal.setUrl(item.getString("url"));
        }

        // set ISSNs
        if (item.has("issns")) {
            JSONArray issns = item.getJSONArray("issns");
            // just get first - DSpace data model only allows for one
            List issnList = new ArrayList<>();
            for (int ii = 0; ii < issns.length(); ii++) {
                JSONObject issn = issns.getJSONObject(ii);
                issnList.add(issn.getString("issn").trim());
            }
            sherpaJournal.setIssns(issnList);
        }

        // Is the item in DOAJ?
        if (item.has("listed_in_doaj")) {
            sherpaJournal.setInDOAJ("yes".equals(item.getString("listed_in_doaj")));
        }

        return sherpaJournal;
    }

    /**
     * Parse a publisher_policy JSON data and return a populated bean
     * @param policy - each publisher policy node in the JSON array
     * @return populated SHERPAPublisherPolicy object
     */
    private SHERPAPublisherPolicy parsePublisherPolicy(JSONObject policy) {

        SHERPAPublisherPolicy sherpaPublisherPolicy = new SHERPAPublisherPolicy();

        // Get and set monikers
        String moniker = null;
        if (policy.has("internal_moniker")) {
            moniker = policy.getString("internal_moniker");
            sherpaPublisherPolicy.setInternalMoniker(moniker);
        }

        // URLs (used to be Copyright Links)
        if (policy.has("urls")) {
            JSONArray urls = policy.getJSONArray("urls");
            Map copyrightLinks = new TreeMap<>();
            for (int u = 0; u < urls.length(); u++) {
                JSONObject url = urls.getJSONObject(u);
                if (url.has("description") && url.has("url")) {
                    log.debug("Setting copyright URL: " + url.getString("url"));
                    copyrightLinks.put(url.getString("url"), url.getString("description"));
                }
            }
            sherpaPublisherPolicy.setUrls(copyrightLinks);
        }

        // Permitted OA options
        int submittedOption = 0;
        int acceptedOption = 0;
        int publishedOption = 0;
        int currentOption = 0;
        if (policy.has("permitted_oa")) {
            List allowed = new ArrayList<>();
            JSONArray permittedOA = policy.getJSONArray("permitted_oa");
            List permittedVersions = new ArrayList<>();

            // Iterate each permitted OA version / option. The permitted_oa node is also known as a 'pathway' --
            // essentially "a way to get a work into a repository". Each pathway could refer to one article version
            // like a pre-print, or multiple versions might have the same acceptable locations and conditions.
            // As described below, where multiple versions are referenced in a single permitted_oa pathway, they will
            // be split out and treated separately. This keeps processing simple, especially later in display or
            // compliance checking when it is preferred to group / indicate rules by the article version
            for (int p = 0; p < permittedOA.length(); p++) {
                JSONObject permitted = permittedOA.getJSONObject(p);
                // Although it adds redundancy, we will treat each 'article version' within
                // the permitted_oa ("pathway") node as a separate version altogether to keep the rest of our display
                // handled nicely. This was confirmed as an appropriate approach by JISC
                if (permitted.has("article_version")) {
                    JSONArray versions = permitted.getJSONArray("article_version");
                    for (int v = 0; v < versions.length(); v++) {
                        // Parse this permitted_oa node but specifically looking for the article_version 'v'
                        SHERPAPermittedVersion permittedVersion = parsePermittedVersion(permitted, v);

                        // To determine which option # we are, inspect article versions and set
                        allowed.add(permittedVersion.getArticleVersion());
                        if ("submitted".equals(permittedVersion.getArticleVersion())) {
                            submittedOption++;
                            currentOption = submittedOption;
                        } else if ("accepted".equals(permittedVersion.getArticleVersion())) {
                            acceptedOption++;
                            currentOption = acceptedOption;
                        } else if ("published".equals(permittedVersion.getArticleVersion())) {
                            publishedOption++;
                            currentOption = publishedOption;
                        }
                        permittedVersion.setOption(currentOption);
                        permittedVersions.add(permittedVersion);
                    }
                }

                // Populate the old indicators into the publisher policy object
                if (allowed.contains("submitted")) {
                    sherpaPublisherPolicy.setPreArchiving("can");
                }
                if (allowed.contains("accepted")) {
                    sherpaPublisherPolicy.setPostArchiving("can");
                }
                if (allowed.contains("published")) {
                    sherpaPublisherPolicy.setPubArchiving("can");
                }

            }
            sherpaPublisherPolicy.setPermittedVersions(permittedVersions);
        }

        return sherpaPublisherPolicy;
    }

    /**
     * Parse permitted version JSON and populate new bean from the data
     * @param permitted - each 'permitted_oa' node in the JSON array
     * @return populated SHERPAPermittedVersion object
     */
    private SHERPAPermittedVersion parsePermittedVersion(JSONObject permitted, int index) {

        SHERPAPermittedVersion permittedVersion = new SHERPAPermittedVersion();

        // Get the article version, which is ultimately used for the ticks / crosses
        // in the UI display. My assumptions around translation:
        // submitted = preprint
        // accepted = postprint
        // published = pdfversion
        // These strings can be used to construct i18n messages.
        String articleVersion = "unknown";

        // Each 'permitted OA' can actually refer to multiple versions
        if (permitted.has("article_version")) {
            JSONArray versions = permitted.getJSONArray("article_version");

            // Get one particular article version to return as a PermittedVersion. The outer loop calling this
            // is iterating all permitted_oa and permitted_oa->article_version array members
            articleVersion = versions.getString(index);
            permittedVersion.setArticleVersion(articleVersion);
            log.debug("Added allowed version: " + articleVersion + " to list");
        }

        // These are now child arrays, in old API they were explicit like
        // "preprint restrictions", etc., and just contained text rather than data
        if (permitted.has("conditions")) {
            List conditionList = new ArrayList<>();
            JSONArray conditions = permitted.getJSONArray("conditions");
            for (int c = 0; c < conditions.length(); c++) {
                conditionList.add(conditions.getString(c).trim());
            }
            permittedVersion.setConditions(conditionList);
        }

        // Any prerequisites for this option (eg required by funder)
        List prerequisites = new ArrayList<>();
        if (permitted.has("prerequisites")) {
            JSONObject prereqs = permitted.getJSONObject("prerequisites");
            if (prereqs.has("prerequisites_phrases")) {
                JSONArray phrases = prereqs.getJSONArray("prerequisites_phrases");
                for (int pp = 0; pp < phrases.length(); pp++) {
                    JSONObject phrase = phrases.getJSONObject(pp);
                    if (phrase.has("phrase")) {
                        prerequisites.add(phrase.getString("phrase").trim());
                    }
                }
            }
        }
        permittedVersion.setPrerequisites(prerequisites);

        // Locations where this version / option may be archived
        List sherpaLocations = new ArrayList<>();
        if (permitted.has("location")) {
            JSONObject locations = permitted.getJSONObject("location");
            if (locations.has("location_phrases")) {
                JSONArray locationPhrases = locations.getJSONArray("location_phrases");
                if (locationPhrases.length() > 0) {
                    for (int l = 0; l < locationPhrases.length(); l++) {
                        JSONObject locationPhrase = locationPhrases.getJSONObject(l);
                        if (locationPhrase.has("phrase")) {
                            sherpaLocations.add(locationPhrase.getString("phrase").trim());
                        }
                    }
                }
            }
        }
        permittedVersion.setLocations(sherpaLocations);

        List sherpaLicenses = new ArrayList<>();
        // required licences
        if (permitted.has("license")) {
            JSONArray licences = permitted.getJSONArray("license");
            for (int l = 0; l < licences.length(); l++) {
                JSONObject licence = licences.getJSONObject(l);
                if (licence.has("license_phrases")) {
                    JSONArray phrases = licence.getJSONArray("license_phrases");
                    for (int ll = 0; ll < phrases.length(); ll++) {
                        JSONObject phrase = phrases.getJSONObject(ll);
                        if (phrase.has("phrase")) {
                            sherpaLicenses.add(phrase.getString("phrase").trim());
                        }
                    }
                }
            }
        }
        permittedVersion.setLicenses(sherpaLicenses);

        if (permitted.has("embargo")) {
            JSONObject embargo = permitted.getJSONObject("embargo");
            SHERPAEmbargo SHERPAEmbargo = new SHERPAEmbargo(embargo.getInt("amount"), embargo.getString("units"));
            permittedVersion.setEmbargo(SHERPAEmbargo);
        }

        return permittedVersion;
    }

    /**
     * Parse publisher array and return the first name string found
     * @param publisher - array of publisher JSON data
     * @return first publisher name found (trimmed String)
     */
    private String parsePublisherName(JSONObject publisher) {
        String name = null;
        if (publisher.has("name")) {
            JSONArray publisherNames = publisher.getJSONArray("name");
            if (publisherNames.length() > 0) {
                JSONObject publisherName = publisherNames.getJSONObject(0);
                if (publisherName.has("name")) {
                    name = publisherName.getString("name").trim();
                }
            }
        }
        return name;
    }


    /**
     * Parse publisher URL from the json data
     * @param publisher - publisher object (from JSON array)
     * @return publisher URL as string
     */
    private String parsePublisherURL(JSONObject publisher) {
        if (publisher.has("url")) {
            return publisher.getString("url");
        }
        return null;
    }

    /**
     * Create new response object to be handled as an error
     * @param message - the message to render in logs or error pages
     */
    public SHERPAResponse(String message) {
        this.message = message;
        this.error = true;
    }

    public boolean isError() {
        return error;
    }

    public String getMessage() {
        return message;
    }

    public void setMessage(String message) {
        this.message = message;
    }

    public List getJournals() {
        return journals;
    }

    public SHERPASystemMetadata getMetadata() {
        return metadata;
    }

    public Instant getRetrievalTime() {
        return retrievalTime;
    }
}