All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.vivoweb.webapp.createandlink.crossref.CrossrefResolverAPI Maven / Gradle / Ivy

The newest version!
/* $This file is distributed under the terms of the license in /doc/license.txt$ */

package org.vivoweb.webapp.createandlink.crossref;

import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import edu.cornell.mannlib.vitro.webapp.web.URLEncoder;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.vivoweb.webapp.createandlink.Citation;
import org.vivoweb.webapp.createandlink.CreateAndLinkUtils;
import org.vivoweb.webapp.createandlink.ResourceModel;
import org.vivoweb.webapp.createandlink.utils.HttpReader;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Interface to the CrossRef resolver
 */
public class CrossrefResolverAPI {
    protected final Log logger = LogFactory.getLog(getClass());

    // Base URL for the resolver
    private static final String CROSSREF_RESOLVER = "https://doi.org/";

    /**
     * Find the DOI in CrossRef, filling the citation object
     *
     * @param id
     * @param citation
     * @return
     */
    public String findInExternal(String id, Citation citation) {
        try {
            // Read JSON from the resolver
            String json = readJSON(CROSSREF_RESOLVER + URLEncoder.encode(id));

            if (StringUtils.isEmpty(json)) {
                return null;
            }

            ObjectMapper objectMapper = new ObjectMapper();
            objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
            CrossrefCiteprocJSONModel jsonModel = objectMapper.readValue(json, CrossrefCiteprocJSONModel.class);
            if (jsonModel == null) {
                return null;
            }

            // Ensure that we have the correct resource
            if (!id.equalsIgnoreCase(jsonModel.DOI)) {
                return null;
            }

            // Map the fields of the resolver response to the citation object

            citation.DOI = id;
            citation.type = normalizeType(jsonModel.type);
            citation.title = jsonModel.title;
            citation.journal = jsonModel.containerTitle;

            if (jsonModel.author != null) {
                List authors = new ArrayList<>();
                for (CrossrefCiteprocJSONModel.NameField author : jsonModel.author) {
                    splitNameLiteral(author);
                    Citation.Name citationAuthor = new Citation.Name();
                    citationAuthor.name = CreateAndLinkUtils.formatAuthorString(author.family, author.given);
                    authors.add(citationAuthor);
                }
                citation.authors = authors.toArray(new Citation.Name[authors.size()]);
            }

            citation.volume = jsonModel.volume;
            citation.issue = jsonModel.issue;
            citation.pagination = jsonModel.page;
            if (citation.pagination == null) {
                citation.pagination = jsonModel.articleNumber;
            }

            citation.publicationYear = extractYearFromDateField(jsonModel.publishedPrint);
            if (citation.publicationYear == null) {
                citation.publicationYear = extractYearFromDateField(jsonModel.publishedOnline);
            }

            return json;
        } catch (Exception e) {
            logger.error("[CREF] Error resolving DOI " + id + ", cause "+ e.getMessage());
            return null;
        }
    }

    /**
     * Extract the year from the crossref JSON model
     *
     * @param date
     * @return
     */
    private Integer extractYearFromDateField(CrossrefCiteprocJSONModel.DateField date) {
        if (date == null) {
            return null;
        }

        if (ArrayUtils.isEmpty(date.dateParts)) {
            return null;
        }

        return Integer.parseInt(date.dateParts[0][0]);
    }

    /**
     *
     * @param externalResource
     * @return
     */
    public ResourceModel makeResourceModel(String externalResource) {
        if (StringUtils.isEmpty(externalResource)) {
            return null;
        }

        CrossrefCiteprocJSONModel jsonModel = null;
        try {
            ObjectMapper objectMapper = new ObjectMapper();
            objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
            jsonModel = objectMapper.readValue(externalResource, CrossrefCiteprocJSONModel.class);
        } catch (IOException e) {
            logger.error("Unable to read JSON", e);
        }
        if (jsonModel == null) {
            return null;
        }

        if (StringUtils.isEmpty(jsonModel.DOI)) {
            return null;
        }

        // Map the fields of the Java object to the resource model

        ResourceModel model = new ResourceModel();

        model.DOI = jsonModel.DOI;
        model.PubMedID = jsonModel.PMID;
        model.PubMedCentralID = jsonModel.PMCID;
        model.ISSN = jsonModel.ISSN;
        model.ISBN = jsonModel.ISBN;
        model.URL = jsonModel.URL;

        if (jsonModel.ISBN != null) {
            int isbnIdx = 0;
            model.ISBN = new String[jsonModel.ISBN.length];
            for (String isbn : jsonModel.ISBN) {
                if (isbn.lastIndexOf('/') > -1) {
                    isbn = isbn.substring(isbn.lastIndexOf('/') + 1);
                }

                model.ISBN[isbnIdx] = isbn;
                isbnIdx++;
            }
        }

        model.author = convertNameFields(jsonModel.author);
        model.editor = convertNameFields(jsonModel.editor);
        model.translator = convertNameFields(jsonModel.translator);

        model.containerTitle = jsonModel.containerTitle;

        model.issue = jsonModel.issue;

        if (!StringUtils.isEmpty(jsonModel.page)) {
            if (jsonModel.page.contains("-")) {
                int hyphen = jsonModel.page.indexOf('-');
                model.pageStart = jsonModel.page.substring(0, hyphen);
                model.pageEnd = jsonModel.page.substring(hyphen + 1);
            } else {
                model.pageStart = jsonModel.page;
            }
        } else if (!StringUtils.isEmpty(jsonModel.articleNumber)) {
            model.pageStart = jsonModel.articleNumber;
        }

        model.publicationDate = convertDateField(jsonModel.publishedPrint);
        if (model.publicationDate == null) {
            model.publicationDate = convertDateField(jsonModel.publishedOnline);
        }

        model.publisher = jsonModel.publisher;
        model.subject = jsonModel.subject;
        model.title = jsonModel.title;
        model.type = normalizeType(jsonModel.type);
        model.volume = jsonModel.volume;

        model.status = jsonModel.status;
        model.presentedAt = jsonModel.event;
        model.abstractText = jsonModel.abstractText;

        return model;
    }

    /**
     * Convert CiteProc name fields into resource model name fields
     *
     * @param nameFields
     * @return
     */
    private ResourceModel.NameField[] convertNameFields(CrossrefCiteprocJSONModel.NameField[] nameFields) {
        if (nameFields == null) {
            return null;
        }

        ResourceModel.NameField[] destNameFields = new ResourceModel.NameField[nameFields.length];

        for (int nameIdx = 0; nameIdx < nameFields.length; nameIdx++) {
            if (nameFields[nameIdx] != null) {
                splitNameLiteral(nameFields[nameIdx]);
                destNameFields[nameIdx] = new ResourceModel.NameField();
                destNameFields[nameIdx].family = nameFields[nameIdx].family;
                destNameFields[nameIdx].given = nameFields[nameIdx].given;
            }
        }

        return destNameFields;
    }

    /**
     * Map non-standard publication types into the CiteProc types
     *
     * @param type
     * @return
     */
    private String normalizeType(String type) {
        if (type != null) {
            switch (type.toLowerCase()) {
                case "journal-article":
                    return "article-journal";

                case "book-chapter":
                    return "chapter";

                case "proceedings-article":
                    return "paper-conference";
            }
         }

        return type;
    }

    /**
     * Split a name literal into first and last names
     *
     * @param author
     */
    private void splitNameLiteral(CrossrefCiteprocJSONModel.NameField author) {
        if (StringUtils.isEmpty(author.family)) {
            String given = null;
            if (!StringUtils.isEmpty(author.literal)) {
                if (author.literal.contains(",")) {
                    author.family = author.literal.substring(0, author.literal.indexOf(','));
                    given = author.literal.substring(author.literal.indexOf(',') + 1);
                } else if (author.literal.lastIndexOf(' ') > -1) {
                    author.family = author.literal.substring(author.literal.lastIndexOf(' ') + 1);
                    given = author.literal.substring(0, author.literal.lastIndexOf(' '));
                } else {
                    author.family = author.literal;
                }
            }

            if (StringUtils.isEmpty(author.given)) {
                author.given = given;
            }
        }
    }

    /**
     * Convert a CiteProc date field to resource model date field
     *
     * @param dateField
     * @return
     */
    private ResourceModel.DateField convertDateField(CrossrefCiteprocJSONModel.DateField dateField) {
        if (dateField != null) {
            ResourceModel.DateField resourceDate = new ResourceModel.DateField();
            if (dateField.dateParts != null && dateField.dateParts.length > 0 && dateField.dateParts[0].length > 0) {
                try {
                    resourceDate.year = Integer.parseInt(dateField.dateParts[0][0], 10);
                } catch (NumberFormatException nfe) {
                }
                if (dateField.dateParts.length > 1) {
                    try {
                        resourceDate.month = Integer.parseInt(dateField.dateParts[0][1], 10);
                    } catch (NumberFormatException nfe) {
                        switch (dateField.dateParts[0][1].toLowerCase()) {
                            case "jan":
                            case "january":
                                resourceDate.month = 1;
                                break;

                            case "feb":
                            case "february":
                                resourceDate.month = 2;
                                break;

                            case "mar":
                            case "march":
                                resourceDate.month = 3;
                                break;

                            case "apr":
                            case "april":
                                resourceDate.month = 4;
                                break;

                            case "may":
                                resourceDate.month = 5;
                                break;

                            case "jun":
                            case "june":
                                resourceDate.month = 6;
                                break;

                            case "jul":
                            case "july":
                                resourceDate.month = 7;
                                break;

                            case "aug":
                            case "august":
                                resourceDate.month = 8;
                                break;

                            case "sep":
                            case "september":
                                resourceDate.month = 9;
                                break;

                            case "oct":
                            case "october":
                                resourceDate.month = 10;
                                break;

                            case "nov":
                            case "november":
                                resourceDate.month = 11;
                                break;

                            case "dec":
                            case "december":
                                resourceDate.month = 12;
                                break;
                        }
                    }
                }
                if (dateField.dateParts.length > 2) {
                    try {
                        resourceDate.day = Integer.parseInt(dateField.dateParts[0][2], 10);
                    } catch (NumberFormatException nfe) {
                    }
                }
            }
            return resourceDate;
        }

        return null;
    }

    /**
     * Read JSON from the URL
     * @param url
     * @return
     */
    private String readJSON(String url) {
        try {
            HttpClient client = HttpClientFactory.getHttpClient();
            HttpGet request = new HttpGet(url);

            // Content negotiate for csl / citeproc JSON
            request.setHeader("Accept", "application/vnd.citationstyles.csl+json;q=1.0");

            HttpResponse response = client.execute(request);
            return HttpReader.fromResponse(response);
        } catch (IOException e) {
        }

        return null;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy