All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.vivoweb.webapp.createandlink.pubmed.PubMedCreateAndLinkResourceProvider Maven / Gradle / Ivy

The newest version!
/* $This file is distributed under the terms of the license in /doc/license.txt$ */

package org.vivoweb.webapp.createandlink.pubmed;

import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.vivoweb.webapp.createandlink.Citation;
import org.vivoweb.webapp.createandlink.CreateAndLinkResourceProvider;
import org.vivoweb.webapp.createandlink.ExternalIdentifiers;
import org.vivoweb.webapp.createandlink.ResourceModel;
import org.vivoweb.webapp.createandlink.utils.HttpReader;
import org.vivoweb.webapp.createandlink.utils.StringArrayDeserializer;

import java.io.IOException;

public class PubMedCreateAndLinkResourceProvider implements CreateAndLinkResourceProvider {
    protected final Log logger = LogFactory.getLog(getClass());

    public final static String PUBMED_ID_API = "http://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?format=json&ids=";
    public final static String PUBMED_SUMMARY_API = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&tool=my_tool&email=my_email@example.com&id=";

    @Override
    public String normalize(String id) {
        return id.trim();
    }

    @Override
    public String getLabel() {
        return "PubMed ID";
    }

    @Override
    public ExternalIdentifiers allExternalIDsForFind(String externalId) {
        ExternalIdentifiers ids = new ExternalIdentifiers();
        ids.PubMedID = externalId;

        String json = readUrl(PUBMED_ID_API + externalId);
        if (!StringUtils.isEmpty(json)) {
            PubMedIDResponse response = null;
            try {
                ObjectMapper objectMapper = new ObjectMapper();
                objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
                response = objectMapper.readValue(json, PubMedIDResponse.class);
            } catch (IOException e) {
                logger.error("Unable to read JSON", e);
            }
            if (response != null && !ArrayUtils.isEmpty(response.records)) {
                ids.DOI = response.records[0].doi;
                ids.PubMedCentralID = response.records[0].pmcid;
            }
        }

        return ids;
    }

    @Override
    public String findInExternal(String id, Citation citation) {
        try {
            String json = readUrl(PUBMED_SUMMARY_API + id);
            if (StringUtils.isEmpty(json)) {
                return null;
            }

            JsonFactory factory = new JsonFactory();
            JsonParser parser  = factory.createParser(json);
            if (parser != null) {
                while (!parser.isClosed() && !id.equals(parser.getCurrentName())) {
                    JsonToken token = parser.nextToken();
                }

                if (!parser.isClosed()) {
                    // We have reached the field for our ID, but we need to be on the next token for the mapper to work
                    JsonToken token = parser.nextToken();
                    ObjectMapper objectMapper = new ObjectMapper();
                    objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
                    PubMedSummaryResponse response = objectMapper.readValue(parser, PubMedSummaryResponse.class);
                    if (response != null) {
                        citation.title = response.title;
                        citation.authors = new Citation.Name[response.authors.length];
                        for (int idx = 0; idx < response.authors.length; idx++) {
                            citation.authors[idx] = new Citation.Name();
                            citation.authors[idx].name = normalizeAuthorName(response.authors[idx].name);
                        }
                        citation.journal = response.fulljournalname;
                        citation.volume = response.volume;
                        citation.issue = response.issue;
                        citation.pagination = response.pages;
                        if (!StringUtils.isEmpty(response.pubdate) && response.pubdate.length() >= 4) {
                            citation.publicationYear = Integer.parseInt(response.pubdate.substring(0, 4), 10);
                        }

                        citation.type = getCiteprocTypeForPubType(response.pubtype);

                        return json;
                    }
                }
            }

            return null;
        } catch (Exception e) {
            logger.error("[PMID] Error resolving PMID " + id + ", cause "+ e.getMessage());
            return null;
        }
    }

    @Override
    public ResourceModel makeResourceModel(String externalId, String externalResource) {
        try {
            JsonFactory factory = new JsonFactory();
            JsonParser parser = factory.createParser(externalResource);
            if (parser != null) {
                while (!parser.isClosed() && !externalId.equals(parser.getCurrentName())) {
                    JsonToken token = parser.nextToken();
                }

                if (!parser.isClosed()) {
                    // We have reached the field for our ID, but we need to be on the next token for the mapper to work
                    JsonToken token = parser.nextToken();
                    ObjectMapper objectMapper = new ObjectMapper();
                    objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
                    PubMedSummaryResponse response = objectMapper.readValue(parser, PubMedSummaryResponse.class);
                    if (response != null) {
                        ResourceModel resourceModel = new ResourceModel();
                        resourceModel.PubMedID = externalId;
                        resourceModel.title = response.title;
                        resourceModel.author = new ResourceModel.NameField[response.authors.length];
                        for (int idx = 0; idx < response.authors.length; idx++) {
                            resourceModel.author[idx] = new ResourceModel.NameField();
                            if (response.authors[idx].name.lastIndexOf(' ') > 0) {
                                resourceModel.author[idx].family = response.authors[idx].name.substring(0, response.authors[idx].name.lastIndexOf(' '));
                                resourceModel.author[idx].given = response.authors[idx].name.substring(response.authors[idx].name.lastIndexOf(' ') + 1);
                            } else {
                                resourceModel.author[idx].family = response.authors[idx].name;
                            }
                        }

                        resourceModel.containerTitle = response.fulljournalname;
                        if (!StringUtils.isEmpty(response.issn)) {
                            resourceModel.ISSN = new String[1];
                            resourceModel.ISSN[0] = response.issn;
                        } else if (!StringUtils.isEmpty(response.eissn)) {
                            resourceModel.ISSN = new String[1];
                            resourceModel.ISSN[0] = response.eissn;
                        }

                        resourceModel.volume = response.volume;
                        resourceModel.issue = response.issue;
                        if (response.pages.contains("-")) {
                            int hyphen = response.pages.indexOf('-');
                            resourceModel.pageStart = response.pages.substring(0, hyphen);
                            resourceModel.pageEnd = response.pages.substring(hyphen + 1);
                        } else {
                            resourceModel.pageStart = response.pages;
                        }

                        if (!StringUtils.isEmpty(response.pubdate) && response.pubdate.length() >= 4) {
                            resourceModel.publicationDate = new ResourceModel.DateField();
                            resourceModel.publicationDate.year = Integer.parseInt(response.pubdate.substring(0, 4), 10);
                        }

                        if (response.articleids != null) {
                            for (PubMedSummaryResponse.ArticleID articleID : response.articleids) {
                                if (!StringUtils.isEmpty(articleID.value)) {
                                    if ("doi".equalsIgnoreCase(articleID.idtype)) {
                                        resourceModel.DOI = articleID.value.trim();
                                    } else if ("pmc".equalsIgnoreCase(articleID.idtype)) {
                                        resourceModel.PubMedCentralID = articleID.value.trim();
                                    } else if ("pmcid".equalsIgnoreCase(articleID.idtype)) {
                                        if (StringUtils.isEmpty(resourceModel.PubMedCentralID)) {
                                            String id = articleID.value.replaceAll(".*(PMC[0-9]+).*", "$1");
                                            if (!StringUtils.isEmpty(id)) {
                                                resourceModel.PubMedCentralID = id;
                                            }
                                        }
                                    }
                                }
                            }
                        }

                        resourceModel.type = getCiteprocTypeForPubType(response.pubtype);
                        resourceModel.publisher = response.publishername;
                        resourceModel.status = response.pubstatus;

        /*
            public DateField created;
            public String[] subject;
            public String presentedAt;
            public String[] keyword;
            public String abstractText;
         */
                        return resourceModel;
                    }
                }
            }
        } catch (IOException e) {
            logger.error("Unable to read JSON", e);
        }

        return null;
    }

    private String normalizeAuthorName(String name) {
        if (name.indexOf(',') < 0 && name.indexOf(' ') > -1) {
            int lastSpace = name.lastIndexOf(' ');
            int insertPoint = lastSpace;
            while (insertPoint > 0) {
                if (name.charAt(insertPoint - 1) == ' ') {
                    insertPoint--;
                } else {
                    break;
                }
            }

            return name.substring(0, insertPoint) + "," + name.substring(lastSpace);
        }
        return name;
    }

    private String getCiteprocTypeForPubType(String[] pubTypes) {
        if (pubTypes != null && pubTypes.length > 0) {
            for (String pubType : pubTypes) {
                switch (pubType) {
                    case "Journal Article":
                        return "article-journal";

                    case "Incunabula":
                    case "Monograph":
                    case "Textbooks":
                        return "book";

                    case "Dataset":
                        return "dataset";

                    case "Legal Cases":
                        return "legal_case";

                    case "Legislation":
                        return "legislation";

                    case "Manuscripts":
                        return "manuscript";

                    case "Maps":
                        return "map";

                    case "Meeting Abstracts":
                        return "paper-conference";

                    case "Patents":
                        return "patent";

                    case "Letter":
                        return "personal_communication";

                    case "Blogs":
                        return "post-weblog";

                    case "Review":
                        return "review";

                    case "Academic Dissertations":
                        return "thesis";
                }
            }
        }

        return "article-journal";
    }

    private String readUrl(String url) {
        try {
            HttpClient client = HttpClientFactory.getHttpClient();
            HttpGet request = new HttpGet(url);
            HttpResponse response = client.execute(request);
            return HttpReader.fromResponse(response);
        } catch (IOException e) {
        }

        return null;
    }

    private static class PubMedIDResponse {
        public String status;
        public String responseDate;
        public String request;
        public String warning;

        public PubMedIDRecord[] records;

        public static class PubMedIDRecord {
            String pmcid;
            String pmid;
            String doi;

            // Don't need versions
        }
    }

    private static class PubMedSummaryResponse {
        public String uid;
        public String pubdate;
        //public String epubdate;
        public String source;
        public NameField[] authors;
        //public String lastauthor;
        public String title;
        //public String sorttitle;
        public String volume;
        public String issue;
        public String pages;
        @JsonDeserialize(using = StringArrayDeserializer.class)
        public String[] lang;
        //public String nlmuniqueid;
        public String issn;
        public String eissn;
        @JsonDeserialize(using = StringArrayDeserializer.class)
        public String[] pubtype;
        //public String recordstatus;
        public String pubstatus;
        public ArticleID[] articleids;
        public History[] history;
        //public String[] references;
        @JsonDeserialize(using = StringArrayDeserializer.class)
        public String[] attributes;
        //public Integer pmcrefcount;
        public String fulljournalname;
        //public String elocationid;
        //public Integer viewcount;
        //public String doctype;
        //public String[] srccontriblist;
        //public String booktitle;
        //public String medium;
        //public String edition;
        //public String publisherlocation;
        public String publishername;
        //public String srcdate;
        //public String reportnumber;
        //public String availablefromurl;
        //public String locationlabel;
        //public String[] doccontriblist;
        //public String docdate;
        //public String bookname;
        public String chapter;
        //public String sortpubdate;
        //public String sortfirstauthor;
        //public String vernaculartitle;

        public static class NameField {
            public String name;
            //public String authtype;
            //public String clusterid;
        }

        public static class ArticleID {
            public String idtype;
            //public Integer idtypen;
            public String value;
        }

        public static class History {
            public String pubstatus;
            public String date;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy