All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.pass.entrez.PmidLookup Maven / Gradle / Ivy

There is a newer version: 1.3.0
Show newest version
/*
 * Copyright 2023 Johns Hopkins University
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.eclipse.pass.entrez;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.TimeUnit;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Service to retrieve a PMID records from Entrez. If you prefer to avoid dealing with JSON,
 * there is an option to retrieve a PubMedRecord object where you can use standard getters to
 * retrieve article details.
 *
 * @author Karen Hanson
 * @version $Id$
 */
public class PmidLookup {

    private static final Logger LOG = LoggerFactory.getLogger(PmidLookup.class);

    /**
     * The default Entrez path does not include an API path. This property can be overridden with a System Property
     * Note that as of May 2018, Entrez supports 3 seconds per second without an API key, above this will result in
     * delayed responses.
     * https://www.ncbi.nlm.nih.gov/books/NBK25497/
     */
    private static final String DEFAULT_ENTREZ_PATH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary" +
                                                      ".fcgi?db=pubmed&retmode=json&rettype=abstract&id=%s";

    private static final String ENTREZ_PATH_KEY = "entrez.pmid.path";

    private static final String JSON_ERROR_KEY = "error";
    private static final String JSON_RESULT_KEY = "result";

    private String entrezPath;

    /**
     * Default constructor uses the default Entrez path
     */
    public PmidLookup() {
        entrezPath = System.getProperty(ENTREZ_PATH_KEY, DEFAULT_ENTREZ_PATH);
    }

    /**
     * Retrieve PubMedRecord object for PMID record from NIH's Entrez API service.
     *
     * @param pmid pub med id
     * @return the record
     */
    public PubMedEntrezRecord retrievePubMedRecord(String pmid) {
        JSONObject jsonObj = retrievePubMedRecordAsJson(pmid);
        PubMedEntrezRecord pmr = (jsonObj != null ? new PubMedEntrezRecord(jsonObj) : null);
        return pmr;
    }

    /**
     * Retrieve JSON for PMID record from NIH's Entrez API service. Returns JSON object containing the record
     * or null if no match found. Note that "no match found" means there is no record for that pmid, whereas
     * a RuntimeException means communication with the service failed, and the client app can decide what
     * to do about those.
     *
     * @param pmid pub med id
     * @return the record as a JSON object
     */
    public JSONObject retrievePubMedRecordAsJson(String pmid) {
        if (pmid == null) {
            throw new IllegalArgumentException("pmid cannot be null");
        }
        JSONObject jsonRecord = null;

        try {
            jsonRecord = retrieveJsonFromApi(pmid);
            if (jsonRecord == null) {
                // pause and retry once to allow for API limitations
                LOG.info("Pausing before trying to pull PMID {} from Entrez again", pmid);
                TimeUnit.MILLISECONDS.sleep(400);
                jsonRecord = retrieveJsonFromApi(pmid);
            }
        } catch (InterruptedException e) {
            throw new RuntimeException("A problem occurred while waiting to retry Entrez API call", e);
        }

        return jsonRecord;
    }

    /**
     * Calls the Entrez API and finds the root of the JSON record
     *
     * @param pmid pub med id
     * @return the root JSON record
     */
    private JSONObject retrieveJsonFromApi(String pmid) {
        JSONObject root = null;
        String path = String.format(entrezPath, pmid);
        try {
            HttpClient client = HttpClientBuilder
                .create()
                .setRetryHandler(new DefaultHttpRequestRetryHandler(3, false))
                .build();
            HttpGet httpget = new HttpGet(new URI(path));
            HttpResponse response = client.execute(httpget);
            HttpEntity entity = response.getEntity();

            if (entity != null) {
                String result = EntityUtils.toString(entity, StandardCharsets.UTF_8);
                root = walkToJsonRoot(result, pmid);

                if (root.has(JSON_ERROR_KEY)) {
                    //if there is an error key, something went wrong. Log error and return null.
                    String error = root.getString(JSON_ERROR_KEY);
                    LOG.warn("Could not retrieve PMID {} from Entrez. Error: {}", pmid, error);
                    root = null;
                }
            } else {
                LOG.warn("Could not retrieve PMID {} from Entrez. Returned empty value.", pmid);
                root = null;
            }

        } catch (URISyntaxException e) {
            throw new RuntimeException("Could not convert convert path to URL: " + path, e);
        } catch (IllegalStateException | IOException e) {
            LOG.warn("Could not retrieve PMID {} from Entrez. Error: {}", pmid, e);
            throw new RuntimeException("Error while retrieving content from Entrez at URL: " + path, e);
        }

        return root;
    }

    /**
     * Converts to JSONObject then walks to the root of the JSON content. This could be a PMID record
     * or an error message
     *
     * @param jsonEntrezRecord as string
     * @param pmid             pub med id
     * @return the root JSON object
     * @throws IOException if there's an error reading the record
     */
    private JSONObject walkToJsonRoot(String jsonEntrezRecord, String pmid) throws IOException {
        JSONObject root = new JSONObject(jsonEntrezRecord);

        if (root.has(JSON_RESULT_KEY)) {
            root = root.getJSONObject(JSON_RESULT_KEY);
            if (root.has(pmid)) {
                root = (JSONObject) root.getJSONObject(pmid);
            }
        }

        return root;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy