All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.citation.ncbi.NCBI_eFetch Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.citation.ncbi;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hfg.bio.seq.format.SeqCitation;
import com.hfg.citation.CitationRetriever;
import com.hfg.citation.Journal;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.User;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.io.HTTPUtil;
import com.hfg.xml.XMLTag;

public class NCBI_eFetch implements CitationRetriever
{
   private User mUser;

   // https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=22368089&tool=my_tool&email=my_email@example.com
   private String mBaseURL = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi";

   // https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/[email protected]&ids=10.1093/nar/gks1195
   private String mIDConvertBaseURL = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/";


   // Ex: Science. 2002 Nov 8;298(5596):1248-51
   private static final Pattern JOURNAL_CITATION_PATTERN = Pattern.compile("([^\\.]+)\\.\\s+(\\d{4})[^\\;]+\\;(\\d+)(?:\\((\\d+)\\))?\\:([\\d\\-]+)");

   //---------------------------------------------------------------------------
   public NCBI_eFetch(User inUser)
   {
      mUser = inUser;
   }

   //###########################################################################
   // PUBLIC METHODS
   //###########################################################################

   //---------------------------------------------------------------------------
   public String getBaseQueryURL()
   {
      return mBaseURL;
   }

   //---------------------------------------------------------------------------
   public NCBI_eFetch setBaseQueryURL(String inValue)
   {
      mBaseURL = inValue;
      return this;
   }

   //---------------------------------------------------------------------------
   @Override
   public MedlineCitation fetch(SeqCitation inQueryData)
         throws IOException
   {
      if (! StringUtil.isSet(inQueryData.getPubMedId())
          && StringUtil.isSet(inQueryData.getDOI()))
      {
         lookupPubMedIdFromDOI(inQueryData);
      }

      String url = composeQueryURL(inQueryData);
      HttpURLConnection conn = HTTPUtil.openConnection(url);

      MedlineCitation citation = null;

      int responseCode = conn.getResponseCode();
      if (200 == responseCode)
      {
         InputStream stream = new BufferedInputStream(conn.getInputStream());
         XMLTag xmlTag = new XMLTag(stream);


         XMLTag pubmedArticleTag = xmlTag.getRequiredSubtagByName(PubmedXML.PUBMED_ARTICLE);
         XMLTag medlineCitationTag = pubmedArticleTag.getRequiredSubtagByName(PubmedXML.MEDLINE_CITATION);

         citation = new MedlineCitation(medlineCitationTag);

         // References
         XMLTag pubmedDataTag = pubmedArticleTag.getOptionalSubtagByName(PubmedXML.PUBMED_DATA);
         if (pubmedDataTag != null)
         {
            XMLTag refListTag = pubmedDataTag.getOptionalSubtagByName(PubmedXML.REFERENCE_LIST);
            if (refListTag != null)
            {
               citation.setReferences(parseReferences(refListTag));
            }
         }
      }

      return citation;
   }

   //---------------------------------------------------------------------------
   private String composeQueryURL(SeqCitation inQueryData)
   {
      StringBuilderPlus url = new StringBuilderPlus(getBaseQueryURL()).setDelimiter("&")
            .append("?")
            .append("tool=com_hfg")
            .delimitedAppend("email=" + mUser.getEmail())
            .delimitedAppend("db=pubmed")
            .delimitedAppend("format=xml")
            .delimitedAppend("id=" + inQueryData.getPubMedId());

      return url.toString();
   }

   //---------------------------------------------------------------------------
   private void lookupPubMedIdFromDOI(SeqCitation inQueryData)
         throws IOException
   {
      StringBuilderPlus url = new StringBuilderPlus(mIDConvertBaseURL).setDelimiter("&")
                  .append("?")
                  .append("tool=com_hfg")
                  .delimitedAppend("email=" + mUser.getEmail())
                  .delimitedAppend("ids=" + inQueryData.getDOI());

      HttpURLConnection conn = HTTPUtil.openConnection(url.toString());

      InputStream stream = new BufferedInputStream(conn.getInputStream());

      XMLTag xmlTag = new XMLTag(stream);

      /*
      Example response:

      
        
          
            tool=my_tool;email=my_email%40example.com;ids=10.1093%2Fnar%2Fgks1195
          
        
        
          
            
          
        
      

       */
      List recordTags = xmlTag.getSubtagsByName("record");
      if (CollectionUtil.hasValues(recordTags))
      {
         XMLTag recordTag = recordTags.get(0);
         inQueryData.setPubMedId(recordTag.getAttributeValue("pmid"));
      }
   }

   //---------------------------------------------------------------------------
   private List parseReferences(XMLTag inRefListTag)
   {
      List references = null;

      List referenceTags = inRefListTag.getSubtagsByName(PubmedXML.REFERENCE);
      if (CollectionUtil.hasValues(referenceTags))
      {
         references = new ArrayList<>(referenceTags.size());
         for (XMLTag referenceTag : referenceTags)
         {
            MedlineCitation citation = new MedlineCitation();
            references.add(citation);

            XMLTag citationTag = referenceTag.getOptionalSubtagByName(PubmedXML.CITATION);
            if (citationTag != null)
            {
               citation.setRawContent(citationTag.getContent().trim());

               Matcher m = JOURNAL_CITATION_PATTERN.matcher(citation.toString());
               if (m.matches())
               {
                  Journal journal = new Journal().setAbbrev(m.group(1));
                  citation.setJournal(journal);

                  citation.setYear(Integer.parseInt(m.group(2)));
                  citation.setVolume(m.group(3));
                  citation.setIssue(m.group(4));
                  citation.setPages(m.group(5));
               }
            }

            XMLTag articleIdListTag = referenceTag.getOptionalSubtagByName(PubmedXML.ARTICLE_ID_LIST);
            if (articleIdListTag != null)
            {
               List articleIdTags = referenceTag.getSubtagsByName(PubmedXML.ARTICLE_ID);
               if (CollectionUtil.hasValues(articleIdTags))
               {
                  for (XMLTag articleIdTag : articleIdTags)
                  {
                     if (articleIdTag.getAttributeValue(PubmedXML.ID_TYPE_ATT).equalsIgnoreCase("pubmed"))
                     {
                        citation.setPubMedId(articleIdTag.getContent().trim());
                     }
                  }
               }
            }
         }
      }

      return references;
   }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy